AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: fix possible fence leaks from job structure
@ 2025-10-27 22:02 Alex Deucher
  2025-10-30 12:51 ` Alex Deucher
  2025-10-31  8:40 ` Christian König
  0 siblings, 2 replies; 15+ messages in thread
From: Alex Deucher @ 2025-10-27 22:02 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Jesse Zhang

If we don't end up initializing the fences, free them when
we free the job.

v2: take a reference to the fences if we emit them

Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
 3 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 39229ece83f83..0596114377600 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -302,6 +302,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 		return r;
 	}
 	*f = &af->base;
+	/* get a ref for the job */
+	dma_fence_get(*f);
 
 	if (ring->funcs->insert_end)
 		ring->funcs->insert_end(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 55c7e104d5ca0..dc970f5fe601b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -295,6 +295,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
 	amdgpu_sync_free(&job->explicit_sync);
 
+	if (job->hw_fence->base.ops)
+		dma_fence_put(&job->hw_fence->base);
+	else
+		kfree(job->hw_fence);
+	if (job->hw_vm_fence->base.ops)
+		dma_fence_put(&job->hw_vm_fence->base);
+	else
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
@@ -324,6 +333,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	if (job->gang_submit != &job->base.s_fence->scheduled)
 		dma_fence_put(job->gang_submit);
 
+	if (job->hw_fence->base.ops)
+		dma_fence_put(&job->hw_fence->base);
+	else
+		kfree(job->hw_fence);
+	if (job->hw_vm_fence->base.ops)
+		dma_fence_put(&job->hw_vm_fence->base);
+	else
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index db66b4232de02..f8c67840f446f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -845,6 +845,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 		if (r)
 			return r;
 		fence = &job->hw_vm_fence->base;
+		/* get a ref for the job */
+		dma_fence_get(fence);
 	}
 
 	if (vm_flush_needed) {
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH] drm/amdgpu: fix possible fence leaks from job structure
@ 2025-10-31 17:43 Alex Deucher
  2025-11-04 13:36 ` Alex Deucher
  2025-11-04 13:50 ` Christian König
  0 siblings, 2 replies; 15+ messages in thread
From: Alex Deucher @ 2025-10-31 17:43 UTC (permalink / raw)
  To: amd-gfx, christian.koenig; +Cc: Alex Deucher, Jesse Zhang

If we don't end up initializing the fences, free them when
we free the job.  We can't set the hw_fence to NULL after
emitting it because we need it in the cleanup path for the
submit direct case.

v2: take a reference to the fences if we emit them
v3: handle non-job fence in error paths

Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 19 +++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 39229ece83f83..586a58facca10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -176,18 +176,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 
 	if (!ring->sched.ready) {
 		dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
-		return -EINVAL;
+		r = -EINVAL;
+		goto free_fence;
 	}
 
 	if (vm && !job->vmid) {
 		dev_err(adev->dev, "VM IB without ID\n");
-		return -EINVAL;
+		r = -EINVAL;
+		goto free_fence;
 	}
 
 	if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
 	    (!ring->funcs->secure_submission_supported)) {
 		dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name);
-		return -EINVAL;
+		r = -EINVAL;
+		goto free_fence;
 	}
 
 	alloc_size = ring->funcs->emit_frame_size + num_ibs *
@@ -196,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 	r = amdgpu_ring_alloc(ring, alloc_size);
 	if (r) {
 		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
-		return r;
+		goto free_fence;
 	}
 
 	need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -302,6 +305,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 		return r;
 	}
 	*f = &af->base;
+	/* get a ref for the job */
+	if (job)
+		dma_fence_get(*f);
 
 	if (ring->funcs->insert_end)
 		ring->funcs->insert_end(ring);
@@ -328,6 +334,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 	amdgpu_ring_commit(ring);
 
 	return 0;
+
+free_fence:
+	if (!job)
+		kfree(af);
+	return r;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index fd6aade7ee9e3..efa3281145f6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -293,6 +293,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
 	amdgpu_sync_free(&job->explicit_sync);
 
+	if (job->hw_fence->base.ops)
+		dma_fence_put(&job->hw_fence->base);
+	else
+		kfree(job->hw_fence);
+	if (job->hw_vm_fence->base.ops)
+		dma_fence_put(&job->hw_vm_fence->base);
+	else
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
@@ -322,6 +331,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	if (job->gang_submit != &job->base.s_fence->scheduled)
 		dma_fence_put(job->gang_submit);
 
+	if (job->hw_fence->base.ops)
+		dma_fence_put(&job->hw_fence->base);
+	else
+		kfree(job->hw_fence);
+	if (job->hw_vm_fence->base.ops)
+		dma_fence_put(&job->hw_vm_fence->base);
+	else
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c3dfb949a9b87..82e897cd5feac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -849,6 +849,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 		if (r)
 			return r;
 		fence = &job->hw_vm_fence->base;
+		/* get a ref for the job */
+		dma_fence_get(fence);
 	}
 
 	if (vm_flush_needed) {
-- 
2.51.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH] drm/amdgpu: fix possible fence leaks from job structure
@ 2025-10-22 21:20 Alex Deucher
  2025-10-23 19:02 ` Alex Deucher
  2025-10-24  9:48 ` Zhang, Jesse(Jie)
  0 siblings, 2 replies; 15+ messages in thread
From: Alex Deucher @ 2025-10-22 21:20 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher

If we don't end up initializing the fences, free then when
we free the job.

Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 3d396ab625f33..8ad8b16e96760 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -295,6 +295,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
 	amdgpu_sync_free(&job->explicit_sync);
 
+	if (!job->hw_fence->base.ops)
+		kfree(job->hw_fence);
+	if (!job->hw_vm_fence->base.ops)
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
@@ -324,6 +329,11 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	if (job->gang_submit != &job->base.s_fence->scheduled)
 		dma_fence_put(job->gang_submit);
 
+	if (!job->hw_fence->base.ops)
+		kfree(job->hw_fence);
+	if (!job->hw_vm_fence->base.ops)
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2025-11-04 13:50 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-27 22:02 [PATCH] drm/amdgpu: fix possible fence leaks from job structure Alex Deucher
2025-10-30 12:51 ` Alex Deucher
2025-10-31  8:40 ` Christian König
2025-10-31 13:53   ` Alex Deucher
2025-10-31 14:01     ` Christian König
2025-10-31 14:05       ` Alex Deucher
2025-10-31 15:28       ` Alex Deucher
2025-11-03 10:51         ` Christian König
2025-11-03 14:14           ` Alex Deucher
  -- strict thread matches above, loose matches on Subject: below --
2025-10-31 17:43 Alex Deucher
2025-11-04 13:36 ` Alex Deucher
2025-11-04 13:50 ` Christian König
2025-10-22 21:20 Alex Deucher
2025-10-23 19:02 ` Alex Deucher
2025-10-24  9:48 ` Zhang, Jesse(Jie)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox