AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: fix possible fence leaks from job structure
@ 2025-10-22 21:20 Alex Deucher
  2025-10-23 19:02 ` Alex Deucher
  2025-10-24  9:48 ` Zhang, Jesse(Jie)
  0 siblings, 2 replies; 15+ messages in thread
From: Alex Deucher @ 2025-10-22 21:20 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher

If we don't end up initializing the fences, free then when
we free the job.

Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 3d396ab625f33..8ad8b16e96760 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -295,6 +295,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
 	amdgpu_sync_free(&job->explicit_sync);
 
+	if (!job->hw_fence->base.ops)
+		kfree(job->hw_fence);
+	if (!job->hw_vm_fence->base.ops)
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
@@ -324,6 +329,11 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	if (job->gang_submit != &job->base.s_fence->scheduled)
 		dma_fence_put(job->gang_submit);
 
+	if (!job->hw_fence->base.ops)
+		kfree(job->hw_fence);
+	if (!job->hw_vm_fence->base.ops)
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-22 21:20 Alex Deucher
@ 2025-10-23 19:02 ` Alex Deucher
  2025-10-24  9:48 ` Zhang, Jesse(Jie)
  1 sibling, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2025-10-23 19:02 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx

Ping?

Alex

On Wed, Oct 22, 2025 at 5:26 PM Alex Deucher <alexander.deucher@amd.com> wrote:
>
> If we don't end up initializing the fences, free then when
> we free the job.
>
> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 3d396ab625f33..8ad8b16e96760 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -295,6 +295,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>
>         amdgpu_sync_free(&job->explicit_sync);
>
> +       if (!job->hw_fence->base.ops)
> +               kfree(job->hw_fence);
> +       if (!job->hw_vm_fence->base.ops)
> +               kfree(job->hw_vm_fence);
> +
>         kfree(job);
>  }
>
> @@ -324,6 +329,11 @@ void amdgpu_job_free(struct amdgpu_job *job)
>         if (job->gang_submit != &job->base.s_fence->scheduled)
>                 dma_fence_put(job->gang_submit);
>
> +       if (!job->hw_fence->base.ops)
> +               kfree(job->hw_fence);
> +       if (!job->hw_vm_fence->base.ops)
> +               kfree(job->hw_vm_fence);
> +
>         kfree(job);
>  }
>
> --
> 2.51.0
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-22 21:20 Alex Deucher
  2025-10-23 19:02 ` Alex Deucher
@ 2025-10-24  9:48 ` Zhang, Jesse(Jie)
  1 sibling, 0 replies; 15+ messages in thread
From: Zhang, Jesse(Jie) @ 2025-10-24  9:48 UTC (permalink / raw)
  To: Deucher, Alexander, amd-gfx@lists.freedesktop.org; +Cc: Deucher, Alexander

[AMD Official Use Only - AMD Internal Distribution Only]

this patch is Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com>

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Alex
> Deucher
> Sent: Thursday, October 23, 2025 5:20 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
>
> If we don't end up initializing the fences, free then when we free the job.
>
> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 3d396ab625f33..8ad8b16e96760 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -295,6 +295,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job
> *s_job)
>
>       amdgpu_sync_free(&job->explicit_sync);
>
> +     if (!job->hw_fence->base.ops)
> +             kfree(job->hw_fence);
> +     if (!job->hw_vm_fence->base.ops)
> +             kfree(job->hw_vm_fence);
> +
>       kfree(job);
>  }
>
> @@ -324,6 +329,11 @@ void amdgpu_job_free(struct amdgpu_job *job)
>       if (job->gang_submit != &job->base.s_fence->scheduled)
>               dma_fence_put(job->gang_submit);
>
> +     if (!job->hw_fence->base.ops)
> +             kfree(job->hw_fence);
> +     if (!job->hw_vm_fence->base.ops)
> +             kfree(job->hw_vm_fence);
> +
>       kfree(job);
>  }
>
> --
> 2.51.0


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] drm/amdgpu: fix possible fence leaks from job structure
@ 2025-10-27 22:02 Alex Deucher
  2025-10-30 12:51 ` Alex Deucher
  2025-10-31  8:40 ` Christian König
  0 siblings, 2 replies; 15+ messages in thread
From: Alex Deucher @ 2025-10-27 22:02 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Jesse Zhang

If we don't end up initializing the fences, free them when
we free the job.

v2: take a reference to the fences if we emit them

Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
 3 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 39229ece83f83..0596114377600 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -302,6 +302,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 		return r;
 	}
 	*f = &af->base;
+	/* get a ref for the job */
+	dma_fence_get(*f);
 
 	if (ring->funcs->insert_end)
 		ring->funcs->insert_end(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 55c7e104d5ca0..dc970f5fe601b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -295,6 +295,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
 	amdgpu_sync_free(&job->explicit_sync);
 
+	if (job->hw_fence->base.ops)
+		dma_fence_put(&job->hw_fence->base);
+	else
+		kfree(job->hw_fence);
+	if (job->hw_vm_fence->base.ops)
+		dma_fence_put(&job->hw_vm_fence->base);
+	else
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
@@ -324,6 +333,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	if (job->gang_submit != &job->base.s_fence->scheduled)
 		dma_fence_put(job->gang_submit);
 
+	if (job->hw_fence->base.ops)
+		dma_fence_put(&job->hw_fence->base);
+	else
+		kfree(job->hw_fence);
+	if (job->hw_vm_fence->base.ops)
+		dma_fence_put(&job->hw_vm_fence->base);
+	else
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index db66b4232de02..f8c67840f446f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -845,6 +845,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 		if (r)
 			return r;
 		fence = &job->hw_vm_fence->base;
+		/* get a ref for the job */
+		dma_fence_get(fence);
 	}
 
 	if (vm_flush_needed) {
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-27 22:02 [PATCH] drm/amdgpu: fix possible fence leaks from job structure Alex Deucher
@ 2025-10-30 12:51 ` Alex Deucher
  2025-10-31  8:40 ` Christian König
  1 sibling, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2025-10-30 12:51 UTC (permalink / raw)
  To: Alex Deucher, Christian Koenig; +Cc: amd-gfx, Jesse Zhang

Ping?

On Mon, Oct 27, 2025 at 6:03 PM Alex Deucher <alexander.deucher@amd.com> wrote:
>
> If we don't end up initializing the fences, free them when
> we free the job.
>
> v2: take a reference to the fences if we emit them
>
> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
>  3 files changed, 22 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 39229ece83f83..0596114377600 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -302,6 +302,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>                 return r;
>         }
>         *f = &af->base;
> +       /* get a ref for the job */
> +       dma_fence_get(*f);
>
>         if (ring->funcs->insert_end)
>                 ring->funcs->insert_end(ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 55c7e104d5ca0..dc970f5fe601b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -295,6 +295,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>
>         amdgpu_sync_free(&job->explicit_sync);
>
> +       if (job->hw_fence->base.ops)
> +               dma_fence_put(&job->hw_fence->base);
> +       else
> +               kfree(job->hw_fence);
> +       if (job->hw_vm_fence->base.ops)
> +               dma_fence_put(&job->hw_vm_fence->base);
> +       else
> +               kfree(job->hw_vm_fence);
> +
>         kfree(job);
>  }
>
> @@ -324,6 +333,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
>         if (job->gang_submit != &job->base.s_fence->scheduled)
>                 dma_fence_put(job->gang_submit);
>
> +       if (job->hw_fence->base.ops)
> +               dma_fence_put(&job->hw_fence->base);
> +       else
> +               kfree(job->hw_fence);
> +       if (job->hw_vm_fence->base.ops)
> +               dma_fence_put(&job->hw_vm_fence->base);
> +       else
> +               kfree(job->hw_vm_fence);
> +
>         kfree(job);
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index db66b4232de02..f8c67840f446f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -845,6 +845,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>                 if (r)
>                         return r;
>                 fence = &job->hw_vm_fence->base;
> +               /* get a ref for the job */
> +               dma_fence_get(fence);
>         }
>
>         if (vm_flush_needed) {
> --
> 2.51.0
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-27 22:02 [PATCH] drm/amdgpu: fix possible fence leaks from job structure Alex Deucher
  2025-10-30 12:51 ` Alex Deucher
@ 2025-10-31  8:40 ` Christian König
  2025-10-31 13:53   ` Alex Deucher
  1 sibling, 1 reply; 15+ messages in thread
From: Christian König @ 2025-10-31  8:40 UTC (permalink / raw)
  To: Alex Deucher, amd-gfx; +Cc: Jesse Zhang

On 10/27/25 23:02, Alex Deucher wrote:
> If we don't end up initializing the fences, free them when
> we free the job.
> 
> v2: take a reference to the fences if we emit them
> 
> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
>  3 files changed, 22 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 39229ece83f83..0596114377600 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -302,6 +302,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>  		return r;
>  	}
>  	*f = &af->base;
> +	/* get a ref for the job */
> +	dma_fence_get(*f);

I think it would be better to set the fence inside the job to NULL as soon as it is consumed/initialized.

>  
>  	if (ring->funcs->insert_end)
>  		ring->funcs->insert_end(ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 55c7e104d5ca0..dc970f5fe601b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -295,6 +295,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>  
>  	amdgpu_sync_free(&job->explicit_sync);
>  
> +	if (job->hw_fence->base.ops)
> +		dma_fence_put(&job->hw_fence->base);
> +	else
> +		kfree(job->hw_fence);
> +	if (job->hw_vm_fence->base.ops)
> +		dma_fence_put(&job->hw_vm_fence->base);
> +	else
> +		kfree(job->hw_vm_fence);
> +

This way that here can just be a kfree(..).

Regards,
Christian.

>  	kfree(job);
>  }
>  
> @@ -324,6 +333,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
>  	if (job->gang_submit != &job->base.s_fence->scheduled)
>  		dma_fence_put(job->gang_submit);
>  
> +	if (job->hw_fence->base.ops)
> +		dma_fence_put(&job->hw_fence->base);
> +	else
> +		kfree(job->hw_fence);
> +	if (job->hw_vm_fence->base.ops)
> +		dma_fence_put(&job->hw_vm_fence->base);
> +	else
> +		kfree(job->hw_vm_fence);
> +
>  	kfree(job);
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index db66b4232de02..f8c67840f446f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -845,6 +845,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>  		if (r)
>  			return r;
>  		fence = &job->hw_vm_fence->base;
> +		/* get a ref for the job */
> +		dma_fence_get(fence);
>  	}
>  
>  	if (vm_flush_needed) {


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-31  8:40 ` Christian König
@ 2025-10-31 13:53   ` Alex Deucher
  2025-10-31 14:01     ` Christian König
  0 siblings, 1 reply; 15+ messages in thread
From: Alex Deucher @ 2025-10-31 13:53 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, amd-gfx, Jesse Zhang

On Fri, Oct 31, 2025 at 4:40 AM Christian König
<christian.koenig@amd.com> wrote:
>
> On 10/27/25 23:02, Alex Deucher wrote:
> > If we don't end up initializing the fences, free them when
> > we free the job.
> >
> > v2: take a reference to the fences if we emit them
> >
> > Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> > Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
> > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 ++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
> >  3 files changed, 22 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > index 39229ece83f83..0596114377600 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > @@ -302,6 +302,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
> >               return r;
> >       }
> >       *f = &af->base;
> > +     /* get a ref for the job */
> > +     dma_fence_get(*f);
>
> I think it would be better to set the fence inside the job to NULL as soon as it is consumed/initialized.

We need the pointer for the job timed out handling.

Alex

>
> >
> >       if (ring->funcs->insert_end)
> >               ring->funcs->insert_end(ring);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > index 55c7e104d5ca0..dc970f5fe601b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > @@ -295,6 +295,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
> >
> >       amdgpu_sync_free(&job->explicit_sync);
> >
> > +     if (job->hw_fence->base.ops)
> > +             dma_fence_put(&job->hw_fence->base);
> > +     else
> > +             kfree(job->hw_fence);
> > +     if (job->hw_vm_fence->base.ops)
> > +             dma_fence_put(&job->hw_vm_fence->base);
> > +     else
> > +             kfree(job->hw_vm_fence);
> > +
>
> This way that here can just be a kfree(..).
>
> Regards,
> Christian.
>
> >       kfree(job);
> >  }
> >
> > @@ -324,6 +333,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
> >       if (job->gang_submit != &job->base.s_fence->scheduled)
> >               dma_fence_put(job->gang_submit);
> >
> > +     if (job->hw_fence->base.ops)
> > +             dma_fence_put(&job->hw_fence->base);
> > +     else
> > +             kfree(job->hw_fence);
> > +     if (job->hw_vm_fence->base.ops)
> > +             dma_fence_put(&job->hw_vm_fence->base);
> > +     else
> > +             kfree(job->hw_vm_fence);
> > +
> >       kfree(job);
> >  }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index db66b4232de02..f8c67840f446f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -845,6 +845,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
> >               if (r)
> >                       return r;
> >               fence = &job->hw_vm_fence->base;
> > +             /* get a ref for the job */
> > +             dma_fence_get(fence);
> >       }
> >
> >       if (vm_flush_needed) {
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-31 13:53   ` Alex Deucher
@ 2025-10-31 14:01     ` Christian König
  2025-10-31 14:05       ` Alex Deucher
  2025-10-31 15:28       ` Alex Deucher
  0 siblings, 2 replies; 15+ messages in thread
From: Christian König @ 2025-10-31 14:01 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Alex Deucher, amd-gfx, Jesse Zhang

On 10/31/25 14:53, Alex Deucher wrote:
> On Fri, Oct 31, 2025 at 4:40 AM Christian König
> <christian.koenig@amd.com> wrote:
>>
>> On 10/27/25 23:02, Alex Deucher wrote:
>>> If we don't end up initializing the fences, free them when
>>> we free the job.
>>>
>>> v2: take a reference to the fences if we emit them
>>>
>>> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
>>> Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 ++
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
>>>  3 files changed, 22 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> index 39229ece83f83..0596114377600 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> @@ -302,6 +302,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>>>               return r;
>>>       }
>>>       *f = &af->base;
>>> +     /* get a ref for the job */
>>> +     dma_fence_get(*f);
>>
>> I think it would be better to set the fence inside the job to NULL as soon as it is consumed/initialized.
> 
> We need the pointer for the job timed out handling.

I don't think that is true. During a timeout we should have job->s_fence->parent for the HW fence.

But even when we go down that route here, you only grab a reference to the hw_fence but not the hw_vm_fence.

That looks broken to me.

Christian.

> 
> Alex
> 
>>
>>>
>>>       if (ring->funcs->insert_end)
>>>               ring->funcs->insert_end(ring);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> index 55c7e104d5ca0..dc970f5fe601b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> @@ -295,6 +295,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>>>
>>>       amdgpu_sync_free(&job->explicit_sync);
>>>
>>> +     if (job->hw_fence->base.ops)
>>> +             dma_fence_put(&job->hw_fence->base);
>>> +     else
>>> +             kfree(job->hw_fence);
>>> +     if (job->hw_vm_fence->base.ops)
>>> +             dma_fence_put(&job->hw_vm_fence->base);
>>> +     else
>>> +             kfree(job->hw_vm_fence);
>>> +
>>
>> This way that here can just be a kfree(..).
>>
>> Regards,
>> Christian.
>>
>>>       kfree(job);
>>>  }
>>>
>>> @@ -324,6 +333,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
>>>       if (job->gang_submit != &job->base.s_fence->scheduled)
>>>               dma_fence_put(job->gang_submit);
>>>
>>> +     if (job->hw_fence->base.ops)
>>> +             dma_fence_put(&job->hw_fence->base);
>>> +     else
>>> +             kfree(job->hw_fence);
>>> +     if (job->hw_vm_fence->base.ops)
>>> +             dma_fence_put(&job->hw_vm_fence->base);
>>> +     else
>>> +             kfree(job->hw_vm_fence);
>>> +
>>>       kfree(job);
>>>  }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index db66b4232de02..f8c67840f446f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -845,6 +845,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>>               if (r)
>>>                       return r;
>>>               fence = &job->hw_vm_fence->base;
>>> +             /* get a ref for the job */
>>> +             dma_fence_get(fence);
>>>       }
>>>
>>>       if (vm_flush_needed) {
>>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-31 14:01     ` Christian König
@ 2025-10-31 14:05       ` Alex Deucher
  2025-10-31 15:28       ` Alex Deucher
  1 sibling, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2025-10-31 14:05 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, amd-gfx, Jesse Zhang

On Fri, Oct 31, 2025 at 10:01 AM Christian König
<christian.koenig@amd.com> wrote:
>
> On 10/31/25 14:53, Alex Deucher wrote:
> > On Fri, Oct 31, 2025 at 4:40 AM Christian König
> > <christian.koenig@amd.com> wrote:
> >>
> >> On 10/27/25 23:02, Alex Deucher wrote:
> >>> If we don't end up initializing the fences, free them when
> >>> we free the job.
> >>>
> >>> v2: take a reference to the fences if we emit them
> >>>
> >>> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> >>> Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
> >>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> >>> ---
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 ++
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
> >>>  3 files changed, 22 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >>> index 39229ece83f83..0596114377600 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >>> @@ -302,6 +302,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
> >>>               return r;
> >>>       }
> >>>       *f = &af->base;
> >>> +     /* get a ref for the job */
> >>> +     dma_fence_get(*f);
> >>
> >> I think it would be better to set the fence inside the job to NULL as soon as it is consumed/initialized.
> >
> > We need the pointer for the job timed out handling.
>
> I don't think that is true. During a timeout we should have job->s_fence->parent for the HW fence.
>
> But even when we go down that route here, you only grab a reference to the hw_fence but not the hw_vm_fence.
>
> That looks broken to me.

I also grab a reference to the vm fence.  See the last hunk of the patch below.

Alex

>
> Christian.
>
> >
> > Alex
> >
> >>
> >>>
> >>>       if (ring->funcs->insert_end)
> >>>               ring->funcs->insert_end(ring);
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>> index 55c7e104d5ca0..dc970f5fe601b 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>> @@ -295,6 +295,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
> >>>
> >>>       amdgpu_sync_free(&job->explicit_sync);
> >>>
> >>> +     if (job->hw_fence->base.ops)
> >>> +             dma_fence_put(&job->hw_fence->base);
> >>> +     else
> >>> +             kfree(job->hw_fence);
> >>> +     if (job->hw_vm_fence->base.ops)
> >>> +             dma_fence_put(&job->hw_vm_fence->base);
> >>> +     else
> >>> +             kfree(job->hw_vm_fence);
> >>> +
> >>
> >> This way that here can just be a kfree(..).
> >>
> >> Regards,
> >> Christian.
> >>
> >>>       kfree(job);
> >>>  }
> >>>
> >>> @@ -324,6 +333,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
> >>>       if (job->gang_submit != &job->base.s_fence->scheduled)
> >>>               dma_fence_put(job->gang_submit);
> >>>
> >>> +     if (job->hw_fence->base.ops)
> >>> +             dma_fence_put(&job->hw_fence->base);
> >>> +     else
> >>> +             kfree(job->hw_fence);
> >>> +     if (job->hw_vm_fence->base.ops)
> >>> +             dma_fence_put(&job->hw_vm_fence->base);
> >>> +     else
> >>> +             kfree(job->hw_vm_fence);
> >>> +
> >>>       kfree(job);
> >>>  }
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> index db66b4232de02..f8c67840f446f 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> @@ -845,6 +845,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
> >>>               if (r)
> >>>                       return r;
> >>>               fence = &job->hw_vm_fence->base;
> >>> +             /* get a ref for the job */
> >>> +             dma_fence_get(fence);
> >>>       }
> >>>
> >>>       if (vm_flush_needed) {
> >>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-31 14:01     ` Christian König
  2025-10-31 14:05       ` Alex Deucher
@ 2025-10-31 15:28       ` Alex Deucher
  2025-11-03 10:51         ` Christian König
  1 sibling, 1 reply; 15+ messages in thread
From: Alex Deucher @ 2025-10-31 15:28 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, amd-gfx, Jesse Zhang

On Fri, Oct 31, 2025 at 10:01 AM Christian König
<christian.koenig@amd.com> wrote:
>
> On 10/31/25 14:53, Alex Deucher wrote:
> > On Fri, Oct 31, 2025 at 4:40 AM Christian König
> > <christian.koenig@amd.com> wrote:
> >>
> >> On 10/27/25 23:02, Alex Deucher wrote:
> >>> If we don't end up initializing the fences, free them when
> >>> we free the job.
> >>>
> >>> v2: take a reference to the fences if we emit them
> >>>
> >>> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> >>> Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
> >>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> >>> ---
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 ++
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
> >>>  3 files changed, 22 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >>> index 39229ece83f83..0596114377600 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >>> @@ -302,6 +302,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
> >>>               return r;
> >>>       }
> >>>       *f = &af->base;
> >>> +     /* get a ref for the job */
> >>> +     dma_fence_get(*f);
> >>
> >> I think it would be better to set the fence inside the job to NULL as soon as it is consumed/initialized.
> >
> > We need the pointer for the job timed out handling.
>
> I don't think that is true. During a timeout we should have job->s_fence->parent for the HW fence.

We also need to keep it around for job_submit_direct() so we can free
the IBs used for that.

Alex

>
> But even when we go down that route here, you only grab a reference to the hw_fence but not the hw_vm_fence.
>
> That looks broken to me.
>
> Christian.
>
> >
> > Alex
> >
> >>
> >>>
> >>>       if (ring->funcs->insert_end)
> >>>               ring->funcs->insert_end(ring);
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>> index 55c7e104d5ca0..dc970f5fe601b 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>> @@ -295,6 +295,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
> >>>
> >>>       amdgpu_sync_free(&job->explicit_sync);
> >>>
> >>> +     if (job->hw_fence->base.ops)
> >>> +             dma_fence_put(&job->hw_fence->base);
> >>> +     else
> >>> +             kfree(job->hw_fence);
> >>> +     if (job->hw_vm_fence->base.ops)
> >>> +             dma_fence_put(&job->hw_vm_fence->base);
> >>> +     else
> >>> +             kfree(job->hw_vm_fence);
> >>> +
> >>
> >> This way that here can just be a kfree(..).
> >>
> >> Regards,
> >> Christian.
> >>
> >>>       kfree(job);
> >>>  }
> >>>
> >>> @@ -324,6 +333,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
> >>>       if (job->gang_submit != &job->base.s_fence->scheduled)
> >>>               dma_fence_put(job->gang_submit);
> >>>
> >>> +     if (job->hw_fence->base.ops)
> >>> +             dma_fence_put(&job->hw_fence->base);
> >>> +     else
> >>> +             kfree(job->hw_fence);
> >>> +     if (job->hw_vm_fence->base.ops)
> >>> +             dma_fence_put(&job->hw_vm_fence->base);
> >>> +     else
> >>> +             kfree(job->hw_vm_fence);
> >>> +
> >>>       kfree(job);
> >>>  }
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> index db66b4232de02..f8c67840f446f 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>> @@ -845,6 +845,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
> >>>               if (r)
> >>>                       return r;
> >>>               fence = &job->hw_vm_fence->base;
> >>> +             /* get a ref for the job */
> >>> +             dma_fence_get(fence);
> >>>       }
> >>>
> >>>       if (vm_flush_needed) {
> >>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] drm/amdgpu: fix possible fence leaks from job structure
@ 2025-10-31 17:43 Alex Deucher
  2025-11-04 13:36 ` Alex Deucher
  2025-11-04 13:50 ` Christian König
  0 siblings, 2 replies; 15+ messages in thread
From: Alex Deucher @ 2025-10-31 17:43 UTC (permalink / raw)
  To: amd-gfx, christian.koenig; +Cc: Alex Deucher, Jesse Zhang

If we don't end up initializing the fences, free them when
we free the job.  We can't set the hw_fence to NULL after
emitting it because we need it in the cleanup path for the
submit direct case.

v2: take a reference to the fences if we emit them
v3: handle non-job fence in error paths

Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 19 +++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 39229ece83f83..586a58facca10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -176,18 +176,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 
 	if (!ring->sched.ready) {
 		dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
-		return -EINVAL;
+		r = -EINVAL;
+		goto free_fence;
 	}
 
 	if (vm && !job->vmid) {
 		dev_err(adev->dev, "VM IB without ID\n");
-		return -EINVAL;
+		r = -EINVAL;
+		goto free_fence;
 	}
 
 	if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
 	    (!ring->funcs->secure_submission_supported)) {
 		dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name);
-		return -EINVAL;
+		r = -EINVAL;
+		goto free_fence;
 	}
 
 	alloc_size = ring->funcs->emit_frame_size + num_ibs *
@@ -196,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 	r = amdgpu_ring_alloc(ring, alloc_size);
 	if (r) {
 		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
-		return r;
+		goto free_fence;
 	}
 
 	need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -302,6 +305,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 		return r;
 	}
 	*f = &af->base;
+	/* get a ref for the job */
+	if (job)
+		dma_fence_get(*f);
 
 	if (ring->funcs->insert_end)
 		ring->funcs->insert_end(ring);
@@ -328,6 +334,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 	amdgpu_ring_commit(ring);
 
 	return 0;
+
+free_fence:
+	if (!job)
+		kfree(af);
+	return r;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index fd6aade7ee9e3..efa3281145f6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -293,6 +293,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
 	amdgpu_sync_free(&job->explicit_sync);
 
+	if (job->hw_fence->base.ops)
+		dma_fence_put(&job->hw_fence->base);
+	else
+		kfree(job->hw_fence);
+	if (job->hw_vm_fence->base.ops)
+		dma_fence_put(&job->hw_vm_fence->base);
+	else
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
@@ -322,6 +331,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	if (job->gang_submit != &job->base.s_fence->scheduled)
 		dma_fence_put(job->gang_submit);
 
+	if (job->hw_fence->base.ops)
+		dma_fence_put(&job->hw_fence->base);
+	else
+		kfree(job->hw_fence);
+	if (job->hw_vm_fence->base.ops)
+		dma_fence_put(&job->hw_vm_fence->base);
+	else
+		kfree(job->hw_vm_fence);
+
 	kfree(job);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c3dfb949a9b87..82e897cd5feac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -849,6 +849,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 		if (r)
 			return r;
 		fence = &job->hw_vm_fence->base;
+		/* get a ref for the job */
+		dma_fence_get(fence);
 	}
 
 	if (vm_flush_needed) {
-- 
2.51.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-31 15:28       ` Alex Deucher
@ 2025-11-03 10:51         ` Christian König
  2025-11-03 14:14           ` Alex Deucher
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2025-11-03 10:51 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Alex Deucher, amd-gfx, Jesse Zhang

On 10/31/25 16:28, Alex Deucher wrote:
> On Fri, Oct 31, 2025 at 10:01 AM Christian König
> <christian.koenig@amd.com> wrote:
>>
>> On 10/31/25 14:53, Alex Deucher wrote:
>>> On Fri, Oct 31, 2025 at 4:40 AM Christian König
>>> <christian.koenig@amd.com> wrote:
>>>>
>>>> On 10/27/25 23:02, Alex Deucher wrote:
>>>>> If we don't end up initializing the fences, free them when
>>>>> we free the job.
>>>>>
>>>>> v2: take a reference to the fences if we emit them
>>>>>
>>>>> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
>>>>> Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>> ---
>>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 ++
>>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
>>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
>>>>>  3 files changed, 22 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>>>> index 39229ece83f83..0596114377600 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>>>> @@ -302,6 +302,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>>>>>               return r;
>>>>>       }
>>>>>       *f = &af->base;
>>>>> +     /* get a ref for the job */
>>>>> +     dma_fence_get(*f);
>>>>
>>>> I think it would be better to set the fence inside the job to NULL as soon as it is consumed/initialized.
>>>
>>> We need the pointer for the job timed out handling.
>>
>> I don't think that is true. During a timeout we should have job->s_fence->parent for the HW fence.
> 
> We also need to keep it around for job_submit_direct() so we can free
> the IBs used for that.

Good point, but that handling here is really not straight forward.

Anyway feel free to add my rb for now, but we need to re-visite that at some point.

Regards,
Christian.

> 
> Alex
> 
>>
>> But even when we go down that route here, you only grab a reference to the hw_fence but not the hw_vm_fence.
>>
>> That looks broken to me.
>>
>> Christian.
>>
>>>
>>> Alex
>>>
>>>>
>>>>>
>>>>>       if (ring->funcs->insert_end)
>>>>>               ring->funcs->insert_end(ring);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>> index 55c7e104d5ca0..dc970f5fe601b 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>>> @@ -295,6 +295,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>>>>>
>>>>>       amdgpu_sync_free(&job->explicit_sync);
>>>>>
>>>>> +     if (job->hw_fence->base.ops)
>>>>> +             dma_fence_put(&job->hw_fence->base);
>>>>> +     else
>>>>> +             kfree(job->hw_fence);
>>>>> +     if (job->hw_vm_fence->base.ops)
>>>>> +             dma_fence_put(&job->hw_vm_fence->base);
>>>>> +     else
>>>>> +             kfree(job->hw_vm_fence);
>>>>> +
>>>>
>>>> This way that here can just be a kfree(..).
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>       kfree(job);
>>>>>  }
>>>>>
>>>>> @@ -324,6 +333,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
>>>>>       if (job->gang_submit != &job->base.s_fence->scheduled)
>>>>>               dma_fence_put(job->gang_submit);
>>>>>
>>>>> +     if (job->hw_fence->base.ops)
>>>>> +             dma_fence_put(&job->hw_fence->base);
>>>>> +     else
>>>>> +             kfree(job->hw_fence);
>>>>> +     if (job->hw_vm_fence->base.ops)
>>>>> +             dma_fence_put(&job->hw_vm_fence->base);
>>>>> +     else
>>>>> +             kfree(job->hw_vm_fence);
>>>>> +
>>>>>       kfree(job);
>>>>>  }
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index db66b4232de02..f8c67840f446f 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -845,6 +845,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>>>>>               if (r)
>>>>>                       return r;
>>>>>               fence = &job->hw_vm_fence->base;
>>>>> +             /* get a ref for the job */
>>>>> +             dma_fence_get(fence);
>>>>>       }
>>>>>
>>>>>       if (vm_flush_needed) {
>>>>
>>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-11-03 10:51         ` Christian König
@ 2025-11-03 14:14           ` Alex Deucher
  0 siblings, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2025-11-03 14:14 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, amd-gfx, Jesse Zhang

On Mon, Nov 3, 2025 at 5:51 AM Christian König <christian.koenig@amd.com> wrote:
>
> On 10/31/25 16:28, Alex Deucher wrote:
> > On Fri, Oct 31, 2025 at 10:01 AM Christian König
> > <christian.koenig@amd.com> wrote:
> >>
> >> On 10/31/25 14:53, Alex Deucher wrote:
> >>> On Fri, Oct 31, 2025 at 4:40 AM Christian König
> >>> <christian.koenig@amd.com> wrote:
> >>>>
> >>>> On 10/27/25 23:02, Alex Deucher wrote:
> >>>>> If we don't end up initializing the fences, free them when
> >>>>> we free the job.
> >>>>>
> >>>>> v2: take a reference to the fences if we emit them
> >>>>>
> >>>>> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> >>>>> Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
> >>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> >>>>> ---
> >>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 ++
> >>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
> >>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
> >>>>>  3 files changed, 22 insertions(+)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >>>>> index 39229ece83f83..0596114377600 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >>>>> @@ -302,6 +302,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
> >>>>>               return r;
> >>>>>       }
> >>>>>       *f = &af->base;
> >>>>> +     /* get a ref for the job */
> >>>>> +     dma_fence_get(*f);
> >>>>
> >>>> I think it would be better to set the fence inside the job to NULL as soon as it is consumed/initialized.
> >>>
> >>> We need the pointer for the job timed out handling.
> >>
> >> I don't think that is true. During a timeout we should have job->s_fence->parent for the HW fence.
> >
> > We also need to keep it around for job_submit_direct() so we can free
> > the IBs used for that.
>
> Good point, but that handling here is really not straight forward.
>
> Anyway feel free to add my rb for now, but we need to re-visite that at some point.

Thanks.  I found a leak of the non-job fence.  Please see the latest
revision of the patch.

Alex

>
> Regards,
> Christian.
>
> >
> > Alex
> >
> >>
> >> But even when we go down that route here, you only grab a reference to the hw_fence but not the hw_vm_fence.
> >>
> >> That looks broken to me.
> >>
> >> Christian.
> >>
> >>>
> >>> Alex
> >>>
> >>>>
> >>>>>
> >>>>>       if (ring->funcs->insert_end)
> >>>>>               ring->funcs->insert_end(ring);
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>>>> index 55c7e104d5ca0..dc970f5fe601b 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> >>>>> @@ -295,6 +295,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
> >>>>>
> >>>>>       amdgpu_sync_free(&job->explicit_sync);
> >>>>>
> >>>>> +     if (job->hw_fence->base.ops)
> >>>>> +             dma_fence_put(&job->hw_fence->base);
> >>>>> +     else
> >>>>> +             kfree(job->hw_fence);
> >>>>> +     if (job->hw_vm_fence->base.ops)
> >>>>> +             dma_fence_put(&job->hw_vm_fence->base);
> >>>>> +     else
> >>>>> +             kfree(job->hw_vm_fence);
> >>>>> +
> >>>>
> >>>> This way that here can just be a kfree(..).
> >>>>
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>>>>       kfree(job);
> >>>>>  }
> >>>>>
> >>>>> @@ -324,6 +333,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
> >>>>>       if (job->gang_submit != &job->base.s_fence->scheduled)
> >>>>>               dma_fence_put(job->gang_submit);
> >>>>>
> >>>>> +     if (job->hw_fence->base.ops)
> >>>>> +             dma_fence_put(&job->hw_fence->base);
> >>>>> +     else
> >>>>> +             kfree(job->hw_fence);
> >>>>> +     if (job->hw_vm_fence->base.ops)
> >>>>> +             dma_fence_put(&job->hw_vm_fence->base);
> >>>>> +     else
> >>>>> +             kfree(job->hw_vm_fence);
> >>>>> +
> >>>>>       kfree(job);
> >>>>>  }
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>> index db66b4232de02..f8c67840f446f 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>> @@ -845,6 +845,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
> >>>>>               if (r)
> >>>>>                       return r;
> >>>>>               fence = &job->hw_vm_fence->base;
> >>>>> +             /* get a ref for the job */
> >>>>> +             dma_fence_get(fence);
> >>>>>       }
> >>>>>
> >>>>>       if (vm_flush_needed) {
> >>>>
> >>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-31 17:43 Alex Deucher
@ 2025-11-04 13:36 ` Alex Deucher
  2025-11-04 13:50 ` Christian König
  1 sibling, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2025-11-04 13:36 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx, christian.koenig, Jesse Zhang

Ping?

On Fri, Oct 31, 2025 at 3:01 PM Alex Deucher <alexander.deucher@amd.com> wrote:
>
> If we don't end up initializing the fences, free them when
> we free the job.  We can't set the hw_fence to NULL after
> emitting it because we need it in the cleanup path for the
> submit direct case.
>
> v2: take a reference to the fences if we emit them
> v3: handle non-job fence in error paths
>
> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 19 +++++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
>  3 files changed, 35 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 39229ece83f83..586a58facca10 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -176,18 +176,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>
>         if (!ring->sched.ready) {
>                 dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
> -               return -EINVAL;
> +               r = -EINVAL;
> +               goto free_fence;
>         }
>
>         if (vm && !job->vmid) {
>                 dev_err(adev->dev, "VM IB without ID\n");
> -               return -EINVAL;
> +               r = -EINVAL;
> +               goto free_fence;
>         }
>
>         if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
>             (!ring->funcs->secure_submission_supported)) {
>                 dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name);
> -               return -EINVAL;
> +               r = -EINVAL;
> +               goto free_fence;
>         }
>
>         alloc_size = ring->funcs->emit_frame_size + num_ibs *
> @@ -196,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>         r = amdgpu_ring_alloc(ring, alloc_size);
>         if (r) {
>                 dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
> -               return r;
> +               goto free_fence;
>         }
>
>         need_ctx_switch = ring->current_ctx != fence_ctx;
> @@ -302,6 +305,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>                 return r;
>         }
>         *f = &af->base;
> +       /* get a ref for the job */
> +       if (job)
> +               dma_fence_get(*f);
>
>         if (ring->funcs->insert_end)
>                 ring->funcs->insert_end(ring);
> @@ -328,6 +334,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>         amdgpu_ring_commit(ring);
>
>         return 0;
> +
> +free_fence:
> +       if (!job)
> +               kfree(af);
> +       return r;
>  }
>
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index fd6aade7ee9e3..efa3281145f6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -293,6 +293,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>
>         amdgpu_sync_free(&job->explicit_sync);
>
> +       if (job->hw_fence->base.ops)
> +               dma_fence_put(&job->hw_fence->base);
> +       else
> +               kfree(job->hw_fence);
> +       if (job->hw_vm_fence->base.ops)
> +               dma_fence_put(&job->hw_vm_fence->base);
> +       else
> +               kfree(job->hw_vm_fence);
> +
>         kfree(job);
>  }
>
> @@ -322,6 +331,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
>         if (job->gang_submit != &job->base.s_fence->scheduled)
>                 dma_fence_put(job->gang_submit);
>
> +       if (job->hw_fence->base.ops)
> +               dma_fence_put(&job->hw_fence->base);
> +       else
> +               kfree(job->hw_fence);
> +       if (job->hw_vm_fence->base.ops)
> +               dma_fence_put(&job->hw_vm_fence->base);
> +       else
> +               kfree(job->hw_vm_fence);
> +
>         kfree(job);
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c3dfb949a9b87..82e897cd5feac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -849,6 +849,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>                 if (r)
>                         return r;
>                 fence = &job->hw_vm_fence->base;
> +               /* get a ref for the job */
> +               dma_fence_get(fence);
>         }
>
>         if (vm_flush_needed) {
> --
> 2.51.1
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/amdgpu: fix possible fence leaks from job structure
  2025-10-31 17:43 Alex Deucher
  2025-11-04 13:36 ` Alex Deucher
@ 2025-11-04 13:50 ` Christian König
  1 sibling, 0 replies; 15+ messages in thread
From: Christian König @ 2025-11-04 13:50 UTC (permalink / raw)
  To: Alex Deucher, amd-gfx; +Cc: Jesse Zhang

On 10/31/25 18:43, Alex Deucher wrote:
> If we don't end up initializing the fences, free them when
> we free the job.  We can't set the hw_fence to NULL after
> emitting it because we need it in the cleanup path for the
> submit direct case.
> 
> v2: take a reference to the fences if we emit them
> v3: handle non-job fence in error paths
> 
> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 19 +++++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 ++
>  3 files changed, 35 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 39229ece83f83..586a58facca10 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -176,18 +176,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>  
>  	if (!ring->sched.ready) {
>  		dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
> -		return -EINVAL;
> +		r = -EINVAL;
> +		goto free_fence;
>  	}
>  
>  	if (vm && !job->vmid) {
>  		dev_err(adev->dev, "VM IB without ID\n");
> -		return -EINVAL;
> +		r = -EINVAL;
> +		goto free_fence;
>  	}
>  
>  	if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
>  	    (!ring->funcs->secure_submission_supported)) {
>  		dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name);
> -		return -EINVAL;
> +		r = -EINVAL;
> +		goto free_fence;
>  	}
>  
>  	alloc_size = ring->funcs->emit_frame_size + num_ibs *
> @@ -196,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>  	r = amdgpu_ring_alloc(ring, alloc_size);
>  	if (r) {
>  		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
> -		return r;
> +		goto free_fence;
>  	}
>  
>  	need_ctx_switch = ring->current_ctx != fence_ctx;
> @@ -302,6 +305,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>  		return r;
>  	}
>  	*f = &af->base;
> +	/* get a ref for the job */
> +	if (job)
> +		dma_fence_get(*f);
>  
>  	if (ring->funcs->insert_end)
>  		ring->funcs->insert_end(ring);
> @@ -328,6 +334,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
>  	amdgpu_ring_commit(ring);
>  
>  	return 0;
> +
> +free_fence:
> +	if (!job)
> +		kfree(af);
> +	return r;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index fd6aade7ee9e3..efa3281145f6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -293,6 +293,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>  
>  	amdgpu_sync_free(&job->explicit_sync);
>  
> +	if (job->hw_fence->base.ops)
> +		dma_fence_put(&job->hw_fence->base);
> +	else
> +		kfree(job->hw_fence);
> +	if (job->hw_vm_fence->base.ops)
> +		dma_fence_put(&job->hw_vm_fence->base);
> +	else
> +		kfree(job->hw_vm_fence);
> +
>  	kfree(job);
>  }
>  
> @@ -322,6 +331,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
>  	if (job->gang_submit != &job->base.s_fence->scheduled)
>  		dma_fence_put(job->gang_submit);
>  
> +	if (job->hw_fence->base.ops)
> +		dma_fence_put(&job->hw_fence->base);
> +	else
> +		kfree(job->hw_fence);
> +	if (job->hw_vm_fence->base.ops)
> +		dma_fence_put(&job->hw_vm_fence->base);
> +	else
> +		kfree(job->hw_vm_fence);
> +
>  	kfree(job);
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c3dfb949a9b87..82e897cd5feac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -849,6 +849,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>  		if (r)
>  			return r;
>  		fence = &job->hw_vm_fence->base;
> +		/* get a ref for the job */
> +		dma_fence_get(fence);
>  	}
>  
>  	if (vm_flush_needed) {


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2025-11-04 13:50 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-27 22:02 [PATCH] drm/amdgpu: fix possible fence leaks from job structure Alex Deucher
2025-10-30 12:51 ` Alex Deucher
2025-10-31  8:40 ` Christian König
2025-10-31 13:53   ` Alex Deucher
2025-10-31 14:01     ` Christian König
2025-10-31 14:05       ` Alex Deucher
2025-10-31 15:28       ` Alex Deucher
2025-11-03 10:51         ` Christian König
2025-11-03 14:14           ` Alex Deucher
  -- strict thread matches above, loose matches on Subject: below --
2025-10-31 17:43 Alex Deucher
2025-11-04 13:36 ` Alex Deucher
2025-11-04 13:50 ` Christian König
2025-10-22 21:20 Alex Deucher
2025-10-23 19:02 ` Alex Deucher
2025-10-24  9:48 ` Zhang, Jesse(Jie)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox