AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "Khatri, Sunil" <sukhatri@amd.com>
To: "Christian König" <ckoenig.leichtzumerken@gmail.com>,
	tursulin@ursulin.net, Alexander.Deucher@amd.com,
	Prike.Liang@amd.com, Yogesh.Mohanmarimuthu@amd.com,
	SRINIVASAN.SHANMUGAM@amd.com, Sunil.Khatri@amd.com,
	amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 04/11] drm/amdgpu: completely rework eviction fence handling
Date: Wed, 11 Mar 2026 17:57:01 +0530	[thread overview]
Message-ID: <8cae20bf-4ff5-4a03-8137-36096197364f@amd.com> (raw)
In-Reply-To: <20260310191327.2279-4-christian.koenig@amd.com>


On 11-03-2026 12:43 am, Christian König wrote:
> Well that was broken on multiple levels.
>
> First of all a lot of checks where placed at incorrect locations, especially if
where -> were
> the resume worker should run or not.
>
> Then a bunch of code was just mid-layering because of incorrect assignment who
> should do what.
>
> And finally comments explaining what happens instead of why.
>
> Just re-write it from scratch, that should at least fix some of the hangs we
> are seeing.
>
> Use RCU for the eviction fence pointer in the manager, the spinlock usage was
> mostly incorrect as well. Then finally remove all the nonsense checks and
> actually add them in the correct locations.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |   4 +-
>   .../drm/amd/amdgpu/amdgpu_eviction_fence.c    | 213 ++++++------------
>   .../drm/amd/amdgpu/amdgpu_eviction_fence.h    |  54 ++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |   5 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     |  29 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h     |   2 +-
>   7 files changed, 112 insertions(+), 205 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 03814a23eb54..67b8c33d5ee3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2952,9 +2952,9 @@ static int amdgpu_drm_release(struct inode *inode, struct file *filp)
>   	int idx;
>   
>   	if (fpriv && drm_dev_enter(dev, &idx)) {
> -		fpriv->evf_mgr.fd_closing = true;
> -		amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
> +		amdgpu_evf_mgr_shutdown(&fpriv->evf_mgr);
>   		amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
> +		amdgpu_evf_mgr_fini(&fpriv->evf_mgr);
>   		drm_dev_exit(idx);
>   	}
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> index 23d7d0b0d625..8fe9f91f9551 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> @@ -25,9 +25,6 @@
>   #include <drm/drm_exec.h>
>   #include "amdgpu.h"
>   
> -#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name)
> -#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr)
> -
>   static const char *
>   amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
>   {
> @@ -43,102 +40,14 @@ amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
>   	return ef->timeline_name;
>   }
>   
> -int
> -amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
> -				    struct drm_exec *exec)
> -{
> -	struct amdgpu_eviction_fence *old_ef, *new_ef;
> -	struct drm_gem_object *obj;
> -	unsigned long index;
> -	int ret;
> -
> -	if (evf_mgr->ev_fence &&
> -	    !dma_fence_is_signaled(&evf_mgr->ev_fence->base))
> -		return 0;
> -	/*
> -	 * Steps to replace eviction fence:
> -	 * * lock all objects in exec (caller)
> -	 * * create a new eviction fence
> -	 * * update new eviction fence in evf_mgr
> -	 * * attach the new eviction fence to BOs
> -	 * * release the old fence
> -	 * * unlock the objects (caller)
> -	 */
> -	new_ef = amdgpu_eviction_fence_create(evf_mgr);
> -	if (!new_ef) {
> -		DRM_ERROR("Failed to create new eviction fence\n");
> -		return -ENOMEM;
> -	}
> -
> -	/* Update the eviction fence now */
> -	spin_lock(&evf_mgr->ev_fence_lock);
> -	old_ef = evf_mgr->ev_fence;
> -	evf_mgr->ev_fence = new_ef;
> -	spin_unlock(&evf_mgr->ev_fence_lock);
> -
> -	/* Attach the new fence */
> -	drm_exec_for_each_locked_object(exec, index, obj) {
> -		struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> -
> -		if (!bo)
> -			continue;
> -		ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
> -		if (ret) {
> -			DRM_ERROR("Failed to attch new eviction fence\n");
> -			goto free_err;
> -		}
> -	}
> -
> -	/* Free old fence */
> -	if (old_ef)
> -		dma_fence_put(&old_ef->base);
> -	return 0;
> -
> -free_err:
> -	kfree(new_ef);
> -	return ret;
> -}
> -
> -static void
> -amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
> -{
> -	struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
> -	struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
> -	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
> -	struct amdgpu_eviction_fence *ev_fence;
> -
> -	mutex_lock(&uq_mgr->userq_mutex);
> -	spin_lock(&evf_mgr->ev_fence_lock);
> -	ev_fence = evf_mgr->ev_fence;
> -	if (ev_fence)
> -		dma_fence_get(&ev_fence->base);
> -	else
> -		goto unlock;
> -	spin_unlock(&evf_mgr->ev_fence_lock);
> -
> -	amdgpu_userq_evict(uq_mgr, ev_fence);
> -
> -	mutex_unlock(&uq_mgr->userq_mutex);
> -	dma_fence_put(&ev_fence->base);
> -	return;
> -
> -unlock:
> -	spin_unlock(&evf_mgr->ev_fence_lock);
> -	mutex_unlock(&uq_mgr->userq_mutex);
> -}
> -
>   static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
>   {
>   	struct amdgpu_eviction_fence_mgr *evf_mgr;
>   	struct amdgpu_eviction_fence *ev_fence;
>   
> -	if (!f)
> -		return true;
Isn't there a possibility of the fence being signaled or f to be NULL?
> -
>   	ev_fence = to_ev_fence(f);
>   	evf_mgr = ev_fence->evf_mgr;
> -
> -	schedule_delayed_work(&evf_mgr->suspend_work, 0);
> +	schedule_work(&evf_mgr->suspend_work);

We can avoid to use evf_mgr instead directly use ev_fence->evf_mgr as it 
is only one time usage.

Regards
Sunil Khatri

>   	return true;
>   }
>   
> @@ -148,22 +57,52 @@ static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
>   	.enable_signaling = amdgpu_eviction_fence_enable_signaling,
>   };
>   
> -void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
> -				  struct amdgpu_eviction_fence *ev_fence)
> +static void
> +amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
>   {
> -	spin_lock(&evf_mgr->ev_fence_lock);
> -	dma_fence_signal(&ev_fence->base);
> -	spin_unlock(&evf_mgr->ev_fence_lock);
> +	struct amdgpu_eviction_fence_mgr *evf_mgr =
> +		container_of(work, struct amdgpu_eviction_fence_mgr,
> +			     suspend_work);
> +	struct amdgpu_fpriv *fpriv =
> +		container_of(evf_mgr, struct amdgpu_fpriv, evf_mgr);
> +	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
> +	struct dma_fence *ev_fence;
> +
> +	mutex_lock(&uq_mgr->userq_mutex);
> +	ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
> +	amdgpu_userq_evict(uq_mgr, !evf_mgr->shutdown);
> +
> +	/*
> +	 * Signaling the eviction fence must be done while holding the
> +	 * userq_mutex. Otherwise we won't resume the queues before issuing the
> +	 * next fence.
> +	 */
> +	dma_fence_signal(ev_fence);
> +	dma_fence_put(ev_fence);
> +	mutex_unlock(&uq_mgr->userq_mutex);
> +}
> +
> +void amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
> +				 struct amdgpu_bo *bo)
> +{
> +	struct dma_fence *ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
> +	struct dma_resv *resv = bo->tbo.base.resv;
> +
> +	dma_resv_add_fence(resv, ev_fence, DMA_RESV_USAGE_BOOKKEEP);
> +	dma_fence_put(ev_fence);
>   }
>   
> -struct amdgpu_eviction_fence *
> -amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
> +int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr,
> +			 struct drm_exec *exec)
>   {
>   	struct amdgpu_eviction_fence *ev_fence;
> +	struct drm_gem_object *obj;
> +	unsigned long index;
>   
> +	/* Create and initialize a new eviction fence */
>   	ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
>   	if (!ev_fence)
> -		return NULL;
> +		return -ENOMEM;
>   
>   	ev_fence->evf_mgr = evf_mgr;
>   	get_task_comm(ev_fence->timeline_name, current);
> @@ -171,56 +110,22 @@ amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
>   	dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops,
>   			 &ev_fence->lock, evf_mgr->ev_fence_ctx,
>   			 atomic_inc_return(&evf_mgr->ev_fence_seq));
> -	return ev_fence;
> -}
> -
> -void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
> -{
> -	struct amdgpu_eviction_fence *ev_fence;
> -
> -	/* Wait for any pending work to execute */
> -	flush_delayed_work(&evf_mgr->suspend_work);
> -
> -	spin_lock(&evf_mgr->ev_fence_lock);
> -	ev_fence = evf_mgr->ev_fence;
> -	spin_unlock(&evf_mgr->ev_fence_lock);
> -
> -	if (!ev_fence)
> -		return;
> -
> -	dma_fence_wait(&ev_fence->base, false);
>   
> -	/* Last unref of ev_fence */
> -	dma_fence_put(&ev_fence->base);
> -}
> -
> -int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
> -				 struct amdgpu_bo *bo)
> -{
> -	struct amdgpu_eviction_fence *ev_fence;
> -	struct dma_resv *resv = bo->tbo.base.resv;
> -	int ret;
> +	/* Remember it for newly added BOs */
> +	dma_fence_put(evf_mgr->ev_fence);
> +	evf_mgr->ev_fence = &ev_fence->base;
>   
> -	if (!resv)
> -		return 0;
> +	/* And add it to all existing BOs */
> +	drm_exec_for_each_locked_object(exec, index, obj) {
> +		struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>   
> -	ret = dma_resv_reserve_fences(resv, 1);
> -	if (ret) {
> -		DRM_DEBUG_DRIVER("Failed to resv fence space\n");
> -		return ret;
> +		amdgpu_evf_mgr_attach_fence(evf_mgr, bo);
>   	}
> -
> -	spin_lock(&evf_mgr->ev_fence_lock);
> -	ev_fence = evf_mgr->ev_fence;
> -	if (ev_fence)
> -		dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
> -	spin_unlock(&evf_mgr->ev_fence_lock);
> -
>   	return 0;
>   }
>   
> -void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
> -				  struct amdgpu_bo *bo)
> +void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
> +				 struct amdgpu_bo *bo)
>   {
>   	struct dma_fence *stub = dma_fence_get_stub();
>   
> @@ -229,13 +134,25 @@ void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
>   	dma_fence_put(stub);
>   }
>   
> -int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
> +void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
>   {
> -	/* This needs to be done one time per open */
>   	atomic_set(&evf_mgr->ev_fence_seq, 0);
>   	evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
> -	spin_lock_init(&evf_mgr->ev_fence_lock);
> +	evf_mgr->ev_fence = dma_fence_get_stub();
>   
> -	INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
> -	return 0;
> +	INIT_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
> +}
> +
> +void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr)
> +{
> +	evf_mgr->shutdown = true;
> +	flush_work(&evf_mgr->suspend_work);
> +}
> +
> +void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr)
> +{
> +	dma_fence_wait(rcu_dereference_protected(evf_mgr->ev_fence, true),
> +		       false);
> +	flush_work(&evf_mgr->suspend_work);
> +	dma_fence_put(evf_mgr->ev_fence);
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
> index fcd867b7147d..527de3a23583 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
> @@ -25,6 +25,8 @@
>   #ifndef AMDGPU_EV_FENCE_H_
>   #define AMDGPU_EV_FENCE_H_
>   
> +#include <linux/dma-fence.h>
> +
>   struct amdgpu_eviction_fence {
>   	struct dma_fence base;
>   	spinlock_t	 lock;
> @@ -35,35 +37,35 @@ struct amdgpu_eviction_fence {
>   struct amdgpu_eviction_fence_mgr {
>   	u64			ev_fence_ctx;
>   	atomic_t		ev_fence_seq;
> -	spinlock_t		ev_fence_lock;
> -	struct amdgpu_eviction_fence *ev_fence;
> -	struct delayed_work	suspend_work;
> -	uint8_t fd_closing;
> -};
> -
> -/* Eviction fence helper functions */
> -struct amdgpu_eviction_fence *
> -amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
>   
> -void
> -amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
> -
> -int
> -amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
> -			     struct amdgpu_bo *bo);
> +	/*
> +	 * Only updated while holding the VM resv lock.
> +	 * Only signaled while holding the userq mutex.
> +	 */
> +	struct dma_fence __rcu	*ev_fence;
> +	struct work_struct	suspend_work;
> +	bool			shutdown;
> +};
>   
> -void
> -amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
> -			     struct amdgpu_bo *bo);
> +static inline struct dma_fence *
> +amdgpu_evf_mgr_get_fence(struct amdgpu_eviction_fence_mgr *evf_mgr)
> +{
> +	struct dma_fence *ev_fence;
>   
> -int
> -amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
> +	rcu_read_lock();
> +	ev_fence = dma_fence_get_rcu_safe(&evf_mgr->ev_fence);
> +	rcu_read_unlock();
> +	return ev_fence;
> +}
>   
> -void
> -amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
> -			     struct amdgpu_eviction_fence *ev_fence);
> +void amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
> +				 struct amdgpu_bo *bo);
> +int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr,
> +			 struct drm_exec *exec);
> +void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
> +				 struct amdgpu_bo *bo);
> +void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
> +void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr);
> +void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr);
>   
> -int
> -amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
> -				    struct drm_exec *exec);
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 5c90de58cc28..e28abfd04867 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -263,13 +263,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
>   	else
>   		++bo_va->ref_count;
>   
> -	/* attach gfx eviction fence */
> -	r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
> -	if (r) {
> -		DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
> -		amdgpu_bo_unreserve(abo);
> -		return r;
> -	}
> +	amdgpu_evf_mgr_attach_fence(&fpriv->evf_mgr, abo);
>   	drm_exec_fini(&exec);
>   
>   	/* Validate and add eviction fence to DMABuf imports with dynamic
> @@ -337,7 +331,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   	}
>   
>   	if (!amdgpu_vm_is_bo_always_valid(vm, bo))
> -		amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
> +		amdgpu_evf_mgr_detach_fence(&fpriv->evf_mgr, bo);
>   
>   	bo_va = amdgpu_vm_bo_find(vm, bo);
>   	if (!bo_va || --bo_va->ref_count)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index f69332eed051..f512b6ec6c53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1522,10 +1522,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>   			 "Failed to init usermode queue manager (%d), use legacy workload submission only\n",
>   			 r);
>   
> -	r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
> -	if (r)
> -		goto error_vm;
> -
> +	amdgpu_evf_mgr_init(&fpriv->evf_mgr);
>   	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
>   
>   	file_priv->driver_priv = fpriv;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index 85adc53eb523..67ba46851c2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -472,17 +472,16 @@ void
>   amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
>   			     struct amdgpu_eviction_fence_mgr *evf_mgr)
>   {
> -	struct amdgpu_eviction_fence *ev_fence;
> +	struct dma_fence *ev_fence;
>   
>   retry:
>   	/* Flush any pending resume work to create ev_fence */
>   	flush_delayed_work(&uq_mgr->resume_work);
>   
>   	mutex_lock(&uq_mgr->userq_mutex);
> -	spin_lock(&evf_mgr->ev_fence_lock);
> -	ev_fence = evf_mgr->ev_fence;
> -	spin_unlock(&evf_mgr->ev_fence_lock);
> -	if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
> +	ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
> +	if (dma_fence_is_signaled(ev_fence)) {
> +		dma_fence_put(ev_fence);
>   		mutex_unlock(&uq_mgr->userq_mutex);
>   		/*
>   		 * Looks like there was no pending resume work,
> @@ -491,6 +490,7 @@ amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
>   		schedule_delayed_work(&uq_mgr->resume_work, 0);
>   		goto retry;
>   	}
> +	dma_fence_put(ev_fence);
>   }
>   
>   int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
> @@ -1196,7 +1196,7 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
>   		dma_fence_wait(bo_va->last_pt_update, false);
>   	dma_fence_wait(vm->last_update, false);
>   
> -	ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
> +	ret = amdgpu_evf_mgr_rearm(&fpriv->evf_mgr, &exec);
>   	if (ret)
>   		drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
>   
> @@ -1216,11 +1216,13 @@ static void amdgpu_userq_restore_worker(struct work_struct *work)
>   {
>   	struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
>   	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
> +	struct dma_fence *ev_fence;
>   	int ret;
>   
> -	flush_delayed_work(&fpriv->evf_mgr.suspend_work);
> -
>   	mutex_lock(&uq_mgr->userq_mutex);
> +	ev_fence = amdgpu_evf_mgr_get_fence(&fpriv->evf_mgr);
> +	if (!dma_fence_is_signaled(ev_fence))
> +		goto unlock;
>   
>   	ret = amdgpu_userq_vm_validate(uq_mgr);
>   	if (ret) {
> @@ -1236,6 +1238,7 @@ static void amdgpu_userq_restore_worker(struct work_struct *work)
>   
>   unlock:
>   	mutex_unlock(&uq_mgr->userq_mutex);
> +	dma_fence_put(ev_fence);
>   }
>   
>   static int
> @@ -1311,11 +1314,8 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
>   }
>   
>   void
> -amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
> -		   struct amdgpu_eviction_fence *ev_fence)
> +amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, bool schedule_resume)
>   {
> -	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
> -	struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
>   	struct amdgpu_device *adev = uq_mgr->adev;
>   	int ret;
>   
> @@ -1328,10 +1328,7 @@ amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
>   	if (ret)
>   		dev_err(adev->dev, "Failed to evict userqueue\n");
>   
> -	/* Signal current eviction fence */
> -	amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
> -
> -	if (!evf_mgr->fd_closing)
> +	if (schedule_resume)
>   		schedule_delayed_work(&uq_mgr->resume_work, 0);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> index 54e1997b3cc0..82306d489064 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
> @@ -133,7 +133,7 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
>   				 struct amdgpu_userq_obj *userq_obj);
>   
>   void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
> -			struct amdgpu_eviction_fence *ev_fence);
> +			bool schedule_resume);
>   
>   void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
>   				  struct amdgpu_eviction_fence_mgr *evf_mgr);

  reply	other threads:[~2026-03-11 12:27 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-10 19:13 [PATCH 01/11] drm/amdgpu: revert to old status lock handling v4 Christian König
2026-03-10 19:13 ` [PATCH 02/11] drm/amdgpu: restructure VM state machine Christian König
2026-03-11  8:47   ` Khatri, Sunil
2026-03-12 12:49     ` Christian König
2026-03-12 10:07   ` Liang, Prike
2026-03-12 14:32   ` Tvrtko Ursulin
2026-03-16 13:44     ` Christian König
2026-03-16 14:26       ` Tvrtko Ursulin
2026-03-10 19:13 ` [PATCH 03/11] drm/amdgpu: fix amdgpu_userq_evict Christian König
2026-03-11  8:51   ` Khatri, Sunil
2026-03-13  7:25   ` Liang, Prike
2026-03-10 19:13 ` [PATCH 04/11] drm/amdgpu: completely rework eviction fence handling Christian König
2026-03-11 12:27   ` Khatri, Sunil [this message]
2026-03-13  8:00     ` Khatri, Sunil
2026-03-17  9:41     ` Christian König
2026-03-13  8:28   ` Liang, Prike
2026-03-17  9:57     ` Christian König
2026-03-17 11:21       ` Liang, Prike
2026-03-17 11:23         ` Christian König
2026-03-17 11:54           ` Liang, Prike
2026-03-10 19:13 ` [PATCH 05/11] drm/amdgpu: fix eviction fence and userq manager shutdown Christian König
2026-03-11 12:26   ` Khatri, Sunil
2026-03-13  9:35     ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 06/11] drm/amdgpu: fix adding eviction fence Christian König
2026-03-11 12:26   ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 07/11] drm/amdgpu: rework amdgpu_userq_wait_ioctl v3 Christian König
2026-03-12 16:34   ` Tvrtko Ursulin
2026-03-16 14:19     ` Christian König
2026-03-16 14:44       ` Tvrtko Ursulin
2026-03-17  7:05   ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 08/11] drm/amdgpu: make amdgpu_user_wait_ioctl more resilent v2 Christian König
2026-03-17  7:15   ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 09/11] drm/amdgpu: annotate eviction fence signaling path Christian König
2026-03-17  7:35   ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 10/11] drm/amdgpu: fix some more bug in amdgpu_gem_va_ioctl Christian König
2026-03-17  8:44   ` Khatri, Sunil
2026-03-17 11:08     ` Christian König
2026-03-10 19:13 ` [PATCH 11/11] drm/amdgpu: WIP sync amdgpu_ttm_fill_mem only to kernel fences Christian König
2026-03-17  8:59   ` Khatri, Sunil
2026-03-17 10:52     ` Christian König
2026-03-11  7:43 ` [PATCH 01/11] drm/amdgpu: revert to old status lock handling v4 Khatri, Sunil
2026-03-12  7:13 ` Liang, Prike

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8cae20bf-4ff5-4a03-8137-36096197364f@amd.com \
    --to=sukhatri@amd.com \
    --cc=Alexander.Deucher@amd.com \
    --cc=Prike.Liang@amd.com \
    --cc=SRINIVASAN.SHANMUGAM@amd.com \
    --cc=Sunil.Khatri@amd.com \
    --cc=Yogesh.Mohanmarimuthu@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=ckoenig.leichtzumerken@gmail.com \
    --cc=tursulin@ursulin.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox