AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "Khatri, Sunil" <sukhatri@amd.com>
To: "Christian König" <ckoenig.leichtzumerken@gmail.com>,
	tursulin@ursulin.net, Alexander.Deucher@amd.com,
	Prike.Liang@amd.com, Yogesh.Mohanmarimuthu@amd.com,
	SRINIVASAN.SHANMUGAM@amd.com, Sunil.Khatri@amd.com,
	amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 04/11] drm/amdgpu: completely rework eviction fence handling
Date: Fri, 13 Mar 2026 13:30:00 +0530	[thread overview]
Message-ID: <afcb82ed-5c7a-4d13-960b-957e9a8cefb0@amd.com> (raw)
In-Reply-To: <8cae20bf-4ff5-4a03-8137-36096197364f@amd.com>

With some of the comments in my previous mail and with the fixes that i 
sent before this patch, the results looks good.
Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>

On 11-03-2026 05:57 pm, Khatri, Sunil wrote:
>
> On 11-03-2026 12:43 am, Christian König wrote:
>> Well that was broken on multiple levels.
>>
>> First of all a lot of checks where placed at incorrect locations, 
>> especially if
> where -> were
>> the resume worker should run or not.
>>
>> Then a bunch of code was just mid-layering because of incorrect 
>> assignment who
>> should do what.
>>
>> And finally comments explaining what happens instead of why.
>>
>> Just re-write it from scratch, that should at least fix some of the 
>> hangs we
>> are seeing.
>>
>> Use RCU for the eviction fence pointer in the manager, the spinlock 
>> usage was
>> mostly incorrect as well. Then finally remove all the nonsense checks 
>> and
>> actually add them in the correct locations.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |   4 +-
>>   .../drm/amd/amdgpu/amdgpu_eviction_fence.c    | 213 ++++++------------
>>   .../drm/amd/amdgpu/amdgpu_eviction_fence.h    |  54 ++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |   5 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     |  29 ++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h     |   2 +-
>>   7 files changed, 112 insertions(+), 205 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 03814a23eb54..67b8c33d5ee3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -2952,9 +2952,9 @@ static int amdgpu_drm_release(struct inode 
>> *inode, struct file *filp)
>>       int idx;
>>         if (fpriv && drm_dev_enter(dev, &idx)) {
>> -        fpriv->evf_mgr.fd_closing = true;
>> -        amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
>> +        amdgpu_evf_mgr_shutdown(&fpriv->evf_mgr);
>>           amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
>> +        amdgpu_evf_mgr_fini(&fpriv->evf_mgr);
>>           drm_dev_exit(idx);
>>       }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
>> index 23d7d0b0d625..8fe9f91f9551 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
>> @@ -25,9 +25,6 @@
>>   #include <drm/drm_exec.h>
>>   #include "amdgpu.h"
>>   -#define work_to_evf_mgr(w, name) container_of(w, struct 
>> amdgpu_eviction_fence_mgr, name)
>> -#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, 
>> evf_mgr)
>> -
>>   static const char *
>>   amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
>>   {
>> @@ -43,102 +40,14 @@ amdgpu_eviction_fence_get_timeline_name(struct 
>> dma_fence *f)
>>       return ef->timeline_name;
>>   }
>>   -int
>> -amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> -                    struct drm_exec *exec)
>> -{
>> -    struct amdgpu_eviction_fence *old_ef, *new_ef;
>> -    struct drm_gem_object *obj;
>> -    unsigned long index;
>> -    int ret;
>> -
>> -    if (evf_mgr->ev_fence &&
>> - !dma_fence_is_signaled(&evf_mgr->ev_fence->base))
>> -        return 0;
>> -    /*
>> -     * Steps to replace eviction fence:
>> -     * * lock all objects in exec (caller)
>> -     * * create a new eviction fence
>> -     * * update new eviction fence in evf_mgr
>> -     * * attach the new eviction fence to BOs
>> -     * * release the old fence
>> -     * * unlock the objects (caller)
>> -     */
>> -    new_ef = amdgpu_eviction_fence_create(evf_mgr);
>> -    if (!new_ef) {
>> -        DRM_ERROR("Failed to create new eviction fence\n");
>> -        return -ENOMEM;
>> -    }
>> -
>> -    /* Update the eviction fence now */
>> -    spin_lock(&evf_mgr->ev_fence_lock);
>> -    old_ef = evf_mgr->ev_fence;
>> -    evf_mgr->ev_fence = new_ef;
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> -
>> -    /* Attach the new fence */
>> -    drm_exec_for_each_locked_object(exec, index, obj) {
>> -        struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>> -
>> -        if (!bo)
>> -            continue;
>> -        ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
>> -        if (ret) {
>> -            DRM_ERROR("Failed to attch new eviction fence\n");
>> -            goto free_err;
>> -        }
>> -    }
>> -
>> -    /* Free old fence */
>> -    if (old_ef)
>> -        dma_fence_put(&old_ef->base);
>> -    return 0;
>> -
>> -free_err:
>> -    kfree(new_ef);
>> -    return ret;
>> -}
>> -
>> -static void
>> -amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
>> -{
>> -    struct amdgpu_eviction_fence_mgr *evf_mgr = 
>> work_to_evf_mgr(work, suspend_work.work);
>> -    struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
>> -    struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
>> -    struct amdgpu_eviction_fence *ev_fence;
>> -
>> -    mutex_lock(&uq_mgr->userq_mutex);
>> -    spin_lock(&evf_mgr->ev_fence_lock);
>> -    ev_fence = evf_mgr->ev_fence;
>> -    if (ev_fence)
>> -        dma_fence_get(&ev_fence->base);
>> -    else
>> -        goto unlock;
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> -
>> -    amdgpu_userq_evict(uq_mgr, ev_fence);
>> -
>> -    mutex_unlock(&uq_mgr->userq_mutex);
>> -    dma_fence_put(&ev_fence->base);
>> -    return;
>> -
>> -unlock:
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> -    mutex_unlock(&uq_mgr->userq_mutex);
>> -}
>> -
>>   static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence 
>> *f)
>>   {
>>       struct amdgpu_eviction_fence_mgr *evf_mgr;
>>       struct amdgpu_eviction_fence *ev_fence;
>>   -    if (!f)
>> -        return true;
> Isn't there a possibility of the fence being signaled or f to be NULL?
>> -
>>       ev_fence = to_ev_fence(f);
>>       evf_mgr = ev_fence->evf_mgr;
>> -
>> -    schedule_delayed_work(&evf_mgr->suspend_work, 0);
>> +    schedule_work(&evf_mgr->suspend_work);
>
> We can avoid to use evf_mgr instead directly use ev_fence->evf_mgr as 
> it is only one time usage.
>
> Regards
> Sunil Khatri
>
>>       return true;
>>   }
>>   @@ -148,22 +57,52 @@ static const struct dma_fence_ops 
>> amdgpu_eviction_fence_ops = {
>>       .enable_signaling = amdgpu_eviction_fence_enable_signaling,
>>   };
>>   -void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> -                  struct amdgpu_eviction_fence *ev_fence)
>> +static void
>> +amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
>>   {
>> -    spin_lock(&evf_mgr->ev_fence_lock);
>> -    dma_fence_signal(&ev_fence->base);
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> +    struct amdgpu_eviction_fence_mgr *evf_mgr =
>> +        container_of(work, struct amdgpu_eviction_fence_mgr,
>> +                 suspend_work);
>> +    struct amdgpu_fpriv *fpriv =
>> +        container_of(evf_mgr, struct amdgpu_fpriv, evf_mgr);
>> +    struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
>> +    struct dma_fence *ev_fence;
>> +
>> +    mutex_lock(&uq_mgr->userq_mutex);
>> +    ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
>> +    amdgpu_userq_evict(uq_mgr, !evf_mgr->shutdown);
>> +
>> +    /*
>> +     * Signaling the eviction fence must be done while holding the
>> +     * userq_mutex. Otherwise we won't resume the queues before 
>> issuing the
>> +     * next fence.
>> +     */
>> +    dma_fence_signal(ev_fence);
>> +    dma_fence_put(ev_fence);
>> +    mutex_unlock(&uq_mgr->userq_mutex);
>> +}
>> +
>> +void amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> +                 struct amdgpu_bo *bo)
>> +{
>> +    struct dma_fence *ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
>> +    struct dma_resv *resv = bo->tbo.base.resv;
>> +
>> +    dma_resv_add_fence(resv, ev_fence, DMA_RESV_USAGE_BOOKKEEP);
>> +    dma_fence_put(ev_fence);
>>   }
>>   -struct amdgpu_eviction_fence *
>> -amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> +             struct drm_exec *exec)
>>   {
>>       struct amdgpu_eviction_fence *ev_fence;
>> +    struct drm_gem_object *obj;
>> +    unsigned long index;
>>   +    /* Create and initialize a new eviction fence */
>>       ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
>>       if (!ev_fence)
>> -        return NULL;
>> +        return -ENOMEM;
>>         ev_fence->evf_mgr = evf_mgr;
>>       get_task_comm(ev_fence->timeline_name, current);
>> @@ -171,56 +110,22 @@ amdgpu_eviction_fence_create(struct 
>> amdgpu_eviction_fence_mgr *evf_mgr)
>>       dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops,
>>                &ev_fence->lock, evf_mgr->ev_fence_ctx,
>> atomic_inc_return(&evf_mgr->ev_fence_seq));
>> -    return ev_fence;
>> -}
>> -
>> -void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr)
>> -{
>> -    struct amdgpu_eviction_fence *ev_fence;
>> -
>> -    /* Wait for any pending work to execute */
>> -    flush_delayed_work(&evf_mgr->suspend_work);
>> -
>> -    spin_lock(&evf_mgr->ev_fence_lock);
>> -    ev_fence = evf_mgr->ev_fence;
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> -
>> -    if (!ev_fence)
>> -        return;
>> -
>> -    dma_fence_wait(&ev_fence->base, false);
>>   -    /* Last unref of ev_fence */
>> -    dma_fence_put(&ev_fence->base);
>> -}
>> -
>> -int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> -                 struct amdgpu_bo *bo)
>> -{
>> -    struct amdgpu_eviction_fence *ev_fence;
>> -    struct dma_resv *resv = bo->tbo.base.resv;
>> -    int ret;
>> +    /* Remember it for newly added BOs */
>> +    dma_fence_put(evf_mgr->ev_fence);
>> +    evf_mgr->ev_fence = &ev_fence->base;
>>   -    if (!resv)
>> -        return 0;
>> +    /* And add it to all existing BOs */
>> +    drm_exec_for_each_locked_object(exec, index, obj) {
>> +        struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>   -    ret = dma_resv_reserve_fences(resv, 1);
>> -    if (ret) {
>> -        DRM_DEBUG_DRIVER("Failed to resv fence space\n");
>> -        return ret;
>> +        amdgpu_evf_mgr_attach_fence(evf_mgr, bo);
>>       }
>> -
>> -    spin_lock(&evf_mgr->ev_fence_lock);
>> -    ev_fence = evf_mgr->ev_fence;
>> -    if (ev_fence)
>> -        dma_resv_add_fence(resv, &ev_fence->base, 
>> DMA_RESV_USAGE_BOOKKEEP);
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> -
>>       return 0;
>>   }
>>   -void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> -                  struct amdgpu_bo *bo)
>> +void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> +                 struct amdgpu_bo *bo)
>>   {
>>       struct dma_fence *stub = dma_fence_get_stub();
>>   @@ -229,13 +134,25 @@ void amdgpu_eviction_fence_detach(struct 
>> amdgpu_eviction_fence_mgr *evf_mgr,
>>       dma_fence_put(stub);
>>   }
>>   -int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr)
>> +void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
>>   {
>> -    /* This needs to be done one time per open */
>>       atomic_set(&evf_mgr->ev_fence_seq, 0);
>>       evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
>> -    spin_lock_init(&evf_mgr->ev_fence_lock);
>> +    evf_mgr->ev_fence = dma_fence_get_stub();
>>   -    INIT_DELAYED_WORK(&evf_mgr->suspend_work, 
>> amdgpu_eviction_fence_suspend_worker);
>> -    return 0;
>> +    INIT_WORK(&evf_mgr->suspend_work, 
>> amdgpu_eviction_fence_suspend_worker);
>> +}
>> +
>> +void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +{
>> +    evf_mgr->shutdown = true;
>> +    flush_work(&evf_mgr->suspend_work);
>> +}
>> +
>> +void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +{
>> + dma_fence_wait(rcu_dereference_protected(evf_mgr->ev_fence, true),
>> +               false);
>> +    flush_work(&evf_mgr->suspend_work);
>> +    dma_fence_put(evf_mgr->ev_fence);
>>   }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>> index fcd867b7147d..527de3a23583 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>> @@ -25,6 +25,8 @@
>>   #ifndef AMDGPU_EV_FENCE_H_
>>   #define AMDGPU_EV_FENCE_H_
>>   +#include <linux/dma-fence.h>
>> +
>>   struct amdgpu_eviction_fence {
>>       struct dma_fence base;
>>       spinlock_t     lock;
>> @@ -35,35 +37,35 @@ struct amdgpu_eviction_fence {
>>   struct amdgpu_eviction_fence_mgr {
>>       u64            ev_fence_ctx;
>>       atomic_t        ev_fence_seq;
>> -    spinlock_t        ev_fence_lock;
>> -    struct amdgpu_eviction_fence *ev_fence;
>> -    struct delayed_work    suspend_work;
>> -    uint8_t fd_closing;
>> -};
>> -
>> -/* Eviction fence helper functions */
>> -struct amdgpu_eviction_fence *
>> -amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr);
>>   -void
>> -amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr);
>> -
>> -int
>> -amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> -                 struct amdgpu_bo *bo);
>> +    /*
>> +     * Only updated while holding the VM resv lock.
>> +     * Only signaled while holding the userq mutex.
>> +     */
>> +    struct dma_fence __rcu    *ev_fence;
>> +    struct work_struct    suspend_work;
>> +    bool            shutdown;
>> +};
>>   -void
>> -amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> -                 struct amdgpu_bo *bo);
>> +static inline struct dma_fence *
>> +amdgpu_evf_mgr_get_fence(struct amdgpu_eviction_fence_mgr *evf_mgr)
>> +{
>> +    struct dma_fence *ev_fence;
>>   -int
>> -amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> +    rcu_read_lock();
>> +    ev_fence = dma_fence_get_rcu_safe(&evf_mgr->ev_fence);
>> +    rcu_read_unlock();
>> +    return ev_fence;
>> +}
>>   -void
>> -amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> -                 struct amdgpu_eviction_fence *ev_fence);
>> +void amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> +                 struct amdgpu_bo *bo);
>> +int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr,
>> +             struct drm_exec *exec);
>> +void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> +                 struct amdgpu_bo *bo);
>> +void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
>> +void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr);
>> +void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr);
>>   -int
>> -amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr 
>> *evf_mgr,
>> -                    struct drm_exec *exec);
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 5c90de58cc28..e28abfd04867 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -263,13 +263,7 @@ static int amdgpu_gem_object_open(struct 
>> drm_gem_object *obj,
>>       else
>>           ++bo_va->ref_count;
>>   -    /* attach gfx eviction fence */
>> -    r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
>> -    if (r) {
>> -        DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
>> -        amdgpu_bo_unreserve(abo);
>> -        return r;
>> -    }
>> +    amdgpu_evf_mgr_attach_fence(&fpriv->evf_mgr, abo);
>>       drm_exec_fini(&exec);
>>         /* Validate and add eviction fence to DMABuf imports with 
>> dynamic
>> @@ -337,7 +331,7 @@ static void amdgpu_gem_object_close(struct 
>> drm_gem_object *obj,
>>       }
>>         if (!amdgpu_vm_is_bo_always_valid(vm, bo))
>> -        amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
>> +        amdgpu_evf_mgr_detach_fence(&fpriv->evf_mgr, bo);
>>         bo_va = amdgpu_vm_bo_find(vm, bo);
>>       if (!bo_va || --bo_va->ref_count)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index f69332eed051..f512b6ec6c53 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -1522,10 +1522,7 @@ int amdgpu_driver_open_kms(struct drm_device 
>> *dev, struct drm_file *file_priv)
>>                "Failed to init usermode queue manager (%d), use 
>> legacy workload submission only\n",
>>                r);
>>   -    r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
>> -    if (r)
>> -        goto error_vm;
>> -
>> +    amdgpu_evf_mgr_init(&fpriv->evf_mgr);
>>       amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
>>         file_priv->driver_priv = fpriv;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> index 85adc53eb523..67ba46851c2b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>> @@ -472,17 +472,16 @@ void
>>   amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
>>                    struct amdgpu_eviction_fence_mgr *evf_mgr)
>>   {
>> -    struct amdgpu_eviction_fence *ev_fence;
>> +    struct dma_fence *ev_fence;
>>     retry:
>>       /* Flush any pending resume work to create ev_fence */
>>       flush_delayed_work(&uq_mgr->resume_work);
>>         mutex_lock(&uq_mgr->userq_mutex);
>> -    spin_lock(&evf_mgr->ev_fence_lock);
>> -    ev_fence = evf_mgr->ev_fence;
>> -    spin_unlock(&evf_mgr->ev_fence_lock);
>> -    if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
>> +    ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
>> +    if (dma_fence_is_signaled(ev_fence)) {
>> +        dma_fence_put(ev_fence);
>>           mutex_unlock(&uq_mgr->userq_mutex);
>>           /*
>>            * Looks like there was no pending resume work,
>> @@ -491,6 +490,7 @@ amdgpu_userq_ensure_ev_fence(struct 
>> amdgpu_userq_mgr *uq_mgr,
>>           schedule_delayed_work(&uq_mgr->resume_work, 0);
>>           goto retry;
>>       }
>> +    dma_fence_put(ev_fence);
>>   }
>>     int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
>> @@ -1196,7 +1196,7 @@ amdgpu_userq_vm_validate(struct 
>> amdgpu_userq_mgr *uq_mgr)
>>           dma_fence_wait(bo_va->last_pt_update, false);
>>       dma_fence_wait(vm->last_update, false);
>>   -    ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, 
>> &exec);
>> +    ret = amdgpu_evf_mgr_rearm(&fpriv->evf_mgr, &exec);
>>       if (ret)
>>           drm_file_err(uq_mgr->file, "Failed to replace eviction 
>> fence\n");
>>   @@ -1216,11 +1216,13 @@ static void 
>> amdgpu_userq_restore_worker(struct work_struct *work)
>>   {
>>       struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, 
>> resume_work.work);
>>       struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
>> +    struct dma_fence *ev_fence;
>>       int ret;
>>   -    flush_delayed_work(&fpriv->evf_mgr.suspend_work);
>> -
>>       mutex_lock(&uq_mgr->userq_mutex);
>> +    ev_fence = amdgpu_evf_mgr_get_fence(&fpriv->evf_mgr);
>> +    if (!dma_fence_is_signaled(ev_fence))
>> +        goto unlock;
>>         ret = amdgpu_userq_vm_validate(uq_mgr);
>>       if (ret) {
>> @@ -1236,6 +1238,7 @@ static void amdgpu_userq_restore_worker(struct 
>> work_struct *work)
>>     unlock:
>>       mutex_unlock(&uq_mgr->userq_mutex);
>> +    dma_fence_put(ev_fence);
>>   }
>>     static int
>> @@ -1311,11 +1314,8 @@ amdgpu_userq_wait_for_signal(struct 
>> amdgpu_userq_mgr *uq_mgr)
>>   }
>>     void
>> -amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
>> -           struct amdgpu_eviction_fence *ev_fence)
>> +amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, bool 
>> schedule_resume)
>>   {
>> -    struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
>> -    struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
>>       struct amdgpu_device *adev = uq_mgr->adev;
>>       int ret;
>>   @@ -1328,10 +1328,7 @@ amdgpu_userq_evict(struct amdgpu_userq_mgr 
>> *uq_mgr,
>>       if (ret)
>>           dev_err(adev->dev, "Failed to evict userqueue\n");
>>   -    /* Signal current eviction fence */
>> -    amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
>> -
>> -    if (!evf_mgr->fd_closing)
>> +    if (schedule_resume)
>>           schedule_delayed_work(&uq_mgr->resume_work, 0);
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>> index 54e1997b3cc0..82306d489064 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>> @@ -133,7 +133,7 @@ void amdgpu_userq_destroy_object(struct 
>> amdgpu_userq_mgr *uq_mgr,
>>                    struct amdgpu_userq_obj *userq_obj);
>>     void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
>> -            struct amdgpu_eviction_fence *ev_fence);
>> +            bool schedule_resume);
>>     void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr 
>> *userq_mgr,
>>                     struct amdgpu_eviction_fence_mgr *evf_mgr);

  reply	other threads:[~2026-03-13  8:00 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-10 19:13 [PATCH 01/11] drm/amdgpu: revert to old status lock handling v4 Christian König
2026-03-10 19:13 ` [PATCH 02/11] drm/amdgpu: restructure VM state machine Christian König
2026-03-11  8:47   ` Khatri, Sunil
2026-03-12 12:49     ` Christian König
2026-03-12 10:07   ` Liang, Prike
2026-03-12 14:32   ` Tvrtko Ursulin
2026-03-16 13:44     ` Christian König
2026-03-16 14:26       ` Tvrtko Ursulin
2026-03-10 19:13 ` [PATCH 03/11] drm/amdgpu: fix amdgpu_userq_evict Christian König
2026-03-11  8:51   ` Khatri, Sunil
2026-03-13  7:25   ` Liang, Prike
2026-03-10 19:13 ` [PATCH 04/11] drm/amdgpu: completely rework eviction fence handling Christian König
2026-03-11 12:27   ` Khatri, Sunil
2026-03-13  8:00     ` Khatri, Sunil [this message]
2026-03-17  9:41     ` Christian König
2026-03-13  8:28   ` Liang, Prike
2026-03-17  9:57     ` Christian König
2026-03-17 11:21       ` Liang, Prike
2026-03-17 11:23         ` Christian König
2026-03-17 11:54           ` Liang, Prike
2026-03-10 19:13 ` [PATCH 05/11] drm/amdgpu: fix eviction fence and userq manager shutdown Christian König
2026-03-11 12:26   ` Khatri, Sunil
2026-03-13  9:35     ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 06/11] drm/amdgpu: fix adding eviction fence Christian König
2026-03-11 12:26   ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 07/11] drm/amdgpu: rework amdgpu_userq_wait_ioctl v3 Christian König
2026-03-12 16:34   ` Tvrtko Ursulin
2026-03-16 14:19     ` Christian König
2026-03-16 14:44       ` Tvrtko Ursulin
2026-03-17  7:05   ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 08/11] drm/amdgpu: make amdgpu_user_wait_ioctl more resilent v2 Christian König
2026-03-17  7:15   ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 09/11] drm/amdgpu: annotate eviction fence signaling path Christian König
2026-03-17  7:35   ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 10/11] drm/amdgpu: fix some more bug in amdgpu_gem_va_ioctl Christian König
2026-03-17  8:44   ` Khatri, Sunil
2026-03-17 11:08     ` Christian König
2026-03-10 19:13 ` [PATCH 11/11] drm/amdgpu: WIP sync amdgpu_ttm_fill_mem only to kernel fences Christian König
2026-03-17  8:59   ` Khatri, Sunil
2026-03-17 10:52     ` Christian König
2026-03-11  7:43 ` [PATCH 01/11] drm/amdgpu: revert to old status lock handling v4 Khatri, Sunil
2026-03-12  7:13 ` Liang, Prike

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=afcb82ed-5c7a-4d13-960b-957e9a8cefb0@amd.com \
    --to=sukhatri@amd.com \
    --cc=Alexander.Deucher@amd.com \
    --cc=Prike.Liang@amd.com \
    --cc=SRINIVASAN.SHANMUGAM@amd.com \
    --cc=Sunil.Khatri@amd.com \
    --cc=Yogesh.Mohanmarimuthu@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=ckoenig.leichtzumerken@gmail.com \
    --cc=tursulin@ursulin.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox