AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Felix Kuehling <felix.kuehling@amd.com>
To: "Christian König" <ckoenig.leichtzumerken@gmail.com>,
	phasta@mailbox.org, alexdeucher@gmail.com,
	simona.vetter@ffwll.ch, tursulin@ursulin.net
Cc: dri-devel@lists.freedesktop.org, amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 14/15] drm/amdgpu: independence for the amdkfd_fence!
Date: Thu, 30 Oct 2025 16:04:48 -0400	[thread overview]
Message-ID: <dee83b0a-1464-464e-a2a1-0d7d958d4289@amd.com> (raw)
In-Reply-To: <741496aa-2154-4939-9d3a-27ea6eff2fea@gmail.com>


On 2025-10-30 11:07, Christian König wrote:
> On 10/18/25 00:22, Felix Kuehling wrote:
>> On 2025-10-13 09:48, Christian König wrote:
>>> This should allow amdkfd_fences to outlive the amdgpu module.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  6 ++++
>>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c  | 36 +++++++------------
>>>    drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  7 ++--
>>>    drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  4 +--
>>>    4 files changed, 24 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> index 9e120c934cc1..35c59c784b7b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> @@ -196,6 +196,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
>>>    #endif
>>>    #if IS_ENABLED(CONFIG_HSA_AMD)
>>>    bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
>>> +void amdkfd_fence_signal(struct dma_fence *f);
>>>    struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
>>>    void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
>>>    int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
>>> @@ -210,6 +211,11 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
>>>        return false;
>>>    }
>>>    +static inline
>>> +void amdkfd_fence_signal(struct dma_fence *f)
>>> +{
>>> +}
>>> +
>>>    static inline
>>>    struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
>>>    {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>>> index 09c919f72b6c..69bca4536326 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>>> @@ -127,29 +127,9 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
>>>            if (!svm_range_schedule_evict_svm_bo(fence))
>>>                return true;
>>>        }
>>> -    return false;
>>> -}
>>> -
>>> -/**
>>> - * amdkfd_fence_release - callback that fence can be freed
>>> - *
>>> - * @f: dma_fence
>>> - *
>>> - * This function is called when the reference count becomes zero.
>>> - * Drops the mm_struct reference and RCU schedules freeing up the fence.
>>> - */
>>> -static void amdkfd_fence_release(struct dma_fence *f)
>>> -{
>>> -    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>>> -
>>> -    /* Unconditionally signal the fence. The process is getting
>>> -     * terminated.
>>> -     */
>>> -    if (WARN_ON(!fence))
>>> -        return; /* Not an amdgpu_amdkfd_fence */
>>> -
>>>        mmdrop(fence->mm);
>>> -    kfree_rcu(f, rcu);
>>> +    fence->mm = NULL;
>>> +    return false;
>>>    }
>>>      /**
>>> @@ -174,9 +154,19 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
>>>        return false;
>>>    }
>>>    +void amdkfd_fence_signal(struct dma_fence *f)
>>> +{
>>> +    struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>>> +
>>> +    if (fence) {
>>> +        mmdrop(fence->mm);
>>> +        fence->mm = NULL;
>> Isn't fence->mm already NULL here if it was dropped in amdkfd_fence_enable_signaling?
> It looked like ther're some use cases which signals the fence without going through amdkfd_fence_enable_signaling.
>
> E.g. kfd_process_wq_release which is most likely used on process tear down.

I see. Could there be race conditions here, if enable_signaling happens 
concurrently and we end up calling mmdrop twice?

Regards,
   Felix


>
> Regards,
> Christian.
>
>> Regards,
>>    Felix
>>
>>
>>> +    }
>>> +    dma_fence_signal(f);
>>> +}
>>> +
>>>    static const struct dma_fence_ops amdkfd_fence_ops = {
>>>        .get_driver_name = amdkfd_fence_get_driver_name,
>>>        .get_timeline_name = amdkfd_fence_get_timeline_name,
>>>        .enable_signaling = amdkfd_fence_enable_signaling,
>>> -    .release = amdkfd_fence_release,
>>>    };
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> index ddfe30c13e9d..779d7701bac9 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> @@ -1177,7 +1177,7 @@ static void kfd_process_wq_release(struct work_struct *work)
>>>        synchronize_rcu();
>>>        ef = rcu_access_pointer(p->ef);
>>>        if (ef)
>>> -        dma_fence_signal(ef);
>>> +        amdkfd_fence_signal(ef);
>>>          kfd_process_remove_sysfs(p);
>>>        kfd_debugfs_remove_process(p);
>>> @@ -1986,7 +1986,6 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
>>>    static int signal_eviction_fence(struct kfd_process *p)
>>>    {
>>>        struct dma_fence *ef;
>>> -    int ret;
>>>          rcu_read_lock();
>>>        ef = dma_fence_get_rcu_safe(&p->ef);
>>> @@ -1994,10 +1993,10 @@ static int signal_eviction_fence(struct kfd_process *p)
>>>        if (!ef)
>>>            return -EINVAL;
>>>    -    ret = dma_fence_signal(ef);
>>> +    amdkfd_fence_signal(ef);
>>>        dma_fence_put(ef);
>>>    -    return ret;
>>> +    return 0;
>>>    }
>>>      static void evict_process_worker(struct work_struct *work)
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> index 91609dd5730f..01ce2d853602 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> @@ -428,7 +428,7 @@ static void svm_range_bo_release(struct kref *kref)
>>>          if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
>>>            /* We're not in the eviction worker. Signal the fence. */
>>> -        dma_fence_signal(&svm_bo->eviction_fence->base);
>>> +        amdkfd_fence_signal(&svm_bo->eviction_fence->base);
>>>        dma_fence_put(&svm_bo->eviction_fence->base);
>>>        amdgpu_bo_unref(&svm_bo->bo);
>>>        kfree(svm_bo);
>>> @@ -3628,7 +3628,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
>>>        mmap_read_unlock(mm);
>>>        mmput(mm);
>>>    -    dma_fence_signal(&svm_bo->eviction_fence->base);
>>> +    amdkfd_fence_signal(&svm_bo->eviction_fence->base);
>>>          /* This is the last reference to svm_bo, after svm_range_vram_node_free
>>>         * has been called in svm_migrate_vram_to_ram

  reply	other threads:[~2025-10-30 20:05 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-13 13:48 Independence for dma_fences! Christian König
2025-10-13 13:48 ` [PATCH 01/15] dma-buf: cleanup dma_fence_describe Christian König
2025-10-14 14:37   ` Tvrtko Ursulin
2025-10-23  3:45     ` Matthew Brost
2025-10-13 13:48 ` [PATCH 02/15] dma-buf: rework stub fence initialisation Christian König
2025-10-14 15:03   ` Tvrtko Ursulin
2025-10-24  7:29   ` Tvrtko Ursulin
2025-10-13 13:48 ` [PATCH 03/15] dma-buf: protected fence ops by RCU Christian König
2025-10-16 18:04   ` Tvrtko Ursulin
2025-10-31 10:35   ` Tvrtko Ursulin
2025-10-13 13:48 ` [PATCH 04/15] dma-buf: detach fence ops on signal Christian König
2025-10-16  8:56   ` Tvrtko Ursulin
2025-10-16 15:57     ` Tvrtko Ursulin
2025-10-23  4:23       ` Matthew Brost
2025-10-23  4:44         ` Matthew Brost
2025-10-30 13:52       ` Christian König
2025-10-31 10:31         ` Tvrtko Ursulin
2025-10-17  9:14   ` Philipp Stanner
2025-10-30 15:05     ` Christian König
2025-10-13 13:48 ` [PATCH 05/15] dma-buf: inline spinlock for fence protection Christian König
2025-10-16  9:26   ` Tvrtko Ursulin
2025-11-03 13:07     ` Philipp Stanner
2025-10-23 18:09   ` Matthew Brost
2025-10-30 15:14     ` Christian König
2025-10-13 13:48 ` [PATCH 06/15] dma-buf: use inline lock for the stub fence Christian König
2025-10-13 13:48 ` [PATCH 07/15] dma-buf: use inline lock for the dma-fence-array Christian König
2025-10-13 13:48 ` [PATCH 08/15] dma-buf: use inline lock for the dma-fence-chain Christian König
2025-10-13 13:48 ` [PATCH 09/15] drm/sched: use inline locks for the drm-sched-fence Christian König
2025-10-13 13:48 ` [PATCH 10/15] drm/amdgpu: fix KFD eviction fence enable_signaling path Christian König
2025-10-13 13:48 ` [PATCH 11/15] drm/amdgpu: independence for the amdgpu_fence! Christian König
2025-10-13 13:48 ` [PATCH 12/15] drm/amdgpu: independence for the amdgpu_eviction_fence! Christian König
2025-10-13 13:48 ` [PATCH 13/15] drm/amdgpu: independence for the amdgpu_vm_tlb_fence! Christian König
2025-10-13 13:48 ` [PATCH 14/15] drm/amdgpu: independence for the amdkfd_fence! Christian König
2025-10-17 22:22   ` Felix Kuehling
2025-10-30 15:07     ` Christian König
2025-10-30 20:04       ` Felix Kuehling [this message]
2025-10-13 13:48 ` [PATCH 15/15] drm/amdgpu: independence for the amdgpu_userq__fence! Christian König
2025-10-13 14:54 ` Independence for dma_fences! Philipp Stanner
2025-10-14 15:54   ` Christian König
2025-10-17  8:32     ` Philipp Stanner
2025-10-28 14:06       ` Christian König
2025-10-29 20:53         ` Matthew Brost
2025-10-30 10:59           ` Christian König
2025-10-31 17:44             ` Matthew Brost
2025-11-03 11:43               ` Christian König
2025-11-03 19:32                 ` Matthew Brost
2025-10-15  0:51 ` Dave Airlie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dee83b0a-1464-464e-a2a1-0d7d958d4289@amd.com \
    --to=felix.kuehling@amd.com \
    --cc=alexdeucher@gmail.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=ckoenig.leichtzumerken@gmail.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=phasta@mailbox.org \
    --cc=simona.vetter@ffwll.ch \
    --cc=tursulin@ursulin.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox