From: "Kuehling, Felix" <felix.kuehling@amd.com>
To: "Christian König" <ckoenig.leichtzumerken@gmail.com>,
phasta@mailbox.org, alexdeucher@gmail.com,
simona.vetter@ffwll.ch, tursulin@ursulin.net, airlied@gmail.com,
matthew.brost@intel.com
Cc: dri-devel@lists.freedesktop.org, amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 15/20] drm/amdgpu: independence for the amdkfd_fence!
Date: Fri, 31 Oct 2025 10:34:14 -0400 [thread overview]
Message-ID: <70dbb58d-fa41-476a-be2b-8d98da3eaf3b@amd.com> (raw)
In-Reply-To: <20251031134442.113648-16-christian.koenig@amd.com>
[-- Attachment #1: Type: text/plain, Size: 6117 bytes --]
On 2025-10-31 09:16, Christian König wrote:
> This should allow amdkfd_fences to outlive the amdgpu module.
>
> Signed-off-by: Christian König<christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 ++++
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 36 +++++++------------
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 ++--
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 +--
> 4 files changed, 24 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 9e120c934cc1..35c59c784b7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -196,6 +196,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
> #endif
> #if IS_ENABLED(CONFIG_HSA_AMD)
> bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
> +void amdkfd_fence_signal(struct dma_fence *f);
> struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
> void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
> int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
> @@ -210,6 +211,11 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
> return false;
> }
>
> +static inline
> +void amdkfd_fence_signal(struct dma_fence *f)
> +{
> +}
> +
> static inline
> struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
> {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> index 09c919f72b6c..69bca4536326 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> @@ -127,29 +127,9 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
> if (!svm_range_schedule_evict_svm_bo(fence))
> return true;
> }
> - return false;
> -}
> -
> -/**
> - * amdkfd_fence_release - callback that fence can be freed
> - *
> - * @f: dma_fence
> - *
> - * This function is called when the reference count becomes zero.
> - * Drops the mm_struct reference and RCU schedules freeing up the fence.
> - */
> -static void amdkfd_fence_release(struct dma_fence *f)
> -{
> - struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
> -
> - /* Unconditionally signal the fence. The process is getting
> - * terminated.
> - */
> - if (WARN_ON(!fence))
> - return; /* Not an amdgpu_amdkfd_fence */
> -
> mmdrop(fence->mm);
> - kfree_rcu(f, rcu);
> + fence->mm = NULL;
> + return false;
> }
>
> /**
> @@ -174,9 +154,19 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
> return false;
> }
>
> +void amdkfd_fence_signal(struct dma_fence *f)
> +{
> + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
> +
> + if (fence) {
> + mmdrop(fence->mm);
> + fence->mm = NULL;
> + }
> + dma_fence_signal(f);
> +}
> +
I'm still concerned about possible race conditions between
amdkfd_fence_signal and amdkfd_fence_enable_signaling. I think the
latter is always called with the fence->lock held. So this could be
fixed by taking the fence->lock in amdkfd_fence_signal:
void amdkfd_fence_signal(struct dma_fence *f)
{
struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
unsigned long flags;
spin_lock_irqsave(f->lock, &flags);
if (fence && fence->mm) {
mmdrop(fence->mm);
fence->mm = NULL;
}
dma_fence_signal_locked(f);
spin_unlock_irqrestore(f->lock, flags);
}
Also note that you need to NULL-check fence->mm (here and in
enable_signaling) because mmdrop doesn't have a check.
Regards,
Felix
> static const struct dma_fence_ops amdkfd_fence_ops = {
> .get_driver_name = amdkfd_fence_get_driver_name,
> .get_timeline_name = amdkfd_fence_get_timeline_name,
> .enable_signaling = amdkfd_fence_enable_signaling,
> - .release = amdkfd_fence_release,
> };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index ddfe30c13e9d..779d7701bac9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1177,7 +1177,7 @@ static void kfd_process_wq_release(struct work_struct *work)
> synchronize_rcu();
> ef = rcu_access_pointer(p->ef);
> if (ef)
> - dma_fence_signal(ef);
> + amdkfd_fence_signal(ef);
>
> kfd_process_remove_sysfs(p);
> kfd_debugfs_remove_process(p);
> @@ -1986,7 +1986,6 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
> static int signal_eviction_fence(struct kfd_process *p)
> {
> struct dma_fence *ef;
> - int ret;
>
> rcu_read_lock();
> ef = dma_fence_get_rcu_safe(&p->ef);
> @@ -1994,10 +1993,10 @@ static int signal_eviction_fence(struct kfd_process *p)
> if (!ef)
> return -EINVAL;
>
> - ret = dma_fence_signal(ef);
> + amdkfd_fence_signal(ef);
> dma_fence_put(ef);
>
> - return ret;
> + return 0;
> }
>
> static void evict_process_worker(struct work_struct *work)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 9d72411c3379..5d62d231a865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -428,7 +428,7 @@ static void svm_range_bo_release(struct kref *kref)
>
> if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
> /* We're not in the eviction worker. Signal the fence. */
> - dma_fence_signal(&svm_bo->eviction_fence->base);
> + amdkfd_fence_signal(&svm_bo->eviction_fence->base);
> dma_fence_put(&svm_bo->eviction_fence->base);
> amdgpu_bo_unref(&svm_bo->bo);
> kfree(svm_bo);
> @@ -3622,7 +3622,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
> mmap_read_unlock(mm);
> mmput(mm);
>
> - dma_fence_signal(&svm_bo->eviction_fence->base);
> + amdkfd_fence_signal(&svm_bo->eviction_fence->base);
>
> /* This is the last reference to svm_bo, after svm_range_vram_node_free
> * has been called in svm_migrate_vram_to_ram
[-- Attachment #2: Type: text/html, Size: 6571 bytes --]
next prev parent reply other threads:[~2025-10-31 14:34 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-31 13:16 Independence for dma_fences! v2 Christian König
2025-10-31 13:16 ` [PATCH 01/20] dma-buf: cleanup dma_fence_describe v2 Christian König
2025-10-31 14:04 ` Tvrtko Ursulin
2025-10-31 13:16 ` [PATCH 02/20] dma-buf: rework stub fence initialisation v2 Christian König
2025-10-31 14:05 ` Tvrtko Ursulin
2025-11-04 15:01 ` Tvrtko Ursulin
2025-11-06 13:16 ` Christian König
2025-10-31 13:16 ` [PATCH 03/20] dma-buf: protected fence ops by RCU v2 Christian König
2025-10-31 14:29 ` Tvrtko Ursulin
2025-11-06 13:14 ` Christian König
2025-11-07 11:09 ` Tvrtko Ursulin
2025-10-31 13:16 ` [PATCH 04/20] dma-buf: detach fence ops on signal Christian König
2025-11-07 11:04 ` Philipp Stanner
2025-10-31 13:16 ` [PATCH 05/20] dma-buf: inline spinlock for fence protection Christian König
2025-11-07 11:59 ` Philipp Stanner
2025-10-31 13:16 ` [PATCH 06/20] dma-buf: use inline lock for the stub fence Christian König
2025-11-04 15:05 ` Tvrtko Ursulin
2025-10-31 13:16 ` [PATCH 07/20] dma-buf: use inline lock for the dma-fence-array Christian König
2025-11-05 8:50 ` Tvrtko Ursulin
2025-11-07 12:04 ` Philipp Stanner
2025-11-12 13:53 ` Christian König
2025-11-12 14:00 ` Philipp Stanner
2025-10-31 13:16 ` [PATCH 08/20] dma-buf: use inline lock for the dma-fence-chain Christian König
2025-11-04 15:08 ` Tvrtko Ursulin
2025-10-31 13:16 ` [PATCH 09/20] drm/sched: use inline locks for the drm-sched-fence Christian König
2025-11-04 15:12 ` Tvrtko Ursulin
2025-11-06 13:23 ` Christian König
2025-11-06 13:45 ` Tvrtko Ursulin
2025-11-07 8:33 ` Philipp Stanner
2025-11-12 13:58 ` Christian König
2025-10-31 13:16 ` [PATCH 10/20] drm/amdgpu: clean up and unify hw fence handling Christian König
2025-11-04 15:14 ` Tvrtko Ursulin
2025-10-31 13:16 ` [PATCH 11/20] drm/amdgpu: fix KFD eviction fence enable_signaling path Christian König
2025-11-04 16:28 ` Philipp Stanner
2025-11-06 13:43 ` Christian König
2025-11-06 16:37 ` Kuehling, Felix
2025-11-06 16:46 ` Christian König
2025-11-06 17:07 ` Kuehling, Felix
2025-11-06 17:09 ` Christian König
2025-11-06 17:25 ` Kuehling, Felix
2025-11-13 14:37 ` Christian König
2025-11-13 17:46 ` Kuehling, Felix
2025-10-31 13:16 ` [PATCH 12/20] drm/amdgpu: independence for the amdgpu_fence! Christian König
2025-10-31 13:16 ` [PATCH 13/20] drm/amdgpu: independence for the amdgpu_eviction_fence! Christian König
2025-11-04 15:45 ` Tvrtko Ursulin
2025-10-31 13:16 ` [PATCH 14/20] drm/amdgpu: independence for the amdgpu_vm_tlb_fence! Christian König
2025-11-04 15:45 ` Tvrtko Ursulin
2025-10-31 13:16 ` [PATCH 15/20] drm/amdgpu: independence for the amdkfd_fence! Christian König
2025-10-31 14:34 ` Kuehling, Felix [this message]
2025-10-31 13:16 ` [PATCH 16/20] drm/amdgpu: independence for the amdgpu_userq__fence! Christian König
2025-11-04 15:59 ` Tvrtko Ursulin
2025-10-31 13:16 ` [PATCH 17/20] drm/xe: Disconnect the low hanging fences from Xe module Christian König
2025-10-31 13:16 ` [PATCH 18/20] drm/xe: Drop HW fence slab Christian König
2025-10-31 13:16 ` [PATCH 19/20] drm/xe: Promote xe_hw_fence_irq to an ref counted object Christian König
2025-10-31 13:16 ` [PATCH 20/20] drm/xe: Finish disconnect HW fences from module Christian König
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=70dbb58d-fa41-476a-be2b-8d98da3eaf3b@amd.com \
--to=felix.kuehling@amd.com \
--cc=airlied@gmail.com \
--cc=alexdeucher@gmail.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=ckoenig.leichtzumerken@gmail.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
--cc=phasta@mailbox.org \
--cc=simona.vetter@ffwll.ch \
--cc=tursulin@ursulin.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox