From: "Christian König" <christian.koenig@amd.com>
To: Tvrtko Ursulin <tursulin@ursulin.net>,
Alexander.Deucher@amd.com, Prike.Liang@amd.com,
Yogesh.Mohanmarimuthu@amd.com, SRINIVASAN.SHANMUGAM@amd.com,
Sunil.Khatri@amd.com, amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 07/11] drm/amdgpu: rework amdgpu_userq_wait_ioctl v3
Date: Mon, 16 Mar 2026 15:19:55 +0100 [thread overview]
Message-ID: <649ffc71-0d29-48c0-b621-38da72a041a3@amd.com> (raw)
In-Reply-To: <4f60cc9f-b4df-480c-9914-fa0c7f8d224c@ursulin.net>
On 3/12/26 17:34, Tvrtko Ursulin wrote:
>> + /* Retrieve timeline fences */
>> + num_points = wait_info->num_syncobj_timeline_handles;
>> + for (i = 0; i < num_points; i++) {
>> + r = drm_syncobj_find_fence(filp, timeline_handles[i],
>> + timeline_points[i],
>> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
>> + &fence);
>> + if (r)
>> + goto free_fences;
>> +
>> + dma_fence_unwrap_for_each(f, &iter, fence) {
>> + if (num_fences >= wait_info->num_fences) {
>> + r = -EINVAL;
>
> dma_fence_put(fence);
Fixed.
>
> Also, maybe -EAGAIN?
>
> Or even consider dma_fence_dedup_array() and only bail out if it couldn't compact it.
I've replaced the error with a fallback in the next patch anyway.
>
>> + goto free_fences;
>> + }
>> - /* Array of fences */
>> - fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
>> - if (!fences) {
>> - r = -ENOMEM;
>> - goto free_fence_info;
>> + fences[num_fences++] = dma_fence_get(f);
>> }
>> - /* Retrieve GEM read objects fence */
>> - for (i = 0; i < num_read_bo_handles; i++) {
>> - struct dma_resv_iter resv_cursor;
>> - struct dma_fence *fence;
>> + dma_fence_put(fence);
>> + }
>> +
>> + /* Retrieve boolean fences */
>> + num_syncobj = wait_info->num_syncobj_handles;
>> + for (i = 0; i < num_syncobj; i++) {
>> + struct dma_fence *fence;
>> - dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
>> - DMA_RESV_USAGE_READ, fence) {
>> - if (num_fences >= wait_info->num_fences) {
>> - r = -EINVAL;
>
> Same as above.
Those doesn't hold an extra reference, so dma_fence_put would underflow the refcount.
>> + if (!userq_fence) {
>> /*
>> - * We need to make sure the user queue release their reference
>> - * to the fence drivers at some point before queue destruction.
>> - * Otherwise, we would gather those references until we don't
>> - * have any more space left and crash.
>> + * Just waiting on other driver fences should
>> + * be good for now
>> */
>> - r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
>> - xa_limit_32b, GFP_KERNEL);
>
> Does it need to put potentially the same fence_drv multiple times into the same xarray?
That would be quite unlikely. We have one fence_drv for each fence context and de-duplicate them above.
It could only happen if you call the wait_ioctl multiple times, but I don't see an use case for that.
Thanks,
Christian.
>
>> + r = dma_fence_wait(fences[i], true);
>> if (r)
>> - goto free_fences;
>> + goto put_waitq;
>> +
>> + continue;
>> + }
>> - amdgpu_userq_fence_driver_get(fence_drv);
>> + fence_drv = userq_fence->fence_drv;
>> + /*
>> + * We need to make sure the user queue release their reference
>> + * to the fence drivers at some point before queue destruction.
>> + * Otherwise, we would gather those references until we don't
>> + * have any more space left and crash.
>> + */
>> + r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
>> + xa_limit_32b, GFP_KERNEL);
>> + if (r)
>> + goto put_waitq;
>> - /* Store drm syncobj's gpu va address and value */
>> - fence_info[cnt].va = fence_drv->va;
>> - fence_info[cnt].value = fences[i]->seqno;
>> + amdgpu_userq_fence_driver_get(fence_drv);
>> - dma_fence_put(fences[i]);
>> - /* Increment the actual userq fence count */
>> - cnt++;
>> - }
>> + /* Store drm syncobj's gpu va address and value */
>> + fence_info[cnt].va = fence_drv->va;
>> + fence_info[cnt].value = fences[i]->seqno;
>> - wait_info->num_fences = cnt;
>> - /* Copy userq fence info to user space */
>> - if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
>> - fence_info, wait_info->num_fences * sizeof(*fence_info))) {
>> - r = -EFAULT;
>> - goto free_fences;
>> - }
>> + /* Increment the actual userq fence count */
>> + cnt++;
>> }
>> + wait_info->num_fences = cnt;
>> +
>> + /* Copy userq fence info to user space */
>> + if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
>> + fence_info, cnt * sizeof(*fence_info)))
>> + r = -EFAULT;
>> + else
>> + r = 0;
>> +
>> +put_waitq:
>> + amdgpu_userq_put(waitq);
>> free_fences:
>> - if (fences) {
>> - while (num_fences-- > 0)
>> - dma_fence_put(fences[num_fences]);
>> - kfree(fences);
>> - }
>> + while (num_fences--)
>> + dma_fence_put(fences[num_fences]);
>> + kfree(fences);
>> +
>> free_fence_info:
>> kfree(fence_info);
>> -exec_fini:
>> + return r;
>> +
>> +error_unlock:
>> drm_exec_fini(&exec);
>> -put_gobj_write:
>> - for (i = 0; i < num_write_bo_handles; i++)
>> - drm_gem_object_put(gobj_write[i]);
>> - kfree(gobj_write);
>> + goto free_fences;
>> +}
>> +
>> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>> + struct drm_file *filp)
>> +{
>> + int num_points, num_syncobj, num_read_bo_handles, num_write_bo_handles;
>> + u32 *syncobj_handles, *timeline_points, *timeline_handles;
>> + struct drm_amdgpu_userq_wait *wait_info = data;
>> + struct drm_gem_object **gobj_write;
>> + struct drm_gem_object **gobj_read;
>> + void __user *ptr;
>> + int r;
>> +
>> + if (!amdgpu_userq_enabled(dev))
>> + return -ENOTSUPP;
>> +
>> + if (wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES ||
>> + wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES)
>> + return -EINVAL;
>> +
>> + num_syncobj = wait_info->num_syncobj_handles;
>> + ptr = u64_to_user_ptr(wait_info->syncobj_handles);
>> + syncobj_handles = memdup_array_user(ptr, num_syncobj, sizeof(u32));
>> + if (IS_ERR(syncobj_handles))
>> + return PTR_ERR(syncobj_handles);
>> +
>> + num_points = wait_info->num_syncobj_timeline_handles;
>> + ptr = u64_to_user_ptr(wait_info->syncobj_timeline_handles);
>> + timeline_handles = memdup_array_user(ptr, num_points, sizeof(u32));
>> + if (IS_ERR(timeline_handles)) {
>> + r = PTR_ERR(timeline_handles);
>> + goto free_syncobj_handles;
>> + }
>> +
>> + ptr = u64_to_user_ptr(wait_info->syncobj_timeline_points);
>> + timeline_points = memdup_array_user(ptr, num_points, sizeof(u32));
>> + if (IS_ERR(timeline_points)) {
>> + r = PTR_ERR(timeline_points);
>> + goto free_timeline_handles;
>> + }
>> +
>> + num_read_bo_handles = wait_info->num_bo_read_handles;
>> + ptr = u64_to_user_ptr(wait_info->bo_read_handles),
>> + r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read);
>> + if (r)
>> + goto free_timeline_points;
>> +
>> + num_write_bo_handles = wait_info->num_bo_write_handles;
>> + ptr = u64_to_user_ptr(wait_info->bo_write_handles),
>> + r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles,
>> + &gobj_write);
>> + if (r)
>> + goto put_gobj_read;
>> +
>> + /*
>> + * Passing num_fences = 0 means that userspace doesn't want to
>> + * retrieve userq_fence_info. If num_fences = 0 we skip filling
>> + * userq_fence_info and return the actual number of fences on
>> + * args->num_fences.
>> + */
>> + if (!wait_info->num_fences) {
>> + r = amdgpu_userq_wait_count_fences(filp, wait_info,
>> + syncobj_handles,
>> + timeline_points,
>> + timeline_handles,
>> + gobj_write,
>> + gobj_read);
>> + } else {
>> + r = amdgpu_userq_wait_return_fence_info(filp, wait_info,
>> + syncobj_handles,
>> + timeline_points,
>> + timeline_handles,
>> + gobj_write,
>> + gobj_read);
>> + }
>> +
>> + while (num_write_bo_handles--)
>> + drm_gem_object_put(gobj_write[num_write_bo_handles]);
>> + kvfree(gobj_write);
>> +
>> put_gobj_read:
>> - for (i = 0; i < num_read_bo_handles; i++)
>> - drm_gem_object_put(gobj_read[i]);
>> - kfree(gobj_read);
>> + while (num_read_bo_handles--)
>> + drm_gem_object_put(gobj_read[num_read_bo_handles]);
>> + kvfree(gobj_read);
>> +
>> free_timeline_points:
>> kfree(timeline_points);
>> free_timeline_handles:
>> kfree(timeline_handles);
>> free_syncobj_handles:
>> kfree(syncobj_handles);
>> -
>> - if (waitq)
>> - amdgpu_userq_put(waitq);
>> -
>> return r;
>> }
>
> The rest looks good. In my RFC I found a way to not duplicate the various fence walks between the counting and waiting, but yours works as well.
>
> Regards,
>
> Tvrtko
>
next prev parent reply other threads:[~2026-03-16 14:20 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-10 19:13 [PATCH 01/11] drm/amdgpu: revert to old status lock handling v4 Christian König
2026-03-10 19:13 ` [PATCH 02/11] drm/amdgpu: restructure VM state machine Christian König
2026-03-11 8:47 ` Khatri, Sunil
2026-03-12 12:49 ` Christian König
2026-03-12 10:07 ` Liang, Prike
2026-03-12 14:32 ` Tvrtko Ursulin
2026-03-16 13:44 ` Christian König
2026-03-16 14:26 ` Tvrtko Ursulin
2026-03-10 19:13 ` [PATCH 03/11] drm/amdgpu: fix amdgpu_userq_evict Christian König
2026-03-11 8:51 ` Khatri, Sunil
2026-03-13 7:25 ` Liang, Prike
2026-03-10 19:13 ` [PATCH 04/11] drm/amdgpu: completely rework eviction fence handling Christian König
2026-03-11 12:27 ` Khatri, Sunil
2026-03-13 8:00 ` Khatri, Sunil
2026-03-17 9:41 ` Christian König
2026-03-13 8:28 ` Liang, Prike
2026-03-17 9:57 ` Christian König
2026-03-17 11:21 ` Liang, Prike
2026-03-17 11:23 ` Christian König
2026-03-17 11:54 ` Liang, Prike
2026-03-10 19:13 ` [PATCH 05/11] drm/amdgpu: fix eviction fence and userq manager shutdown Christian König
2026-03-11 12:26 ` Khatri, Sunil
2026-03-13 9:35 ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 06/11] drm/amdgpu: fix adding eviction fence Christian König
2026-03-11 12:26 ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 07/11] drm/amdgpu: rework amdgpu_userq_wait_ioctl v3 Christian König
2026-03-12 16:34 ` Tvrtko Ursulin
2026-03-16 14:19 ` Christian König [this message]
2026-03-16 14:44 ` Tvrtko Ursulin
2026-03-17 7:05 ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 08/11] drm/amdgpu: make amdgpu_user_wait_ioctl more resilent v2 Christian König
2026-03-17 7:15 ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 09/11] drm/amdgpu: annotate eviction fence signaling path Christian König
2026-03-17 7:35 ` Khatri, Sunil
2026-03-10 19:13 ` [PATCH 10/11] drm/amdgpu: fix some more bug in amdgpu_gem_va_ioctl Christian König
2026-03-17 8:44 ` Khatri, Sunil
2026-03-17 11:08 ` Christian König
2026-03-10 19:13 ` [PATCH 11/11] drm/amdgpu: WIP sync amdgpu_ttm_fill_mem only to kernel fences Christian König
2026-03-17 8:59 ` Khatri, Sunil
2026-03-17 10:52 ` Christian König
2026-03-11 7:43 ` [PATCH 01/11] drm/amdgpu: revert to old status lock handling v4 Khatri, Sunil
2026-03-12 7:13 ` Liang, Prike
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=649ffc71-0d29-48c0-b621-38da72a041a3@amd.com \
--to=christian.koenig@amd.com \
--cc=Alexander.Deucher@amd.com \
--cc=Prike.Liang@amd.com \
--cc=SRINIVASAN.SHANMUGAM@amd.com \
--cc=Sunil.Khatri@amd.com \
--cc=Yogesh.Mohanmarimuthu@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=tursulin@ursulin.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.