From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: Matthew Brost <matthew.brost@intel.com>, intel-xe@lists.freedesktop.org
Subject: Re: [PATCH v5 1/6] drm/xe: Enforce correct user fence signaling order using drm_syncobjs
Date: Thu, 30 Oct 2025 08:58:53 +0100 [thread overview]
Message-ID: <f647296ae0d8150745852b439da169529aa47e4d.camel@linux.intel.com> (raw)
In-Reply-To: <20251029205719.2746501-2-matthew.brost@intel.com>
On Wed, 2025-10-29 at 13:57 -0700, Matthew Brost wrote:
> Prevent application hangs caused by out-of-order fence signaling when
> user fences are attached. Use drm_syncobj (via dma-fence-chain) to
> guarantee that each user fence signals in order, regardless of the
> signaling order of the attached fences. Ensure user fence writebacks
> to
> user space occur in the correct sequence.
Perhaps describe exactly what the signalling order is supposed to be?
Like calling order per exec_queue etc. (strictly I guess it's the
grabbing order of a certain lock, like the vm resv?)
Do we need a Fixes: for this?
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>
> Signed-of-by: Matthew Brost <matthew.brost@intel.com>
> ---
> drivers/gpu/drm/xe/xe_exec.c | 3 +-
> drivers/gpu/drm/xe/xe_exec_queue.c | 13 +++++++
> drivers/gpu/drm/xe/xe_exec_queue_types.h | 7 ++++
> drivers/gpu/drm/xe/xe_oa.c | 45 ++++++++++++++++------
> --
> drivers/gpu/drm/xe/xe_oa_types.h | 8 +++++
> drivers/gpu/drm/xe/xe_sync.c | 17 +++++++--
> drivers/gpu/drm/xe/xe_sync.h | 3 ++
> drivers/gpu/drm/xe/xe_sync_types.h | 3 ++
> drivers/gpu/drm/xe/xe_vm.c | 4 +++
> 9 files changed, 85 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_exec.c
> b/drivers/gpu/drm/xe/xe_exec.c
> index 0dc27476832b..f7b9d8715053 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -166,7 +166,8 @@ int xe_exec_ioctl(struct drm_device *dev, void
> *data, struct drm_file *file)
>
> for (num_syncs = 0; num_syncs < args->num_syncs;
> num_syncs++) {
> err = xe_sync_entry_parse(xe, xef,
> &syncs[num_syncs],
> - &syncs_user[num_syncs],
> SYNC_PARSE_FLAG_EXEC |
> + &syncs_user[num_syncs],
> NULL, 0,
> + SYNC_PARSE_FLAG_EXEC |
> (xe_vm_in_lr_mode(vm) ?
> SYNC_PARSE_FLAG_LR_MODE :
> 0));
> if (err)
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
> b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 90cbc95f8e2e..6e168efbac65 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -10,6 +10,7 @@
> #include <drm/drm_device.h>
> #include <drm/drm_drv.h>
> #include <drm/drm_file.h>
> +#include <drm/drm_syncobj.h>
> #include <uapi/drm/xe_drm.h>
>
> #include "xe_dep_scheduler.h"
> @@ -345,6 +346,7 @@ struct xe_exec_queue
> *xe_exec_queue_create_bind(struct xe_device *xe,
> struct xe_gt *gt = tile->primary_gt;
> struct xe_exec_queue *q;
> struct xe_vm *migrate_vm;
> + int err;
>
> migrate_vm = xe_migrate_get_vm(tile->migrate);
> if (xe->info.has_usm) {
> @@ -368,6 +370,14 @@ struct xe_exec_queue
> *xe_exec_queue_create_bind(struct xe_device *xe,
> }
> xe_vm_put(migrate_vm);
>
> + err = drm_syncobj_create(&q->ufence_syncobj,
> + DRM_SYNCOBJ_CREATE_SIGNALED,
> + NULL);
> + if (err) {
> + xe_exec_queue_put(q);
> + return ERR_PTR(err);
> + }
> +
> return q;
> }
> ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
> @@ -377,6 +387,9 @@ void xe_exec_queue_destroy(struct kref *ref)
> struct xe_exec_queue *q = container_of(ref, struct
> xe_exec_queue, refcount);
> struct xe_exec_queue *eq, *next;
>
> + if (q->ufence_syncobj)
> + drm_syncobj_put(q->ufence_syncobj);
> +
> if (xe_exec_queue_uses_pxp(q))
> xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
>
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index 282505fa1377..838266c3914b 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -15,6 +15,7 @@
> #include "xe_hw_fence_types.h"
> #include "xe_lrc_types.h"
>
> +struct drm_syncobj;
> struct xe_execlist_exec_queue;
> struct xe_gt;
> struct xe_guc_exec_queue;
> @@ -155,6 +156,12 @@ struct xe_exec_queue {
> struct list_head link;
> } pxp;
>
> + /** @ufence_syncobj: User fence syncobj */
> + struct drm_syncobj *ufence_syncobj;
> +
> + /** @ufence_timeline_value: User fence timeline value */
> + u64 ufence_timeline_value;
> +
> /** @ops: submission backend exec queue operations */
> const struct xe_exec_queue_ops *ops;
>
> diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> index f901ba52b403..7a13a7bd99a6 100644
> --- a/drivers/gpu/drm/xe/xe_oa.c
> +++ b/drivers/gpu/drm/xe/xe_oa.c
> @@ -10,6 +10,7 @@
>
> #include <drm/drm_drv.h>
> #include <drm/drm_managed.h>
> +#include <drm/drm_syncobj.h>
> #include <uapi/drm/xe_drm.h>
>
> #include <generated/xe_wa_oob.h>
> @@ -1390,7 +1391,9 @@ static int xe_oa_user_extensions(struct xe_oa
> *oa, enum xe_oa_user_extn_from fro
> return 0;
> }
>
> -static int xe_oa_parse_syncs(struct xe_oa *oa, struct
> xe_oa_open_param *param)
> +static int xe_oa_parse_syncs(struct xe_oa *oa,
> + struct xe_oa_stream *stream,
> + struct xe_oa_open_param *param)
> {
> int ret, num_syncs, num_ufence = 0;
>
> @@ -1410,7 +1413,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa,
> struct xe_oa_open_param *param)
>
> for (num_syncs = 0; num_syncs < param->num_syncs;
> num_syncs++) {
> ret = xe_sync_entry_parse(oa->xe, param->xef,
> ¶m->syncs[num_syncs],
> - ¶m-
> >syncs_user[num_syncs], 0);
> + ¶m-
> >syncs_user[num_syncs],
> + stream->ufence_syncobj,
> + ++stream-
> >ufence_timeline_value, 0);
> if (ret)
> goto err_syncs;
>
> @@ -1540,7 +1545,7 @@ static long xe_oa_config_locked(struct
> xe_oa_stream *stream, u64 arg)
> return -ENODEV;
>
> param.xef = stream->xef;
> - err = xe_oa_parse_syncs(stream->oa, ¶m);
> + err = xe_oa_parse_syncs(stream->oa, stream, ¶m);
> if (err)
> goto err_config_put;
>
> @@ -1636,6 +1641,7 @@ static void xe_oa_destroy_locked(struct
> xe_oa_stream *stream)
> if (stream->exec_q)
> xe_exec_queue_put(stream->exec_q);
>
> + drm_syncobj_put(stream->ufence_syncobj);
> kfree(stream);
> }
>
> @@ -1827,6 +1833,7 @@ static int
> xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
> struct xe_oa_open_param
> *param)
> {
> struct xe_oa_stream *stream;
> + struct drm_syncobj *ufence_syncobj;
> int stream_fd;
> int ret;
>
> @@ -1837,17 +1844,31 @@ static int
> xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
> goto exit;
> }
>
> + ret = drm_syncobj_create(&ufence_syncobj,
> DRM_SYNCOBJ_CREATE_SIGNALED,
> + NULL);
> + if (ret)
> + goto exit;
> +
> stream = kzalloc(sizeof(*stream), GFP_KERNEL);
> if (!stream) {
> ret = -ENOMEM;
> - goto exit;
> + goto err_syncobj;
> }
> -
> + stream->ufence_syncobj = ufence_syncobj;
> stream->oa = oa;
> - ret = xe_oa_stream_init(stream, param);
> +
> + ret = xe_oa_parse_syncs(oa, stream, param);
> if (ret)
> goto err_free;
>
> + ret = xe_oa_stream_init(stream, param);
> + if (ret) {
> + while (param->num_syncs--)
> + xe_sync_entry_cleanup(¶m->syncs[param-
> >num_syncs]);
> + kfree(param->syncs);
> + goto err_free;
> + }
> +
> if (!param->disabled) {
> ret = xe_oa_enable_locked(stream);
> if (ret)
> @@ -1871,6 +1892,8 @@ static int
> xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
> xe_oa_stream_destroy(stream);
> err_free:
> kfree(stream);
> +err_syncobj:
> + drm_syncobj_put(ufence_syncobj);
> exit:
> return ret;
> }
> @@ -2084,22 +2107,14 @@ int xe_oa_stream_open_ioctl(struct drm_device
> *dev, u64 data, struct drm_file *f
> goto err_exec_q;
> }
>
> - ret = xe_oa_parse_syncs(oa, ¶m);
> - if (ret)
> - goto err_exec_q;
> -
> mutex_lock(¶m.hwe->gt->oa.gt_lock);
> ret = xe_oa_stream_open_ioctl_locked(oa, ¶m);
> mutex_unlock(¶m.hwe->gt->oa.gt_lock);
> if (ret < 0)
> - goto err_sync_cleanup;
> + goto err_exec_q;
>
> return ret;
>
> -err_sync_cleanup:
> - while (param.num_syncs--)
> -
> xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]);
> - kfree(param.syncs);
> err_exec_q:
> if (param.exec_q)
> xe_exec_queue_put(param.exec_q);
> diff --git a/drivers/gpu/drm/xe/xe_oa_types.h
> b/drivers/gpu/drm/xe/xe_oa_types.h
> index 2628f78c4e8d..daf701b5d48b 100644
> --- a/drivers/gpu/drm/xe/xe_oa_types.h
> +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> @@ -15,6 +15,8 @@
> #include "regs/xe_reg_defs.h"
> #include "xe_hw_engine_types.h"
>
> +struct drm_syncobj;
> +
> #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M
>
> enum xe_oa_report_header {
> @@ -248,6 +250,12 @@ struct xe_oa_stream {
> /** @xef: xe_file with which the stream was opened */
> struct xe_file *xef;
>
> + /** @ufence_syncobj: User fence syncobj */
> + struct drm_syncobj *ufence_syncobj;
> +
> + /** @ufence_timeline_value: User fence timeline value */
> + u64 ufence_timeline_value;
> +
> /** @last_fence: fence to use in stream destroy when needed
> */
> struct dma_fence *last_fence;
>
> diff --git a/drivers/gpu/drm/xe/xe_sync.c
> b/drivers/gpu/drm/xe/xe_sync.c
> index 82872a51f098..d48ab7b32ca5 100644
> --- a/drivers/gpu/drm/xe/xe_sync.c
> +++ b/drivers/gpu/drm/xe/xe_sync.c
> @@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence
> *fence, struct dma_fence_cb *cb)
> int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> struct xe_sync_entry *sync,
> struct drm_xe_sync __user *sync_user,
> + struct drm_syncobj *ufence_syncobj,
> + u64 ufence_timeline_value,
> unsigned int flags)
> {
> struct drm_xe_sync sync_in;
> @@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe,
> struct xe_file *xef,
> if (exec) {
> sync->addr = sync_in.addr;
> } else {
> + sync->ufence_timeline_value =
> ufence_timeline_value;
> sync->ufence = user_fence_create(xe,
> sync_in.addr,
>
> sync_in.timeline_value);
> if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
> return PTR_ERR(sync->ufence);
> + sync->ufence_chain_fence =
> dma_fence_chain_alloc();
> + if (!sync->ufence_chain_fence)
> + return -ENOMEM;
> + sync->ufence_syncobj = ufence_syncobj;
> }
>
> break;
> @@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry
> *sync, struct dma_fence *fence)
> } else if (sync->ufence) {
> int err;
>
> - dma_fence_get(fence);
> + drm_syncobj_add_point(sync->ufence_syncobj,
> + sync->ufence_chain_fence,
> + fence, sync-
> >ufence_timeline_value);
> + sync->ufence_chain_fence = NULL;
> +
> + fence = drm_syncobj_fence_get(sync->ufence_syncobj);
> user_fence_get(sync->ufence);
> err = dma_fence_add_callback(fence, &sync->ufence-
> >cb,
> user_fence_cb);
> @@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry
> *sync)
> drm_syncobj_put(sync->syncobj);
> dma_fence_put(sync->fence);
> dma_fence_chain_free(sync->chain_fence);
> - if (sync->ufence)
> + dma_fence_chain_free(sync->ufence_chain_fence);
> + if (!IS_ERR_OR_NULL(sync->ufence))
> user_fence_put(sync->ufence);
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_sync.h
> b/drivers/gpu/drm/xe/xe_sync.h
> index 256ffc1e54dc..51f2d803e977 100644
> --- a/drivers/gpu/drm/xe/xe_sync.h
> +++ b/drivers/gpu/drm/xe/xe_sync.h
> @@ -8,6 +8,7 @@
>
> #include "xe_sync_types.h"
>
> +struct drm_syncobj;
> struct xe_device;
> struct xe_exec_queue;
> struct xe_file;
> @@ -21,6 +22,8 @@ struct xe_vm;
> int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> struct xe_sync_entry *sync,
> struct drm_xe_sync __user *sync_user,
> + struct drm_syncobj *ufence_syncobj,
> + u64 ufence_timeline_value,
> unsigned int flags);
> int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
> struct xe_sched_job *job);
> diff --git a/drivers/gpu/drm/xe/xe_sync_types.h
> b/drivers/gpu/drm/xe/xe_sync_types.h
> index 30ac3f51993b..b88f1833e28c 100644
> --- a/drivers/gpu/drm/xe/xe_sync_types.h
> +++ b/drivers/gpu/drm/xe/xe_sync_types.h
> @@ -18,9 +18,12 @@ struct xe_sync_entry {
> struct drm_syncobj *syncobj;
> struct dma_fence *fence;
> struct dma_fence_chain *chain_fence;
> + struct dma_fence_chain *ufence_chain_fence;
> + struct drm_syncobj *ufence_syncobj;
> struct xe_user_fence *ufence;
> u64 addr;
> u64 timeline_value;
> + u64 ufence_timeline_value;
> u32 type;
> u32 flags;
> };
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 10d77666a425..4058c476aa2d 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3633,8 +3633,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file)
>
> syncs_user = u64_to_user_ptr(args->syncs);
> for (num_syncs = 0; num_syncs < args->num_syncs;
> num_syncs++) {
> + struct xe_exec_queue *__q = q ?: vm->q[0];
> +
> err = xe_sync_entry_parse(xe, xef,
> &syncs[num_syncs],
> &syncs_user[num_syncs],
> + __q->ufence_syncobj,
> + ++__q-
> >ufence_timeline_value,
> (xe_vm_in_lr_mode(vm) ?
> SYNC_PARSE_FLAG_LR_MODE :
> 0) |
> (!args->num_binds ?
next prev parent reply other threads:[~2025-10-30 7:58 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-29 20:57 [PATCH v5 0/6] Fix serialization on burst of unbinds - v2 Matthew Brost
2025-10-29 20:57 ` [PATCH v5 1/6] drm/xe: Enforce correct user fence signaling order using drm_syncobjs Matthew Brost
2025-10-30 7:58 ` Thomas Hellström [this message]
2025-10-30 12:54 ` Matthew Brost
2025-10-29 20:57 ` [PATCH v5 2/6] drm/xe: Attach last fence to TLB invalidation job queues Matthew Brost
2025-10-30 8:24 ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 3/6] drm/xe: Decouple bind queue last fence from TLB invalidations Matthew Brost
2025-10-30 9:52 ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 4/6] drm/xe: Skip TLB invalidation waits in page fault binds Matthew Brost
2025-11-03 15:19 ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 5/6] drm/xe: Disallow input fences on zero batch execs and zero binds Matthew Brost
2025-11-03 15:21 ` Thomas Hellström
2025-11-03 15:22 ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 6/6] drm/xe: Remove last fence dependency check from binds Matthew Brost
2025-10-30 8:43 ` Thomas Hellström
2025-11-03 15:24 ` Thomas Hellström
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f647296ae0d8150745852b439da169529aa47e4d.camel@linux.intel.com \
--to=thomas.hellstrom@linux.intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.