From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: Matthew Brost <matthew.brost@intel.com>, intel-xe@lists.freedesktop.org
Subject: Re: [PATCH v5 1/6] drm/xe: Enforce correct user fence signaling order using drm_syncobjs
Date: Thu, 30 Oct 2025 08:58:53 +0100 [thread overview]
Message-ID: <f647296ae0d8150745852b439da169529aa47e4d.camel@linux.intel.com> (raw)
In-Reply-To: <20251029205719.2746501-2-matthew.brost@intel.com>
On Wed, 2025-10-29 at 13:57 -0700, Matthew Brost wrote:
> Prevent application hangs caused by out-of-order fence signaling when
> user fences are attached. Use drm_syncobj (via dma-fence-chain) to
> guarantee that each user fence signals in order, regardless of the
> signaling order of the attached fences. Ensure user fence writebacks
> to
> user space occur in the correct sequence.
Perhaps describe exactly what the signalling order is supposed to be?
Like calling order per exec_queue etc. (strictly I guess it's the
grabbing order of a certain lock, like the vm resv?)
Do we need a Fixes: for this?
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>
> Signed-of-by: Matthew Brost <matthew.brost@intel.com>
> ---
> drivers/gpu/drm/xe/xe_exec.c | 3 +-
> drivers/gpu/drm/xe/xe_exec_queue.c | 13 +++++++
> drivers/gpu/drm/xe/xe_exec_queue_types.h | 7 ++++
> drivers/gpu/drm/xe/xe_oa.c | 45 ++++++++++++++++------
> --
> drivers/gpu/drm/xe/xe_oa_types.h | 8 +++++
> drivers/gpu/drm/xe/xe_sync.c | 17 +++++++--
> drivers/gpu/drm/xe/xe_sync.h | 3 ++
> drivers/gpu/drm/xe/xe_sync_types.h | 3 ++
> drivers/gpu/drm/xe/xe_vm.c | 4 +++
> 9 files changed, 85 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_exec.c
> b/drivers/gpu/drm/xe/xe_exec.c
> index 0dc27476832b..f7b9d8715053 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -166,7 +166,8 @@ int xe_exec_ioctl(struct drm_device *dev, void
> *data, struct drm_file *file)
>
> for (num_syncs = 0; num_syncs < args->num_syncs;
> num_syncs++) {
> err = xe_sync_entry_parse(xe, xef,
> &syncs[num_syncs],
> - &syncs_user[num_syncs],
> SYNC_PARSE_FLAG_EXEC |
> + &syncs_user[num_syncs],
> NULL, 0,
> + SYNC_PARSE_FLAG_EXEC |
> (xe_vm_in_lr_mode(vm) ?
> SYNC_PARSE_FLAG_LR_MODE :
> 0));
> if (err)
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
> b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 90cbc95f8e2e..6e168efbac65 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -10,6 +10,7 @@
> #include <drm/drm_device.h>
> #include <drm/drm_drv.h>
> #include <drm/drm_file.h>
> +#include <drm/drm_syncobj.h>
> #include <uapi/drm/xe_drm.h>
>
> #include "xe_dep_scheduler.h"
> @@ -345,6 +346,7 @@ struct xe_exec_queue
> *xe_exec_queue_create_bind(struct xe_device *xe,
> struct xe_gt *gt = tile->primary_gt;
> struct xe_exec_queue *q;
> struct xe_vm *migrate_vm;
> + int err;
>
> migrate_vm = xe_migrate_get_vm(tile->migrate);
> if (xe->info.has_usm) {
> @@ -368,6 +370,14 @@ struct xe_exec_queue
> *xe_exec_queue_create_bind(struct xe_device *xe,
> }
> xe_vm_put(migrate_vm);
>
> + err = drm_syncobj_create(&q->ufence_syncobj,
> + DRM_SYNCOBJ_CREATE_SIGNALED,
> + NULL);
> + if (err) {
> + xe_exec_queue_put(q);
> + return ERR_PTR(err);
> + }
> +
> return q;
> }
> ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
> @@ -377,6 +387,9 @@ void xe_exec_queue_destroy(struct kref *ref)
> struct xe_exec_queue *q = container_of(ref, struct
> xe_exec_queue, refcount);
> struct xe_exec_queue *eq, *next;
>
> + if (q->ufence_syncobj)
> + drm_syncobj_put(q->ufence_syncobj);
> +
> if (xe_exec_queue_uses_pxp(q))
> xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
>
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index 282505fa1377..838266c3914b 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -15,6 +15,7 @@
> #include "xe_hw_fence_types.h"
> #include "xe_lrc_types.h"
>
> +struct drm_syncobj;
> struct xe_execlist_exec_queue;
> struct xe_gt;
> struct xe_guc_exec_queue;
> @@ -155,6 +156,12 @@ struct xe_exec_queue {
> struct list_head link;
> } pxp;
>
> + /** @ufence_syncobj: User fence syncobj */
> + struct drm_syncobj *ufence_syncobj;
> +
> + /** @ufence_timeline_value: User fence timeline value */
> + u64 ufence_timeline_value;
> +
> /** @ops: submission backend exec queue operations */
> const struct xe_exec_queue_ops *ops;
>
> diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> index f901ba52b403..7a13a7bd99a6 100644
> --- a/drivers/gpu/drm/xe/xe_oa.c
> +++ b/drivers/gpu/drm/xe/xe_oa.c
> @@ -10,6 +10,7 @@
>
> #include <drm/drm_drv.h>
> #include <drm/drm_managed.h>
> +#include <drm/drm_syncobj.h>
> #include <uapi/drm/xe_drm.h>
>
> #include <generated/xe_wa_oob.h>
> @@ -1390,7 +1391,9 @@ static int xe_oa_user_extensions(struct xe_oa
> *oa, enum xe_oa_user_extn_from fro
> return 0;
> }
>
> -static int xe_oa_parse_syncs(struct xe_oa *oa, struct
> xe_oa_open_param *param)
> +static int xe_oa_parse_syncs(struct xe_oa *oa,
> + struct xe_oa_stream *stream,
> + struct xe_oa_open_param *param)
> {
> int ret, num_syncs, num_ufence = 0;
>
> @@ -1410,7 +1413,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa,
> struct xe_oa_open_param *param)
>
> for (num_syncs = 0; num_syncs < param->num_syncs;
> num_syncs++) {
> ret = xe_sync_entry_parse(oa->xe, param->xef,
> ¶m->syncs[num_syncs],
> - ¶m-
> >syncs_user[num_syncs], 0);
> + ¶m-
> >syncs_user[num_syncs],
> + stream->ufence_syncobj,
> + ++stream-
> >ufence_timeline_value, 0);
> if (ret)
> goto err_syncs;
>
> @@ -1540,7 +1545,7 @@ static long xe_oa_config_locked(struct
> xe_oa_stream *stream, u64 arg)
> return -ENODEV;
>
> param.xef = stream->xef;
> - err = xe_oa_parse_syncs(stream->oa, ¶m);
> + err = xe_oa_parse_syncs(stream->oa, stream, ¶m);
> if (err)
> goto err_config_put;
>
> @@ -1636,6 +1641,7 @@ static void xe_oa_destroy_locked(struct
> xe_oa_stream *stream)
> if (stream->exec_q)
> xe_exec_queue_put(stream->exec_q);
>
> + drm_syncobj_put(stream->ufence_syncobj);
> kfree(stream);
> }
>
> @@ -1827,6 +1833,7 @@ static int
> xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
> struct xe_oa_open_param
> *param)
> {
> struct xe_oa_stream *stream;
> + struct drm_syncobj *ufence_syncobj;
> int stream_fd;
> int ret;
>
> @@ -1837,17 +1844,31 @@ static int
> xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
> goto exit;
> }
>
> + ret = drm_syncobj_create(&ufence_syncobj,
> DRM_SYNCOBJ_CREATE_SIGNALED,
> + NULL);
> + if (ret)
> + goto exit;
> +
> stream = kzalloc(sizeof(*stream), GFP_KERNEL);
> if (!stream) {
> ret = -ENOMEM;
> - goto exit;
> + goto err_syncobj;
> }
> -
> + stream->ufence_syncobj = ufence_syncobj;
> stream->oa = oa;
> - ret = xe_oa_stream_init(stream, param);
> +
> + ret = xe_oa_parse_syncs(oa, stream, param);
> if (ret)
> goto err_free;
>
> + ret = xe_oa_stream_init(stream, param);
> + if (ret) {
> + while (param->num_syncs--)
> + xe_sync_entry_cleanup(¶m->syncs[param-
> >num_syncs]);
> + kfree(param->syncs);
> + goto err_free;
> + }
> +
> if (!param->disabled) {
> ret = xe_oa_enable_locked(stream);
> if (ret)
> @@ -1871,6 +1892,8 @@ static int
> xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
> xe_oa_stream_destroy(stream);
> err_free:
> kfree(stream);
> +err_syncobj:
> + drm_syncobj_put(ufence_syncobj);
> exit:
> return ret;
> }
> @@ -2084,22 +2107,14 @@ int xe_oa_stream_open_ioctl(struct drm_device
> *dev, u64 data, struct drm_file *f
> goto err_exec_q;
> }
>
> - ret = xe_oa_parse_syncs(oa, ¶m);
> - if (ret)
> - goto err_exec_q;
> -
> mutex_lock(¶m.hwe->gt->oa.gt_lock);
> ret = xe_oa_stream_open_ioctl_locked(oa, ¶m);
> mutex_unlock(¶m.hwe->gt->oa.gt_lock);
> if (ret < 0)
> - goto err_sync_cleanup;
> + goto err_exec_q;
>
> return ret;
>
> -err_sync_cleanup:
> - while (param.num_syncs--)
> -
> xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]);
> - kfree(param.syncs);
> err_exec_q:
> if (param.exec_q)
> xe_exec_queue_put(param.exec_q);
> diff --git a/drivers/gpu/drm/xe/xe_oa_types.h
> b/drivers/gpu/drm/xe/xe_oa_types.h
> index 2628f78c4e8d..daf701b5d48b 100644
> --- a/drivers/gpu/drm/xe/xe_oa_types.h
> +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> @@ -15,6 +15,8 @@
> #include "regs/xe_reg_defs.h"
> #include "xe_hw_engine_types.h"
>
> +struct drm_syncobj;
> +
> #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M
>
> enum xe_oa_report_header {
> @@ -248,6 +250,12 @@ struct xe_oa_stream {
> /** @xef: xe_file with which the stream was opened */
> struct xe_file *xef;
>
> + /** @ufence_syncobj: User fence syncobj */
> + struct drm_syncobj *ufence_syncobj;
> +
> + /** @ufence_timeline_value: User fence timeline value */
> + u64 ufence_timeline_value;
> +
> /** @last_fence: fence to use in stream destroy when needed
> */
> struct dma_fence *last_fence;
>
> diff --git a/drivers/gpu/drm/xe/xe_sync.c
> b/drivers/gpu/drm/xe/xe_sync.c
> index 82872a51f098..d48ab7b32ca5 100644
> --- a/drivers/gpu/drm/xe/xe_sync.c
> +++ b/drivers/gpu/drm/xe/xe_sync.c
> @@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence
> *fence, struct dma_fence_cb *cb)
> int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> struct xe_sync_entry *sync,
> struct drm_xe_sync __user *sync_user,
> + struct drm_syncobj *ufence_syncobj,
> + u64 ufence_timeline_value,
> unsigned int flags)
> {
> struct drm_xe_sync sync_in;
> @@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe,
> struct xe_file *xef,
> if (exec) {
> sync->addr = sync_in.addr;
> } else {
> + sync->ufence_timeline_value =
> ufence_timeline_value;
> sync->ufence = user_fence_create(xe,
> sync_in.addr,
>
> sync_in.timeline_value);
> if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
> return PTR_ERR(sync->ufence);
> + sync->ufence_chain_fence =
> dma_fence_chain_alloc();
> + if (!sync->ufence_chain_fence)
> + return -ENOMEM;
> + sync->ufence_syncobj = ufence_syncobj;
> }
>
> break;
> @@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry
> *sync, struct dma_fence *fence)
> } else if (sync->ufence) {
> int err;
>
> - dma_fence_get(fence);
> + drm_syncobj_add_point(sync->ufence_syncobj,
> + sync->ufence_chain_fence,
> + fence, sync-
> >ufence_timeline_value);
> + sync->ufence_chain_fence = NULL;
> +
> + fence = drm_syncobj_fence_get(sync->ufence_syncobj);
> user_fence_get(sync->ufence);
> err = dma_fence_add_callback(fence, &sync->ufence-
> >cb,
> user_fence_cb);
> @@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry
> *sync)
> drm_syncobj_put(sync->syncobj);
> dma_fence_put(sync->fence);
> dma_fence_chain_free(sync->chain_fence);
> - if (sync->ufence)
> + dma_fence_chain_free(sync->ufence_chain_fence);
> + if (!IS_ERR_OR_NULL(sync->ufence))
> user_fence_put(sync->ufence);
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_sync.h
> b/drivers/gpu/drm/xe/xe_sync.h
> index 256ffc1e54dc..51f2d803e977 100644
> --- a/drivers/gpu/drm/xe/xe_sync.h
> +++ b/drivers/gpu/drm/xe/xe_sync.h
> @@ -8,6 +8,7 @@
>
> #include "xe_sync_types.h"
>
> +struct drm_syncobj;
> struct xe_device;
> struct xe_exec_queue;
> struct xe_file;
> @@ -21,6 +22,8 @@ struct xe_vm;
> int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> struct xe_sync_entry *sync,
> struct drm_xe_sync __user *sync_user,
> + struct drm_syncobj *ufence_syncobj,
> + u64 ufence_timeline_value,
> unsigned int flags);
> int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
> struct xe_sched_job *job);
> diff --git a/drivers/gpu/drm/xe/xe_sync_types.h
> b/drivers/gpu/drm/xe/xe_sync_types.h
> index 30ac3f51993b..b88f1833e28c 100644
> --- a/drivers/gpu/drm/xe/xe_sync_types.h
> +++ b/drivers/gpu/drm/xe/xe_sync_types.h
> @@ -18,9 +18,12 @@ struct xe_sync_entry {
> struct drm_syncobj *syncobj;
> struct dma_fence *fence;
> struct dma_fence_chain *chain_fence;
> + struct dma_fence_chain *ufence_chain_fence;
> + struct drm_syncobj *ufence_syncobj;
> struct xe_user_fence *ufence;
> u64 addr;
> u64 timeline_value;
> + u64 ufence_timeline_value;
> u32 type;
> u32 flags;
> };
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 10d77666a425..4058c476aa2d 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3633,8 +3633,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file)
>
> syncs_user = u64_to_user_ptr(args->syncs);
> for (num_syncs = 0; num_syncs < args->num_syncs;
> num_syncs++) {
> + struct xe_exec_queue *__q = q ?: vm->q[0];
> +
> err = xe_sync_entry_parse(xe, xef,
> &syncs[num_syncs],
> &syncs_user[num_syncs],
> + __q->ufence_syncobj,
> + ++__q-
> >ufence_timeline_value,
> (xe_vm_in_lr_mode(vm) ?
> SYNC_PARSE_FLAG_LR_MODE :
> 0) |
> (!args->num_binds ?
next prev parent reply other threads:[~2025-10-30 7:58 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-29 20:57 [PATCH v5 0/6] Fix serialization on burst of unbinds - v2 Matthew Brost
2025-10-29 20:57 ` [PATCH v5 1/6] drm/xe: Enforce correct user fence signaling order using drm_syncobjs Matthew Brost
2025-10-30 7:58 ` Thomas Hellström [this message]
2025-10-30 12:54 ` Matthew Brost
2025-10-29 20:57 ` [PATCH v5 2/6] drm/xe: Attach last fence to TLB invalidation job queues Matthew Brost
2025-10-30 8:24 ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 3/6] drm/xe: Decouple bind queue last fence from TLB invalidations Matthew Brost
2025-10-30 9:52 ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 4/6] drm/xe: Skip TLB invalidation waits in page fault binds Matthew Brost
2025-11-03 15:19 ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 5/6] drm/xe: Disallow input fences on zero batch execs and zero binds Matthew Brost
2025-11-03 15:21 ` Thomas Hellström
2025-11-03 15:22 ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 6/6] drm/xe: Remove last fence dependency check from binds Matthew Brost
2025-10-30 8:43 ` Thomas Hellström
2025-11-03 15:24 ` Thomas Hellström
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f647296ae0d8150745852b439da169529aa47e4d.camel@linux.intel.com \
--to=thomas.hellstrom@linux.intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox