Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Cc: <intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH v5 1/6] drm/xe: Enforce correct user fence signaling order using drm_syncobjs
Date: Thu, 30 Oct 2025 05:54:02 -0700	[thread overview]
Message-ID: <aQNf6qkmh8WSkFbo@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <f647296ae0d8150745852b439da169529aa47e4d.camel@linux.intel.com>

On Thu, Oct 30, 2025 at 08:58:53AM +0100, Thomas Hellström wrote:
> On Wed, 2025-10-29 at 13:57 -0700, Matthew Brost wrote:
> > Prevent application hangs caused by out-of-order fence signaling when
> > user fences are attached. Use drm_syncobj (via dma-fence-chain) to
> > guarantee that each user fence signals in order, regardless of the
> > signaling order of the attached fences. Ensure user fence writebacks
> > to
> > user space occur in the correct sequence.
> 
> Perhaps describe exactly what the signalling order is supposed to be?
> Like calling order per exec_queue etc. (strictly I guess it's the
> grabbing order of a certain lock, like the vm resv?)
> 
> Do we need a Fixes: for this?

Yes, we probably should add a fixes tag. It seems possible to hit, but
by chance, timing we haven't hit this yet. Will add ahead of merge.

Matt

> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> 
> > 
> > Signed-of-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_exec.c             |  3 +-
> >  drivers/gpu/drm/xe/xe_exec_queue.c       | 13 +++++++
> >  drivers/gpu/drm/xe/xe_exec_queue_types.h |  7 ++++
> >  drivers/gpu/drm/xe/xe_oa.c               | 45 ++++++++++++++++------
> > --
> >  drivers/gpu/drm/xe/xe_oa_types.h         |  8 +++++
> >  drivers/gpu/drm/xe/xe_sync.c             | 17 +++++++--
> >  drivers/gpu/drm/xe/xe_sync.h             |  3 ++
> >  drivers/gpu/drm/xe/xe_sync_types.h       |  3 ++
> >  drivers/gpu/drm/xe/xe_vm.c               |  4 +++
> >  9 files changed, 85 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_exec.c
> > b/drivers/gpu/drm/xe/xe_exec.c
> > index 0dc27476832b..f7b9d8715053 100644
> > --- a/drivers/gpu/drm/xe/xe_exec.c
> > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > @@ -166,7 +166,8 @@ int xe_exec_ioctl(struct drm_device *dev, void
> > *data, struct drm_file *file)
> >  
> >  	for (num_syncs = 0; num_syncs < args->num_syncs;
> > num_syncs++) {
> >  		err = xe_sync_entry_parse(xe, xef,
> > &syncs[num_syncs],
> > -					  &syncs_user[num_syncs],
> > SYNC_PARSE_FLAG_EXEC |
> > +					  &syncs_user[num_syncs],
> > NULL, 0,
> > +					  SYNC_PARSE_FLAG_EXEC |
> >  					  (xe_vm_in_lr_mode(vm) ?
> >  					   SYNC_PARSE_FLAG_LR_MODE :
> > 0));
> >  		if (err)
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
> > b/drivers/gpu/drm/xe/xe_exec_queue.c
> > index 90cbc95f8e2e..6e168efbac65 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > @@ -10,6 +10,7 @@
> >  #include <drm/drm_device.h>
> >  #include <drm/drm_drv.h>
> >  #include <drm/drm_file.h>
> > +#include <drm/drm_syncobj.h>
> >  #include <uapi/drm/xe_drm.h>
> >  
> >  #include "xe_dep_scheduler.h"
> > @@ -345,6 +346,7 @@ struct xe_exec_queue
> > *xe_exec_queue_create_bind(struct xe_device *xe,
> >  	struct xe_gt *gt = tile->primary_gt;
> >  	struct xe_exec_queue *q;
> >  	struct xe_vm *migrate_vm;
> > +	int err;
> >  
> >  	migrate_vm = xe_migrate_get_vm(tile->migrate);
> >  	if (xe->info.has_usm) {
> > @@ -368,6 +370,14 @@ struct xe_exec_queue
> > *xe_exec_queue_create_bind(struct xe_device *xe,
> >  	}
> >  	xe_vm_put(migrate_vm);
> >  
> > +	err = drm_syncobj_create(&q->ufence_syncobj,
> > +				 DRM_SYNCOBJ_CREATE_SIGNALED,
> > +				 NULL);
> > +	if (err) {
> > +		xe_exec_queue_put(q);
> > +		return ERR_PTR(err);
> > +	}
> > +
> >  	return q;
> >  }
> >  ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
> > @@ -377,6 +387,9 @@ void xe_exec_queue_destroy(struct kref *ref)
> >  	struct xe_exec_queue *q = container_of(ref, struct
> > xe_exec_queue, refcount);
> >  	struct xe_exec_queue *eq, *next;
> >  
> > +	if (q->ufence_syncobj)
> > +		drm_syncobj_put(q->ufence_syncobj);
> > +
> >  	if (xe_exec_queue_uses_pxp(q))
> >  		xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > index 282505fa1377..838266c3914b 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > @@ -15,6 +15,7 @@
> >  #include "xe_hw_fence_types.h"
> >  #include "xe_lrc_types.h"
> >  
> > +struct drm_syncobj;
> >  struct xe_execlist_exec_queue;
> >  struct xe_gt;
> >  struct xe_guc_exec_queue;
> > @@ -155,6 +156,12 @@ struct xe_exec_queue {
> >  		struct list_head link;
> >  	} pxp;
> >  
> > +	/** @ufence_syncobj: User fence syncobj */
> > +	struct drm_syncobj *ufence_syncobj;
> > +
> > +	/** @ufence_timeline_value: User fence timeline value */
> > +	u64 ufence_timeline_value;
> > +
> >  	/** @ops: submission backend exec queue operations */
> >  	const struct xe_exec_queue_ops *ops;
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> > index f901ba52b403..7a13a7bd99a6 100644
> > --- a/drivers/gpu/drm/xe/xe_oa.c
> > +++ b/drivers/gpu/drm/xe/xe_oa.c
> > @@ -10,6 +10,7 @@
> >  
> >  #include <drm/drm_drv.h>
> >  #include <drm/drm_managed.h>
> > +#include <drm/drm_syncobj.h>
> >  #include <uapi/drm/xe_drm.h>
> >  
> >  #include <generated/xe_wa_oob.h>
> > @@ -1390,7 +1391,9 @@ static int xe_oa_user_extensions(struct xe_oa
> > *oa, enum xe_oa_user_extn_from fro
> >  	return 0;
> >  }
> >  
> > -static int xe_oa_parse_syncs(struct xe_oa *oa, struct
> > xe_oa_open_param *param)
> > +static int xe_oa_parse_syncs(struct xe_oa *oa,
> > +			     struct xe_oa_stream *stream,
> > +			     struct xe_oa_open_param *param)
> >  {
> >  	int ret, num_syncs, num_ufence = 0;
> >  
> > @@ -1410,7 +1413,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa,
> > struct xe_oa_open_param *param)
> >  
> >  	for (num_syncs = 0; num_syncs < param->num_syncs;
> > num_syncs++) {
> >  		ret = xe_sync_entry_parse(oa->xe, param->xef,
> > &param->syncs[num_syncs],
> > -					  &param-
> > >syncs_user[num_syncs], 0);
> > +					  &param-
> > >syncs_user[num_syncs],
> > +					  stream->ufence_syncobj,
> > +					  ++stream-
> > >ufence_timeline_value, 0);
> >  		if (ret)
> >  			goto err_syncs;
> >  
> > @@ -1540,7 +1545,7 @@ static long xe_oa_config_locked(struct
> > xe_oa_stream *stream, u64 arg)
> >  		return -ENODEV;
> >  
> >  	param.xef = stream->xef;
> > -	err = xe_oa_parse_syncs(stream->oa, &param);
> > +	err = xe_oa_parse_syncs(stream->oa, stream, &param);
> >  	if (err)
> >  		goto err_config_put;
> >  
> > @@ -1636,6 +1641,7 @@ static void xe_oa_destroy_locked(struct
> > xe_oa_stream *stream)
> >  	if (stream->exec_q)
> >  		xe_exec_queue_put(stream->exec_q);
> >  
> > +	drm_syncobj_put(stream->ufence_syncobj);
> >  	kfree(stream);
> >  }
> >  
> > @@ -1827,6 +1833,7 @@ static int
> > xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
> >  					  struct xe_oa_open_param
> > *param)
> >  {
> >  	struct xe_oa_stream *stream;
> > +	struct drm_syncobj *ufence_syncobj;
> >  	int stream_fd;
> >  	int ret;
> >  
> > @@ -1837,17 +1844,31 @@ static int
> > xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
> >  		goto exit;
> >  	}
> >  
> > +	ret = drm_syncobj_create(&ufence_syncobj,
> > DRM_SYNCOBJ_CREATE_SIGNALED,
> > +				 NULL);
> > +	if (ret)
> > +		goto exit;
> > +
> >  	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
> >  	if (!stream) {
> >  		ret = -ENOMEM;
> > -		goto exit;
> > +		goto err_syncobj;
> >  	}
> > -
> > +	stream->ufence_syncobj = ufence_syncobj;
> >  	stream->oa = oa;
> > -	ret = xe_oa_stream_init(stream, param);
> > +
> > +	ret = xe_oa_parse_syncs(oa, stream, param);
> >  	if (ret)
> >  		goto err_free;
> >  
> > +	ret = xe_oa_stream_init(stream, param);
> > +	if (ret) {
> > +		while (param->num_syncs--)
> > +			xe_sync_entry_cleanup(&param->syncs[param-
> > >num_syncs]);
> > +		kfree(param->syncs);
> > +		goto err_free;
> > +	}
> > +
> >  	if (!param->disabled) {
> >  		ret = xe_oa_enable_locked(stream);
> >  		if (ret)
> > @@ -1871,6 +1892,8 @@ static int
> > xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
> >  	xe_oa_stream_destroy(stream);
> >  err_free:
> >  	kfree(stream);
> > +err_syncobj:
> > +	drm_syncobj_put(ufence_syncobj);
> >  exit:
> >  	return ret;
> >  }
> > @@ -2084,22 +2107,14 @@ int xe_oa_stream_open_ioctl(struct drm_device
> > *dev, u64 data, struct drm_file *f
> >  		goto err_exec_q;
> >  	}
> >  
> > -	ret = xe_oa_parse_syncs(oa, &param);
> > -	if (ret)
> > -		goto err_exec_q;
> > -
> >  	mutex_lock(&param.hwe->gt->oa.gt_lock);
> >  	ret = xe_oa_stream_open_ioctl_locked(oa, &param);
> >  	mutex_unlock(&param.hwe->gt->oa.gt_lock);
> >  	if (ret < 0)
> > -		goto err_sync_cleanup;
> > +		goto err_exec_q;
> >  
> >  	return ret;
> >  
> > -err_sync_cleanup:
> > -	while (param.num_syncs--)
> > -
> > 		xe_sync_entry_cleanup(&param.syncs[param.num_syncs]);
> > -	kfree(param.syncs);
> >  err_exec_q:
> >  	if (param.exec_q)
> >  		xe_exec_queue_put(param.exec_q);
> > diff --git a/drivers/gpu/drm/xe/xe_oa_types.h
> > b/drivers/gpu/drm/xe/xe_oa_types.h
> > index 2628f78c4e8d..daf701b5d48b 100644
> > --- a/drivers/gpu/drm/xe/xe_oa_types.h
> > +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> > @@ -15,6 +15,8 @@
> >  #include "regs/xe_reg_defs.h"
> >  #include "xe_hw_engine_types.h"
> >  
> > +struct drm_syncobj;
> > +
> >  #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M
> >  
> >  enum xe_oa_report_header {
> > @@ -248,6 +250,12 @@ struct xe_oa_stream {
> >  	/** @xef: xe_file with which the stream was opened */
> >  	struct xe_file *xef;
> >  
> > +	/** @ufence_syncobj: User fence syncobj */
> > +	struct drm_syncobj *ufence_syncobj;
> > +
> > +	/** @ufence_timeline_value: User fence timeline value */
> > +	u64 ufence_timeline_value;
> > +
> >  	/** @last_fence: fence to use in stream destroy when needed
> > */
> >  	struct dma_fence *last_fence;
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_sync.c
> > b/drivers/gpu/drm/xe/xe_sync.c
> > index 82872a51f098..d48ab7b32ca5 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.c
> > +++ b/drivers/gpu/drm/xe/xe_sync.c
> > @@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence
> > *fence, struct dma_fence_cb *cb)
> >  int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> >  			struct xe_sync_entry *sync,
> >  			struct drm_xe_sync __user *sync_user,
> > +			struct drm_syncobj *ufence_syncobj,
> > +			u64 ufence_timeline_value,
> >  			unsigned int flags)
> >  {
> >  	struct drm_xe_sync sync_in;
> > @@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe,
> > struct xe_file *xef,
> >  		if (exec) {
> >  			sync->addr = sync_in.addr;
> >  		} else {
> > +			sync->ufence_timeline_value =
> > ufence_timeline_value;
> >  			sync->ufence = user_fence_create(xe,
> > sync_in.addr,
> >  							
> > sync_in.timeline_value);
> >  			if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
> >  				return PTR_ERR(sync->ufence);
> > +			sync->ufence_chain_fence =
> > dma_fence_chain_alloc();
> > +			if (!sync->ufence_chain_fence)
> > +				return -ENOMEM;
> > +			sync->ufence_syncobj = ufence_syncobj;
> >  		}
> >  
> >  		break;
> > @@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry
> > *sync, struct dma_fence *fence)
> >  	} else if (sync->ufence) {
> >  		int err;
> >  
> > -		dma_fence_get(fence);
> > +		drm_syncobj_add_point(sync->ufence_syncobj,
> > +				      sync->ufence_chain_fence,
> > +				      fence, sync-
> > >ufence_timeline_value);
> > +		sync->ufence_chain_fence = NULL;
> > +
> > +		fence = drm_syncobj_fence_get(sync->ufence_syncobj);
> >  		user_fence_get(sync->ufence);
> >  		err = dma_fence_add_callback(fence, &sync->ufence-
> > >cb,
> >  					     user_fence_cb);
> > @@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry
> > *sync)
> >  		drm_syncobj_put(sync->syncobj);
> >  	dma_fence_put(sync->fence);
> >  	dma_fence_chain_free(sync->chain_fence);
> > -	if (sync->ufence)
> > +	dma_fence_chain_free(sync->ufence_chain_fence);
> > +	if (!IS_ERR_OR_NULL(sync->ufence))
> >  		user_fence_put(sync->ufence);
> >  }
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_sync.h
> > b/drivers/gpu/drm/xe/xe_sync.h
> > index 256ffc1e54dc..51f2d803e977 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.h
> > +++ b/drivers/gpu/drm/xe/xe_sync.h
> > @@ -8,6 +8,7 @@
> >  
> >  #include "xe_sync_types.h"
> >  
> > +struct drm_syncobj;
> >  struct xe_device;
> >  struct xe_exec_queue;
> >  struct xe_file;
> > @@ -21,6 +22,8 @@ struct xe_vm;
> >  int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
> >  			struct xe_sync_entry *sync,
> >  			struct drm_xe_sync __user *sync_user,
> > +			struct drm_syncobj *ufence_syncobj,
> > +			u64 ufence_timeline_value,
> >  			unsigned int flags);
> >  int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
> >  			   struct xe_sched_job *job);
> > diff --git a/drivers/gpu/drm/xe/xe_sync_types.h
> > b/drivers/gpu/drm/xe/xe_sync_types.h
> > index 30ac3f51993b..b88f1833e28c 100644
> > --- a/drivers/gpu/drm/xe/xe_sync_types.h
> > +++ b/drivers/gpu/drm/xe/xe_sync_types.h
> > @@ -18,9 +18,12 @@ struct xe_sync_entry {
> >  	struct drm_syncobj *syncobj;
> >  	struct dma_fence *fence;
> >  	struct dma_fence_chain *chain_fence;
> > +	struct dma_fence_chain *ufence_chain_fence;
> > +	struct drm_syncobj *ufence_syncobj;
> >  	struct xe_user_fence *ufence;
> >  	u64 addr;
> >  	u64 timeline_value;
> > +	u64 ufence_timeline_value;
> >  	u32 type;
> >  	u32 flags;
> >  };
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 10d77666a425..4058c476aa2d 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -3633,8 +3633,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> > void *data, struct drm_file *file)
> >  
> >  	syncs_user = u64_to_user_ptr(args->syncs);
> >  	for (num_syncs = 0; num_syncs < args->num_syncs;
> > num_syncs++) {
> > +		struct xe_exec_queue *__q = q ?: vm->q[0];
> > +
> >  		err = xe_sync_entry_parse(xe, xef,
> > &syncs[num_syncs],
> >  					  &syncs_user[num_syncs],
> > +					  __q->ufence_syncobj,
> > +					  ++__q-
> > >ufence_timeline_value,
> >  					  (xe_vm_in_lr_mode(vm) ?
> >  					   SYNC_PARSE_FLAG_LR_MODE :
> > 0) |
> >  					  (!args->num_binds ?
> 

  reply	other threads:[~2025-10-30 12:54 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-29 20:57 [PATCH v5 0/6] Fix serialization on burst of unbinds - v2 Matthew Brost
2025-10-29 20:57 ` [PATCH v5 1/6] drm/xe: Enforce correct user fence signaling order using drm_syncobjs Matthew Brost
2025-10-30  7:58   ` Thomas Hellström
2025-10-30 12:54     ` Matthew Brost [this message]
2025-10-29 20:57 ` [PATCH v5 2/6] drm/xe: Attach last fence to TLB invalidation job queues Matthew Brost
2025-10-30  8:24   ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 3/6] drm/xe: Decouple bind queue last fence from TLB invalidations Matthew Brost
2025-10-30  9:52   ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 4/6] drm/xe: Skip TLB invalidation waits in page fault binds Matthew Brost
2025-11-03 15:19   ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 5/6] drm/xe: Disallow input fences on zero batch execs and zero binds Matthew Brost
2025-11-03 15:21   ` Thomas Hellström
2025-11-03 15:22     ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 6/6] drm/xe: Remove last fence dependency check from binds Matthew Brost
2025-10-30  8:43   ` Thomas Hellström
2025-11-03 15:24   ` Thomas Hellström

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aQNf6qkmh8WSkFbo@lstrano-desk.jf.intel.com \
    --to=matthew.brost@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox