Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: Matthew Brost <matthew.brost@intel.com>, intel-xe@lists.freedesktop.org
Subject: Re: [PATCH v5 2/6] drm/xe: Attach last fence to TLB invalidation job queues
Date: Thu, 30 Oct 2025 09:24:59 +0100	[thread overview]
Message-ID: <cc6705b5f45780f9750ecd3fa3234522456be7d2.camel@linux.intel.com> (raw)
In-Reply-To: <20251029205719.2746501-3-matthew.brost@intel.com>

On Wed, 2025-10-29 at 13:57 -0700, Matthew Brost wrote:
> Add support for attaching the last fence to TLB invalidation job
> queues
> to address serialization issues during bursts of unbind jobs. Ensure
> that user fence signaling for a bind job reflects both the bind job
> itself and the last fences of all related TLB invalidations. Maintain
> submission order based solely on the state of the bind and TLB
> invalidation queues.
> 
> Introduce support functions for last fence attachment to TLB
> invalidation queues.
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>

> 
> ---
> v3:
>  - Fix assert in xe_exec_queue_tlb_inval_last_fence_set (CI)
>  - Ensure migrate lock held for migrate queues (Testing)
> v5:
>  - Style nits (Thomas)
>  - Rewrite commit message (Thomas)
> ---
>  drivers/gpu/drm/xe/xe_exec_queue.c       | 103
> ++++++++++++++++++++++-
>  drivers/gpu/drm/xe/xe_exec_queue.h       |  21 +++++
>  drivers/gpu/drm/xe/xe_exec_queue_types.h |   5 ++
>  drivers/gpu/drm/xe/xe_migrate.c          |  14 +++
>  drivers/gpu/drm/xe/xe_migrate.h          |   8 ++
>  drivers/gpu/drm/xe/xe_vm.c               |   7 +-
>  6 files changed, 156 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
> b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 6e168efbac65..b7551592fe3f 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -386,6 +386,7 @@ void xe_exec_queue_destroy(struct kref *ref)
>  {
>  	struct xe_exec_queue *q = container_of(ref, struct
> xe_exec_queue, refcount);
>  	struct xe_exec_queue *eq, *next;
> +	int i;
>  
>  	if (q->ufence_syncobj)
>  		drm_syncobj_put(q->ufence_syncobj);
> @@ -394,6 +395,9 @@ void xe_exec_queue_destroy(struct kref *ref)
>  		xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
>  
>  	xe_exec_queue_last_fence_put_unlocked(q);
> +	for_each_tlb_inval(i)
> +		xe_exec_queue_tlb_inval_last_fence_put_unlocked(q,
> i);
> +
>  	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
>  		list_for_each_entry_safe(eq, next, &q-
> >multi_gt_list,
>  					 multi_gt_link)
> @@ -1011,7 +1015,9 @@ int xe_exec_queue_destroy_ioctl(struct
> drm_device *dev, void *data,
>  static void xe_exec_queue_last_fence_lockdep_assert(struct
> xe_exec_queue *q,
>  						    struct xe_vm
> *vm)
>  {
> -	if (q->flags & EXEC_QUEUE_FLAG_VM) {
> +	if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) {
> +		xe_migrate_job_lock_assert(q);
> +	} else if (q->flags & EXEC_QUEUE_FLAG_VM) {
>  		lockdep_assert_held(&vm->lock);
>  	} else {
>  		xe_vm_assert_held(vm);
> @@ -1110,6 +1116,7 @@ void xe_exec_queue_last_fence_set(struct
> xe_exec_queue *q, struct xe_vm *vm,
>  				  struct dma_fence *fence)
>  {
>  	xe_exec_queue_last_fence_lockdep_assert(q, vm);
> +	xe_assert(vm->xe, !dma_fence_is_container(fence));
>  
>  	xe_exec_queue_last_fence_put(q, vm);
>  	q->last_fence = dma_fence_get(fence);
> @@ -1138,6 +1145,100 @@ int xe_exec_queue_last_fence_test_dep(struct
> xe_exec_queue *q, struct xe_vm *vm)
>  	return err;
>  }
>  
> +/**
> + * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB
> invalidation fence
> + * @q: The exec queue
> + * @vm: The VM the engine does a bind for
> + * @type: Either primary or media GT
> + */
> +void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
> +					    struct xe_vm *vm,
> +					    unsigned int type)
> +{
> +	xe_exec_queue_last_fence_lockdep_assert(q, vm);
> +	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT
> ||
> +		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
> +
> +	xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type);
> +}
> +
> +/**
> + * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to
> last TLB
> + * invalidation fence unlocked
> + * @q: The exec queue
> + * @type: Either primary or media GT
> + *
> + * Only safe to be called from xe_exec_queue_destroy().
> + */
> +void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct
> xe_exec_queue *q,
> +						     unsigned int
> type)
> +{
> +	xe_assert(q->vm->xe, type ==
> XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
> +		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
> +
> +	dma_fence_put(q->tlb_inval[type].last_fence);
> +	q->tlb_inval[type].last_fence = NULL;
> +}
> +
> +/**
> + * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB
> invalidation
> + * @q: The exec queue
> + * @vm: The VM the engine does a bind for
> + * @type: Either primary or media GT
> + *
> + * Get last fence, takes a ref
> + *
> + * Returns: last fence if not signaled, dma fence stub if signaled
> + */
> +struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct
> xe_exec_queue *q,
> +							 struct
> xe_vm *vm,
> +							 unsigned
> int type)
> +{
> +	struct dma_fence *fence;
> +
> +	xe_exec_queue_last_fence_lockdep_assert(q, vm);
> +	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT
> ||
> +		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
> +	xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
> +				      EXEC_QUEUE_FLAG_MIGRATE));
> +
> +	if (q->tlb_inval[type].last_fence &&
> +	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +		     &q->tlb_inval[type].last_fence->flags))
> +		xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
> +
> +	fence = q->tlb_inval[type].last_fence ?:
> dma_fence_get_stub();
> +	dma_fence_get(fence);
> +	return fence;
> +}
> +
> +/**
> + * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB
> invalidation
> + * @q: The exec queue
> + * @vm: The VM the engine does a bind for
> + * @fence: The fence
> + * @type: Either primary or media GT
> + *
> + * Set the last fence for the tlb invalidation type on the queue.
> Increases
> + * reference count for fence, when closing queue
> + * xe_exec_queue_tlb_inval_last_fence_put should be called.
> + */
> +void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
> +					    struct xe_vm *vm,
> +					    struct dma_fence *fence,
> +					    unsigned int type)
> +{
> +	xe_exec_queue_last_fence_lockdep_assert(q, vm);
> +	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT
> ||
> +		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
> +	xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
> +				      EXEC_QUEUE_FLAG_MIGRATE));
> +	xe_assert(vm->xe, !dma_fence_is_container(fence));
> +
> +	xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
> +	q->tlb_inval[type].last_fence = dma_fence_get(fence);
> +}
> +
>  /**
>   * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references
>   * within all LRCs of a queue.
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h
> b/drivers/gpu/drm/xe/xe_exec_queue.h
> index a4dfbe858bda..1ba7354b33d1 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.h
> @@ -14,6 +14,10 @@ struct drm_file;
>  struct xe_device;
>  struct xe_file;
>  
> +#define for_each_tlb_inval(__i)	\
> +	for (__i = XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT; \
> +	     __i <= XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT; ++__i)
> +
>  struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe,
> struct xe_vm *vm,
>  					   u32 logical_mask, u16
> width,
>  					   struct xe_hw_engine
> *hw_engine, u32 flags,
> @@ -86,6 +90,23 @@ void xe_exec_queue_last_fence_set(struct
> xe_exec_queue *e, struct xe_vm *vm,
>  				  struct dma_fence *fence);
>  int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q,
>  				      struct xe_vm *vm);
> +
> +void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
> +					    struct xe_vm *vm,
> +					    unsigned int type);
> +
> +void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct
> xe_exec_queue *q,
> +						     unsigned int
> type);
> +
> +struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct
> xe_exec_queue *q,
> +							 struct
> xe_vm *vm,
> +							 unsigned
> int type);
> +
> +void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
> +					    struct xe_vm *vm,
> +					    struct dma_fence *fence,
> +					    unsigned int type);
> +
>  void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);
>  
>  int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void
> *scratch);
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index 838266c3914b..4ef61e803676 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -146,6 +146,11 @@ struct xe_exec_queue {
>  		 * dependency scheduler
>  		 */
>  		struct xe_dep_scheduler *dep_scheduler;
> +		/**
> +		 * @last_fence: last fence for tlb invalidation,
> protected by
> +		 * vm->lock in write mode
> +		 */
> +		struct dma_fence *last_fence;
>  	} tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT];
>  
>  	/** @pxp: PXP info tracking */
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c
> b/drivers/gpu/drm/xe/xe_migrate.c
> index 921c9c1ea41f..4567bc88a8ec 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -2333,6 +2333,20 @@ void xe_migrate_job_unlock(struct xe_migrate
> *m, struct xe_exec_queue *q)
>  		xe_vm_assert_held(q->vm);	/* User queues VM's
> should be locked */
>  }
>  
> +#if IS_ENABLED(CONFIG_PROVE_LOCKING)
> +/**
> + * xe_migrate_job_lock_assert() - Assert migrate job lock held of
> queue
> + * @q: Migrate queue
> + */
> +void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
> +{
> +	struct xe_migrate *m = gt_to_tile(q->gt)->migrate;
> +
> +	xe_gt_assert(q->gt, q == m->q);
> +	lockdep_assert_held(&m->job_mutex);
> +}
> +#endif
> +
>  #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
>  #include "tests/xe_migrate.c"
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_migrate.h
> b/drivers/gpu/drm/xe/xe_migrate.h
> index 4fad324b6253..9b5791617f5e 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.h
> +++ b/drivers/gpu/drm/xe/xe_migrate.h
> @@ -152,6 +152,14 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
>  
>  void xe_migrate_wait(struct xe_migrate *m);
>  
> +#if IS_ENABLED(CONFIG_PROVE_LOCKING)
> +void xe_migrate_job_lock_assert(struct xe_exec_queue *q);
> +#else
> +static inline void xe_migrate_job_lock_assert(struct xe_exec_queue
> *q)
> +{
> +}
> +#endif
> +
>  void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue
> *q);
>  void xe_migrate_job_unlock(struct xe_migrate *m, struct
> xe_exec_queue *q);
>  
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 4058c476aa2d..4241cc433dca 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -1731,8 +1731,13 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>  
>  	down_write(&vm->lock);
>  	for_each_tile(tile, xe, id) {
> -		if (vm->q[id])
> +		if (vm->q[id]) {
> +			int i;
> +
>  			xe_exec_queue_last_fence_put(vm->q[id], vm);
> +			for_each_tlb_inval(i)
> +				xe_exec_queue_tlb_inval_last_fence_p
> ut(vm->q[id], vm, i);
> +		}
>  	}
>  	up_write(&vm->lock);
>  


  reply	other threads:[~2025-10-30  8:25 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-29 20:57 [PATCH v5 0/6] Fix serialization on burst of unbinds - v2 Matthew Brost
2025-10-29 20:57 ` [PATCH v5 1/6] drm/xe: Enforce correct user fence signaling order using drm_syncobjs Matthew Brost
2025-10-30  7:58   ` Thomas Hellström
2025-10-30 12:54     ` Matthew Brost
2025-10-29 20:57 ` [PATCH v5 2/6] drm/xe: Attach last fence to TLB invalidation job queues Matthew Brost
2025-10-30  8:24   ` Thomas Hellström [this message]
2025-10-29 20:57 ` [PATCH v5 3/6] drm/xe: Decouple bind queue last fence from TLB invalidations Matthew Brost
2025-10-30  9:52   ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 4/6] drm/xe: Skip TLB invalidation waits in page fault binds Matthew Brost
2025-11-03 15:19   ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 5/6] drm/xe: Disallow input fences on zero batch execs and zero binds Matthew Brost
2025-11-03 15:21   ` Thomas Hellström
2025-11-03 15:22     ` Thomas Hellström
2025-10-29 20:57 ` [PATCH v5 6/6] drm/xe: Remove last fence dependency check from binds Matthew Brost
2025-10-30  8:43   ` Thomas Hellström
2025-11-03 15:24   ` Thomas Hellström

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cc6705b5f45780f9750ecd3fa3234522456be7d2.camel@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox