All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: stuart.summers@intel.com, arvind.yadav@intel.com,
	himal.prasad.ghimiray@intel.com,
	thomas.hellstrom@linux.intel.com, francois.dugast@intel.com
Subject: [PATCH v3 15/25] drm/xe: Make bind queues operate cross-tile
Date: Fri, 27 Feb 2026 17:34:51 -0800	[thread overview]
Message-ID: <20260228013501.106680-16-matthew.brost@intel.com> (raw)
In-Reply-To: <20260228013501.106680-1-matthew.brost@intel.com>

Since bind jobs execute on the CPU rather than the GPU, maintaining a
per-tile bind queue no longer provides value. Convert the driver to use
a single bind queue shared across tiles. The primary change is routing
all GT TLB invalidations through this unified bind queue.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c       | 142 +++++++++--------------
 drivers/gpu/drm/xe/xe_exec_queue.h       |  14 +--
 drivers/gpu/drm/xe/xe_exec_queue_types.h |  21 ++--
 drivers/gpu/drm/xe/xe_pt.c               |  22 ++--
 drivers/gpu/drm/xe/xe_sync.c             |  20 +---
 drivers/gpu/drm/xe/xe_tlb_inval_job.c    |  15 ++-
 drivers/gpu/drm/xe/xe_tlb_inval_job.h    |   2 +-
 drivers/gpu/drm/xe/xe_vm.c               |  65 +++++------
 drivers/gpu/drm/xe/xe_vm_types.h         |   2 +-
 9 files changed, 126 insertions(+), 177 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index b3f700a9d425..0201b8159e63 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -142,9 +142,8 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
 {
 	int i;
 
-	for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i)
-		if (q->tlb_inval[i].dep_scheduler)
-			xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler);
+	for_each_tlb_inval(q, i)
+		xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler);
 
 	if (xe_exec_queue_uses_pxp(q))
 		xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
@@ -166,31 +165,34 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
 
 static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q)
 {
-	struct xe_tile *tile = gt_to_tile(q->gt);
-	int i;
+	struct xe_tile *tile;
+	int i = 0, j;
+	u8 id;
 
-	for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) {
-		struct xe_dep_scheduler *dep_scheduler;
-		struct xe_gt *gt;
-		struct workqueue_struct *wq;
+	for_each_tile(tile, xe, id) {
+		for (j = 0; j < (XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT + 1); ++j, ++i) {
+			struct xe_dep_scheduler *dep_scheduler;
+			struct xe_gt *gt;
+			struct workqueue_struct *wq;
 
-		if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT)
-			gt = tile->primary_gt;
-		else
-			gt = tile->media_gt;
+			if (j == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT)
+				gt = tile->primary_gt;
+			else
+				gt = tile->media_gt;
 
-		if (!gt)
-			continue;
+			if (!gt)
+				continue;
 
-		wq = gt->tlb_inval.job_wq;
+			wq = gt->tlb_inval.job_wq;
 
 #define MAX_TLB_INVAL_JOBS	16	/* Picking a reasonable value */
-		dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name,
-							MAX_TLB_INVAL_JOBS);
-		if (IS_ERR(dep_scheduler))
-			return PTR_ERR(dep_scheduler);
+			dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name,
+								MAX_TLB_INVAL_JOBS);
+			if (IS_ERR(dep_scheduler))
+				return PTR_ERR(dep_scheduler);
 
-		q->tlb_inval[i].dep_scheduler = dep_scheduler;
+			q->tlb_inval[i].dep_scheduler = dep_scheduler;
+		}
 	}
 #undef MAX_TLB_INVAL_JOBS
 
@@ -227,7 +229,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
 	q->ops = gt->exec_queue_ops;
 	INIT_LIST_HEAD(&q->lr.link);
 	INIT_LIST_HEAD(&q->vm_exec_queue_link);
-	INIT_LIST_HEAD(&q->multi_gt_link);
 	INIT_LIST_HEAD(&q->hw_engine_group_link);
 	INIT_LIST_HEAD(&q->pxp.link);
 	spin_lock_init(&q->multi_queue.lock);
@@ -536,7 +537,6 @@ ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
 void xe_exec_queue_destroy(struct kref *ref)
 {
 	struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
-	struct xe_exec_queue *eq, *next;
 	int i;
 
 	xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0);
@@ -548,15 +548,9 @@ void xe_exec_queue_destroy(struct kref *ref)
 		xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
 
 	xe_exec_queue_last_fence_put_unlocked(q);
-	for_each_tlb_inval(i)
+	for_each_tlb_inval(q, i)
 		xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i);
 
-	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
-		list_for_each_entry_safe(eq, next, &q->multi_gt_list,
-					 multi_gt_link)
-			xe_exec_queue_put(eq);
-	}
-
 	if (q->user_vm) {
 		xe_vm_put(q->user_vm);
 		q->user_vm = NULL;
@@ -1159,7 +1153,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 		u64_to_user_ptr(args->instances);
 	struct xe_hw_engine *hwe;
 	struct xe_vm *vm;
-	struct xe_tile *tile;
 	struct xe_exec_queue *q = NULL;
 	u32 logical_mask;
 	u32 flags = 0;
@@ -1208,31 +1201,16 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 			return -ENOENT;
 		}
 
-		for_each_tile(tile, xe, id) {
-			struct xe_exec_queue *new;
-
-			flags |= EXEC_QUEUE_FLAG_VM;
-			if (id)
-				flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD;
-
-			new = xe_exec_queue_create_bind(xe, tile, vm, flags,
-							args->extensions);
-			if (IS_ERR(new)) {
-				up_read(&vm->lock);
-				xe_vm_put(vm);
-				err = PTR_ERR(new);
-				if (q)
-					goto put_exec_queue;
-				return err;
-			}
-			if (id == 0)
-				q = new;
-			else
-				list_add_tail(&new->multi_gt_list,
-					      &q->multi_gt_link);
-		}
+		flags |= EXEC_QUEUE_FLAG_VM;
+
+		q = xe_exec_queue_create_bind(xe, xe_device_get_root_tile(xe),
+					      vm, flags, args->extensions);
 		up_read(&vm->lock);
 		xe_vm_put(vm);
+		if (IS_ERR(q)) {
+			err = PTR_ERR(q);
+			return err;
+		}
 	} else {
 		logical_mask = calc_validate_logical_mask(xe, eci,
 							  args->width,
@@ -1436,14 +1414,6 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
  */
 void xe_exec_queue_kill(struct xe_exec_queue *q)
 {
-	struct xe_exec_queue *eq = q, *next;
-
-	list_for_each_entry_safe(eq, next, &eq->multi_gt_list,
-				 multi_gt_link) {
-		q->ops->kill(eq);
-		xe_vm_remove_compute_exec_queue(q->vm, eq);
-	}
-
 	q->ops->kill(q);
 	xe_vm_remove_compute_exec_queue(q->vm, q);
 }
@@ -1594,42 +1564,40 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
  * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence
  * @q: The exec queue
  * @vm: The VM the engine does a bind for
- * @type: Either primary or media GT
+ * @idx: Index of tlb invalidation
  */
 void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
 					    struct xe_vm *vm,
-					    unsigned int type)
+					    unsigned int idx)
 {
 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
-	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
-		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+	xe_assert(vm->xe, idx < XE_EXEC_QUEUE_TLB_INVAL_COUNT);
 
-	xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type);
+	xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, idx);
 }
 
 /**
  * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB
  * invalidation fence unlocked
  * @q: The exec queue
- * @type: Either primary or media GT
+ * @idx: Index of tlb invalidation
  *
  * Only safe to be called from xe_exec_queue_destroy().
  */
 void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
-						     unsigned int type)
+						     unsigned int idx)
 {
-	xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
-		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+	xe_assert(q->vm->xe, idx < XE_EXEC_QUEUE_TLB_INVAL_COUNT);
 
-	dma_fence_put(q->tlb_inval[type].last_fence);
-	q->tlb_inval[type].last_fence = NULL;
+	dma_fence_put(q->tlb_inval[idx].last_fence);
+	q->tlb_inval[idx].last_fence = NULL;
 }
 
 /**
  * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation
  * @q: The exec queue
  * @vm: The VM the engine does a bind for
- * @type: Either primary or media GT
+ * @idx: Index of tlb invalidation
  *
  * Get last fence, takes a ref
  *
@@ -1637,22 +1605,21 @@ void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
  */
 struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
 							 struct xe_vm *vm,
-							 unsigned int type)
+							 unsigned int idx)
 {
 	struct dma_fence *fence;
 
 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
-	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
-		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+	xe_assert(vm->xe, idx < XE_EXEC_QUEUE_TLB_INVAL_COUNT);
 	xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
 				      EXEC_QUEUE_FLAG_MIGRATE));
 
-	if (q->tlb_inval[type].last_fence &&
+	if (q->tlb_inval[idx].last_fence &&
 	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-		     &q->tlb_inval[type].last_fence->flags))
-		xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
+		     &q->tlb_inval[idx].last_fence->flags))
+		xe_exec_queue_tlb_inval_last_fence_put(q, vm, idx);
 
-	fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub();
+	fence = q->tlb_inval[idx].last_fence ?: dma_fence_get_stub();
 	dma_fence_get(fence);
 	return fence;
 }
@@ -1662,26 +1629,25 @@ struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q
  * @q: The exec queue
  * @vm: The VM the engine does a bind for
  * @fence: The fence
- * @type: Either primary or media GT
+ * @idx: Index of tlb invalidation
  *
- * Set the last fence for the tlb invalidation type on the queue. Increases
+ * Set the last fence for the tlb invalidation client on the queue. Increases
  * reference count for fence, when closing queue
  * xe_exec_queue_tlb_inval_last_fence_put should be called.
  */
 void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
 					    struct xe_vm *vm,
 					    struct dma_fence *fence,
-					    unsigned int type)
+					    unsigned int idx)
 {
 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
-	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
-		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+	xe_assert(vm->xe, idx < XE_EXEC_QUEUE_TLB_INVAL_COUNT);
 	xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
 				      EXEC_QUEUE_FLAG_MIGRATE));
 	xe_assert(vm->xe, !dma_fence_is_container(fence));
 
-	xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
-	q->tlb_inval[type].last_fence = dma_fence_get(fence);
+	xe_exec_queue_tlb_inval_last_fence_put(q, vm, idx);
+	q->tlb_inval[idx].last_fence = dma_fence_get(fence);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index a82d99bd77bc..b5aabab388c1 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -14,9 +14,9 @@ struct drm_file;
 struct xe_device;
 struct xe_file;
 
-#define for_each_tlb_inval(__i)	\
-	for (__i = XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT; \
-	     __i <= XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT; ++__i)
+#define for_each_tlb_inval(__q, __i)	\
+	for (__i = 0; __i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++__i)	\
+		for_each_if((__q)->tlb_inval[__i].dep_scheduler)
 
 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
 					   u32 logical_mask, u16 width,
@@ -141,19 +141,19 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
 
 void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
 					    struct xe_vm *vm,
-					    unsigned int type);
+					    unsigned int idx);
 
 void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
-						     unsigned int type);
+						     unsigned int idx);
 
 struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
 							 struct xe_vm *vm,
-							 unsigned int type);
+							 unsigned int idx);
 
 void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
 					    struct xe_vm *vm,
 					    struct dma_fence *fence,
-					    unsigned int type);
+					    unsigned int idx);
 
 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);
 
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index a1f3938f4173..d2a25db0a835 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -126,14 +126,12 @@ struct xe_exec_queue {
 #define EXEC_QUEUE_FLAG_PERMANENT		BIT(1)
 /* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */
 #define EXEC_QUEUE_FLAG_VM			BIT(2)
-/* child of VM queue for multi-tile VM jobs */
-#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(3)
 /* kernel exec_queue only, set priority to highest level */
-#define EXEC_QUEUE_FLAG_HIGH_PRIORITY		BIT(4)
+#define EXEC_QUEUE_FLAG_HIGH_PRIORITY		BIT(3)
 /* flag to indicate low latency hint to guc */
-#define EXEC_QUEUE_FLAG_LOW_LATENCY		BIT(5)
+#define EXEC_QUEUE_FLAG_LOW_LATENCY		BIT(4)
 /* for migration (kernel copy, clear, bind) jobs */
-#define EXEC_QUEUE_FLAG_MIGRATE			BIT(6)
+#define EXEC_QUEUE_FLAG_MIGRATE			BIT(5)
 
 	/**
 	 * @flags: flags for this exec queue, should statically setup aside from ban
@@ -141,13 +139,6 @@ struct xe_exec_queue {
 	 */
 	unsigned long flags;
 
-	union {
-		/** @multi_gt_list: list head for VM bind engines if multi-GT */
-		struct list_head multi_gt_list;
-		/** @multi_gt_link: link for VM bind engines if multi-GT */
-		struct list_head multi_gt_link;
-	};
-
 	union {
 		/** @execlist: execlist backend specific state for exec queue */
 		struct xe_execlist_exec_queue *execlist;
@@ -202,7 +193,8 @@ struct xe_exec_queue {
 
 #define XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT	0
 #define XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT	1
-#define XE_EXEC_QUEUE_TLB_INVAL_COUNT		(XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT  + 1)
+#define XE_EXEC_QUEUE_TLB_INVAL_COUNT	\
+	((XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT + 1) * 2)
 
 	/** @tlb_inval: TLB invalidations exec queue state */
 	struct {
@@ -213,7 +205,8 @@ struct xe_exec_queue {
 		struct xe_dep_scheduler *dep_scheduler;
 		/**
 		 * @last_fence: last fence for tlb invalidation, protected by
-		 * vm->lock in write mode
+		 * vm->lock in write mode to user queues, protected by
+		 * tile->m->lock for migration queues
 		 */
 		struct dma_fence *last_fence;
 	} tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT];
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index ed7cb34c958c..032947a10806 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -2510,12 +2510,18 @@ static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops;
 #endif
 
 static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q,
-						 struct xe_gt *gt)
+						 struct xe_tile *tile,
+						 struct xe_gt *gt,
+						 unsigned int *type)
 {
+	int tile_ofs = tile->id * (XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT + 1);
+
 	if (xe_gt_is_media_type(gt))
-		return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT].dep_scheduler;
+		*type = tile_ofs + XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT;
+	else
+		*type = tile_ofs + XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT;
 
-	return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT].dep_scheduler;
+	return q->tlb_inval[*type].dep_scheduler;
 }
 
 /**
@@ -2540,6 +2546,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
 	struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL;
 	struct xe_range_fence *rfence;
 	struct xe_vma_op *op;
+	unsigned int type;
 	int err = 0, i;
 	struct xe_migrate_pt_update update = {
 		.ops = pt_update_ops->needs_svm_lock ?
@@ -2566,13 +2573,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
 
 	if (pt_update_ops->needs_invalidation) {
 		struct xe_dep_scheduler *dep_scheduler =
-			to_dep_scheduler(q, tile->primary_gt);
+			to_dep_scheduler(q, tile, tile->primary_gt, &type);
 
 		ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval,
 					       dep_scheduler, vm,
 					       pt_update_ops->start,
 					       pt_update_ops->last,
-					       XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+					       type);
 		if (IS_ERR(ijob)) {
 			err = PTR_ERR(ijob);
 			goto kill_vm_tile1;
@@ -2591,14 +2598,15 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
 		}
 
 		if (tile->media_gt) {
-			dep_scheduler = to_dep_scheduler(q, tile->media_gt);
+			dep_scheduler = to_dep_scheduler(q, tile,
+							 tile->media_gt, &type);
 
 			mjob = xe_tlb_inval_job_create(q,
 						       &tile->media_gt->tlb_inval,
 						       dep_scheduler, vm,
 						       pt_update_ops->start,
 						       pt_update_ops->last,
-						       XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT);
+						       type);
 			if (IS_ERR(mjob)) {
 				err = PTR_ERR(mjob);
 				goto free_ijob;
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 24d6d9af20d6..8a0de78395f1 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -345,15 +345,9 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
 			return ERR_PTR(-EOPNOTSUPP);
 
 	if (q->flags & EXEC_QUEUE_FLAG_VM) {
-		struct xe_exec_queue *__q;
-		struct xe_tile *tile;
-		u8 id;
-
-		for_each_tile(tile, vm->xe, id) {
+		num_fence++;
+		for_each_tlb_inval(q, i)
 			num_fence++;
-			for_each_tlb_inval(i)
-				num_fence++;
-		}
 
 		fences = kmalloc_objs(*fences, num_fence);
 		if (!fences)
@@ -361,17 +355,9 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
 
 		fences[current_fence++] =
 			xe_exec_queue_last_fence_get(q, vm);
-		for_each_tlb_inval(i)
+		for_each_tlb_inval(q, i)
 			fences[current_fence++] =
 				xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
-		list_for_each_entry(__q, &q->multi_gt_list,
-				    multi_gt_link) {
-			fences[current_fence++] =
-				xe_exec_queue_last_fence_get(__q, vm);
-			for_each_tlb_inval(i)
-				fences[current_fence++] =
-					xe_exec_queue_tlb_inval_last_fence_get(__q, vm, i);
-		}
 
 		xe_assert(vm->xe, current_fence == num_fence);
 		cf = dma_fence_array_create(num_fence, fences,
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
index 04d21015cd5d..81f560068d3c 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
@@ -39,8 +39,8 @@ struct xe_tlb_inval_job {
 	u64 start;
 	/** @end: End address to invalidate */
 	u64 end;
-	/** @type: GT type */
-	int type;
+	/** @idx: Index of tlb invalidation */
+	int idx;
 	/** @fence_armed: Fence has been armed */
 	bool fence_armed;
 };
@@ -87,7 +87,7 @@ static const struct xe_dep_job_ops dep_job_ops = {
  * @vm: VM which TLB invalidation is being issued for
  * @start: Start address to invalidate
  * @end: End address to invalidate
- * @type: GT type
+ * @idx: Index of tlb invalidation
  *
  * Create a TLB invalidation job and initialize internal fields. The caller is
  * responsible for releasing the creation reference.
@@ -97,7 +97,7 @@ static const struct xe_dep_job_ops dep_job_ops = {
 struct xe_tlb_inval_job *
 xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
 			struct xe_dep_scheduler *dep_scheduler,
-			struct xe_vm *vm, u64 start, u64 end, int type)
+			struct xe_vm *vm, u64 start, u64 end, int idx)
 {
 	struct xe_tlb_inval_job *job;
 	struct drm_sched_entity *entity =
@@ -105,8 +105,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
 	struct xe_tlb_inval_fence *ifence;
 	int err;
 
-	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
-		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+	xe_assert(vm->xe, idx < XE_EXEC_QUEUE_TLB_INVAL_COUNT);
 
 	job = kmalloc_obj(*job);
 	if (!job)
@@ -120,7 +119,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
 	job->fence_armed = false;
 	xe_page_reclaim_list_init(&job->prl);
 	job->dep.ops = &dep_job_ops;
-	job->type = type;
+	job->idx = idx;
 	kref_init(&job->refcount);
 	xe_exec_queue_get(q);	/* Pairs with put in xe_tlb_inval_job_destroy */
 	xe_vm_get(vm);		/* Pairs with put in xe_tlb_inval_job_destroy */
@@ -280,7 +279,7 @@ struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
 	/* Let the upper layers fish this out */
 	xe_exec_queue_tlb_inval_last_fence_set(job->q, job->vm,
 					       &job->dep.drm.s_fence->finished,
-					       job->type);
+					       job->idx);
 
 	xe_migrate_job_unlock(m, job->q);
 
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
index 03d6e21cd611..2a4478f529e6 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
@@ -20,7 +20,7 @@ struct xe_vm;
 struct xe_tlb_inval_job *
 xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
 			struct xe_dep_scheduler *dep_scheduler,
-			struct xe_vm *vm, u64 start, u64 end, int type);
+			struct xe_vm *vm, u64 start, u64 end, int idx);
 
 void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job,
 				       struct xe_page_reclaim_list *prl);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 4ddfdd6a3c2a..52212b51caa8 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1657,7 +1657,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
 			struct xe_exec_queue *q;
 			u32 create_flags = EXEC_QUEUE_FLAG_VM;
 
-			if (!vm->pt_root[id])
+			if (!vm->pt_root[id] || vm->q)
 				continue;
 
 			if (!xef) /* Not from userspace */
@@ -1668,7 +1668,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
 				err = PTR_ERR(q);
 				goto err_close;
 			}
-			vm->q[id] = q;
+			vm->q = q;
 		}
 	}
 
@@ -1775,24 +1775,18 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	if (xe_vm_in_fault_mode(vm))
 		xe_svm_close(vm);
 
-	down_write(&vm->lock);
-	for_each_tile(tile, xe, id) {
-		if (vm->q[id]) {
-			int i;
+	if (vm->q) {
+		int i;
 
-			xe_exec_queue_last_fence_put(vm->q[id], vm);
-			for_each_tlb_inval(i)
-				xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
-		}
-	}
-	up_write(&vm->lock);
+		down_write(&vm->lock);
+		xe_exec_queue_last_fence_put(vm->q, vm);
+		for_each_tlb_inval(vm->q, i)
+			xe_exec_queue_tlb_inval_last_fence_put(vm->q, vm, i);
+		up_write(&vm->lock);
 
-	for_each_tile(tile, xe, id) {
-		if (vm->q[id]) {
-			xe_exec_queue_kill(vm->q[id]);
-			xe_exec_queue_put(vm->q[id]);
-			vm->q[id] = NULL;
-		}
+		xe_exec_queue_kill(vm->q);
+		xe_exec_queue_put(vm->q);
+		vm->q = NULL;
 	}
 
 	down_write(&vm->lock);
@@ -1924,7 +1918,7 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
 static struct xe_exec_queue *
 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 {
-	return q ? q : vm->q[0];
+	return q ? q : vm->q;
 }
 
 static struct xe_user_fence *
@@ -3159,13 +3153,10 @@ static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
 		if (vops->pt_update_ops[id].q)
 			continue;
 
-		if (q) {
+		if (q)
 			vops->pt_update_ops[id].q = q;
-			if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
-				q = list_next_entry(q, multi_gt_list);
-		} else {
-			vops->pt_update_ops[id].q = vm->q[id];
-		}
+		else
+			vops->pt_update_ops[id].q = vm->q;
 	}
 
 	return number_tiles;
@@ -3185,15 +3176,15 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
 	if (number_tiles == 0)
 		return ERR_PTR(-ENODATA);
 
-	for_each_tile(tile, vm->xe, id) {
+	for_each_tile(tile, vm->xe, id)
 		++n_fence;
 
-		if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT))
-			for_each_tlb_inval(i)
-				++n_fence;
+	if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)) {
+		for_each_tlb_inval(vops->pt_update_ops[0].q, i)
+			++n_fence;
 	}
 
-	fences = kmalloc_objs(*fences, n_fence);
+	fences = kcalloc(n_fence, sizeof(*fences), GFP_KERNEL);
 	if (!fences) {
 		fence = ERR_PTR(-ENOMEM);
 		goto err_trace;
@@ -3235,9 +3226,15 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
 			continue;
 
 		xe_migrate_job_lock(tile->migrate, q);
-		for_each_tlb_inval(i)
-			fences[current_fence++] =
-				xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
+		for_each_tlb_inval(q, i) {
+			if (i >= (tile->id + 1) * XE_MAX_GT_PER_TILE ||
+			    i < tile->id * XE_MAX_GT_PER_TILE)
+				continue;
+
+			fences[current_fence++] = fence ?
+				xe_exec_queue_tlb_inval_last_fence_get(q, vm, i) :
+				dma_fence_get_stub();
+		}
 		xe_migrate_job_unlock(tile->migrate, q);
 	}
 
@@ -3746,7 +3743,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 	syncs_user = u64_to_user_ptr(args->syncs);
 	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
-		struct xe_exec_queue *__q = q ?: vm->q[0];
+		struct xe_exec_queue *__q = q ?: vm->q;
 
 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
 					  &syncs_user[num_syncs],
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 1f6f7e30e751..2c173550346a 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -209,7 +209,7 @@ struct xe_vm {
 	struct xe_device *xe;
 
 	/* exec queue used for (un)binding vma's */
-	struct xe_exec_queue *q[XE_MAX_TILES_PER_DEVICE];
+	struct xe_exec_queue *q;
 
 	/** @lru_bulk_move: Bulk LRU move list for this VM's BOs */
 	struct ttm_lru_bulk_move lru_bulk_move;
-- 
2.34.1


  parent reply	other threads:[~2026-02-28  1:35 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-28  1:34 [PATCH v3 00/25] CPU binds and ULLS on migration queue Matthew Brost
2026-02-28  1:34 ` [PATCH v3 01/25] drm/xe: Drop struct xe_migrate_pt_update argument from populate/clear vfuns Matthew Brost
2026-03-05 14:17   ` Francois Dugast
2026-02-28  1:34 ` [PATCH v3 02/25] drm/xe: Add xe_migrate_update_pgtables_cpu_execute helper Matthew Brost
2026-03-05 14:39   ` Francois Dugast
2026-02-28  1:34 ` [PATCH v3 03/25] drm/xe: Decouple exec queue idle check from LRC Matthew Brost
2026-03-02 20:50   ` Summers, Stuart
2026-03-02 21:02     ` Matthew Brost
2026-03-03 21:26       ` Summers, Stuart
2026-03-03 22:42         ` Matthew Brost
2026-03-03 22:54           ` Summers, Stuart
2026-02-28  1:34 ` [PATCH v3 04/25] drm/xe: Add job count to GuC exec queue snapshot Matthew Brost
2026-03-02 20:50   ` Summers, Stuart
2026-02-28  1:34 ` [PATCH v3 05/25] drm/xe: Update xe_bo_put_deferred arguments to include writeback flag Matthew Brost
2026-04-01 12:20   ` Francois Dugast
2026-04-01 22:39     ` Matthew Brost
2026-02-28  1:34 ` [PATCH v3 06/25] drm/xe: Add XE_BO_FLAG_PUT_VM_ASYNC Matthew Brost
2026-04-01 12:22   ` Francois Dugast
2026-04-01 22:38     ` Matthew Brost
2026-02-28  1:34 ` [PATCH v3 07/25] drm/xe: Update scheduler job layer to support PT jobs Matthew Brost
2026-03-03 22:50   ` Summers, Stuart
2026-03-03 23:00     ` Matthew Brost
2026-02-28  1:34 ` [PATCH v3 08/25] drm/xe: Add helpers to access PT ops Matthew Brost
2026-04-07 15:22   ` Francois Dugast
2026-02-28  1:34 ` [PATCH v3 09/25] drm/xe: Add struct xe_pt_job_ops Matthew Brost
2026-03-03 23:26   ` Summers, Stuart
2026-03-03 23:28     ` Matthew Brost
2026-02-28  1:34 ` [PATCH v3 10/25] drm/xe: Update GuC submission backend to run PT jobs Matthew Brost
2026-03-03 23:28   ` Summers, Stuart
2026-03-04  0:26     ` Matthew Brost
2026-03-04 20:43       ` Summers, Stuart
2026-03-04 21:53         ` Matthew Brost
2026-03-05 20:24           ` Summers, Stuart
2026-02-28  1:34 ` [PATCH v3 11/25] drm/xe: Store level in struct xe_vm_pgtable_update Matthew Brost
2026-03-03 23:44   ` Summers, Stuart
2026-02-28  1:34 ` [PATCH v3 12/25] drm/xe: Don't use migrate exec queue for page fault binds Matthew Brost
2026-02-28  1:34 ` [PATCH v3 13/25] drm/xe: Enable CPU binds for jobs Matthew Brost
2026-02-28  1:34 ` [PATCH v3 14/25] drm/xe: Remove unused arguments from xe_migrate_pt_update_ops Matthew Brost
2026-02-28  1:34 ` Matthew Brost [this message]
2026-02-28  1:34 ` [PATCH v3 16/25] drm/xe: Add CPU bind layer Matthew Brost
2026-02-28  1:34 ` [PATCH v3 17/25] drm/xe: Add device flag to enable PT mirroring across tiles Matthew Brost
2026-02-28  1:34 ` [PATCH v3 18/25] drm/xe: Add xe_hw_engine_write_ring_tail Matthew Brost
2026-02-28  1:34 ` [PATCH v3 19/25] drm/xe: Add ULLS support to LRC Matthew Brost
2026-03-05 20:21   ` Francois Dugast
2026-02-28  1:34 ` [PATCH v3 20/25] drm/xe: Add ULLS migration job support to migration layer Matthew Brost
2026-03-05 23:34   ` Summers, Stuart
2026-03-09 23:11     ` Matthew Brost
2026-02-28  1:34 ` [PATCH v3 21/25] drm/xe: Add MI_SEMAPHORE_WAIT instruction defs Matthew Brost
2026-02-28  1:34 ` [PATCH v3 22/25] drm/xe: Add ULLS migration job support to ring ops Matthew Brost
2026-02-28  1:34 ` [PATCH v3 23/25] drm/xe: Add ULLS migration job support to GuC submission Matthew Brost
2026-02-28  1:35 ` [PATCH v3 24/25] drm/xe: Enter ULLS for migration jobs upon page fault or SVM prefetch Matthew Brost
2026-02-28  1:35 ` [PATCH v3 25/25] drm/xe: Add modparam to enable / disable ULLS on migrate queue Matthew Brost
2026-03-05 22:59   ` Summers, Stuart
2026-04-01 22:44     ` Matthew Brost
2026-02-28  1:43 ` ✗ CI.checkpatch: warning for CPU binds and ULLS on migration queue (rev3) Patchwork
2026-02-28  1:44 ` ✓ CI.KUnit: success " Patchwork
2026-02-28  2:32 ` ✓ Xe.CI.BAT: " Patchwork
2026-02-28 13:59 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-03-02 17:54   ` Summers, Stuart
2026-03-02 18:13     ` Matthew Brost
2026-03-05 22:56 ` [PATCH v3 00/25] CPU binds and ULLS on migration queue Summers, Stuart
2026-03-10 22:17   ` Matthew Brost
2026-03-20 15:31 ` Thomas Hellström

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260228013501.106680-16-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=arvind.yadav@intel.com \
    --cc=francois.dugast@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=stuart.summers@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.