From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: thomas.hellstrom@linux.intel.com
Subject: [PATCH v7 2/6] drm/xe: Attach last fence to TLB invalidation job queues
Date: Fri, 31 Oct 2025 16:40:46 -0700 [thread overview]
Message-ID: <20251031234050.3043507-3-matthew.brost@intel.com> (raw)
In-Reply-To: <20251031234050.3043507-1-matthew.brost@intel.com>
Add support for attaching the last fence to TLB invalidation job queues
to address serialization issues during bursts of unbind jobs. Ensure
that user fence signaling for a bind job reflects both the bind job
itself and the last fences of all related TLB invalidations. Maintain
submission order based solely on the state of the bind and TLB
invalidation queues.
Introduce support functions for last fence attachment to TLB
invalidation queues.
v3:
- Fix assert in xe_exec_queue_tlb_inval_last_fence_set (CI)
- Ensure migrate lock held for migrate queues (Testing)
v5:
- Style nits (Thomas)
- Rewrite commit message (Thomas)
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_exec_queue.c | 103 ++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_exec_queue.h | 21 +++++
drivers/gpu/drm/xe/xe_exec_queue_types.h | 5 ++
drivers/gpu/drm/xe/xe_migrate.c | 14 +++
drivers/gpu/drm/xe/xe_migrate.h | 8 ++
drivers/gpu/drm/xe/xe_vm.c | 7 +-
6 files changed, 156 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index e3414b337569..aada199faf71 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -387,6 +387,7 @@ void xe_exec_queue_destroy(struct kref *ref)
{
struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
struct xe_exec_queue *eq, *next;
+ int i;
xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0);
@@ -397,6 +398,9 @@ void xe_exec_queue_destroy(struct kref *ref)
xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
xe_exec_queue_last_fence_put_unlocked(q);
+ for_each_tlb_inval(i)
+ xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i);
+
if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
list_for_each_entry_safe(eq, next, &q->multi_gt_list,
multi_gt_link)
@@ -1014,7 +1018,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
struct xe_vm *vm)
{
- if (q->flags & EXEC_QUEUE_FLAG_VM) {
+ if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) {
+ xe_migrate_job_lock_assert(q);
+ } else if (q->flags & EXEC_QUEUE_FLAG_VM) {
lockdep_assert_held(&vm->lock);
} else {
xe_vm_assert_held(vm);
@@ -1113,6 +1119,7 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
struct dma_fence *fence)
{
xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, !dma_fence_is_container(fence));
xe_exec_queue_last_fence_put(q, vm);
q->last_fence = dma_fence_get(fence);
@@ -1141,6 +1148,100 @@ int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm)
return err;
}
+/**
+ * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind for
+ * @type: Either primary or media GT
+ */
+void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type)
+{
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
+ xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type);
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB
+ * invalidation fence unlocked
+ * @q: The exec queue
+ * @type: Either primary or media GT
+ *
+ * Only safe to be called from xe_exec_queue_destroy().
+ */
+void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
+ unsigned int type)
+{
+ xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
+ dma_fence_put(q->tlb_inval[type].last_fence);
+ q->tlb_inval[type].last_fence = NULL;
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind for
+ * @type: Either primary or media GT
+ *
+ * Get last fence, takes a ref
+ *
+ * Returns: last fence if not signaled, dma fence stub if signaled
+ */
+struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type)
+{
+ struct dma_fence *fence;
+
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+ xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
+ EXEC_QUEUE_FLAG_MIGRATE));
+
+ if (q->tlb_inval[type].last_fence &&
+ test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &q->tlb_inval[type].last_fence->flags))
+ xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
+
+ fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub();
+ dma_fence_get(fence);
+ return fence;
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind for
+ * @fence: The fence
+ * @type: Either primary or media GT
+ *
+ * Set the last fence for the tlb invalidation type on the queue. Increases
+ * reference count for fence, when closing queue
+ * xe_exec_queue_tlb_inval_last_fence_put should be called.
+ */
+void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ struct dma_fence *fence,
+ unsigned int type)
+{
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+ xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
+ EXEC_QUEUE_FLAG_MIGRATE));
+ xe_assert(vm->xe, !dma_fence_is_container(fence));
+
+ xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
+ q->tlb_inval[type].last_fence = dma_fence_get(fence);
+}
+
/**
* xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references
* within all LRCs of a queue.
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index a4dfbe858bda..1ba7354b33d1 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -14,6 +14,10 @@ struct drm_file;
struct xe_device;
struct xe_file;
+#define for_each_tlb_inval(__i) \
+ for (__i = XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT; \
+ __i <= XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT; ++__i)
+
struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
u32 logical_mask, u16 width,
struct xe_hw_engine *hw_engine, u32 flags,
@@ -86,6 +90,23 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
struct dma_fence *fence);
int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q,
struct xe_vm *vm);
+
+void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type);
+
+void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
+ unsigned int type);
+
+struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type);
+
+void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ struct dma_fence *fence,
+ unsigned int type);
+
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);
int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 9bda1148e17b..771ffe35cd0c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -146,6 +146,11 @@ struct xe_exec_queue {
* dependency scheduler
*/
struct xe_dep_scheduler *dep_scheduler;
+ /**
+ * @last_fence: last fence for tlb invalidation, protected by
+ * vm->lock in write mode
+ */
+ struct dma_fence *last_fence;
} tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT];
/** @pxp: PXP info tracking */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 56a5804726e9..5003e3c4dd17 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -2333,6 +2333,20 @@ void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q)
xe_vm_assert_held(q->vm); /* User queues VM's should be locked */
}
+#if IS_ENABLED(CONFIG_PROVE_LOCKING)
+/**
+ * xe_migrate_job_lock_assert() - Assert migrate job lock held of queue
+ * @q: Migrate queue
+ */
+void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
+{
+ struct xe_migrate *m = gt_to_tile(q->gt)->migrate;
+
+ xe_gt_assert(q->gt, q == m->q);
+ lockdep_assert_held(&m->job_mutex);
+}
+#endif
+
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_migrate.c"
#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 4fad324b6253..9b5791617f5e 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -152,6 +152,14 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
void xe_migrate_wait(struct xe_migrate *m);
+#if IS_ENABLED(CONFIG_PROVE_LOCKING)
+void xe_migrate_job_lock_assert(struct xe_exec_queue *q);
+#else
+static inline void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
+{
+}
+#endif
+
void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q);
void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 6c67ab88a7c0..7343f34757d2 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1731,8 +1731,13 @@ void xe_vm_close_and_put(struct xe_vm *vm)
down_write(&vm->lock);
for_each_tile(tile, xe, id) {
- if (vm->q[id])
+ if (vm->q[id]) {
+ int i;
+
xe_exec_queue_last_fence_put(vm->q[id], vm);
+ for_each_tlb_inval(i)
+ xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
+ }
}
up_write(&vm->lock);
--
2.34.1
next prev parent reply other threads:[~2025-10-31 23:41 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-31 23:40 [PATCH v7 0/6] Fix serialization on burst of unbinds - v2 Matthew Brost
2025-10-31 23:40 ` [PATCH v7 1/6] drm/xe: Enforce correct user fence signaling order using Matthew Brost
2025-10-31 23:40 ` Matthew Brost [this message]
2025-10-31 23:40 ` [PATCH v7 3/6] drm/xe: Decouple bind queue last fence from TLB invalidations Matthew Brost
2025-10-31 23:40 ` [PATCH v7 4/6] drm/xe: Skip TLB invalidation waits in page fault binds Matthew Brost
2025-10-31 23:40 ` [PATCH v7 5/6] drm/xe: Disallow input fences on zero batch execs and zero binds Matthew Brost
2025-10-31 23:40 ` [PATCH v7 6/6] drm/xe: Remove last fence dependency check from binds and execs Matthew Brost
2025-11-01 2:49 ` ✗ CI.checkpatch: warning for Fix serialization on burst of unbinds - v2 Patchwork
2025-11-01 2:50 ` ✓ CI.KUnit: success " Patchwork
2025-11-01 3:38 ` ✗ Xe.CI.BAT: failure " Patchwork
2025-11-03 21:46 ` Matthew Brost
2025-11-01 22:55 ` ✓ Xe.CI.Full: success " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251031234050.3043507-3-matthew.brost@intel.com \
--to=matthew.brost@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=thomas.hellstrom@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox