Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/xe: LR mode TLB coherence for userptr, rebind, and exec ordering
@ 2026-05-13 10:16 tilak.tirumalesh.tangudu
  2026-05-13 10:38 ` ✗ CI.checkpatch: warning for " Patchwork
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: tilak.tirumalesh.tangudu @ 2026-05-13 10:16 UTC (permalink / raw)
  To: tilak.tirumalesh.tangudu, matthew.d.roper, thomas.hellstrom,
	daniele.ceraolospurio, matthew.auld, stuart.summers, intel-xe

From: Tangudu Tilak Tirumalesh <tilak.tirumalesh.tangudu@intel.com>

LR mode currently relies on context-switch based TLB coherence while
per-batch invalidation is disabled. This is unsafe when hardware
auto-tail update advances execution, allowing newly fetched work
to run before the expected implicit coherence point at context
-switch or re-enable.

1.On the LR path, add a dependency on q->lr.pfence so newly
submitted exec jobs wait for outstanding preempt/rebind work
to fully complete. This avoids out-of-order execution where
a new batch could run with stale TLB/PTE state.

2.For LR rebinds in non-fault mode, force explicit invalidation
instead of relying on implicit context re-enable behavior.
This avoids stale TLB entries surviving across rebind.

3.Include LR mode in the initial userptr invalidation condition so
LR queues always perform the required TLB invalidation. This avoids
cases where LR execution continues with stale userptr translations
after a mapping change.

Assisted-by: GitHub Copilot:claude-sonnet-4.6
Signed-off-by: Tangudu Tilak Tirumalesh <tilak.tirumalesh.tangudu@intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c      | 11 +++++++++++
 drivers/gpu/drm/xe/xe_pt.c        | 18 ++++++++++++------
 drivers/gpu/drm/xe/xe_sched_job.c |  6 ++++++
 drivers/gpu/drm/xe/xe_sched_job.h |  2 ++
 drivers/gpu/drm/xe/xe_userptr.c   |  2 +-
 5 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index e05dabfcd43c..2abf851ef7a1 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -301,6 +301,17 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			goto err_put_job;
 	}
 
+	/* LR jobs wait for preempt/rebind to complete, but only when a
+	 * preemption/rebind is actually in-flight.
+	 */
+	if (xe_vm_in_lr_mode(vm) && q->lr.pfence &&
+	    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &q->lr.pfence->flags)) {
+		err = xe_sched_job_add_fence_dep(job,
+						 dma_fence_get(q->lr.pfence));
+		if (err)
+			goto err_put_job;
+	}
+
 	for (i = 0; i < num_syncs && !err; i++)
 		err = xe_sync_entry_add_deps(&syncs[i], job);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 2669ff5ee747..0281dc9ca07f 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -2006,9 +2006,10 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
 
 		/*
 		 * If rebind, we have to invalidate TLB on !LR vms to invalidate
-		 * cached PTEs point to freed memory. On LR vms this is done
-		 * automatically when the context is re-enabled by the rebind worker,
-		 * or in fault mode it was invalidated on PTE zapping.
+		 * cached PTEs point to freed memory. On LR vms in non-fault mode,
+		 * implicit context re-enable is not a reliable coherence point,
+		 * so force explicit invalidation here. In fault mode the TLB
+		 * was already invalidated during PTE zapping.
 		 *
 		 * If !rebind, and scratch enabled VMs, there is a chance the scratch
 		 * PTE is already cached in the TLB so it needs to be invalidated.
@@ -2019,9 +2020,14 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
 		if ((!pt_op->rebind && xe_vm_has_scratch(vm) &&
 		     xe_vm_in_lr_mode(vm)))
 			pt_update_ops->needs_invalidation = true;
-		else if (pt_op->rebind && !xe_vm_in_lr_mode(vm))
-			/* We bump also if batch_invalidate_tlb is true */
-			vm->tlb_flush_seqno++;
+		else if (pt_op->rebind) {
+			if (xe_vm_in_lr_mode(vm) && !xe_vm_in_fault_mode(vm))
+				/* Force explicit invalidation for LR rebinds */
+				pt_update_ops->needs_invalidation = true;
+			else if (!xe_vm_in_lr_mode(vm))
+				/* We bump also if batch_invalidate_tlb is true */
+				vm->tlb_flush_seqno++;
+		}
 
 		vma->tile_staged |= BIT(tile->id);
 		pt_op->vma = vma;
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index ae5b38b2a884..2f6c84c662d5 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -353,6 +353,12 @@ xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot,
 		drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]);
 }
 
+int xe_sched_job_add_fence_dep(struct xe_sched_job *job,
+			       struct dma_fence *fence)
+{
+	return drm_sched_job_add_dependency(&job->drm, fence);
+}
+
 int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv,
 			  enum dma_resv_usage usage)
 {
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index 1c1cb44216c3..771fab005175 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -89,6 +89,8 @@ struct xe_sched_job_snapshot *xe_sched_job_snapshot_capture(struct xe_sched_job
 void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot);
 void xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot, struct drm_printer *p);
 
+int xe_sched_job_add_fence_dep(struct xe_sched_job *job,
+			       struct dma_fence *fence);
 int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv,
 			  enum dma_resv_usage usage);
 
diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
index 6761005c0b90..9c6fd1004c7c 100644
--- a/drivers/gpu/drm/xe/xe_userptr.c
+++ b/drivers/gpu/drm/xe/xe_userptr.c
@@ -102,7 +102,7 @@ xe_vma_userptr_do_inval(struct xe_vm *vm, struct xe_userptr_vma *uvma, bool is_d
 				    false, MAX_SCHEDULE_TIMEOUT);
 	XE_WARN_ON(err <= 0);
 
-	if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
+	if ((xe_vm_in_lr_mode(vm) || xe_vm_in_fault_mode(vm)) && userptr->initial_bind) {
 		if (!userptr->finish_inuse) {
 			/*
 			 * Defer the TLB wait to an extra pass so the caller
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2026-05-14  9:46 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-13 10:16 [PATCH] drm/xe: LR mode TLB coherence for userptr, rebind, and exec ordering tilak.tirumalesh.tangudu
2026-05-13 10:38 ` ✗ CI.checkpatch: warning for " Patchwork
2026-05-13 10:40 ` ✓ CI.KUnit: success " Patchwork
2026-05-13 11:33 ` ✓ Xe.CI.BAT: " Patchwork
2026-05-13 11:42 ` [PATCH] " Hellstrom, Thomas
2026-05-14  9:46 ` ✗ Xe.CI.FULL: failure for " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox