[PATCH v2 11/12] drm/xe: Add context-based invalidation to GuC TLB invalidation backend

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: stuart.summers@intel.com, lucas.demarchi@intel.com,
	matthew.d.roper@intel.com
Subject: [PATCH v2 11/12] drm/xe: Add context-based invalidation to GuC TLB invalidation backend
Date: Tue,  4 Nov 2025 11:56:15 -0800	[thread overview]
Message-ID: <20251104195616.3339137-12-matthew.brost@intel.com> (raw)
In-Reply-To: <20251104195616.3339137-1-matthew.brost@intel.com>

Introduce context-based invalidation support to the GuC TLB invalidation
backend. This is implemented by iterating over each exec queue per GT
within a VM, skipping inactive queues, and issuing a context-based (GuC
ID) H2G TLB invalidation. All H2G messages, except the final one, are
sent with an invalid seqno, which the G2H handler drops to ensure the
TLB invalidation fence is only signaled once all H2G messages are
completed.

A watermark mechanism is also added to switch between context-based TLB
invalidations and full device-wide invalidations, as the return on
investment for context-based invalidation diminishes when many exec
queues are mapped.

v2:
 - Fix checkpatch warnings

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h  |   2 +
 drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 122 +++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_pci.c           |   1 +
 drivers/gpu/drm/xe/xe_pci_types.h     |   1 +
 4 files changed, 124 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 145951dd95c9..ca285f4bce11 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -316,6 +316,8 @@ struct xe_device {
 		u8 has_mem_copy_instr:1;
 		/** @info.has_pxp: Device has PXP support */
 		u8 has_pxp:1;
+		/** @info.has_ctx_tlb_inval: Has context based TLB invalidations */
+		u8 has_ctx_tlb_inval:1;
 		/** @info.has_range_tlb_inval: Has range based TLB invalidations */
 		u8 has_range_tlb_inval:1;
 		/** @info.has_sriov: Supports SR-IOV */
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index 6978ee8edf2e..1baaf577cded 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -6,14 +6,17 @@
 #include "abi/guc_actions_abi.h"
 
 #include "xe_device.h"
+#include "xe_exec_queue_types.h"
 #include "xe_gt_stats.h"
 #include "xe_gt_types.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
+#include "xe_guc_exec_queue_types.h"
 #include "xe_guc_tlb_inval.h"
 #include "xe_force_wake.h"
 #include "xe_mmio.h"
 #include "xe_tlb_inval.h"
+#include "xe_vm.h"
 
 #include "regs/xe_guc_regs.h"
 
@@ -136,10 +139,16 @@ static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start,
 {
 #define MAX_TLB_INVALIDATION_LEN	7
 	struct xe_gt *gt = guc_to_gt(guc);
+	struct xe_device *xe = guc_to_xe(guc);
 	u32 action[MAX_TLB_INVALIDATION_LEN];
 	u64 length = end - start;
 	int len = 0;
 
+	xe_gt_assert(gt, (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE &&
+			  !xe->info.has_ctx_tlb_inval) ||
+		     (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX &&
+		      xe->info.has_ctx_tlb_inval));
+
 	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
 	action[len++] = seqno;
 	if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
@@ -176,6 +185,100 @@ static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
 				    XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
 }
 
+static bool queue_mapped_in_guc(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+	return q->gt == guc_to_gt(guc);
+}
+
+static int send_tlb_inval_ctx_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
+				    u64 start, u64 end, u32 asid)
+{
+	struct xe_guc *guc = tlb_inval->private;
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q, *last_q = NULL;
+	struct xe_vm *vm;
+	int err = 0;
+
+	lockdep_assert_held(&tlb_inval->seqno_lock);
+
+	if (xe->info.force_execlist)
+		return -ECANCELED;
+
+	vm = xe_device_asid_to_vm(xe, asid);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+	down_read(&vm->exec_queues.lock);
+
+	/*
+	 * XXX: Randomly picking a threshold for now. This will need to be
+	 * tuned based on expected UMD queue counts and performance profiling.
+	 */
+#define EXEC_QUEUE_COUNT_FULL_THRESHOLD	8
+	if (vm->exec_queues.count[guc_to_gt(guc)->info.id] >=
+	    EXEC_QUEUE_COUNT_FULL_THRESHOLD) {
+		u32 action[] = {
+			XE_GUC_ACTION_TLB_INVALIDATION,
+			seqno,
+			MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL),
+		};
+
+		err = send_tlb_inval(guc, action, ARRAY_SIZE(action));
+		goto err_unlock;
+	}
+#undef EXEC_QUEUE_COUNT_FULL_THRESHOLD
+
+	list_for_each_entry_reverse(q, &vm->exec_queues.list,
+				    vm_exec_queue_link)
+		if (queue_mapped_in_guc(guc, q) && q->ops->active(q)) {
+			last_q = q;
+			break;
+		}
+
+	if (!last_q) {
+		/*
+		 * We can't break fence ordering for TLB invalidation jobs, if
+		 * TLB invalidations are inflight issue a dummy invalidation to
+		 * maintain ordering. Nor can we move safely the seqno_recv when
+		 * returning -ECANCELED if TLB invalidations are in flight. Use
+		 * GGTT invalidation as dummy invalidation given ASID
+		 * invalidations are unsupported here.
+		 */
+		if (xe_tlb_inval_idle(tlb_inval))
+			err = -ECANCELED;
+		else
+			err = send_tlb_inval_ggtt(tlb_inval, seqno);
+		goto err_unlock;
+	}
+
+	list_for_each_entry(q, &vm->exec_queues.list, vm_exec_queue_link) {
+		int __seqno = last_q == q ? seqno :
+			TLB_INVALIDATION_SEQNO_INVALID;
+		u32 type = XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX;
+
+		/*
+		 * XXX: Techincally we can race here and queue can become
+		 * inactive, not ideal. The TLB invalidation will timeout in
+		 * this case unless we get GuC support to convert to NOP...
+		 */
+		if (!queue_mapped_in_guc(guc, q) || !q->ops->active(q))
+			continue;
+
+		xe_assert(xe, q->vm == vm);
+
+		err = send_tlb_inval_ppgtt(guc, __seqno, start, end,
+					   q->guc->id, type);
+		if (err)
+			goto err_unlock;
+	}
+
+err_unlock:
+	up_read(&vm->exec_queues.lock);
+	xe_vm_put(vm);
+
+	return err;
+}
+
 static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval)
 {
 	struct xe_guc *guc = tlb_inval->private;
@@ -203,7 +306,7 @@ static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval)
 	return hw_tlb_timeout + 2 * delay;
 }
 
-static const struct xe_tlb_inval_ops guc_tlb_inval_ops = {
+static const struct xe_tlb_inval_ops guc_tlb_inval_asid_ops = {
 	.all = send_tlb_inval_all,
 	.ggtt = send_tlb_inval_ggtt,
 	.ppgtt = send_tlb_inval_asid_ppgtt,
@@ -212,6 +315,15 @@ static const struct xe_tlb_inval_ops guc_tlb_inval_ops = {
 	.timeout_delay = tlb_inval_timeout_delay,
 };
 
+static const struct xe_tlb_inval_ops guc_tlb_inval_ctx_ops = {
+	.ggtt = send_tlb_inval_ggtt,
+	.all = send_tlb_inval_all,
+	.ppgtt = send_tlb_inval_ctx_ppgtt,
+	.initialized = tlb_inval_initialized,
+	.flush = tlb_inval_flush,
+	.timeout_delay = tlb_inval_timeout_delay,
+};
+
 /**
  * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early
  * @guc: GuC object
@@ -223,8 +335,14 @@ static const struct xe_tlb_inval_ops guc_tlb_inval_ops = {
 void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
 				 struct xe_tlb_inval *tlb_inval)
 {
+	struct xe_device *xe = guc_to_xe(guc);
+
 	tlb_inval->private = guc;
-	tlb_inval->ops = &guc_tlb_inval_ops;
+
+	if (xe->info.has_ctx_tlb_inval)
+		tlb_inval->ops = &guc_tlb_inval_ctx_ops;
+	else
+		tlb_inval->ops = &guc_tlb_inval_asid_ops;
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 1959de3f7a27..9a11066c7d4a 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -863,6 +863,7 @@ static int xe_info_init(struct xe_device *xe,
 		xe->info.has_device_atomics_on_smem = 1;
 
 	xe->info.has_range_tlb_inval = graphics_desc->has_range_tlb_inval;
+	xe->info.has_ctx_tlb_inval = graphics_desc->has_ctx_tlb_inval;
 	xe->info.has_usm = graphics_desc->has_usm;
 	xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp;
 
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 9892c063a9c5..c08857c06c7e 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -63,6 +63,7 @@ struct xe_graphics_desc {
 	u8 has_atomic_enable_pte_bit:1;
 	u8 has_indirect_ring_state:1;
 	u8 has_range_tlb_inval:1;
+	u8 has_ctx_tlb_inval:1;
 	u8 has_usm:1;
 	u8 has_64bit_timestamp:1;
 };
-- 
2.34.1

next prev parent reply	other threads:[~2025-11-04 19:56 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-04 19:56 [PATCH v2 00/12] Context based TLB invalidations Matthew Brost
2025-11-04 19:56 ` [PATCH v2 01/12] drm/xe: Add normalize_invalidation_range Matthew Brost
2025-11-06 20:03   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 02/12] drm/xe: Make usm.asid_to_vm allocation use GFP_NOWAIT Matthew Brost
2025-11-06 22:08   ` Summers, Stuart
2025-11-06 22:13   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 03/12] drm/xe: Add xe_device_asid_to_vm helper Matthew Brost
2025-12-11 22:07   ` Matt Atwood
2025-11-04 19:56 ` [PATCH v2 04/12] drm/xe: Add vm to exec queues association Matthew Brost
2025-11-06 22:15   ` Summers, Stuart
2025-12-12 21:03   ` Summers, Stuart
2025-12-12 21:24     ` Matthew Brost
2025-12-12 21:37       ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 05/12] drm/xe: Taint TLB invalidation seqno lock with GFP_KERNEL Matthew Brost
2025-12-11 22:35   ` Matt Atwood
2025-11-04 19:56 ` [PATCH v2 06/12] drm/xe: Do not forward invalid TLB invalidation seqnos to upper layers Matthew Brost
2025-11-06 22:05   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 07/12] drm/xe: Rename send_tlb_inval_ppgtt to send_tlb_inval_asid_ppgtt Matthew Brost
2025-11-06 20:22   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 08/12] drm/xe: Add send_tlb_inval_ppgtt helper Matthew Brost
2025-11-06 20:25   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 09/12] drm/xe: Add xe_tlb_inval_idle helper Matthew Brost
2025-11-10 18:48   ` Summers, Stuart
2025-12-12 22:00     ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 10/12] drm/xe: Add exec queue active vfunc Matthew Brost
2025-11-04 19:56 ` Matthew Brost [this message]
2025-11-06 21:50   ` [PATCH v2 11/12] drm/xe: Add context-based invalidation to GuC TLB invalidation backend Summers, Stuart
2025-11-07  7:01     ` Matthew Brost
2025-11-10 19:29       ` Summers, Stuart
2025-11-11  1:01         ` Matthew Brost
2025-12-12 22:30   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 12/12] drm/xe: Enable context TLB invalidations for CI Matthew Brost

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:145951dd95c dfblob:ca285f4bce1 dfblob:6978ee8edf2
dfblob:1baaf577cde dfblob:1959de3f7a2 dfblob:9a11066c7d4
dfblob:9892c063a9c dfblob:c08857c06c7 )
 OR (
bs:"[PATCH v2 11/12] drm/xe: Add context-based invalidation to GuC TLB invalidation backend" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251104195616.3339137-12-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=lucas.demarchi@intel.com \
    --cc=matthew.d.roper@intel.com \
    --cc=stuart.summers@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox