public inbox for intel-xe@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH] drm/xe: Add min and max context TLB invalidation sizes
@ 2026-03-19 21:05 Stuart Summers
  2026-03-19 21:11 ` Summers, Stuart
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Stuart Summers @ 2026-03-19 21:05 UTC (permalink / raw)
  Cc: matthew.brost, niranjana.vishwanathapura, intel-xe,
	Stuart Summers

Allow platform-defined TLB invalidation min and max lengths.

This gives finer granular control to which invalidations we
decide to send to GuC. The min size is essentially a round
up. The max allows us to switch to a full invalidation.

The expectation here is that GuC will translate the full
invalidation in this instance into a series of per context
invalidaitons. These are then issued with no H2G or G2H
messages and therefore should be quicker than splitting
the invalidations from the KMD in max size chunks and sending
separately.

v2: Add proper defaults for min/max if not set in the device
    structures

Signed-off-by: Stuart Summers <stuart.summers@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h  |  4 ++++
 drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 20 +++++++-------------
 drivers/gpu/drm/xe/xe_pci.c           |  3 +++
 drivers/gpu/drm/xe/xe_pci_types.h     |  2 ++
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 615218d775b1..0c4168fe2ffb 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -137,6 +137,10 @@ struct xe_device {
 		u8 vm_max_level;
 		/** @info.va_bits: Maximum bits of a virtual address */
 		u8 va_bits;
+		/** @info.min_tlb_inval_size: Minimum size of context based TLB invalidations */
+		u64 min_tlb_inval_size;
+		/** @info.max_tlb_inval_size: Maximum size of context based TLB invalidations */
+		u64 max_tlb_inval_size;
 
 		/*
 		 * Keep all flags below alphabetically sorted
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index eb40528976ca..7512f889a97a 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -133,12 +133,12 @@ static int send_page_reclaim(struct xe_guc *guc, u32 seqno,
 
 static u64 normalize_invalidation_range(struct xe_gt *gt, u64 *start, u64 *end)
 {
+	struct xe_device *xe = gt_to_xe(gt);
 	u64 orig_start = *start;
 	u64 length = *end - *start;
 	u64 align;
 
-	if (length < SZ_4K)
-		length = SZ_4K;
+	length = max_t(u64, xe->info.min_tlb_inval_size, length);
 
 	align = roundup_pow_of_two(length);
 	*start = ALIGN_DOWN(*start, align);
@@ -163,13 +163,6 @@ static u64 normalize_invalidation_range(struct xe_gt *gt, u64 *start, u64 *end)
 	return length;
 }
 
-/*
- * Ensure that roundup_pow_of_two(length) doesn't overflow.
- * Note that roundup_pow_of_two() operates on unsigned long,
- * not on u64.
- */
-#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
-
 static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start,
 				u64 end, u32 id, u32 type,
 				struct drm_suballoc *prl_sa)
@@ -178,9 +171,12 @@ static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start,
 	struct xe_gt *gt = guc_to_gt(guc);
 	struct xe_device *xe = guc_to_xe(guc);
 	u32 action[MAX_TLB_INVALIDATION_LEN];
-	u64 length = end - start;
+	u64 normalize_len;
 	int len = 0, err;
 
+	normalize_len = normalize_invalidation_range(gt, &start,
+						     &end);
+
 	xe_gt_assert(gt, (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE &&
 			  !xe->info.has_ctx_tlb_inval) ||
 		     (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX &&
@@ -189,11 +185,9 @@ static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start,
 	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
 	action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID;
 	if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
-	    length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
+	    normalize_len > xe->info.max_tlb_inval_size) {
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
 	} else {
-		u64 normalize_len = normalize_invalidation_range(gt, &start,
-								 &end);
 		bool need_flush = !prl_sa &&
 			seqno != TLB_INVALIDATION_SEQNO_INVALID;
 
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 189e2a1c29f9..5e02f9ab625b 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -743,6 +743,9 @@ static int xe_info_init_early(struct xe_device *xe,
 	xe->info.vm_max_level = desc->vm_max_level;
 	xe->info.vram_flags = desc->vram_flags;
 
+	xe->info.min_tlb_inval_size = desc->min_tlb_inval_size ?: SZ_4K;
+	xe->info.max_tlb_inval_size = desc->max_tlb_inval_size ?: SZ_1G;
+
 	xe->info.is_dgfx = desc->is_dgfx;
 	xe->info.has_cached_pt = desc->has_cached_pt;
 	xe->info.has_fan_control = desc->has_fan_control;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 8eee4fb1c57c..cd9d3ad96fe0 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -34,6 +34,8 @@ struct xe_device_desc {
 	u8 va_bits;
 	u8 vm_max_level;
 	u8 vram_flags;
+	u64 min_tlb_inval_size;
+	u64 max_tlb_inval_size;
 
 	u8 require_force_probe:1;
 	u8 is_dgfx:1;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH] drm/xe: Add min and max context TLB invalidation sizes
@ 2026-03-20 20:46 Stuart Summers
  2026-03-20 20:49 ` Summers, Stuart
  2026-03-23 17:23 ` Matthew Brost
  0 siblings, 2 replies; 15+ messages in thread
From: Stuart Summers @ 2026-03-20 20:46 UTC (permalink / raw)
  Cc: intel-xe, matthew.brost, niranjana.vishwanathapura,
	jonathan.cavitt, Stuart Summers

Allow platform-defined TLB invalidation min and max lengths.

This gives finer granular control to which invalidations we
decide to send to GuC. The min size is essentially a round
up. The max allows us to switch to a full invalidation.

The expectation here is that GuC will translate the full
invalidation in this instance into a series of per context
invalidaitons. These are then issued with no H2G or G2H
messages and therefore should be quicker than splitting
the invalidations from the KMD in max size chunks and sending
separately.

v2: Add proper defaults for min/max if not set in the device
    structures
v3: Add coverage for pow-of-2 out of bounds cases

Signed-off-by: Stuart Summers <stuart.summers@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h  |  4 +++
 drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 39 +++++++++++++++++----------
 drivers/gpu/drm/xe/xe_pci.c           |  3 +++
 drivers/gpu/drm/xe/xe_pci_types.h     |  2 ++
 4 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 615218d775b1..0c4168fe2ffb 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -137,6 +137,10 @@ struct xe_device {
 		u8 vm_max_level;
 		/** @info.va_bits: Maximum bits of a virtual address */
 		u8 va_bits;
+		/** @info.min_tlb_inval_size: Minimum size of context based TLB invalidations */
+		u64 min_tlb_inval_size;
+		/** @info.max_tlb_inval_size: Maximum size of context based TLB invalidations */
+		u64 max_tlb_inval_size;
 
 		/*
 		 * Keep all flags below alphabetically sorted
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index ced58f46f846..e9e0be94ceef 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -115,14 +115,23 @@ static int send_page_reclaim(struct xe_guc *guc, u32 seqno,
 			      G2H_LEN_DW_PAGE_RECLAMATION, 1);
 }
 
+/*
+ * Ensure that roundup_pow_of_two(length) doesn't overflow.
+ * Note that roundup_pow_of_two() operates on unsigned long,
+ * not on u64.
+ */
+#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
+
 static u64 normalize_invalidation_range(struct xe_gt *gt, u64 *start, u64 *end)
 {
+	struct xe_device *xe = gt_to_xe(gt);
 	u64 orig_start = *start;
 	u64 length = *end - *start;
 	u64 align;
 
-	if (length < SZ_4K)
-		length = SZ_4K;
+	xe_gt_assert(gt, length <= MAX_RANGE_TLB_INVALIDATION_LENGTH);
+
+	length = max_t(u64, xe->info.min_tlb_inval_size, length);
 
 	align = roundup_pow_of_two(length);
 	*start = ALIGN_DOWN(*start, align);
@@ -147,13 +156,6 @@ static u64 normalize_invalidation_range(struct xe_gt *gt, u64 *start, u64 *end)
 	return length;
 }
 
-/*
- * Ensure that roundup_pow_of_two(length) doesn't overflow.
- * Note that roundup_pow_of_two() operates on unsigned long,
- * not on u64.
- */
-#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
-
 static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start,
 				u64 end, u32 id, u32 type,
 				struct drm_suballoc *prl_sa)
@@ -162,8 +164,20 @@ static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start,
 	struct xe_gt *gt = guc_to_gt(guc);
 	struct xe_device *xe = guc_to_xe(guc);
 	u32 action[MAX_TLB_INVALIDATION_LEN];
-	u64 length = end - start;
+	u64 normalize_len, length = end - start;
 	int len = 0, err;
+	bool do_full_inval = false;
+
+	if (!xe->info.has_range_tlb_inval ||
+	    length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
+		do_full_inval = true;
+	} else {
+		normalize_len = normalize_invalidation_range(gt, &start,
+							     &end);
+
+		if (normalize_len > xe->info.max_tlb_inval_size)
+			do_full_inval = true;
+	}
 
 	xe_gt_assert(gt, (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE &&
 			  !xe->info.has_ctx_tlb_inval) ||
@@ -172,12 +186,9 @@ static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start,
 
 	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
 	action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID;
-	if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
-	    length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
+	if (do_full_inval) {
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
 	} else {
-		u64 normalize_len = normalize_invalidation_range(gt, &start,
-								 &end);
 		bool need_flush = !prl_sa &&
 			seqno != TLB_INVALIDATION_SEQNO_INVALID;
 
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 189e2a1c29f9..5e02f9ab625b 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -743,6 +743,9 @@ static int xe_info_init_early(struct xe_device *xe,
 	xe->info.vm_max_level = desc->vm_max_level;
 	xe->info.vram_flags = desc->vram_flags;
 
+	xe->info.min_tlb_inval_size = desc->min_tlb_inval_size ?: SZ_4K;
+	xe->info.max_tlb_inval_size = desc->max_tlb_inval_size ?: SZ_1G;
+
 	xe->info.is_dgfx = desc->is_dgfx;
 	xe->info.has_cached_pt = desc->has_cached_pt;
 	xe->info.has_fan_control = desc->has_fan_control;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 8eee4fb1c57c..cd9d3ad96fe0 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -34,6 +34,8 @@ struct xe_device_desc {
 	u8 va_bits;
 	u8 vm_max_level;
 	u8 vram_flags;
+	u64 min_tlb_inval_size;
+	u64 max_tlb_inval_size;
 
 	u8 require_force_probe:1;
 	u8 is_dgfx:1;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH] drm/xe: Add min and max context TLB invalidation sizes
@ 2026-03-17 19:50 Stuart Summers
  0 siblings, 0 replies; 15+ messages in thread
From: Stuart Summers @ 2026-03-17 19:50 UTC (permalink / raw)
  Cc: matthew.brost, intel-xe, Stuart Summers

Allow platform-defined TLB invalidation min and max lengths.

This gives finer granular control to which invalidations we
decide to send to GuC. The min size is essentially a round
up. The max allows us to switch to a full invalidation.

The expectation here is that GuC will translate the full
invalidation in this instance into a series of per context
invalidations. These are then issued with no H2G or G2H
messages and therefore should be quicker than splitting
the invalidations from the KMD in max size chunks and sending
separately.

Signed-off-by: Stuart Summers <stuart.summers@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h  |  4 ++++
 drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 14 ++++++++------
 drivers/gpu/drm/xe/xe_pci.c           |  2 ++
 drivers/gpu/drm/xe/xe_pci_types.h     |  2 ++
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 615218d775b1..0c4168fe2ffb 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -137,6 +137,10 @@ struct xe_device {
 		u8 vm_max_level;
 		/** @info.va_bits: Maximum bits of a virtual address */
 		u8 va_bits;
+		/** @info.min_tlb_inval_size: Minimum size of context based TLB invalidations */
+		u64 min_tlb_inval_size;
+		/** @info.max_tlb_inval_size: Maximum size of context based TLB invalidations */
+		u64 max_tlb_inval_size;
 
 		/*
 		 * Keep all flags below alphabetically sorted
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index ced58f46f846..256759b826bc 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -117,12 +117,12 @@ static int send_page_reclaim(struct xe_guc *guc, u32 seqno,
 
 static u64 normalize_invalidation_range(struct xe_gt *gt, u64 *start, u64 *end)
 {
+	struct xe_device *xe = gt_to_xe(gt);
 	u64 orig_start = *start;
 	u64 length = *end - *start;
 	u64 align;
 
-	if (length < SZ_4K)
-		length = SZ_4K;
+	length = max_t(u64, xe->info.min_tlb_inval_size, length);
 
 	align = roundup_pow_of_two(length);
 	*start = ALIGN_DOWN(*start, align);
@@ -162,9 +162,12 @@ static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start,
 	struct xe_gt *gt = guc_to_gt(guc);
 	struct xe_device *xe = guc_to_xe(guc);
 	u32 action[MAX_TLB_INVALIDATION_LEN];
-	u64 length = end - start;
+	u64 normalize_len, length = end - start;
 	int len = 0, err;
 
+	normalize_len = normalize_invalidation_range(gt, &start,
+						     &end);
+
 	xe_gt_assert(gt, (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE &&
 			  !xe->info.has_ctx_tlb_inval) ||
 		     (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX &&
@@ -173,11 +176,10 @@ static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start,
 	action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
 	action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID;
 	if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
-	    length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
+	    length > MAX_RANGE_TLB_INVALIDATION_LENGTH ||
+	    normalize_len > xe->info.max_tlb_inval_size) {
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
 	} else {
-		u64 normalize_len = normalize_invalidation_range(gt, &start,
-								 &end);
 		bool need_flush = !prl_sa &&
 			seqno != TLB_INVALIDATION_SEQNO_INVALID;
 
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 189e2a1c29f9..12569367034b 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -742,6 +742,8 @@ static int xe_info_init_early(struct xe_device *xe,
 	xe->info.va_bits = desc->va_bits;
 	xe->info.vm_max_level = desc->vm_max_level;
 	xe->info.vram_flags = desc->vram_flags;
+	xe->info.min_tlb_inval_size = desc->min_tlb_inval_size;
+	xe->info.max_tlb_inval_size = desc->max_tlb_inval_size;
 
 	xe->info.is_dgfx = desc->is_dgfx;
 	xe->info.has_cached_pt = desc->has_cached_pt;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 8eee4fb1c57c..cd9d3ad96fe0 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -34,6 +34,8 @@ struct xe_device_desc {
 	u8 va_bits;
 	u8 vm_max_level;
 	u8 vram_flags;
+	u64 min_tlb_inval_size;
+	u64 max_tlb_inval_size;
 
 	u8 require_force_probe:1;
 	u8 is_dgfx:1;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2026-03-23 19:23 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-19 21:05 [PATCH] drm/xe: Add min and max context TLB invalidation sizes Stuart Summers
2026-03-19 21:11 ` Summers, Stuart
2026-03-19 21:36   ` Cavitt, Jonathan
2026-03-19 21:51     ` Summers, Stuart
2026-03-19 21:12 ` ✓ CI.KUnit: success for drm/xe: Add min and max context TLB invalidation sizes (rev2) Patchwork
2026-03-19 22:18 ` ✓ Xe.CI.BAT: " Patchwork
2026-03-20 20:05 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-03-22  5:56 ` [PATCH] drm/xe: Add min and max context TLB invalidation sizes Matthew Brost
2026-03-23 19:22   ` Summers, Stuart
  -- strict thread matches above, loose matches on Subject: below --
2026-03-20 20:46 Stuart Summers
2026-03-20 20:49 ` Summers, Stuart
2026-03-20 20:59   ` Cavitt, Jonathan
2026-03-23 17:23 ` Matthew Brost
2026-03-23 19:18   ` Summers, Stuart
2026-03-17 19:50 Stuart Summers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox