public inbox for intel-xe@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH] drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo
@ 2026-03-29 22:22 Yuri Martins
  2026-03-29 22:24 ` ✗ LGCI.VerificationFailed: failure for " Patchwork
  2026-03-30  8:02 ` [PATCH] " Thomas Hellström
  0 siblings, 2 replies; 5+ messages in thread
From: Yuri Martins @ 2026-03-29 22:22 UTC (permalink / raw)
  To: intel-xe
  Cc: Matthew Brost, Thomas Hellström, Rodrigo Vivi, David Airlie,
	Simona Vetter, Matthew Auld, Matt Roper, dri-devel, linux-kernel

The vram_region_gpu_offset() helper chases pointers through TTM resource
manager structures on every call. An XXX comment in the codebase noted
this was a problem in the VM bind hot path and suggested caching the
value with a recalculation on BO move.

Add a vram_gpu_offset field to struct xe_bo that is updated in
xe_bo_move() at the 'out' label, which is the convergence point for all
move paths. Provide xe_bo_vram_gpu_offset() as a static inline accessor.

Convert five callers that always operate on the BO's current resource:
  - xe_pt.c: xe_pt_stage_bind() (VM bind hot path)
  - xe_bo.c: __xe_bo_addr()
  - xe_ggtt.c: xe_ggtt_map_bo()
  - xe_lmtt.c: lmtt_insert_bo()
  - xe_migrate.c: xe_migrate_access_memory()

Two callers in xe_migrate.c (pte_update_size and emit_pte) are
intentionally not converted because they receive a ttm_resource that
may refer to a move destination rather than the BO's current placement.

Replace the XXX comment on vram_region_gpu_offset() with proper kernel
doc that directs callers to prefer the cached accessor.

Add a KUnit live test (xe_bo_vram_gpu_offset_kunit) that validates cache
consistency across BO creation in VRAM, eviction to system memory, and
restoration back to VRAM.

Tested on ASUS Zenbook S14 (Intel Core Ultra 7 258V) with xe_live_test
module on Fedora.

Signed-off-by: Yuri Martins <yurimartins2004@hotmail.com>
---
 drivers/gpu/drm/xe/tests/xe_bo.c | 117 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_bo.c       |  17 +++--
 drivers/gpu/drm/xe/xe_bo.h       |  15 ++++
 drivers/gpu/drm/xe/xe_bo_types.h |   6 ++
 drivers/gpu/drm/xe/xe_ggtt.c     |   2 +-
 drivers/gpu/drm/xe/xe_lmtt.c     |   2 +-
 drivers/gpu/drm/xe/xe_migrate.c  |   2 +-
 drivers/gpu/drm/xe/xe_pt.c       |   2 +-
 8 files changed, 155 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 49c95ed67d7e..ed65e25c1b11 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -603,9 +603,126 @@ static void xe_bo_shrink_kunit(struct kunit *test)
 	shrink_test_run_device(xe);
 }
 
+/*
+ * Test that bo->vram_gpu_offset is kept in sync with the value computed
+ * by vram_region_gpu_offset() across BO creation, eviction to system
+ * memory, and restoration back to VRAM.
+ */
+static void vram_gpu_offset_test_run_tile(struct xe_device *xe,
+					  struct xe_tile *tile,
+					  struct kunit *test)
+{
+	struct xe_bo *bo;
+	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+	struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
+	u64 expected;
+	long timeout;
+	int ret;
+
+	kunit_info(test, "Testing vram_gpu_offset cache on tile %u\n", tile->id);
+
+	bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
+			       bo_flags, exec);
+	if (IS_ERR(bo)) {
+		KUNIT_FAIL(test, "Failed to create bo: %pe\n", bo);
+		return;
+	}
+
+	xe_bo_lock(bo, false);
+
+	/* After creation the BO should be in VRAM on DGFX. */
+	ret = xe_bo_validate(bo, NULL, false, exec);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate bo: %d\n", ret);
+		goto out_unlock;
+	}
+
+	/* Wait for any async clears. */
+	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+					DMA_RESV_USAGE_KERNEL, false, 5 * HZ);
+	if (timeout <= 0) {
+		KUNIT_FAIL(test, "Timeout waiting for bo after validate.\n");
+		goto out_unlock;
+	}
+
+	expected = vram_region_gpu_offset(bo->ttm.resource);
+	KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected);
+
+	if (xe_bo_is_vram(bo))
+		KUNIT_EXPECT_NE(test, xe_bo_vram_gpu_offset(bo), 0);
+
+	/* Evict to system memory — cache must become 0. */
+	ret = xe_bo_evict(bo, exec);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to evict bo: %d\n", ret);
+		goto out_unlock;
+	}
+
+	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+					DMA_RESV_USAGE_KERNEL, false, 5 * HZ);
+	if (timeout <= 0) {
+		KUNIT_FAIL(test, "Timeout waiting for bo after evict.\n");
+		goto out_unlock;
+	}
+
+	expected = vram_region_gpu_offset(bo->ttm.resource);
+	KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected);
+	KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), (u64)0);
+
+	/* Restore back to VRAM — cache must be updated again. */
+	ret = xe_bo_validate(bo, NULL, false, exec);
+	if (ret) {
+		KUNIT_FAIL(test, "Failed to validate bo back to vram: %d\n", ret);
+		goto out_unlock;
+	}
+
+	timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+					DMA_RESV_USAGE_KERNEL, false, 5 * HZ);
+	if (timeout <= 0) {
+		KUNIT_FAIL(test, "Timeout waiting for bo after restore.\n");
+		goto out_unlock;
+	}
+
+	expected = vram_region_gpu_offset(bo->ttm.resource);
+	KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected);
+
+	if (xe_bo_is_vram(bo))
+		KUNIT_EXPECT_NE(test, xe_bo_vram_gpu_offset(bo), 0);
+
+out_unlock:
+	xe_bo_unlock(bo);
+	xe_bo_put(bo);
+}
+
+static int vram_gpu_offset_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = kunit_get_current_test();
+	struct xe_tile *tile;
+	int id;
+
+	if (!IS_DGFX(xe)) {
+		kunit_skip(test, "non-discrete device\n");
+		return 0;
+	}
+
+	guard(xe_pm_runtime)(xe);
+	for_each_tile(tile, xe, id)
+		vram_gpu_offset_test_run_tile(xe, tile, test);
+
+	return 0;
+}
+
+static void xe_bo_vram_gpu_offset_kunit(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+
+	vram_gpu_offset_test_run_device(xe);
+}
+
 static struct kunit_case xe_bo_tests[] = {
 	KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
 	KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
+	KUNIT_CASE_PARAM(xe_bo_vram_gpu_offset_kunit, xe_pci_live_device_gen_param),
 	{}
 };
 
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index a7c2dc7f224c..30c4fe326827 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1164,6 +1164,9 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		ret = xe_sriov_vf_ccs_attach_bo(bo);
 
 out:
+	bo->vram_gpu_offset = ttm_bo->resource ?
+		vram_region_gpu_offset(ttm_bo->resource) : 0;
+
 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
 	    ttm_bo->ttm) {
 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
@@ -2908,9 +2911,15 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str
 	return 0;
 }
 
-/*
- * XXX: This is in the VM bind data path, likely should calculate this once and
- * store, with a recalculation if the BO is moved.
+/**
+ * vram_region_gpu_offset - Compute GPU offset for a TTM resource's memory region
+ * @res: The TTM resource.
+ *
+ * Computes the GPU-visible offset for @res based on its current memory type.
+ * Callers that always operate on a BO's current resource should prefer
+ * xe_bo_vram_gpu_offset() which returns a cached value.
+ *
+ * Return: The GPU-visible offset, or 0 for system/TT memory.
  */
 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
 {
@@ -3173,7 +3182,7 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
 
 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
 			     page_size, &cur);
-		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
+		return cur.start + offset + xe_bo_vram_gpu_offset(bo);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 68dea7d25a6b..d911f7327e8b 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -342,6 +342,21 @@ bool xe_bo_is_vm_bound(struct xe_bo *bo);
 bool xe_bo_has_single_placement(struct xe_bo *bo);
 uint64_t vram_region_gpu_offset(struct ttm_resource *res);
 
+/**
+ * xe_bo_vram_gpu_offset - Return cached GPU offset for BO's memory region
+ * @bo: The buffer object.
+ *
+ * Returns the GPU offset for the BO's current memory region. This value
+ * is cached on the BO and updated whenever the BO is moved, avoiding
+ * repeated pointer chasing through TTM resource manager structures.
+ *
+ * Return: The GPU-visible offset, or 0 for system/TT memory.
+ */
+static inline u64 xe_bo_vram_gpu_offset(struct xe_bo *bo)
+{
+	return bo->vram_gpu_offset;
+}
+
 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
 
 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc,
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index ff8317bfc1ae..622a1ec8231e 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -109,6 +109,12 @@ struct xe_bo {
 	 */
 	u64 min_align;
 
+	/**
+	 * @vram_gpu_offset: Cached GPU offset for BO's current memory
+	 * region, updated on move. Protected by the BO's dma-resv lock.
+	 */
+	u64 vram_gpu_offset;
+
 	/**
 	 * @madv_purgeable: user space advise on BO purgeability, protected
 	 * by BO's dma-resv lock.
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index a848d1a41b9b..a24ae3e0e553 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -705,7 +705,7 @@ static void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
 						   pte | xe_res_dma(&cur));
 	} else {
 		/* Prepend GPU offset */
-		pte |= vram_region_gpu_offset(bo->ttm.resource);
+		pte |= xe_bo_vram_gpu_offset(bo);
 
 		for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur);
 		     cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE))
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 0c726eda9390..f9257ba1728b 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -492,7 +492,7 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo
 	lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size));
 	lmtt_assert(lmtt, xe_bo_is_vram(bo));
 
-	vram_offset = vram_region_gpu_offset(bo->ttm.resource);
+	vram_offset = xe_bo_vram_gpu_offset(bo);
 	xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur);
 	while (cur.remaining) {
 		addr = xe_res_dma(&cur);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index fc918b4fba54..bdaea4979e89 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -2499,7 +2499,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
 
 	do {
 		struct dma_fence *__fence;
-		u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) +
+		u64 vram_addr = xe_bo_vram_gpu_offset(bo) +
 			cursor.start;
 		int current_bytes;
 		u32 pitch;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 8e5f4f0dea3f..7cfce1a92a1a 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -782,7 +782,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 	}
 
 	xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM;
-	xe_walk.dma_offset = (bo && !is_purged) ? vram_region_gpu_offset(bo->ttm.resource) : 0;
+	xe_walk.dma_offset = (bo && !is_purged) ? xe_bo_vram_gpu_offset(bo) : 0;
 	if (!range)
 		xe_bo_assert_held(bo);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-04-01  3:52 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-29 22:22 [PATCH] drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo Yuri Martins
2026-03-29 22:24 ` ✗ LGCI.VerificationFailed: failure for " Patchwork
2026-03-30  8:02 ` [PATCH] " Thomas Hellström
2026-04-01  2:53   ` Yuri Martins
2026-04-01  3:52     ` Matthew Brost

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox