* [PATCH] drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo
@ 2026-03-29 22:22 Yuri Martins
2026-03-29 22:24 ` ✗ LGCI.VerificationFailed: failure for " Patchwork
2026-03-30 8:02 ` [PATCH] " Thomas Hellström
0 siblings, 2 replies; 5+ messages in thread
From: Yuri Martins @ 2026-03-29 22:22 UTC (permalink / raw)
To: intel-xe
Cc: Matthew Brost, Thomas Hellström, Rodrigo Vivi, David Airlie,
Simona Vetter, Matthew Auld, Matt Roper, dri-devel, linux-kernel
The vram_region_gpu_offset() helper chases pointers through TTM resource
manager structures on every call. An XXX comment in the codebase noted
this was a problem in the VM bind hot path and suggested caching the
value with a recalculation on BO move.
Add a vram_gpu_offset field to struct xe_bo that is updated in
xe_bo_move() at the 'out' label, which is the convergence point for all
move paths. Provide xe_bo_vram_gpu_offset() as a static inline accessor.
Convert five callers that always operate on the BO's current resource:
- xe_pt.c: xe_pt_stage_bind() (VM bind hot path)
- xe_bo.c: __xe_bo_addr()
- xe_ggtt.c: xe_ggtt_map_bo()
- xe_lmtt.c: lmtt_insert_bo()
- xe_migrate.c: xe_migrate_access_memory()
Two callers in xe_migrate.c (pte_update_size and emit_pte) are
intentionally not converted because they receive a ttm_resource that
may refer to a move destination rather than the BO's current placement.
Replace the XXX comment on vram_region_gpu_offset() with proper kernel
doc that directs callers to prefer the cached accessor.
Add a KUnit live test (xe_bo_vram_gpu_offset_kunit) that validates cache
consistency across BO creation in VRAM, eviction to system memory, and
restoration back to VRAM.
Tested on ASUS Zenbook S14 (Intel Core Ultra 7 258V) with xe_live_test
module on Fedora.
Signed-off-by: Yuri Martins <yurimartins2004@hotmail.com>
---
drivers/gpu/drm/xe/tests/xe_bo.c | 117 +++++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_bo.c | 17 +++--
drivers/gpu/drm/xe/xe_bo.h | 15 ++++
drivers/gpu/drm/xe/xe_bo_types.h | 6 ++
drivers/gpu/drm/xe/xe_ggtt.c | 2 +-
drivers/gpu/drm/xe/xe_lmtt.c | 2 +-
drivers/gpu/drm/xe/xe_migrate.c | 2 +-
drivers/gpu/drm/xe/xe_pt.c | 2 +-
8 files changed, 155 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 49c95ed67d7e..ed65e25c1b11 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -603,9 +603,126 @@ static void xe_bo_shrink_kunit(struct kunit *test)
shrink_test_run_device(xe);
}
+/*
+ * Test that bo->vram_gpu_offset is kept in sync with the value computed
+ * by vram_region_gpu_offset() across BO creation, eviction to system
+ * memory, and restoration back to VRAM.
+ */
+static void vram_gpu_offset_test_run_tile(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct kunit *test)
+{
+ struct xe_bo *bo;
+ unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
+ u64 expected;
+ long timeout;
+ int ret;
+
+ kunit_info(test, "Testing vram_gpu_offset cache on tile %u\n", tile->id);
+
+ bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
+ bo_flags, exec);
+ if (IS_ERR(bo)) {
+ KUNIT_FAIL(test, "Failed to create bo: %pe\n", bo);
+ return;
+ }
+
+ xe_bo_lock(bo, false);
+
+ /* After creation the BO should be in VRAM on DGFX. */
+ ret = xe_bo_validate(bo, NULL, false, exec);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to validate bo: %d\n", ret);
+ goto out_unlock;
+ }
+
+ /* Wait for any async clears. */
+ timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+ DMA_RESV_USAGE_KERNEL, false, 5 * HZ);
+ if (timeout <= 0) {
+ KUNIT_FAIL(test, "Timeout waiting for bo after validate.\n");
+ goto out_unlock;
+ }
+
+ expected = vram_region_gpu_offset(bo->ttm.resource);
+ KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected);
+
+ if (xe_bo_is_vram(bo))
+ KUNIT_EXPECT_NE(test, xe_bo_vram_gpu_offset(bo), 0);
+
+ /* Evict to system memory — cache must become 0. */
+ ret = xe_bo_evict(bo, exec);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to evict bo: %d\n", ret);
+ goto out_unlock;
+ }
+
+ timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+ DMA_RESV_USAGE_KERNEL, false, 5 * HZ);
+ if (timeout <= 0) {
+ KUNIT_FAIL(test, "Timeout waiting for bo after evict.\n");
+ goto out_unlock;
+ }
+
+ expected = vram_region_gpu_offset(bo->ttm.resource);
+ KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected);
+ KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), (u64)0);
+
+ /* Restore back to VRAM — cache must be updated again. */
+ ret = xe_bo_validate(bo, NULL, false, exec);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to validate bo back to vram: %d\n", ret);
+ goto out_unlock;
+ }
+
+ timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+ DMA_RESV_USAGE_KERNEL, false, 5 * HZ);
+ if (timeout <= 0) {
+ KUNIT_FAIL(test, "Timeout waiting for bo after restore.\n");
+ goto out_unlock;
+ }
+
+ expected = vram_region_gpu_offset(bo->ttm.resource);
+ KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected);
+
+ if (xe_bo_is_vram(bo))
+ KUNIT_EXPECT_NE(test, xe_bo_vram_gpu_offset(bo), 0);
+
+out_unlock:
+ xe_bo_unlock(bo);
+ xe_bo_put(bo);
+}
+
+static int vram_gpu_offset_test_run_device(struct xe_device *xe)
+{
+ struct kunit *test = kunit_get_current_test();
+ struct xe_tile *tile;
+ int id;
+
+ if (!IS_DGFX(xe)) {
+ kunit_skip(test, "non-discrete device\n");
+ return 0;
+ }
+
+ guard(xe_pm_runtime)(xe);
+ for_each_tile(tile, xe, id)
+ vram_gpu_offset_test_run_tile(xe, tile, test);
+
+ return 0;
+}
+
+static void xe_bo_vram_gpu_offset_kunit(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+
+ vram_gpu_offset_test_run_device(xe);
+}
+
static struct kunit_case xe_bo_tests[] = {
KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM(xe_bo_vram_gpu_offset_kunit, xe_pci_live_device_gen_param),
{}
};
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index a7c2dc7f224c..30c4fe326827 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1164,6 +1164,9 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
ret = xe_sriov_vf_ccs_attach_bo(bo);
out:
+ bo->vram_gpu_offset = ttm_bo->resource ?
+ vram_region_gpu_offset(ttm_bo->resource) : 0;
+
if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
ttm_bo->ttm) {
long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
@@ -2908,9 +2911,15 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str
return 0;
}
-/*
- * XXX: This is in the VM bind data path, likely should calculate this once and
- * store, with a recalculation if the BO is moved.
+/**
+ * vram_region_gpu_offset - Compute GPU offset for a TTM resource's memory region
+ * @res: The TTM resource.
+ *
+ * Computes the GPU-visible offset for @res based on its current memory type.
+ * Callers that always operate on a BO's current resource should prefer
+ * xe_bo_vram_gpu_offset() which returns a cached value.
+ *
+ * Return: The GPU-visible offset, or 0 for system/TT memory.
*/
uint64_t vram_region_gpu_offset(struct ttm_resource *res)
{
@@ -3173,7 +3182,7 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
page_size, &cur);
- return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
+ return cur.start + offset + xe_bo_vram_gpu_offset(bo);
}
}
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 68dea7d25a6b..d911f7327e8b 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -342,6 +342,21 @@ bool xe_bo_is_vm_bound(struct xe_bo *bo);
bool xe_bo_has_single_placement(struct xe_bo *bo);
uint64_t vram_region_gpu_offset(struct ttm_resource *res);
+/**
+ * xe_bo_vram_gpu_offset - Return cached GPU offset for BO's memory region
+ * @bo: The buffer object.
+ *
+ * Returns the GPU offset for the BO's current memory region. This value
+ * is cached on the BO and updated whenever the BO is moved, avoiding
+ * repeated pointer chasing through TTM resource manager structures.
+ *
+ * Return: The GPU-visible offset, or 0 for system/TT memory.
+ */
+static inline u64 xe_bo_vram_gpu_offset(struct xe_bo *bo)
+{
+ return bo->vram_gpu_offset;
+}
+
bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc,
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index ff8317bfc1ae..622a1ec8231e 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -109,6 +109,12 @@ struct xe_bo {
*/
u64 min_align;
+ /**
+ * @vram_gpu_offset: Cached GPU offset for BO's current memory
+ * region, updated on move. Protected by the BO's dma-resv lock.
+ */
+ u64 vram_gpu_offset;
+
/**
* @madv_purgeable: user space advise on BO purgeability, protected
* by BO's dma-resv lock.
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index a848d1a41b9b..a24ae3e0e553 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -705,7 +705,7 @@ static void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
pte | xe_res_dma(&cur));
} else {
/* Prepend GPU offset */
- pte |= vram_region_gpu_offset(bo->ttm.resource);
+ pte |= xe_bo_vram_gpu_offset(bo);
for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur);
cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE))
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 0c726eda9390..f9257ba1728b 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -492,7 +492,7 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo
lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size));
lmtt_assert(lmtt, xe_bo_is_vram(bo));
- vram_offset = vram_region_gpu_offset(bo->ttm.resource);
+ vram_offset = xe_bo_vram_gpu_offset(bo);
xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur);
while (cur.remaining) {
addr = xe_res_dma(&cur);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index fc918b4fba54..bdaea4979e89 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -2499,7 +2499,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
do {
struct dma_fence *__fence;
- u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) +
+ u64 vram_addr = xe_bo_vram_gpu_offset(bo) +
cursor.start;
int current_bytes;
u32 pitch;
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 8e5f4f0dea3f..7cfce1a92a1a 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -782,7 +782,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
}
xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM;
- xe_walk.dma_offset = (bo && !is_purged) ? vram_region_gpu_offset(bo->ttm.resource) : 0;
+ xe_walk.dma_offset = (bo && !is_purged) ? xe_bo_vram_gpu_offset(bo) : 0;
if (!range)
xe_bo_assert_held(bo);
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* ✗ LGCI.VerificationFailed: failure for drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo
2026-03-29 22:22 [PATCH] drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo Yuri Martins
@ 2026-03-29 22:24 ` Patchwork
2026-03-30 8:02 ` [PATCH] " Thomas Hellström
1 sibling, 0 replies; 5+ messages in thread
From: Patchwork @ 2026-03-29 22:24 UTC (permalink / raw)
To: Yuri Martins; +Cc: intel-xe
== Series Details ==
Series: drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo
URL : https://patchwork.freedesktop.org/series/164048/
State : failure
== Summary ==
Address 'yurimartins2004@hotmail.com' is not on the allowlist, which prevents CI from being triggered for this patch.
If you want Intel GFX CI to accept this address, please contact the script maintainers at i915-ci-infra@lists.freedesktop.org.
Exception occurred during validation, bailing out!
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo
2026-03-29 22:22 [PATCH] drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo Yuri Martins
2026-03-29 22:24 ` ✗ LGCI.VerificationFailed: failure for " Patchwork
@ 2026-03-30 8:02 ` Thomas Hellström
2026-04-01 2:53 ` Yuri Martins
1 sibling, 1 reply; 5+ messages in thread
From: Thomas Hellström @ 2026-03-30 8:02 UTC (permalink / raw)
To: Yuri Martins, intel-xe
Cc: Matthew Brost, Rodrigo Vivi, David Airlie, Simona Vetter,
Matthew Auld, Matt Roper, dri-devel, linux-kernel
On Sun, 2026-03-29 at 19:22 -0300, Yuri Martins wrote:
> The vram_region_gpu_offset() helper chases pointers through TTM
> resource
> manager structures on every call. An XXX comment in the codebase
> noted
> this was a problem in the VM bind hot path and suggested caching the
> value with a recalculation on BO move.
>
> Add a vram_gpu_offset field to struct xe_bo that is updated in
> xe_bo_move() at the 'out' label, which is the convergence point for
> all
> move paths. Provide xe_bo_vram_gpu_offset() as a static inline
> accessor.
>
> Convert five callers that always operate on the BO's current
> resource:
> - xe_pt.c: xe_pt_stage_bind() (VM bind hot path)
> - xe_bo.c: __xe_bo_addr()
> - xe_ggtt.c: xe_ggtt_map_bo()
> - xe_lmtt.c: lmtt_insert_bo()
> - xe_migrate.c: xe_migrate_access_memory()
>
> Two callers in xe_migrate.c (pte_update_size and emit_pte) are
> intentionally not converted because they receive a ttm_resource that
> may refer to a move destination rather than the BO's current
> placement.
>
> Replace the XXX comment on vram_region_gpu_offset() with proper
> kernel
> doc that directs callers to prefer the cached accessor.
>
> Add a KUnit live test (xe_bo_vram_gpu_offset_kunit) that validates
> cache
> consistency across BO creation in VRAM, eviction to system memory,
> and
> restoration back to VRAM.
>
> Tested on ASUS Zenbook S14 (Intel Core Ultra 7 258V) with
> xe_live_test
> module on Fedora.
>
> Signed-off-by: Yuri Martins <yurimartins2004@hotmail.com>
This adds a pretty large amount of code and some fragility without a
clear proof of benefit. I think improvements like this needs some real
performance increase numbers.
> ---
> drivers/gpu/drm/xe/tests/xe_bo.c | 117
> +++++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_bo.c | 17 +++--
> drivers/gpu/drm/xe/xe_bo.h | 15 ++++
> drivers/gpu/drm/xe/xe_bo_types.h | 6 ++
> drivers/gpu/drm/xe/xe_ggtt.c | 2 +-
> drivers/gpu/drm/xe/xe_lmtt.c | 2 +-
> drivers/gpu/drm/xe/xe_migrate.c | 2 +-
> drivers/gpu/drm/xe/xe_pt.c | 2 +-
> 8 files changed, 155 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c
> b/drivers/gpu/drm/xe/tests/xe_bo.c
> index 49c95ed67d7e..ed65e25c1b11 100644
> --- a/drivers/gpu/drm/xe/tests/xe_bo.c
> +++ b/drivers/gpu/drm/xe/tests/xe_bo.c
> @@ -603,9 +603,126 @@ static void xe_bo_shrink_kunit(struct kunit
> *test)
> shrink_test_run_device(xe);
> }
>
> +/*
> + * Test that bo->vram_gpu_offset is kept in sync with the value
> computed
> + * by vram_region_gpu_offset() across BO creation, eviction to
> system
> + * memory, and restoration back to VRAM.
> + */
> +static void vram_gpu_offset_test_run_tile(struct xe_device *xe,
> + struct xe_tile *tile,
> + struct kunit *test)
> +{
> + struct xe_bo *bo;
> + unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
> + struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
> + u64 expected;
> + long timeout;
> + int ret;
> +
> + kunit_info(test, "Testing vram_gpu_offset cache on tile
> %u\n", tile->id);
> +
> + bo = xe_bo_create_user(xe, NULL, SZ_1M,
> DRM_XE_GEM_CPU_CACHING_WC,
> + bo_flags, exec);
> + if (IS_ERR(bo)) {
> + KUNIT_FAIL(test, "Failed to create bo: %pe\n", bo);
> + return;
> + }
> +
> + xe_bo_lock(bo, false);
> +
> + /* After creation the BO should be in VRAM on DGFX. */
> + ret = xe_bo_validate(bo, NULL, false, exec);
> + if (ret) {
> + KUNIT_FAIL(test, "Failed to validate bo: %d\n",
> ret);
> + goto out_unlock;
> + }
> +
> + /* Wait for any async clears. */
> + timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
> + DMA_RESV_USAGE_KERNEL,
> false, 5 * HZ);
> + if (timeout <= 0) {
> + KUNIT_FAIL(test, "Timeout waiting for bo after
> validate.\n");
> + goto out_unlock;
> + }
> +
> + expected = vram_region_gpu_offset(bo->ttm.resource);
> + KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected);
> +
> + if (xe_bo_is_vram(bo))
> + KUNIT_EXPECT_NE(test, xe_bo_vram_gpu_offset(bo), 0);
> +
> + /* Evict to system memory — cache must become 0. */
> + ret = xe_bo_evict(bo, exec);
> + if (ret) {
> + KUNIT_FAIL(test, "Failed to evict bo: %d\n", ret);
> + goto out_unlock;
> + }
> +
> + timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
> + DMA_RESV_USAGE_KERNEL,
> false, 5 * HZ);
> + if (timeout <= 0) {
> + KUNIT_FAIL(test, "Timeout waiting for bo after
> evict.\n");
> + goto out_unlock;
> + }
> +
> + expected = vram_region_gpu_offset(bo->ttm.resource);
> + KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected);
> + KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), (u64)0);
> +
> + /* Restore back to VRAM — cache must be updated again. */
> + ret = xe_bo_validate(bo, NULL, false, exec);
> + if (ret) {
> + KUNIT_FAIL(test, "Failed to validate bo back to
> vram: %d\n", ret);
> + goto out_unlock;
> + }
> +
> + timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
> + DMA_RESV_USAGE_KERNEL,
> false, 5 * HZ);
> + if (timeout <= 0) {
> + KUNIT_FAIL(test, "Timeout waiting for bo after
> restore.\n");
> + goto out_unlock;
> + }
> +
> + expected = vram_region_gpu_offset(bo->ttm.resource);
> + KUNIT_EXPECT_EQ(test, xe_bo_vram_gpu_offset(bo), expected);
> +
> + if (xe_bo_is_vram(bo))
> + KUNIT_EXPECT_NE(test, xe_bo_vram_gpu_offset(bo), 0);
> +
> +out_unlock:
> + xe_bo_unlock(bo);
> + xe_bo_put(bo);
> +}
> +
> +static int vram_gpu_offset_test_run_device(struct xe_device *xe)
> +{
> + struct kunit *test = kunit_get_current_test();
> + struct xe_tile *tile;
> + int id;
> +
> + if (!IS_DGFX(xe)) {
> + kunit_skip(test, "non-discrete device\n");
> + return 0;
> + }
> +
> + guard(xe_pm_runtime)(xe);
> + for_each_tile(tile, xe, id)
> + vram_gpu_offset_test_run_tile(xe, tile, test);
> +
> + return 0;
> +}
> +
> +static void xe_bo_vram_gpu_offset_kunit(struct kunit *test)
> +{
> + struct xe_device *xe = test->priv;
> +
> + vram_gpu_offset_test_run_device(xe);
> +}
> +
> static struct kunit_case xe_bo_tests[] = {
> KUNIT_CASE_PARAM(xe_ccs_migrate_kunit,
> xe_pci_live_device_gen_param),
> KUNIT_CASE_PARAM(xe_bo_evict_kunit,
> xe_pci_live_device_gen_param),
> + KUNIT_CASE_PARAM(xe_bo_vram_gpu_offset_kunit,
> xe_pci_live_device_gen_param),
> {}
> };
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index a7c2dc7f224c..30c4fe326827 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1164,6 +1164,9 @@ static int xe_bo_move(struct ttm_buffer_object
> *ttm_bo, bool evict,
> ret = xe_sriov_vf_ccs_attach_bo(bo);
>
> out:
> + bo->vram_gpu_offset = ttm_bo->resource ?
> + vram_region_gpu_offset(ttm_bo->resource) : 0;
> +
Please avoid recalculate data like this in "move". In general we
invalidate mappings and cached data in "move_notify" and recalculate on
first use.
> if ((!ttm_bo->resource || ttm_bo->resource->mem_type ==
> XE_PL_SYSTEM) &&
> ttm_bo->ttm) {
> long timeout = dma_resv_wait_timeout(ttm_bo-
> >base.resv,
> @@ -2908,9 +2911,15 @@ int xe_managed_bo_reinit_in_vram(struct
> xe_device *xe, struct xe_tile *tile, str
> return 0;
> }
>
> -/*
> - * XXX: This is in the VM bind data path, likely should calculate
> this once and
> - * store, with a recalculation if the BO is moved.
> +/**
> + * vram_region_gpu_offset - Compute GPU offset for a TTM resource's
> memory region
> + * @res: The TTM resource.
> + *
> + * Computes the GPU-visible offset for @res based on its current
> memory type.
> + * Callers that always operate on a BO's current resource should
> prefer
> + * xe_bo_vram_gpu_offset() which returns a cached value.
> + *
> + * Return: The GPU-visible offset, or 0 for system/TT memory.
> */
> uint64_t vram_region_gpu_offset(struct ttm_resource *res)
> {
> @@ -3173,7 +3182,7 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64
> offset, size_t page_size)
>
> xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
> page_size, &cur);
> - return cur.start + offset +
> vram_region_gpu_offset(bo->ttm.resource);
> + return cur.start + offset +
> xe_bo_vram_gpu_offset(bo);
> }
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 68dea7d25a6b..d911f7327e8b 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -342,6 +342,21 @@ bool xe_bo_is_vm_bound(struct xe_bo *bo);
> bool xe_bo_has_single_placement(struct xe_bo *bo);
> uint64_t vram_region_gpu_offset(struct ttm_resource *res);
>
> +/**
> + * xe_bo_vram_gpu_offset - Return cached GPU offset for BO's memory
> region
Prefer funcion() format.
> + * @bo: The buffer object.
> + *
> + * Returns the GPU offset for the BO's current memory region. This
> value
> + * is cached on the BO and updated whenever the BO is moved,
> avoiding
> + * repeated pointer chasing through TTM resource manager structures.
Just comment what the function does. No need to re-justfiy here.
> + *
> + * Return: The GPU-visible offset, or 0 for system/TT memory.
> + */
> +static inline u64 xe_bo_vram_gpu_offset(struct xe_bo *bo)
> +{
Here you'd
if (unlikely(bo->vram_gpu_offset == SOME_INVALID_VALUE))
recalculate();
> + return bo->vram_gpu_offset;
> +}
> +
Thanks,
Thomas
> bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
>
> int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct
> ttm_operation_ctx *ctc,
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h
> b/drivers/gpu/drm/xe/xe_bo_types.h
> index ff8317bfc1ae..622a1ec8231e 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -109,6 +109,12 @@ struct xe_bo {
> */
> u64 min_align;
>
> + /**
> + * @vram_gpu_offset: Cached GPU offset for BO's current
> memory
> + * region, updated on move. Protected by the BO's dma-resv
> lock.
> + */
> + u64 vram_gpu_offset;
> +
> /**
> * @madv_purgeable: user space advise on BO purgeability,
> protected
> * by BO's dma-resv lock.
> diff --git a/drivers/gpu/drm/xe/xe_ggtt.c
> b/drivers/gpu/drm/xe/xe_ggtt.c
> index a848d1a41b9b..a24ae3e0e553 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt.c
> +++ b/drivers/gpu/drm/xe/xe_ggtt.c
> @@ -705,7 +705,7 @@ static void xe_ggtt_map_bo(struct xe_ggtt *ggtt,
> struct xe_ggtt_node *node,
> pte |
> xe_res_dma(&cur));
> } else {
> /* Prepend GPU offset */
> - pte |= vram_region_gpu_offset(bo->ttm.resource);
> + pte |= xe_bo_vram_gpu_offset(bo);
>
> for (xe_res_first(bo->ttm.resource, 0,
> xe_bo_size(bo), &cur);
> cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE))
> diff --git a/drivers/gpu/drm/xe/xe_lmtt.c
> b/drivers/gpu/drm/xe/xe_lmtt.c
> index 0c726eda9390..f9257ba1728b 100644
> --- a/drivers/gpu/drm/xe/xe_lmtt.c
> +++ b/drivers/gpu/drm/xe/xe_lmtt.c
> @@ -492,7 +492,7 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt,
> unsigned int vfid, struct xe_bo
> lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size));
> lmtt_assert(lmtt, xe_bo_is_vram(bo));
>
> - vram_offset = vram_region_gpu_offset(bo->ttm.resource);
> + vram_offset = xe_bo_vram_gpu_offset(bo);
> xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur);
> while (cur.remaining) {
> addr = xe_res_dma(&cur);
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c
> b/drivers/gpu/drm/xe/xe_migrate.c
> index fc918b4fba54..bdaea4979e89 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -2499,7 +2499,7 @@ int xe_migrate_access_memory(struct xe_migrate
> *m, struct xe_bo *bo,
>
> do {
> struct dma_fence *__fence;
> - u64 vram_addr = vram_region_gpu_offset(bo-
> >ttm.resource) +
> + u64 vram_addr = xe_bo_vram_gpu_offset(bo) +
> cursor.start;
> int current_bytes;
> u32 pitch;
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 8e5f4f0dea3f..7cfce1a92a1a 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -782,7 +782,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct
> xe_vma *vma,
> }
>
> xe_walk.default_vram_pte |= XE_PPGTT_PTE_DM;
> - xe_walk.dma_offset = (bo && !is_purged) ?
> vram_region_gpu_offset(bo->ttm.resource) : 0;
> + xe_walk.dma_offset = (bo && !is_purged) ?
> xe_bo_vram_gpu_offset(bo) : 0;
> if (!range)
> xe_bo_assert_held(bo);
>
^ permalink raw reply [flat|nested] 5+ messages in thread* RE: [PATCH] drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo
2026-03-30 8:02 ` [PATCH] " Thomas Hellström
@ 2026-04-01 2:53 ` Yuri Martins
2026-04-01 3:52 ` Matthew Brost
0 siblings, 1 reply; 5+ messages in thread
From: Yuri Martins @ 2026-04-01 2:53 UTC (permalink / raw)
To: Thomas Hellström, intel-xe@lists.freedesktop.org
Cc: Matthew Brost, Rodrigo Vivi, David Airlie, Simona Vetter,
Matthew Auld, Matt Roper, dri-devel@lists.freedesktop.org,
linux-kernel@vger.kernel.org
Hi Thomas,
Thanks for the review and the detailed feedback on the move_notify pattern.
You're right to ask for performance data, I don't have any. My hardware
(Core Ultra 7 258V) is integrated-only, so vram_region_gpu_offset() returns
0 and the path this targets was never exercised. I should have realized
that before submitting.
Withdrawing this patch.
Thanks,
Yuri
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo
2026-04-01 2:53 ` Yuri Martins
@ 2026-04-01 3:52 ` Matthew Brost
0 siblings, 0 replies; 5+ messages in thread
From: Matthew Brost @ 2026-04-01 3:52 UTC (permalink / raw)
To: Yuri Martins
Cc: Thomas Hellström, intel-xe@lists.freedesktop.org,
Rodrigo Vivi, David Airlie, Simona Vetter, Matthew Auld,
Matt Roper, dri-devel@lists.freedesktop.org,
linux-kernel@vger.kernel.org
On Wed, Apr 01, 2026 at 02:53:17AM +0000, Yuri Martins wrote:
> Hi Thomas,
>
> Thanks for the review and the detailed feedback on the move_notify pattern.
>
> You're right to ask for performance data, I don't have any. My hardware
> (Core Ultra 7 258V) is integrated-only, so vram_region_gpu_offset() returns
> 0 and the path this targets was never exercised. I should have realized
> that before submitting.
All good — we should probably just delete this XXX, as it was an early
comment from me back when I still had the i915 micro-optimization
mindset. I agree with Thomas that a change like this has little to no
impact, given that binds (where this code is typically used) are orders
of magnitude slower than clearing or moving memory. Plus, binds really
only end up in the critical path during page faults — and even there,
we’re usually moving memory first, so a little pointer chasing isn’t
going to show up.
As someone who has done quite a bit of perf work, here are the areas we
should focus on cleaning up:
- Time-complexity reduction (e.g., if we can go from O(N²) to O(N log N),
etc.)
- Reduce unnecessary context switches (e.g., don’t call queue_work()
blindly when it has nothing to do)
- Memory placement improvements (e.g., move CPU-read buffers to system
memory; move GPU-read buffers to VRAM)
- Use the hardware correctly (e.g., reduce GPU context switches for
common kernel operations, etc.)
Matt
>
> Withdrawing this patch.
>
> Thanks,
> Yuri
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2026-04-01 3:52 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-29 22:22 [PATCH] drm/xe/bo: Cache vram_region_gpu_offset in struct xe_bo Yuri Martins
2026-03-29 22:24 ` ✗ LGCI.VerificationFailed: failure for " Patchwork
2026-03-30 8:02 ` [PATCH] " Thomas Hellström
2026-04-01 2:53 ` Yuri Martins
2026-04-01 3:52 ` Matthew Brost
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox