From: Matthew Auld <matthew.auld@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: Stuart Summers <stuart.summers@intel.com>,
Matthew Brost <matthew.brost@intel.com>
Subject: [PATCH v2] drm/xe: rework PDE PAT index selection
Date: Fri, 8 Aug 2025 11:34:56 +0100 [thread overview]
Message-ID: <20250808103455.462424-2-matthew.auld@intel.com> (raw)
For non-leaf paging structures we end up selecting a random index
between [0, 3], depending on the first user if the page-table is shared,
since non-leaf structures only have two bits in the HW for encoding the
PAT index, and here we are just passing along the full user provided
index, which can be an index as large as ~31 on xe2+. The user provided
index is meant for the leaf node, which maps the actual BO pages where
we have more PAT bits, and not the non-leaf nodes which are only mapping
other paging structures, and so only needs a minimal PAT index range.
Also the chosen index might need to consider how the driver mapped the
paging structures on the host side, like wc vs wb, which is separate
from the user provided index.
With that move the PDE PAT index selection under driver control. For now
just use a coherent index on platforms with page-tables that are cached
on host side, and incoherent otherwise. Using a coherent index could
potentially be expensive, and would be overkill if we know the page-table
is always uncached on host side.
v2 (Stuart):
- Add some documentation and split into separate helper.
BSpec: 59510
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
---
drivers/gpu/drm/xe/xe_migrate.c | 10 ++++------
drivers/gpu/drm/xe/xe_pt.c | 4 ++--
drivers/gpu/drm/xe/xe_pt_types.h | 3 +--
drivers/gpu/drm/xe/xe_vm.c | 34 +++++++++++++++++++++++++++-----
4 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 4effe10d815b..9da56c2e0eaa 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -152,8 +152,7 @@ static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm,
for (pos = dpa_base; pos < vram_limit;
pos += SZ_1G, ofs += 8) {
if (pos + SZ_1G >= vram_limit) {
- entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs,
- pat_index);
+ entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs);
xe_map_wr(xe, &bo->vmap, ofs, u64, entry);
flags = vm->pt_ops->pte_encode_addr(xe, 0,
@@ -207,7 +206,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
/* PT30 & PT31 reserved for 2M identity map */
pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE;
- entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index);
+ entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs);
xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE;
@@ -275,15 +274,14 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
flags = XE_PDE_64K;
entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) *
- XE_PAGE_SIZE, pat_index);
+ XE_PAGE_SIZE);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
entry | flags);
}
/* Write PDE's that point to our BO. */
for (i = 0; i < map_ofs / PAGE_SIZE; i++) {
- entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE,
- pat_index);
+ entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
(i + 1) * 8, u64, entry);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 330cc0f54a3f..f3a39e734a90 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -69,7 +69,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
if (level > MAX_HUGEPTE_LEVEL)
return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
- 0, pat_index);
+ 0);
return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) |
XE_PTE_NULL;
@@ -616,7 +616,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
xe_child->is_compact = true;
}
- pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags;
+ pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0) | flags;
ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
pte);
}
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 69eab6f37cfe..17cdd7c7e9f5 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -45,8 +45,7 @@ struct xe_pt_ops {
u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr,
u16 pat_index,
u32 pt_level, bool devmem, u64 flags);
- u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
- u16 pat_index);
+ u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset);
};
struct xe_pt_entry {
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 432ea325677d..dbe6ac889f4e 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1512,14 +1512,39 @@ static u64 pte_encode_ps(u32 pt_level)
return 0;
}
-static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
- const u16 pat_index)
+static u16 pde_pat_index(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+ u16 pat_index;
+
+ /*
+ * We only have two bits to encode the PAT index in non-leaf nodes, but
+ * these only point to other paging structures so we only need a minimal
+ * selection of options. The user PAT index is only for encoding leaf
+ * nodes, where we have use of more bits to do the encoding. The
+ * non-leaf nodes are instead under driver control so the chosen index
+ * here should be distict from the user PAT index. Also the
+ * corresponding coherency of the PAT index should be tied to the
+ * allocation type of the page table (or at least we should pick
+ * something which is always safe).
+ */
+ if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
+ pat_index = xe->pat.idx[XE_CACHE_WB];
+ else
+ pat_index = xe->pat.idx[XE_CACHE_NONE];
+
+ xe_assert(xe, pat_index <= 3);
+
+ return pat_index;
+}
+
+static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
{
u64 pde;
pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
- pde |= pde_encode_pat_index(pat_index);
+ pde |= pde_encode_pat_index(pde_pat_index(bo));
return pde;
}
@@ -2024,8 +2049,7 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
{
- return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
- tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
+ return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
}
static struct xe_exec_queue *
--
2.50.1
next reply other threads:[~2025-08-08 10:35 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-08 10:34 Matthew Auld [this message]
2025-08-08 11:47 ` ✓ CI.KUnit: success for drm/xe: rework PDE PAT index selection (rev2) Patchwork
2025-08-08 12:20 ` ✓ Xe.CI.BAT: " Patchwork
2025-08-08 13:33 ` ✗ Xe.CI.Full: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250808103455.462424-2-matthew.auld@intel.com \
--to=matthew.auld@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
--cc=stuart.summers@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.