* [PATCH v4 01/10] drm/xe: Do not forward invalid TLB invalidation seqnos to upper layers
2025-12-09 20:08 [PATCH v4 00/10] Page Reclamation Support for Xe3p Platforms Brian Nguyen
@ 2025-12-09 20:08 ` Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 02/10] drm/xe/xe_tlb_inval: Modify fence interface to support PPC flush Brian Nguyen
` (8 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Brian Nguyen @ 2025-12-09 20:08 UTC (permalink / raw)
To: intel-xe; +Cc: tejas.upadhyay, matthew.brost, shuicheng.lin, stuart.summers
From: Matthew Brost <matthew.brost@intel.com>
Certain TLB invalidation operations send multiple H2G messages per seqno
with only the final H2G containing the valid seqno - the others carry an
invalid seqno. The G2H handler drops these invalid seqno to aovid
prematurely signaling a TLB invalidation fence.
With TLB_INVALIDATION_SEQNO_INVALID used to indicate in progress
multi-step TLB invalidations, reset tdr to ensure that timeout
won't prematurely trigger when G2H actions are still ongoing.
v2: Remove lock from xe_tlb_inval_reset_timeout. (Matthew B)
v3: Squash with dependent patch from Matthew Brost' series.
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/xe_tlb_inval.c | 20 ++++++++++++++++++++
drivers/gpu/drm/xe/xe_tlb_inval_types.h | 1 +
2 files changed, 21 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c
index 918a59e686ea..a122fbb9fc4a 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.c
@@ -199,6 +199,20 @@ void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval)
mutex_unlock(&tlb_inval->seqno_lock);
}
+/**
+ * xe_tlb_inval_reset_timeout() - Reset TLB inval fence timeout
+ * @tlb_inval: TLB invalidation client
+ *
+ * Reset the TLB invalidation timeout timer.
+ */
+static void xe_tlb_inval_reset_timeout(struct xe_tlb_inval *tlb_inval)
+{
+ lockdep_assert_held(&tlb_inval->pending_lock);
+
+ mod_delayed_work(system_wq, &tlb_inval->fence_tdr,
+ tlb_inval->ops->timeout_delay(tlb_inval));
+}
+
static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno)
{
int seqno_recv = READ_ONCE(tlb_inval->seqno_recv);
@@ -360,6 +374,12 @@ void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno)
* process_g2h_msg().
*/
spin_lock_irqsave(&tlb_inval->pending_lock, flags);
+ if (seqno == TLB_INVALIDATION_SEQNO_INVALID) {
+ xe_tlb_inval_reset_timeout(tlb_inval);
+ spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
+ return;
+ }
+
if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) {
spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
return;
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
index 8f8b060e9005..7a6967ce3b76 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_types.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
@@ -80,6 +80,7 @@ struct xe_tlb_inval {
const struct xe_tlb_inval_ops *ops;
/** @tlb_inval.seqno: TLB invalidation seqno, protected by CT lock */
#define TLB_INVALIDATION_SEQNO_MAX 0x100000
+#define TLB_INVALIDATION_SEQNO_INVALID TLB_INVALIDATION_SEQNO_MAX
int seqno;
/** @tlb_invalidation.seqno_lock: protects @tlb_invalidation.seqno */
struct mutex seqno_lock;
--
2.52.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v4 02/10] drm/xe/xe_tlb_inval: Modify fence interface to support PPC flush
2025-12-09 20:08 [PATCH v4 00/10] Page Reclamation Support for Xe3p Platforms Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 01/10] drm/xe: Do not forward invalid TLB invalidation seqnos to upper layers Brian Nguyen
@ 2025-12-09 20:08 ` Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 03/10] drm/xe: Add page reclamation info to device info Brian Nguyen
` (7 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Brian Nguyen @ 2025-12-09 20:08 UTC (permalink / raw)
To: intel-xe; +Cc: tejas.upadhyay, matthew.brost, shuicheng.lin, stuart.summers
Allow tlb_invalidation to control when driver wants to flush the
Private Physical Cache (PPC) as a process of the tlb invalidation
process.
Default behavior is still to always flush the PPC but driver now has the
option to disable it.
v2:
- Revise commit/kernel doc descriptions. (Shuicheng)
- Remove unused function. (Shuicheng)
- Remove bool flush_cache parameter from fence,
and various function inputs. (Matthew B)
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Cc: Shuicheng Lin <shuicheng.lin@intel.com>
---
drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index 848d3493df10..37ac943cb10f 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -34,9 +34,12 @@ static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len)
G2H_LEN_DW_TLB_INVALIDATE, 1);
}
-#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
+#define MAKE_INVAL_OP_FLUSH(type, flush_cache) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
- XE_GUC_TLB_INVAL_FLUSH_CACHE)
+ (flush_cache ? \
+ XE_GUC_TLB_INVAL_FLUSH_CACHE : 0))
+
+#define MAKE_INVAL_OP(type) MAKE_INVAL_OP_FLUSH(type, true)
static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno)
{
@@ -152,7 +155,7 @@ static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
ilog2(SZ_2M) + 1)));
xe_gt_assert(gt, IS_ALIGNED(start, length));
- action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
+ action[len++] = MAKE_INVAL_OP_FLUSH(XE_GUC_TLB_INVAL_PAGE_SELECTIVE, true);
action[len++] = asid;
action[len++] = lower_32_bits(start);
action[len++] = upper_32_bits(start);
--
2.52.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v4 03/10] drm/xe: Add page reclamation info to device info
2025-12-09 20:08 [PATCH v4 00/10] Page Reclamation Support for Xe3p Platforms Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 01/10] drm/xe: Do not forward invalid TLB invalidation seqnos to upper layers Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 02/10] drm/xe/xe_tlb_inval: Modify fence interface to support PPC flush Brian Nguyen
@ 2025-12-09 20:08 ` Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 04/10] drm/xe/guc: Add page reclamation interface to GuC Brian Nguyen
` (6 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Brian Nguyen @ 2025-12-09 20:08 UTC (permalink / raw)
To: intel-xe
Cc: tejas.upadhyay, matthew.brost, shuicheng.lin, stuart.summers,
Oak Zeng
From: Oak Zeng <oak.zeng@intel.com>
Starting from Xe3p, HW adds a feature assisting range based page
reclamation. Introduce a bit in device info to indicate whether
device has such capability.
Signed-off-by: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Reviewed-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/xe_device_types.h | 2 ++
drivers/gpu/drm/xe/xe_pci.c | 1 +
drivers/gpu/drm/xe/xe_pci_types.h | 1 +
3 files changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 9de73353223f..3836c5ed1c72 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -315,6 +315,8 @@ struct xe_device {
u8 has_mem_copy_instr:1;
/** @info.has_mert: Device has standalone MERT */
u8 has_mert:1;
+ /** @info.has_page_reclaim_hw_assist: Device supports page reclamation feature */
+ u8 has_page_reclaim_hw_assist:1;
/** @info.has_pxp: Device has PXP support */
u8 has_pxp:1;
/** @info.has_range_tlb_inval: Has range based TLB invalidations */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 04fcbacda12a..0e9d23c20f20 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -681,6 +681,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.has_late_bind = desc->has_late_bind;
xe->info.has_llc = desc->has_llc;
xe->info.has_mert = desc->has_mert;
+ xe->info.has_page_reclaim_hw_assist = desc->has_page_reclaim_hw_assist;
xe->info.has_pxp = desc->has_pxp;
xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) &&
desc->has_sriov;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index f19f35359696..96feae80b705 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -49,6 +49,7 @@ struct xe_device_desc {
u8 has_mbx_power_limits:1;
u8 has_mem_copy_instr:1;
u8 has_mert:1;
+ u8 has_page_reclaim_hw_assist:1;
u8 has_pxp:1;
u8 has_sriov:1;
u8 needs_scratch:1;
--
2.52.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v4 04/10] drm/xe/guc: Add page reclamation interface to GuC
2025-12-09 20:08 [PATCH v4 00/10] Page Reclamation Support for Xe3p Platforms Brian Nguyen
` (2 preceding siblings ...)
2025-12-09 20:08 ` [PATCH v4 03/10] drm/xe: Add page reclamation info to device info Brian Nguyen
@ 2025-12-09 20:08 ` Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 05/10] drm/xe: Create page reclaim list on unbind Brian Nguyen
` (5 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Brian Nguyen @ 2025-12-09 20:08 UTC (permalink / raw)
To: intel-xe; +Cc: tejas.upadhyay, matthew.brost, shuicheng.lin, stuart.summers
Add page reclamation related changes to GuC interface, handlers, and
senders to support page reclamation.
Currently TLB invalidations will perform an entire PPC flush in order to
prevent stale memory access for noncoherent system memory. Page
reclamation is an extension of the typical TLB invalidation
workflow, allowing disabling of full PPC flush and enable selective PPC
flushing. Selective flushing will be decided by a list of pages whom's
address is passed to GuC at time of action.
Page reclamation interfaces require at least GuC FW ver 70.31.0.
v2:
- Moved send_page_reclaim to first patch usage.
- Add comments explaining shared done handler. (Matthew B)
- Add FW version fallback to disable page reclaim
on older versions. (Matthew B, Shuicheng)
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Shuicheng Lin <shuicheng.lin@intel.com>
---
drivers/gpu/drm/xe/abi/guc_actions_abi.h | 2 ++
drivers/gpu/drm/xe/xe_guc.c | 4 ++++
drivers/gpu/drm/xe/xe_guc_ct.c | 17 +++++++++++++++++
drivers/gpu/drm/xe/xe_guc_fwif.h | 1 +
4 files changed, 24 insertions(+)
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index 47756e4674a1..11de3bdf69b5 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -151,6 +151,8 @@ enum xe_guc_action {
XE_GUC_ACTION_TLB_INVALIDATION = 0x7000,
XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001,
XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002,
+ XE_GUC_ACTION_PAGE_RECLAMATION = 0x7003,
+ XE_GUC_ACTION_PAGE_RECLAMATION_DONE = 0x7004,
XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index f0407bab9a0c..7daae3294665 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -767,6 +767,10 @@ int xe_guc_init(struct xe_guc *guc)
if (!xe_uc_fw_is_enabled(&guc->fw))
return 0;
+ /* Disable page reclaim if GuC FW does not support */
+ if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 31, 0))
+ xe->info.has_page_reclaim_hw_assist = false;
+
if (IS_SRIOV_VF(xe)) {
ret = xe_guc_ct_init(&guc->ct);
if (ret)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 648f0f523abb..1c2acdec151a 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1405,6 +1405,7 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
g2h_release_space(ct, len);
}
@@ -1591,6 +1592,15 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
ret = xe_guc_pagefault_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
+ /*
+ * Page reclamation is an extension of TLB invalidation. Both
+ * operations share the same seqno and fence. When either
+ * action completes, we need to signal the corresponding
+ * fence. Since the handling logic (lookup fence by seqno,
+ * fence signalling) is identical, we use the same handler
+ * for both G2H events.
+ */
ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
@@ -1756,6 +1766,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
switch (action) {
case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
break; /* Process these in fast-path */
default:
return 0;
@@ -1792,6 +1803,12 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
ret = xe_guc_pagefault_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
+ /*
+ * Seqno and fence handling of page reclamation and TLB
+ * invalidation is identical, so we can use the same handler
+ * for both actions.
+ */
__g2h_release_space(ct, len);
ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len);
break;
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 7d93c2749485..5aa6fb249be9 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -16,6 +16,7 @@
#define G2H_LEN_DW_DEREGISTER_CONTEXT 3
#define G2H_LEN_DW_TLB_INVALIDATE 3
#define G2H_LEN_DW_G2G_NOTIFY_MIN 3
+#define G2H_LEN_DW_PAGE_RECLAMATION 3
#define GUC_ID_MAX 65535
#define GUC_ID_UNKNOWN 0xffffffff
--
2.52.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v4 05/10] drm/xe: Create page reclaim list on unbind
2025-12-09 20:08 [PATCH v4 00/10] Page Reclamation Support for Xe3p Platforms Brian Nguyen
` (3 preceding siblings ...)
2025-12-09 20:08 ` [PATCH v4 04/10] drm/xe/guc: Add page reclamation interface to GuC Brian Nguyen
@ 2025-12-09 20:08 ` Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 06/10] drm/xe: Suballocate BO for page reclaim Brian Nguyen
` (4 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Brian Nguyen @ 2025-12-09 20:08 UTC (permalink / raw)
To: intel-xe; +Cc: tejas.upadhyay, matthew.brost, shuicheng.lin, stuart.summers
Page reclaim list (PRL) is preparation work for the page reclaim feature.
The PRL is firstly owned by pt_update_ops and all other page reclaim
operations will point back to this PRL. PRL generates its entries during
the unbind page walker, updating the PRL.
This PRL is restricted to a 4K page, so 512 page entries at most.
v2:
- Removed unused function. (Shuicheng)
- Compacted warning checking, update commit message,
spelling, etc. (Shuicheng, Matthew B)
- Fix kernel docs
- Moved PRL max entries overflow handling out from
generate_reclaim_entry to caller (Shuicheng)
- Add xe_page_reclaim_list_init for clarity. (Matthew B)
- Modify xe_guc_page_reclaim_entry to use macros
for greater flexbility. (Matthew B)
- Add fallback for PTE outside of page reclaim supported
4K, 64K, 2M pages (Matthew B)
- Invalidate PRL for early abort page walk.
- Removed page reclaim related variables from tlb fence
(Matthew Brost)
- Remove error handling in *alloc_entries failure. (Matthew B)
v3:
- Fix NULL pointer dereference check.
- Modify reclaim_entry to QW and bitfields accordingly. (Matthew B)
- Add vm_dbg prints for PRL generation and invalidation. (Matthew B)
v4:
- s/GENMASK/GENMASK_ULL && s/BIT/BIT_ULL (CI)
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Shuicheng Lin <shuicheng.lin@intel.com>
---
drivers/gpu/drm/xe/Makefile | 1 +
drivers/gpu/drm/xe/regs/xe_gtt_defs.h | 1 +
drivers/gpu/drm/xe/xe_page_reclaim.c | 62 +++++++++++++
drivers/gpu/drm/xe/xe_page_reclaim.h | 72 +++++++++++++++
drivers/gpu/drm/xe/xe_pt.c | 123 +++++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_pt_types.h | 5 ++
6 files changed, 263 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/drm/xe/xe_page_reclaim.c
create mode 100644 drivers/gpu/drm/xe/xe_page_reclaim.h
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 6ecba27d85f7..7f08b4cd91d6 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -95,6 +95,7 @@ xe-y += xe_bb.o \
xe_oa.o \
xe_observation.o \
xe_pagefault.o \
+ xe_page_reclaim.o \
xe_pat.o \
xe_pci.o \
xe_pcode.o \
diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
index 4389e5a76f89..4d83461e538b 100644
--- a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
@@ -9,6 +9,7 @@
#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52)
#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53)
+#define XE_PTE_ADDR_MASK GENMASK_ULL(51, 12)
#define GGTT_PTE_VFID GENMASK_ULL(11, 2)
#define GUC_GGTT_TOP 0xFEE00000
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.c b/drivers/gpu/drm/xe/xe_page_reclaim.c
new file mode 100644
index 000000000000..63facea28213
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_page_reclaim.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "xe_page_reclaim.h"
+
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_macros.h"
+
+/**
+ * xe_page_reclaim_list_invalidate() - Mark a PRL as invalid
+ * @prl: Page reclaim list to reset
+ *
+ * Clears the entries pointer and marks the list as invalid so
+ * future use knows PRL is unusable. It is expected that the entries
+ * have already been released.
+ */
+void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl)
+{
+ prl->entries = NULL;
+ prl->num_entries = XE_PAGE_RECLAIM_INVALID_LIST;
+}
+
+/**
+ * xe_page_reclaim_list_init() - Initialize a page reclaim list
+ * @prl: Page reclaim list to initialize
+ *
+ * Invalidates the list to prepare on initalization.
+ */
+void xe_page_reclaim_list_init(struct xe_page_reclaim_list *prl)
+{
+ xe_page_reclaim_list_invalidate(prl);
+}
+
+/**
+ * xe_page_reclaim_list_alloc_entries() - Allocate page reclaim list entries
+ * @prl: Page reclaim list to allocate entries for
+ *
+ * Allocate one 4K page for the PRL entries, otherwise assign prl->entries to NULL.
+ */
+int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl)
+{
+ struct page *page;
+
+ if (XE_WARN_ON(prl->entries))
+ return 0;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (page) {
+ prl->entries = page_address(page);
+ prl->num_entries = 0;
+ }
+
+ return page ? 0 : -ENOMEM;
+}
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.h b/drivers/gpu/drm/xe/xe_page_reclaim.h
new file mode 100644
index 000000000000..9255566d2066
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_page_reclaim.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PAGE_RECLAIM_H_
+#define _XE_PAGE_RECLAIM_H_
+
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/bits.h>
+
+#define XE_PAGE_RECLAIM_MAX_ENTRIES 512
+#define XE_PAGE_RECLAIM_LIST_MAX_SIZE SZ_4K
+
+struct xe_guc_page_reclaim_entry {
+ u64 qw;
+/* valid reclaim entry bit */
+#define XE_PAGE_RECLAIM_VALID BIT_ULL(0)
+/*
+ * offset order of page size to be reclaimed
+ * page_size = 1 << (XE_PTE_SHIFT + reclamation_size)
+ */
+#define XE_PAGE_RECLAIM_SIZE GENMASK_ULL(6, 1)
+#define XE_PAGE_RECLAIM_RSVD_0 GENMASK_ULL(11, 7)
+/* lower 20 bits of the physical address */
+#define XE_PAGE_RECLAIM_ADDR_LO GENMASK_ULL(31, 12)
+/* upper 20 bits of the physical address */
+#define XE_PAGE_RECLAIM_ADDR_HI GENMASK_ULL(51, 32)
+#define XE_PAGE_RECLAIM_RSVD_1 GENMASK_ULL(63, 52)
+} __packed;
+
+struct xe_page_reclaim_list {
+ /** @entries: array of page reclaim entries, page allocated */
+ struct xe_guc_page_reclaim_entry *entries;
+ /** @num_entries: number of entries */
+ int num_entries;
+#define XE_PAGE_RECLAIM_INVALID_LIST -1
+};
+
+void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl);
+void xe_page_reclaim_list_init(struct xe_page_reclaim_list *prl);
+int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl);
+/**
+ * xe_page_reclaim_entries_get() - Increment the reference count of page reclaim entries.
+ * @entries: Pointer to the array of page reclaim entries.
+ *
+ * This function increments the reference count of the backing page.
+ */
+static inline void xe_page_reclaim_entries_get(struct xe_guc_page_reclaim_entry *entries)
+{
+ if (entries)
+ get_page(virt_to_page(entries));
+}
+
+/**
+ * xe_page_reclaim_entries_put() - Decrement the reference count of page reclaim entries.
+ * @entries: Pointer to the array of page reclaim entries.
+ *
+ * This function decrements the reference count of the backing page
+ * and frees it if the count reaches zero.
+ */
+static inline void xe_page_reclaim_entries_put(struct xe_guc_page_reclaim_entry *entries)
+{
+ if (entries)
+ put_page(virt_to_page(entries));
+}
+
+#endif /* _XE_PAGE_RECLAIM_H_ */
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 884127b4d97d..5f27d0a64013 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -12,6 +12,7 @@
#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_migrate.h"
+#include "xe_page_reclaim.h"
#include "xe_pt_types.h"
#include "xe_pt_walk.h"
#include "xe_res_cursor.h"
@@ -1535,6 +1536,9 @@ struct xe_pt_stage_unbind_walk {
/** @modified_end: Walk range start, modified like @modified_start. */
u64 modified_end;
+ /** @prl: Backing pointer to page reclaim list in pt_update_ops */
+ struct xe_page_reclaim_list *prl;
+
/* Output */
/* @wupd: Structure to track the page-table updates we're building */
struct xe_walk_update wupd;
@@ -1572,6 +1576,66 @@ static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
return false;
}
+/* Huge 2MB leaf lives directly in a level-1 table and has no children */
+static bool is_2m_pte(struct xe_pt *pte)
+{
+ return pte->level == 1 && !pte->base.children;
+}
+
+/* page_size = 2^(reclamation_size + XE_PTE_SHIFT) */
+#define COMPUTE_RECLAIM_ADDRESS_MASK(page_size) \
+({ \
+ BUILD_BUG_ON(!__builtin_constant_p(page_size)); \
+ ilog2(page_size) - XE_PTE_SHIFT; \
+})
+
+static void generate_reclaim_entry(struct xe_tile *tile,
+ struct xe_page_reclaim_list *prl,
+ u64 pte, struct xe_pt *xe_child)
+{
+ struct xe_guc_page_reclaim_entry *reclaim_entries = prl->entries;
+ u64 phys_page = (pte & XE_PTE_ADDR_MASK) >> XE_PTE_SHIFT;
+ int num_entries = prl->num_entries;
+ u32 reclamation_size;
+
+ xe_tile_assert(tile, xe_child->level <= MAX_HUGEPTE_LEVEL);
+ xe_tile_assert(tile, reclaim_entries);
+ xe_tile_assert(tile, num_entries < XE_PAGE_RECLAIM_MAX_ENTRIES - 1);
+
+ if (num_entries == XE_PAGE_RECLAIM_INVALID_LIST)
+ return;
+
+ /**
+ * reclamation_size indicates the size of the page to be
+ * invalidated and flushed from non-coherent cache.
+ * Page size is computed as 2^(reclamation_size + XE_PTE_SHIFT) bytes.
+ * Only 4K, 64K (level 0), and 2M pages are supported by hardware for page reclaim
+ */
+ if (xe_child->level == 0 && !(pte & XE_PTE_PS64)) {
+ reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_4K); /* reclamation_size = 0 */
+ } else if (xe_child->level == 0) {
+ reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_64K); /* reclamation_size = 4 */
+ } else if (is_2m_pte(xe_child)) {
+ reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_2M); /* reclamation_size = 9 */
+ } else {
+ prl->num_entries = XE_PAGE_RECLAIM_INVALID_LIST;
+ vm_dbg(&tile_to_xe(tile)->drm,
+ "PRL invalidate: unsupported PTE level=%u pte=%#llx\n",
+ xe_child->level, pte);
+ return;
+ }
+
+ reclaim_entries[num_entries].qw =
+ FIELD_PREP(XE_PAGE_RECLAIM_VALID, 1) |
+ FIELD_PREP(XE_PAGE_RECLAIM_SIZE, reclamation_size) |
+ FIELD_PREP(XE_PAGE_RECLAIM_ADDR_LO, phys_page) |
+ FIELD_PREP(XE_PAGE_RECLAIM_ADDR_HI, phys_page >> 20);
+ prl->num_entries++;
+ vm_dbg(&tile_to_xe(tile)->drm,
+ "PRL add entry: level=%u pte=%#llx reclamation_size=%u prl_idx=%d\n",
+ xe_child->level, pte, reclamation_size, num_entries);
+}
+
static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
unsigned int level, u64 addr, u64 next,
struct xe_ptw **child,
@@ -1579,11 +1643,45 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
struct xe_pt_walk *walk)
{
struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+ struct xe_pt_stage_unbind_walk *xe_walk =
+ container_of(walk, typeof(*xe_walk), base);
+ struct xe_device *xe = tile_to_xe(xe_walk->tile);
XE_WARN_ON(!*child);
XE_WARN_ON(!level);
+ /* Check for leaf node */
+ if (xe_walk->prl && xe_walk->prl->num_entries != XE_PAGE_RECLAIM_INVALID_LIST &&
+ !xe_child->base.children) {
+ struct iosys_map *leaf_map = &xe_child->bo->vmap;
+ pgoff_t first = xe_pt_offset(addr, 0, walk);
+ pgoff_t count = xe_pt_num_entries(addr, next, 0, walk);
+
+ for (pgoff_t i = 0; i < count; i++) {
+ u64 pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64);
+
+ /* Account for NULL terminated entry on end (-1) */
+ if (xe_walk->prl->num_entries < XE_PAGE_RECLAIM_MAX_ENTRIES - 1) {
+ generate_reclaim_entry(xe_walk->tile, xe_walk->prl,
+ pte, xe_child);
+ } else {
+ /* overflow, mark as invalid */
+ xe_walk->prl->num_entries = XE_PAGE_RECLAIM_INVALID_LIST;
+ vm_dbg(&xe->drm,
+ "PRL invalidate: overflow while adding pte=%#llx",
+ pte);
+ break;
+ }
+ }
+ }
- xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
+ /* If aborting page walk early, invalidate PRL since PTE may be dropped from this abort */
+ if (xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk) &&
+ xe_walk->prl && level > 1 && xe_child->base.children && xe_child->num_live != 0) {
+ xe_walk->prl->num_entries = XE_PAGE_RECLAIM_INVALID_LIST;
+ vm_dbg(&xe->drm,
+ "PRL invalidate: kill at level=%u addr=%#llx next=%#llx num_live=%u\n",
+ level, addr, next, xe_child->num_live);
+ }
return 0;
}
@@ -1654,6 +1752,8 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
{
u64 start = range ? xe_svm_range_start(range) : xe_vma_start(vma);
u64 end = range ? xe_svm_range_end(range) : xe_vma_end(vma);
+ struct xe_vm_pgtable_update_op *pt_update_op =
+ container_of(entries, struct xe_vm_pgtable_update_op, entries[0]);
struct xe_pt_stage_unbind_walk xe_walk = {
.base = {
.ops = &xe_pt_stage_unbind_ops,
@@ -1665,6 +1765,7 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
.modified_start = start,
.modified_end = end,
.wupd.entries = entries,
+ .prl = pt_update_op->prl,
};
struct xe_pt *pt = vm->pt_root[tile->id];
@@ -1897,6 +1998,7 @@ static int unbind_op_prepare(struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
struct xe_vma *vma)
{
+ struct xe_device *xe = tile_to_xe(tile);
u32 current_op = pt_update_ops->current_op;
struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
int err;
@@ -1914,6 +2016,11 @@ static int unbind_op_prepare(struct xe_tile *tile,
pt_op->vma = vma;
pt_op->bind = false;
pt_op->rebind = false;
+ /* Maintain one PRL located in pt_update_ops that all others in unbind op reference */
+ if (xe->info.has_page_reclaim_hw_assist && !pt_update_ops->prl.entries)
+ xe_page_reclaim_list_alloc_entries(&pt_update_ops->prl);
+
+ pt_op->prl = (pt_update_ops->prl.entries) ? &pt_update_ops->prl : NULL;
err = vma_reserve_fences(tile_to_xe(tile), vma);
if (err)
@@ -1921,6 +2028,13 @@ static int unbind_op_prepare(struct xe_tile *tile,
pt_op->num_entries = xe_pt_stage_unbind(tile, xe_vma_vm(vma),
vma, NULL, pt_op->entries);
+ /* Free PRL if list declared as invalid */
+ if (pt_update_ops->prl.entries &&
+ pt_update_ops->prl.num_entries == XE_PAGE_RECLAIM_INVALID_LIST) {
+ xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
+ xe_page_reclaim_list_invalidate(&pt_update_ops->prl);
+ pt_op->prl = NULL;
+ }
xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
pt_op->num_entries, false);
@@ -1979,6 +2093,7 @@ static int unbind_range_prepare(struct xe_vm *vm,
pt_op->vma = XE_INVALID_VMA;
pt_op->bind = false;
pt_op->rebind = false;
+ pt_op->prl = NULL;
pt_op->num_entries = xe_pt_stage_unbind(tile, vm, NULL, range,
pt_op->entries);
@@ -2096,6 +2211,7 @@ xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops)
init_llist_head(&pt_update_ops->deferred);
pt_update_ops->start = ~0x0ull;
pt_update_ops->last = 0x0ull;
+ xe_page_reclaim_list_init(&pt_update_ops->prl);
}
/**
@@ -2518,6 +2634,11 @@ void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops)
&vops->pt_update_ops[tile->id];
int i;
+ if (pt_update_ops->prl.entries) {
+ xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
+ xe_page_reclaim_list_invalidate(&pt_update_ops->prl);
+ }
+
lockdep_assert_held(&vops->vm->lock);
xe_vm_assert_held(vops->vm);
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 881f01e14db8..88fabf8e2655 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -8,6 +8,7 @@
#include <linux/types.h>
+#include "xe_page_reclaim.h"
#include "xe_pt_walk.h"
struct xe_bo;
@@ -79,6 +80,8 @@ struct xe_vm_pgtable_update_op {
struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
/** @vma: VMA for operation, operation not valid if NULL */
struct xe_vma *vma;
+ /** @prl: Backing pointer to page reclaim list of pt_update_ops */
+ struct xe_page_reclaim_list *prl;
/** @num_entries: number of entries for this update operation */
u32 num_entries;
/** @bind: is a bind */
@@ -95,6 +98,8 @@ struct xe_vm_pgtable_update_ops {
struct llist_head deferred;
/** @q: exec queue for PT operations */
struct xe_exec_queue *q;
+ /** @prl: embedded page reclaim list */
+ struct xe_page_reclaim_list prl;
/** @start: start address of ops */
u64 start;
/** @last: last address of ops */
--
2.52.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v4 06/10] drm/xe: Suballocate BO for page reclaim
2025-12-09 20:08 [PATCH v4 00/10] Page Reclamation Support for Xe3p Platforms Brian Nguyen
` (4 preceding siblings ...)
2025-12-09 20:08 ` [PATCH v4 05/10] drm/xe: Create page reclaim list on unbind Brian Nguyen
@ 2025-12-09 20:08 ` Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 07/10] drm/xe: Prep page reclaim in tlb inval job Brian Nguyen
` (3 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Brian Nguyen @ 2025-12-09 20:08 UTC (permalink / raw)
To: intel-xe; +Cc: tejas.upadhyay, matthew.brost, shuicheng.lin, stuart.summers
Page reclamation feature needs the PRL to be suballocated into a
GGTT-mapped BO. On allocation failure, fallback to default tlb
invalidation with full PPC flush.
PRL's BO allocation is managed in separate pool to ensure 4K alignment
for proper GGTT address.
With BO, pass into TLB invalidation backend and modify fence to
accomadate accordingly.
v2:
- Removed page reclaim related variables from TLB fence. (Matthew B)
- Allocate PRL bo size to num_entries. (Matthew B)
- Move PRL bo allocation to tlb_inval run_job. (Matthew B)
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/xe_device_types.h | 7 +++++
drivers/gpu/drm/xe/xe_page_reclaim.c | 39 +++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_page_reclaim.h | 6 +++++
drivers/gpu/drm/xe/xe_tile.c | 5 ++++
drivers/gpu/drm/xe/xe_tlb_inval_job.c | 9 +++++++
5 files changed, 66 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 3836c5ed1c72..155ea0800f1b 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -184,6 +184,13 @@ struct xe_tile {
* Media GT shares a pool with its primary GT.
*/
struct xe_sa_manager *kernel_bb_pool;
+
+ /**
+ * @mem.reclaim_pool: Pool for PRLs allocated.
+ *
+ * Only main GT has page reclaim list allocations.
+ */
+ struct xe_sa_manager *reclaim_pool;
} mem;
/** @sriov: tile level virtualization data */
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.c b/drivers/gpu/drm/xe/xe_page_reclaim.c
index 63facea28213..50961307a557 100644
--- a/drivers/gpu/drm/xe/xe_page_reclaim.c
+++ b/drivers/gpu/drm/xe/xe_page_reclaim.c
@@ -13,6 +13,45 @@
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_macros.h"
+#include "xe_sa.h"
+#include "xe_tlb_inval_types.h"
+
+/**
+ * xe_page_reclaim_create_prl_bo() - Back a PRL with a suballocated GGTT BO
+ * @tlb_inval: TLB invalidation frontend associated with the request
+ * @prl: page reclaim list data that bo will copy from
+ * @fence: tlb invalidation fence that page reclaim action is paired to
+ *
+ * Suballocates a 4K BO out of the tile reclaim pool, copies the PRL CPU
+ * copy into the BO and queues the buffer for release when @fence signals.
+ *
+ * Return: struct drm_suballoc pointer on success or ERR_PTR on failure.
+ */
+struct drm_suballoc *xe_page_reclaim_create_prl_bo(struct xe_tlb_inval *tlb_inval,
+ struct xe_page_reclaim_list *prl,
+ struct xe_tlb_inval_fence *fence)
+{
+ struct xe_gt *gt = container_of(tlb_inval, struct xe_gt, tlb_inval);
+ struct xe_tile *tile = gt_to_tile(gt);
+ /* (+1) for NULL page_reclaim_entry to indicate end of list */
+ int prl_size = min(prl->num_entries + 1, XE_PAGE_RECLAIM_MAX_ENTRIES) *
+ sizeof(struct xe_guc_page_reclaim_entry);
+ struct drm_suballoc *prl_sa;
+
+ /* Maximum size of PRL is 1 4K-page */
+ prl_sa = __xe_sa_bo_new(tile->mem.reclaim_pool,
+ prl_size, GFP_ATOMIC);
+ if (IS_ERR(prl_sa))
+ return prl_sa;
+
+ memcpy(xe_sa_bo_cpu_addr(prl_sa), prl->entries,
+ prl_size);
+ xe_sa_bo_flush_write(prl_sa);
+ /* Queue up sa_bo_free on tlb invalidation fence signal */
+ xe_sa_bo_free(prl_sa, &fence->base);
+
+ return prl_sa;
+}
/**
* xe_page_reclaim_list_invalidate() - Mark a PRL as invalid
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.h b/drivers/gpu/drm/xe/xe_page_reclaim.h
index 9255566d2066..b151f15e0b80 100644
--- a/drivers/gpu/drm/xe/xe_page_reclaim.h
+++ b/drivers/gpu/drm/xe/xe_page_reclaim.h
@@ -16,6 +16,9 @@
#define XE_PAGE_RECLAIM_MAX_ENTRIES 512
#define XE_PAGE_RECLAIM_LIST_MAX_SIZE SZ_4K
+struct xe_tlb_inval;
+struct xe_tlb_inval_fence;
+
struct xe_guc_page_reclaim_entry {
u64 qw;
/* valid reclaim entry bit */
@@ -41,6 +44,9 @@ struct xe_page_reclaim_list {
#define XE_PAGE_RECLAIM_INVALID_LIST -1
};
+struct drm_suballoc *xe_page_reclaim_create_prl_bo(struct xe_tlb_inval *tlb_inval,
+ struct xe_page_reclaim_list *prl,
+ struct xe_tlb_inval_fence *fence);
void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl);
void xe_page_reclaim_list_init(struct xe_page_reclaim_list *prl);
int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl);
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 4f4f9a5c43af..63c060c2ea5c 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -209,6 +209,11 @@ int xe_tile_init(struct xe_tile *tile)
if (IS_ERR(tile->mem.kernel_bb_pool))
return PTR_ERR(tile->mem.kernel_bb_pool);
+ /* Optimistically anticipate at most 256 TLB fences with PRL */
+ tile->mem.reclaim_pool = xe_sa_bo_manager_init(tile, SZ_1M, XE_PAGE_RECLAIM_LIST_MAX_SIZE);
+ if (IS_ERR(tile->mem.reclaim_pool))
+ return PTR_ERR(tile->mem.reclaim_pool);
+
return 0;
}
void xe_tile_migrate_wait(struct xe_tile *tile)
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
index 1ae0dec2cf31..dbd3171fff12 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
@@ -24,6 +24,8 @@ struct xe_tlb_inval_job {
struct xe_exec_queue *q;
/** @vm: VM which TLB invalidation is being issued for */
struct xe_vm *vm;
+ /** @prl: Embedded copy of page reclaim list */
+ struct xe_page_reclaim_list prl;
/** @refcount: ref count of this job */
struct kref refcount;
/**
@@ -47,6 +49,13 @@ static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job)
container_of(dep_job, typeof(*job), dep);
struct xe_tlb_inval_fence *ifence =
container_of(job->fence, typeof(*ifence), base);
+ struct drm_suballoc *prl_sa = NULL;
+
+ if (job->prl.entries) {
+ prl_sa = xe_page_reclaim_create_prl_bo(job->tlb_inval, &job->prl, ifence);
+ if (IS_ERR(prl_sa))
+ prl_sa = NULL; /* Indicate fall back PPC flush with NULL */
+ }
xe_tlb_inval_range(job->tlb_inval, ifence, job->start,
job->end, job->vm->usm.asid);
--
2.52.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v4 07/10] drm/xe: Prep page reclaim in tlb inval job
2025-12-09 20:08 [PATCH v4 00/10] Page Reclamation Support for Xe3p Platforms Brian Nguyen
` (5 preceding siblings ...)
2025-12-09 20:08 ` [PATCH v4 06/10] drm/xe: Suballocate BO for page reclaim Brian Nguyen
@ 2025-12-09 20:08 ` Brian Nguyen
2025-12-09 20:32 ` Matthew Brost
2025-12-09 20:08 ` [PATCH v4 08/10] drm/xe: Append page reclamation action to tlb inval Brian Nguyen
` (2 subsequent siblings)
9 siblings, 1 reply; 13+ messages in thread
From: Brian Nguyen @ 2025-12-09 20:08 UTC (permalink / raw)
To: intel-xe
Cc: tejas.upadhyay, matthew.brost, shuicheng.lin, stuart.summers,
Michal Wajdeczko
Use page reclaim list as indicator if page reclaim action is desired and
pass it to tlb inval fence to handle.
Job will need to maintain its own embedded copy to ensure lifetime of
PRL exist until job has run.
v2:
- Use xe variant of WARN_ON (Michal)
v3:
- Add comments for PRL tile handling and flush behavior with media.
(Matthew Brost)
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
drivers/gpu/drm/xe/xe_pt.c | 12 ++++++++++++
drivers/gpu/drm/xe/xe_tlb_inval_job.c | 26 ++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_tlb_inval_job.h | 4 ++++
3 files changed, 42 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 5f27d0a64013..b774195e32e9 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -2509,6 +2509,18 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
goto kill_vm_tile1;
}
update.ijob = ijob;
+ /*
+ * Only add page reclaim for the primary GT. Media GT does not have
+ * any PPC to flush, so enabling the PPC flush bit for media is
+ * effectively a NOP and provides no performance benefit nor
+ * interfere with primary GT.
+ */
+ if (pt_update_ops->prl.num_entries != XE_PAGE_RECLAIM_INVALID_LIST) {
+ xe_tlb_inval_job_add_page_reclaim(ijob, &pt_update_ops->prl);
+ /* Release ref from alloc, job will now handle it */
+ xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
+ pt_update_ops->prl.entries = NULL;
+ }
if (tile->media_gt) {
dep_scheduler = to_dep_scheduler(q, tile->media_gt);
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
index dbd3171fff12..40e689063a57 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
@@ -7,7 +7,9 @@
#include "xe_dep_job_types.h"
#include "xe_dep_scheduler.h"
#include "xe_exec_queue.h"
+#include "xe_gt_printk.h"
#include "xe_gt_types.h"
+#include "xe_page_reclaim.h"
#include "xe_tlb_inval.h"
#include "xe_tlb_inval_job.h"
#include "xe_migrate.h"
@@ -116,6 +118,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
job->start = start;
job->end = end;
job->fence_armed = false;
+ xe_page_reclaim_list_init(&job->prl);
job->dep.ops = &dep_job_ops;
job->type = type;
kref_init(&job->refcount);
@@ -149,6 +152,25 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
return ERR_PTR(err);
}
+/**
+ * xe_tlb_inval_job_add_page_reclaim() - Embed PRL into a TLB job
+ * @job: TLB invalidation job that may trigger reclamation
+ * @prl: Page reclaim list populated during unbind
+ *
+ * Copies @prl into the job and takes an extra reference to the entry page so
+ * ownership can transfer to the TLB fence when the job is pushed.
+ */
+void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job,
+ struct xe_page_reclaim_list *prl)
+{
+ struct xe_device *xe = gt_to_xe(job->q->gt);
+
+ xe_gt_WARN_ON(job->q->gt, !xe->info.has_page_reclaim_hw_assist);
+ job->prl = *prl;
+ /* Pair with put in job_destroy */
+ xe_page_reclaim_entries_get(job->prl.entries);
+}
+
static void xe_tlb_inval_job_destroy(struct kref *ref)
{
struct xe_tlb_inval_job *job = container_of(ref, typeof(*job),
@@ -159,6 +181,10 @@ static void xe_tlb_inval_job_destroy(struct kref *ref)
struct xe_device *xe = gt_to_xe(q->gt);
struct xe_vm *vm = job->vm;
+ /* BO creation retains a copy (if used), so no longer needed */
+ if (job->prl.entries)
+ xe_page_reclaim_entries_put(job->prl.entries);
+
if (!job->fence_armed)
kfree(ifence);
else
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
index 4d6df1a6c6ca..03d6e21cd611 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
@@ -12,6 +12,7 @@ struct dma_fence;
struct xe_dep_scheduler;
struct xe_exec_queue;
struct xe_migrate;
+struct xe_page_reclaim_list;
struct xe_tlb_inval;
struct xe_tlb_inval_job;
struct xe_vm;
@@ -21,6 +22,9 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
struct xe_dep_scheduler *dep_scheduler,
struct xe_vm *vm, u64 start, u64 end, int type);
+void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job,
+ struct xe_page_reclaim_list *prl);
+
int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job);
struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
--
2.52.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v4 07/10] drm/xe: Prep page reclaim in tlb inval job
2025-12-09 20:08 ` [PATCH v4 07/10] drm/xe: Prep page reclaim in tlb inval job Brian Nguyen
@ 2025-12-09 20:32 ` Matthew Brost
2025-12-10 1:00 ` Nguyen, Brian3
0 siblings, 1 reply; 13+ messages in thread
From: Matthew Brost @ 2025-12-09 20:32 UTC (permalink / raw)
To: Brian Nguyen
Cc: intel-xe, tejas.upadhyay, shuicheng.lin, stuart.summers,
Michal Wajdeczko
On Wed, Dec 10, 2025 at 04:08:23AM +0800, Brian Nguyen wrote:
> Use page reclaim list as indicator if page reclaim action is desired and
> pass it to tlb inval fence to handle.
>
> Job will need to maintain its own embedded copy to ensure lifetime of
> PRL exist until job has run.
>
> v2:
> - Use xe variant of WARN_ON (Michal)
>
> v3:
> - Add comments for PRL tile handling and flush behavior with media.
> (Matthew Brost)
>
> Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---
> drivers/gpu/drm/xe/xe_pt.c | 12 ++++++++++++
> drivers/gpu/drm/xe/xe_tlb_inval_job.c | 26 ++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_tlb_inval_job.h | 4 ++++
> 3 files changed, 42 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 5f27d0a64013..b774195e32e9 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -2509,6 +2509,18 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> goto kill_vm_tile1;
> }
> update.ijob = ijob;
> + /*
> + * Only add page reclaim for the primary GT. Media GT does not have
> + * any PPC to flush, so enabling the PPC flush bit for media is
> + * effectively a NOP and provides no performance benefit nor
> + * interfere with primary GT.
> + */
> + if (pt_update_ops->prl.num_entries != XE_PAGE_RECLAIM_INVALID_LIST) {
I hate to be that guy, but would a helper like this be good thoughout the series:
static inline bool xe_page_reclaim_list_valid(struct xe_page_reclaim_list *prl)
{
return prl->prl.num_entries != XE_PAGE_RECLAIM_INVALID_LIST;
}
> + xe_tlb_inval_job_add_page_reclaim(ijob, &pt_update_ops->prl);
> + /* Release ref from alloc, job will now handle it */
> + xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
> + pt_update_ops->prl.entries = NULL;
> + }
>
> if (tile->media_gt) {
> dep_scheduler = to_dep_scheduler(q, tile->media_gt);
> diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
> index dbd3171fff12..40e689063a57 100644
> --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c
> +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
> @@ -7,7 +7,9 @@
> #include "xe_dep_job_types.h"
> #include "xe_dep_scheduler.h"
> #include "xe_exec_queue.h"
> +#include "xe_gt_printk.h"
> #include "xe_gt_types.h"
> +#include "xe_page_reclaim.h"
> #include "xe_tlb_inval.h"
> #include "xe_tlb_inval_job.h"
> #include "xe_migrate.h"
> @@ -116,6 +118,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
> job->start = start;
> job->end = end;
> job->fence_armed = false;
> + xe_page_reclaim_list_init(&job->prl);
> job->dep.ops = &dep_job_ops;
> job->type = type;
> kref_init(&job->refcount);
> @@ -149,6 +152,25 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
> return ERR_PTR(err);
> }
>
> +/**
> + * xe_tlb_inval_job_add_page_reclaim() - Embed PRL into a TLB job
> + * @job: TLB invalidation job that may trigger reclamation
> + * @prl: Page reclaim list populated during unbind
> + *
> + * Copies @prl into the job and takes an extra reference to the entry page so
> + * ownership can transfer to the TLB fence when the job is pushed.
> + */
> +void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job,
> + struct xe_page_reclaim_list *prl)
> +{
> + struct xe_device *xe = gt_to_xe(job->q->gt);
> +
> + xe_gt_WARN_ON(job->q->gt, !xe->info.has_page_reclaim_hw_assist);
> + job->prl = *prl;
> + /* Pair with put in job_destroy */
> + xe_page_reclaim_entries_get(job->prl.entries);
> +}
> +
> static void xe_tlb_inval_job_destroy(struct kref *ref)
> {
> struct xe_tlb_inval_job *job = container_of(ref, typeof(*job),
> @@ -159,6 +181,10 @@ static void xe_tlb_inval_job_destroy(struct kref *ref)
> struct xe_device *xe = gt_to_xe(q->gt);
> struct xe_vm *vm = job->vm;
>
> + /* BO creation retains a copy (if used), so no longer needed */
> + if (job->prl.entries)
> + xe_page_reclaim_entries_put(job->prl.entries);
Nit: xe_page_reclaim_entries_put has a NULL check.
Matt
> +
> if (!job->fence_armed)
> kfree(ifence);
> else
> diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
> index 4d6df1a6c6ca..03d6e21cd611 100644
> --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.h
> +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
> @@ -12,6 +12,7 @@ struct dma_fence;
> struct xe_dep_scheduler;
> struct xe_exec_queue;
> struct xe_migrate;
> +struct xe_page_reclaim_list;
> struct xe_tlb_inval;
> struct xe_tlb_inval_job;
> struct xe_vm;
> @@ -21,6 +22,9 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
> struct xe_dep_scheduler *dep_scheduler,
> struct xe_vm *vm, u64 start, u64 end, int type);
>
> +void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job,
> + struct xe_page_reclaim_list *prl);
> +
> int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job);
>
> struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
> --
> 2.52.0
>
^ permalink raw reply [flat|nested] 13+ messages in thread* RE: [PATCH v4 07/10] drm/xe: Prep page reclaim in tlb inval job
2025-12-09 20:32 ` Matthew Brost
@ 2025-12-10 1:00 ` Nguyen, Brian3
0 siblings, 0 replies; 13+ messages in thread
From: Nguyen, Brian3 @ 2025-12-10 1:00 UTC (permalink / raw)
To: Brost, Matthew
Cc: intel-xe@lists.freedesktop.org, Upadhyay, Tejas, Lin, Shuicheng,
Summers, Stuart, Wajdeczko, Michal
On Tuesday, December 9, 2025 12:32 PM, Matthew Brost wrote:
> On Wed, Dec 10, 2025 at 04:08:23AM +0800, Brian Nguyen wrote:
> > Use page reclaim list as indicator if page reclaim action is desired
> > and pass it to tlb inval fence to handle.
> >
> > Job will need to maintain its own embedded copy to ensure lifetime of
> > PRL exist until job has run.
> >
> > v2:
> > - Use xe variant of WARN_ON (Michal)
> >
> > v3:
> > - Add comments for PRL tile handling and flush behavior with media.
> > (Matthew Brost)
> >
> > Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_pt.c | 12 ++++++++++++
> > drivers/gpu/drm/xe/xe_tlb_inval_job.c | 26 ++++++++++++++++++++++++++
> > drivers/gpu/drm/xe/xe_tlb_inval_job.h | 4 ++++
> > 3 files changed, 42 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > index 5f27d0a64013..b774195e32e9 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.c
> > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > @@ -2509,6 +2509,18 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> > goto kill_vm_tile1;
> > }
> > update.ijob = ijob;
> > + /*
> > + * Only add page reclaim for the primary GT. Media GT does not have
> > + * any PPC to flush, so enabling the PPC flush bit for media is
> > + * effectively a NOP and provides no performance benefit nor
> > + * interfere with primary GT.
> > + */
> > + if (pt_update_ops->prl.num_entries != XE_PAGE_RECLAIM_INVALID_LIST)
> > +{
>
> I hate to be that guy, but would a helper like this be good thoughout the series:
>
> static inline bool xe_page_reclaim_list_valid(struct xe_page_reclaim_list *prl) {
> return prl->prl.num_entries != XE_PAGE_RECLAIM_INVALID_LIST; }
>
NP, makes sense. Adding it mainly to the "Create page reclaim list (Patch 5)" definition and
modifying throughout the patches accordingly. Alongside this, I realize that we should cleanup
the other usage of XE_PAGE_RECLAIM_INVALID_LIST with xe_page_reclaim_list_invalidate
helper to just clear the entries as well to make things clearer. Will be adding that too in the
next revision.
Looks like only minor changes 1 to 2 line change in other previously reviewed patches but
most changes will be in patch #5.
> > + xe_tlb_inval_job_add_page_reclaim(ijob, &pt_update_ops->prl);
> > + /* Release ref from alloc, job will now handle it */
> > + xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
> > + pt_update_ops->prl.entries = NULL;
> > + }
> >
> > if (tile->media_gt) {
> > dep_scheduler = to_dep_scheduler(q, tile->media_gt); diff --git
> > a/drivers/gpu/drm/xe/xe_tlb_inval_job.c
> > b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
> > index dbd3171fff12..40e689063a57 100644
> > --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c
> > +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
> > @@ -7,7 +7,9 @@
> > #include "xe_dep_job_types.h"
> > #include "xe_dep_scheduler.h"
> > #include "xe_exec_queue.h"
> > +#include "xe_gt_printk.h"
> > #include "xe_gt_types.h"
> > +#include "xe_page_reclaim.h"
> > #include "xe_tlb_inval.h"
> > #include "xe_tlb_inval_job.h"
> > #include "xe_migrate.h"
> > @@ -116,6 +118,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
> > job->start = start;
> > job->end = end;
> > job->fence_armed = false;
> > + xe_page_reclaim_list_init(&job->prl);
> > job->dep.ops = &dep_job_ops;
> > job->type = type;
> > kref_init(&job->refcount);
> > @@ -149,6 +152,25 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
> > return ERR_PTR(err);
> > }
> >
> > +/**
> > + * xe_tlb_inval_job_add_page_reclaim() - Embed PRL into a TLB job
> > + * @job: TLB invalidation job that may trigger reclamation
> > + * @prl: Page reclaim list populated during unbind
> > + *
> > + * Copies @prl into the job and takes an extra reference to the entry
> > +page so
> > + * ownership can transfer to the TLB fence when the job is pushed.
> > + */
> > +void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job,
> > + struct xe_page_reclaim_list *prl) {
> > + struct xe_device *xe = gt_to_xe(job->q->gt);
> > +
> > + xe_gt_WARN_ON(job->q->gt, !xe->info.has_page_reclaim_hw_assist);
> > + job->prl = *prl;
> > + /* Pair with put in job_destroy */
> > + xe_page_reclaim_entries_get(job->prl.entries);
> > +}
> > +
> > static void xe_tlb_inval_job_destroy(struct kref *ref) {
> > struct xe_tlb_inval_job *job = container_of(ref, typeof(*job), @@
> > -159,6 +181,10 @@ static void xe_tlb_inval_job_destroy(struct kref *ref)
> > struct xe_device *xe = gt_to_xe(q->gt);
> > struct xe_vm *vm = job->vm;
> >
> > + /* BO creation retains a copy (if used), so no longer needed */
> > + if (job->prl.entries)
> > + xe_page_reclaim_entries_put(job->prl.entries);
>
> Nit: xe_page_reclaim_entries_put has a NULL check.
>
Ahh yea, missed that, removing the check.
Brian
> Matt
>
> > +
> > if (!job->fence_armed)
> > kfree(ifence);
> > else
> > diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h
> > b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
> > index 4d6df1a6c6ca..03d6e21cd611 100644
> > --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.h
> > +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
> > @@ -12,6 +12,7 @@ struct dma_fence;
> > struct xe_dep_scheduler;
> > struct xe_exec_queue;
> > struct xe_migrate;
> > +struct xe_page_reclaim_list;
> > struct xe_tlb_inval;
> > struct xe_tlb_inval_job;
> > struct xe_vm;
> > @@ -21,6 +22,9 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
> > struct xe_dep_scheduler *dep_scheduler,
> > struct xe_vm *vm, u64 start, u64 end, int type);
> >
> > +void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job,
> > + struct xe_page_reclaim_list *prl);
> > +
> > int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job);
> >
> > struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
> > --
> > 2.52.0
> >
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v4 08/10] drm/xe: Append page reclamation action to tlb inval
2025-12-09 20:08 [PATCH v4 00/10] Page Reclamation Support for Xe3p Platforms Brian Nguyen
` (6 preceding siblings ...)
2025-12-09 20:08 ` [PATCH v4 07/10] drm/xe: Prep page reclaim in tlb inval job Brian Nguyen
@ 2025-12-09 20:08 ` Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 09/10] drm/xe: Optimize flushing of L2$ by skipping unnecessary page reclaim Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 10/10] drm/xe: Add debugfs support for page reclamation Brian Nguyen
9 siblings, 0 replies; 13+ messages in thread
From: Brian Nguyen @ 2025-12-09 20:08 UTC (permalink / raw)
To: intel-xe; +Cc: tejas.upadhyay, matthew.brost, shuicheng.lin, stuart.summers
Add page reclamation action to tlb inval backend. The page reclamation
action is paired with range tlb invalidations so both are issued at the
same time.
Page reclamation will issue the TLB invalidation with an invalid seqno
and a H2G page reclamation action with the fence's corresponding seqno
and handle the fence accordingly on page reclaim action done handler.
If page reclamation fails, tlb timeout handler will be responsible for
signalling fence and cleaning up.
v2:
- add send_page_reclaim to patch.
- Remove flush_cache and use prl_sa pointer to determine PPC flush
instead of explicit bool. Add NULL as fallback for others. (Matthew B)
v3:
- Add comments for flush_cache with media.
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 30 ++++++++++++++++++++-----
drivers/gpu/drm/xe/xe_tlb_inval.c | 7 +++---
drivers/gpu/drm/xe/xe_tlb_inval.h | 2 +-
drivers/gpu/drm/xe/xe_tlb_inval_job.c | 2 +-
drivers/gpu/drm/xe/xe_tlb_inval_types.h | 4 +++-
drivers/gpu/drm/xe/xe_vm.c | 4 ++--
6 files changed, 36 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index 37ac943cb10f..6532a88d51e2 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -13,6 +13,7 @@
#include "xe_guc_tlb_inval.h"
#include "xe_force_wake.h"
#include "xe_mmio.h"
+#include "xe_sa.h"
#include "xe_tlb_inval.h"
#include "regs/xe_guc_regs.h"
@@ -93,6 +94,20 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
return -ECANCELED;
}
+static int send_page_reclaim(struct xe_guc *guc, u32 seqno,
+ u64 gpu_addr)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_PAGE_RECLAMATION,
+ seqno,
+ lower_32_bits(gpu_addr),
+ upper_32_bits(gpu_addr),
+ };
+
+ return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_PAGE_RECLAMATION, 1);
+}
+
/*
* Ensure that roundup_pow_of_two(length) doesn't overflow.
* Note that roundup_pow_of_two() operates on unsigned long,
@@ -101,20 +116,21 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
- u64 start, u64 end, u32 asid)
+ u64 start, u64 end, u32 asid,
+ struct drm_suballoc *prl_sa)
{
#define MAX_TLB_INVALIDATION_LEN 7
struct xe_guc *guc = tlb_inval->private;
struct xe_gt *gt = guc_to_gt(guc);
u32 action[MAX_TLB_INVALIDATION_LEN];
u64 length = end - start;
- int len = 0;
+ int len = 0, err;
if (guc_to_xe(guc)->info.force_execlist)
return -ECANCELED;
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
- action[len++] = seqno;
+ action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID;
if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
@@ -155,7 +171,8 @@ static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
ilog2(SZ_2M) + 1)));
xe_gt_assert(gt, IS_ALIGNED(start, length));
- action[len++] = MAKE_INVAL_OP_FLUSH(XE_GUC_TLB_INVAL_PAGE_SELECTIVE, true);
+ /* Flush on NULL case, Media is not required to modify flush due to no PPC so NOP */
+ action[len++] = MAKE_INVAL_OP_FLUSH(XE_GUC_TLB_INVAL_PAGE_SELECTIVE, !prl_sa);
action[len++] = asid;
action[len++] = lower_32_bits(start);
action[len++] = upper_32_bits(start);
@@ -164,7 +181,10 @@ static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
- return send_tlb_inval(guc, action, len);
+ err = send_tlb_inval(guc, action, len);
+ if (!err && prl_sa)
+ err = send_page_reclaim(guc, seqno, xe_sa_bo_gpu_addr(prl_sa));
+ return err;
}
static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval)
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c
index a122fbb9fc4a..dec042248164 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.c
@@ -313,6 +313,7 @@ int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval)
* @start: start address
* @end: end address
* @asid: address space id
+ * @prl_sa: suballocation of page reclaim list if used, NULL indicates PPC flush
*
* Issue a range based TLB invalidation if supported, if not fallback to a full
* TLB invalidation. Completion of TLB is asynchronous and caller can use
@@ -322,10 +323,10 @@ int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval)
*/
int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
struct xe_tlb_inval_fence *fence, u64 start, u64 end,
- u32 asid)
+ u32 asid, struct drm_suballoc *prl_sa)
{
return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt,
- start, end, asid);
+ start, end, asid, prl_sa);
}
/**
@@ -341,7 +342,7 @@ void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm)
u64 range = 1ull << vm->xe->info.va_bits;
xe_tlb_inval_fence_init(tlb_inval, &fence, true);
- xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid);
+ xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid, NULL);
xe_tlb_inval_fence_wait(&fence);
}
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h
index 05614915463a..858d0690f995 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.h
@@ -23,7 +23,7 @@ int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval);
void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm);
int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
struct xe_tlb_inval_fence *fence,
- u64 start, u64 end, u32 asid);
+ u64 start, u64 end, u32 asid, struct drm_suballoc *prl_sa);
void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval,
struct xe_tlb_inval_fence *fence,
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
index 40e689063a57..a934fab6d51e 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
@@ -60,7 +60,7 @@ static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job)
}
xe_tlb_inval_range(job->tlb_inval, ifence, job->start,
- job->end, job->vm->usm.asid);
+ job->end, job->vm->usm.asid, prl_sa);
return job->fence;
}
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
index 7a6967ce3b76..48d1503e8460 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_types.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
@@ -9,6 +9,7 @@
#include <linux/workqueue.h>
#include <linux/dma-fence.h>
+struct drm_suballoc;
struct xe_tlb_inval;
/** struct xe_tlb_inval_ops - TLB invalidation ops (backend) */
@@ -40,12 +41,13 @@ struct xe_tlb_inval_ops {
* @start: Start address
* @end: End address
* @asid: Address space ID
+ * @prl_sa: Suballocation for page reclaim list
*
* Return 0 on success, -ECANCELED if backend is mid-reset, error on
* failure
*/
int (*ppgtt)(struct xe_tlb_inval *tlb_inval, u32 seqno, u64 start,
- u64 end, u32 asid);
+ u64 end, u32 asid, struct drm_suballoc *prl_sa);
/**
* @initialized: Backend is initialized
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c2012d20faa6..bd787aae4248 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3928,7 +3928,7 @@ int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
&fence[fence_id], start, end,
- vm->usm.asid);
+ vm->usm.asid, NULL);
if (err)
goto wait;
++fence_id;
@@ -3941,7 +3941,7 @@ int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
&fence[fence_id], start, end,
- vm->usm.asid);
+ vm->usm.asid, NULL);
if (err)
goto wait;
++fence_id;
--
2.52.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v4 09/10] drm/xe: Optimize flushing of L2$ by skipping unnecessary page reclaim
2025-12-09 20:08 [PATCH v4 00/10] Page Reclamation Support for Xe3p Platforms Brian Nguyen
` (7 preceding siblings ...)
2025-12-09 20:08 ` [PATCH v4 08/10] drm/xe: Append page reclamation action to tlb inval Brian Nguyen
@ 2025-12-09 20:08 ` Brian Nguyen
2025-12-09 20:08 ` [PATCH v4 10/10] drm/xe: Add debugfs support for page reclamation Brian Nguyen
9 siblings, 0 replies; 13+ messages in thread
From: Brian Nguyen @ 2025-12-09 20:08 UTC (permalink / raw)
To: intel-xe
Cc: tejas.upadhyay, matthew.brost, shuicheng.lin, stuart.summers,
Matthew Auld
There are additional hardware managed L2$ flushing such as the
transient display. In those scenarios, page reclamation is
unnecessary resulting in redundant cacheline flushes, so skip
over those corresponding ranges.
v2:
- Elaborated on reasoning for page reclamation skip based on
Tejas's discussion. (Matthew A, Tejas)
v3:
- Removed MEDIA_IS_ON due to racy condition resulting in removal of
relevant registers and values. (Matthew A)
- Moved l3 policy access to xe_pat. (Matthew A)
v4:
- Updated comments based on previous change. (Tejas)
- Move back PAT index macros to xe_pat.c.
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
drivers/gpu/drm/xe/xe_page_reclaim.c | 32 ++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_page_reclaim.h | 3 +++
drivers/gpu/drm/xe/xe_pat.c | 8 +++++++
drivers/gpu/drm/xe/xe_pat.h | 10 +++++++++
drivers/gpu/drm/xe/xe_pt.c | 4 +++-
5 files changed, 56 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.c b/drivers/gpu/drm/xe/xe_page_reclaim.c
index 50961307a557..ef4a0f54cba9 100644
--- a/drivers/gpu/drm/xe/xe_page_reclaim.c
+++ b/drivers/gpu/drm/xe/xe_page_reclaim.c
@@ -13,8 +13,40 @@
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_pat.h"
#include "xe_sa.h"
#include "xe_tlb_inval_types.h"
+#include "xe_vm.h"
+
+/**
+ * xe_page_reclaim_skip() - Decide whether PRL should be skipped for a VMA
+ * @tile: Tile owning the VMA
+ * @vma: VMA under consideration
+ *
+ * PPC flushing may be handled by HW for specific PAT encodings.
+ * Skip PPC flushing/Page Reclaim for scenarios below due to redundant
+ * flushes.
+ * - pat_index is transient display (1)
+ *
+ * Return: true when page reclamation is unnecessary, false otherwise.
+ */
+bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma)
+{
+ u8 l3_policy;
+
+ l3_policy = xe_pat_index_get_l3_policy(tile->xe, vma->attr.pat_index);
+
+ /**
+ * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), 3=UC
+ * Transient display flushes is taken care by HW, l3_policy = 1.
+ *
+ * HW will sequence these transient flushes at various sync points so
+ * any event of page reclamation will hit these sync points before
+ * page reclamation could execute.
+ */
+ return (l3_policy == XE_L3_POLICY_XD);
+}
/**
* xe_page_reclaim_create_prl_bo() - Back a PRL with a suballocated GGTT BO
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.h b/drivers/gpu/drm/xe/xe_page_reclaim.h
index b151f15e0b80..25be26598067 100644
--- a/drivers/gpu/drm/xe/xe_page_reclaim.h
+++ b/drivers/gpu/drm/xe/xe_page_reclaim.h
@@ -18,6 +18,8 @@
struct xe_tlb_inval;
struct xe_tlb_inval_fence;
+struct xe_tile;
+struct xe_vma;
struct xe_guc_page_reclaim_entry {
u64 qw;
@@ -44,6 +46,7 @@ struct xe_page_reclaim_list {
#define XE_PAGE_RECLAIM_INVALID_LIST -1
};
+bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma);
struct drm_suballoc *xe_page_reclaim_create_prl_bo(struct xe_tlb_inval *tlb_inval,
struct xe_page_reclaim_list *prl,
struct xe_tlb_inval_fence *fence);
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 6f48d34711a6..2c3375e0250b 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -9,6 +9,7 @@
#include <generated/xe_wa_oob.h>
+#include "regs/xe_gt_regs.h"
#include "regs/xe_reg_defs.h"
#include "xe_assert.h"
#include "xe_device.h"
@@ -231,6 +232,13 @@ bool xe_pat_index_get_comp_en(struct xe_device *xe, u16 pat_index)
return !!(xe->pat.table[pat_index].value & XE2_COMP_EN);
}
+u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index)
+{
+ WARN_ON(pat_index >= xe->pat.n_entries);
+
+ return REG_FIELD_GET(XE2_L3_POLICY, xe->pat.table[pat_index].value);
+}
+
static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
int n_entries)
{
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
index 5749a488d9a9..d5dadfb7f924 100644
--- a/drivers/gpu/drm/xe/xe_pat.h
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -69,4 +69,14 @@ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index);
*/
bool xe_pat_index_get_comp_en(struct xe_device *xe, u16 pat_index);
+#define XE_L3_POLICY_WB 0 /* Write-back */
+#define XE_L3_POLICY_XD 1 /* WB - Transient Display */
+#define XE_L3_POLICY_UC 3 /* Uncached */
+/**
+ * xe_pat_index_get_l3_policy - Extract the L3 policy for the given pat_index.
+ * @xe: xe device
+ * @pat_index: The pat_index to query
+ */
+u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index b774195e32e9..4da7bb1145bc 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -2020,7 +2020,9 @@ static int unbind_op_prepare(struct xe_tile *tile,
if (xe->info.has_page_reclaim_hw_assist && !pt_update_ops->prl.entries)
xe_page_reclaim_list_alloc_entries(&pt_update_ops->prl);
- pt_op->prl = (pt_update_ops->prl.entries) ? &pt_update_ops->prl : NULL;
+ /* Page reclaim may not be needed due to other features, so skip the corresponding VMA */
+ pt_op->prl = (pt_update_ops->prl.entries &&
+ !xe_page_reclaim_skip(tile, vma)) ? &pt_update_ops->prl : NULL;
err = vma_reserve_fences(tile_to_xe(tile), vma);
if (err)
--
2.52.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v4 10/10] drm/xe: Add debugfs support for page reclamation
2025-12-09 20:08 [PATCH v4 00/10] Page Reclamation Support for Xe3p Platforms Brian Nguyen
` (8 preceding siblings ...)
2025-12-09 20:08 ` [PATCH v4 09/10] drm/xe: Optimize flushing of L2$ by skipping unnecessary page reclaim Brian Nguyen
@ 2025-12-09 20:08 ` Brian Nguyen
9 siblings, 0 replies; 13+ messages in thread
From: Brian Nguyen @ 2025-12-09 20:08 UTC (permalink / raw)
To: intel-xe
Cc: tejas.upadhyay, matthew.brost, shuicheng.lin, stuart.summers,
Michal Wajdeczko
Allow for runtime modification to page reclamation feature through
debugfs configuration. This parameter will only take effect if the
platform supports the page reclamation feature by default.
v2:
- Minor comment tweaks. (Shuicheng)
- Convert to kstrtobool_from_user. (Michal)
- Only expose page reclaim file if page reclaim flag
initially supported and with that, remove
xe_match_desc usage. (Michal)
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Shuicheng Lin <shuicheng.lin@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
drivers/gpu/drm/xe/xe_debugfs.c | 41 +++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 0f8a96a05a8e..d24e5aca30ed 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -291,6 +291,39 @@ static const struct file_operations wedged_mode_fops = {
.write = wedged_mode_set,
};
+static ssize_t page_reclaim_hw_assist_show(struct file *f, char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ char buf[8];
+ int len;
+
+ len = scnprintf(buf, sizeof(buf), "%d\n", xe->info.has_page_reclaim_hw_assist);
+ return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t page_reclaim_hw_assist_set(struct file *f, const char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ bool val;
+ ssize_t ret;
+
+ ret = kstrtobool_from_user(ubuf, size, &val);
+ if (ret)
+ return ret;
+
+ xe->info.has_page_reclaim_hw_assist = val;
+
+ return size;
+}
+
+static const struct file_operations page_reclaim_hw_assist_fops = {
+ .owner = THIS_MODULE,
+ .read = page_reclaim_hw_assist_show,
+ .write = page_reclaim_hw_assist_set,
+};
+
static ssize_t atomic_svm_timeslice_ms_show(struct file *f, char __user *ubuf,
size_t size, loff_t *pos)
{
@@ -396,6 +429,14 @@ void xe_debugfs_register(struct xe_device *xe)
debugfs_create_file("disable_late_binding", 0600, root, xe,
&disable_late_binding_fops);
+ /*
+ * Don't expose page reclaim configuration file if not supported by the
+ * hardware initially.
+ */
+ if (xe->info.has_page_reclaim_hw_assist)
+ debugfs_create_file("page_reclaim_hw_assist", 0600, root, xe,
+ &page_reclaim_hw_assist_fops);
+
man = ttm_manager_type(bdev, XE_PL_TT);
ttm_resource_manager_create_debugfs(man, root, "gtt_mm");
--
2.52.0
^ permalink raw reply related [flat|nested] 13+ messages in thread