Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Nguyen <brian3.nguyen@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: tejas.upadhyay@intel.com, matthew.brost@intel.com,
	shuicheng.lin@intel.com, stuart.summers@intel.com,
	Brian Nguyen <brian3.nguyen@intel.com>
Subject: [PATCH 06/11] drm/xe: Create page reclaim list on unbind
Date: Tue, 18 Nov 2025 17:05:47 +0800	[thread overview]
Message-ID: <20251118090552.246243-7-brian3.nguyen@intel.com> (raw)
In-Reply-To: <20251118090552.246243-1-brian3.nguyen@intel.com>

Page reclaim list (PRL) is preparation work for the page reclaim feature.
The PRL is firstly owned by pt_update_ops and all other page reclaim
operations will point back to this PRL. PRL generates its entries during
the unbind page walker, updating the PRL.

This PRL is restricted to a 4K page, so 512 page entries at most.

Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
---
 drivers/gpu/drm/xe/Makefile           |   1 +
 drivers/gpu/drm/xe/regs/xe_gtt_defs.h |   1 +
 drivers/gpu/drm/xe/xe_page_reclaim.c  |  52 ++++++++++++
 drivers/gpu/drm/xe/xe_page_reclaim.h  |  49 ++++++++++++
 drivers/gpu/drm/xe/xe_pt.c            | 109 ++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_pt_types.h      |   5 ++
 6 files changed, 217 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_page_reclaim.c
 create mode 100644 drivers/gpu/drm/xe/xe_page_reclaim.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index e4b273b025d2..048e6c93271c 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -95,6 +95,7 @@ xe-y += xe_bb.o \
 	xe_oa.o \
 	xe_observation.o \
 	xe_pagefault.o \
+	xe_page_reclaim.o \
 	xe_pat.o \
 	xe_pci.o \
 	xe_pcode.o \
diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
index 4389e5a76f89..4d83461e538b 100644
--- a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
@@ -9,6 +9,7 @@
 #define XELPG_GGTT_PTE_PAT0	BIT_ULL(52)
 #define XELPG_GGTT_PTE_PAT1	BIT_ULL(53)
 
+#define XE_PTE_ADDR_MASK	GENMASK_ULL(51, 12)
 #define GGTT_PTE_VFID		GENMASK_ULL(11, 2)
 
 #define GUC_GGTT_TOP		0xFEE00000
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.c b/drivers/gpu/drm/xe/xe_page_reclaim.c
new file mode 100644
index 000000000000..a0d15efff58c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_page_reclaim.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "xe_page_reclaim.h"
+
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_macros.h"
+
+/**
+ * xe_page_reclaim_list_invalidate() - Mark a PRL as invalid
+ * @prl: Page reclaim list to reset
+ *
+ * Clears the entries pointer and marks the list as invalid so
+ * future use know PRL is unusable. It is expected that the entries
+ * have already been released.
+ */
+void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl)
+{
+	prl->entries = NULL;
+	prl->num_entries = XE_PAGE_RECLAIM_INVALID_LIST;
+}
+
+/**
+ * xe_page_reclaim_list_alloc_entries() - Allocate page reclaim list entries
+ * @prl: Page reclaim list to allocate entries for
+ *
+ * Allocate one 4K page for the PRL entries, otherwise assign prl->entries to NULL.
+ */
+int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl)
+{
+	struct page *page;
+
+	XE_WARN_ON(prl->entries != NULL);
+	if (prl->entries)
+		return 0;
+
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (page) {
+		prl->entries = page_address(page);
+		prl->num_entries = 0;
+	}
+
+	return page ? 0 : -ENOMEM;
+}
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.h b/drivers/gpu/drm/xe/xe_page_reclaim.h
new file mode 100644
index 000000000000..d066d7d97f79
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_page_reclaim.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PAGE_RECLAIM_H_
+#define _XE_PAGE_RECLAIM_H_
+
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#define XE_PAGE_RECLAIM_MAX_ENTRIES	512
+#define XE_PAGE_RECLAIM_LIST_MAX_SIZE	SZ_4K
+
+struct xe_guc_page_reclaim_entry {
+	u32 valid:1;
+	u32 reclamation_size:6;
+	u32 reserved:5;
+	u32 address_lo:20;
+	u32 address_hi:20;
+	u32 reserved1:12;
+} __packed;
+
+struct xe_page_reclaim_list {
+	/** @entries: array of page reclaim entries, page allocated */
+	struct xe_guc_page_reclaim_entry *entries;
+	/** @num_entries: number of entries */
+	int num_entries;
+#define XE_PAGE_RECLAIM_INVALID_LIST	-1
+};
+
+void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl);
+int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl);
+static inline void xe_page_reclaim_entries_get(struct xe_guc_page_reclaim_entry *entries)
+{
+	if (entries)
+		get_page(virt_to_page(entries));
+}
+
+static inline void xe_page_reclaim_entries_put(struct xe_guc_page_reclaim_entry *entries)
+{
+	if (entries)
+		put_page(virt_to_page(entries));
+}
+
+#endif	/* _XE_PAGE_RECLAIM_H_ */
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 884127b4d97d..532a047676d4 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -12,6 +12,7 @@
 #include "xe_exec_queue.h"
 #include "xe_gt.h"
 #include "xe_migrate.h"
+#include "xe_page_reclaim.h"
 #include "xe_pt_types.h"
 #include "xe_pt_walk.h"
 #include "xe_res_cursor.h"
@@ -1538,6 +1539,9 @@ struct xe_pt_stage_unbind_walk {
 	/* Output */
 	/* @wupd: Structure to track the page-table updates we're building */
 	struct xe_walk_update wupd;
+
+	/** @prl: Backing pointer to page reclaim list in pt_update_ops */
+	struct xe_page_reclaim_list *prl;
 };
 
 /*
@@ -1572,6 +1576,69 @@ static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
 	return false;
 }
 
+/* Huge 2MB leaf lives directly in a level-1 table and has no children */
+static bool is_large_pte(struct xe_pt *pte)
+{
+	return pte->level == 1 && !pte->base.children;
+}
+
+/* page_size = 2^(reclamation_size + 12) */
+#define COMPUTE_RECLAIM_ADDRESS_MASK(page_size)				\
+({									\
+	BUILD_BUG_ON(!__builtin_constant_p(page_size));			\
+	ilog2(page_size) - 12;						\
+})
+
+static void generate_reclaim_entry(struct xe_tile *tile,
+				   struct xe_page_reclaim_list *prl,
+				   u64 pte,
+				   struct xe_pt *xe_child)
+{
+	struct xe_guc_page_reclaim_entry *reclaim_entries = prl->entries;
+	u64 phys_addr = pte & XE_PTE_ADDR_MASK;
+	const u64 field_mask = GENMASK_ULL(19, 0);
+	u32 reclamation_size;
+	const uint max_entries = XE_PAGE_RECLAIM_MAX_ENTRIES;
+	int num_entries = prl->num_entries;
+
+	xe_tile_assert(tile, xe_child->level <= MAX_HUGEPTE_LEVEL);
+	xe_tile_assert(tile, reclaim_entries);
+
+	if (num_entries == XE_PAGE_RECLAIM_INVALID_LIST)
+		return;
+
+	/* Overflow: mark as invalid through num_entries */
+	if (num_entries >= max_entries) {
+		prl->num_entries = XE_PAGE_RECLAIM_INVALID_LIST;
+		return;
+	}
+
+	/**
+	 * reclamation_size indicates the size of the page to be
+	 * invalidated and flushed from non-coherent cache.
+	 * Page size is computed as 2^(reclamation_size+12) bytes.
+	 * Only valid for these specific levels.
+	 */
+
+	if (xe_child->level == 0 && !(pte & XE_PTE_PS64))
+		reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_4K);  /* reclamation_size = 0 */
+	else if (xe_child->level == 0)
+		reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_64K); /* reclamation_size = 1 */
+	else if (is_large_pte(xe_child))
+		reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_2M);  /* reclamation_size = 2 */
+	else
+		return;
+
+	reclaim_entries[num_entries].valid = 1;
+	reclaim_entries[num_entries].reclamation_size =
+		reclamation_size;
+	reclaim_entries[num_entries].address_lo =
+		FIELD_GET(field_mask, phys_addr);
+	reclaim_entries[num_entries].address_hi =
+		FIELD_GET(field_mask, phys_addr >> 20);
+	prl->num_entries++;
+}
+
 static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
 				    unsigned int level, u64 addr, u64 next,
 				    struct xe_ptw **child,
@@ -1579,10 +1646,27 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
 				    struct xe_pt_walk *walk)
 {
 	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+	struct xe_pt_stage_unbind_walk *xe_walk =
+		container_of(walk, typeof(*xe_walk), base);
+	struct xe_device *xe = tile_to_xe(xe_walk->tile);
 
 	XE_WARN_ON(!*child);
 	XE_WARN_ON(!level);
 
+	/* 4K and 64K Pages are level 0, large pte needs additional handling. */
+	if (xe_walk->prl && (xe_child->level == 0 || is_large_pte(xe_child))) {
+		struct iosys_map *leaf_map = &xe_child->bo->vmap;
+		pgoff_t first = xe_pt_offset(addr, 0, walk);
+		pgoff_t count = xe_pt_num_entries(addr, next, 0, walk);
+
+		for (pgoff_t i = 0; i < count; i++) {
+			u64 pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64);
+
+			generate_reclaim_entry(xe_walk->tile, xe_walk->prl,
+					       pte, xe_child);
+		}
+	}
+
 	xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
 
 	return 0;
@@ -1654,6 +1738,8 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
 {
 	u64 start = range ? xe_svm_range_start(range) : xe_vma_start(vma);
 	u64 end = range ? xe_svm_range_end(range) : xe_vma_end(vma);
+	struct xe_vm_pgtable_update_op *pt_update_op =
+		container_of(entries, struct xe_vm_pgtable_update_op, entries[0]);
 	struct xe_pt_stage_unbind_walk xe_walk = {
 		.base = {
 			.ops = &xe_pt_stage_unbind_ops,
@@ -1665,6 +1751,7 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
 		.modified_start = start,
 		.modified_end = end,
 		.wupd.entries = entries,
+		.prl = pt_update_op->prl,
 	};
 	struct xe_pt *pt = vm->pt_root[tile->id];
 
@@ -1897,6 +1984,7 @@ static int unbind_op_prepare(struct xe_tile *tile,
 			     struct xe_vm_pgtable_update_ops *pt_update_ops,
 			     struct xe_vma *vma)
 {
+	struct xe_device *xe = tile_to_xe(tile);
 	u32 current_op = pt_update_ops->current_op;
 	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
 	int err;
@@ -1914,6 +2002,13 @@ static int unbind_op_prepare(struct xe_tile *tile,
 	pt_op->vma = vma;
 	pt_op->bind = false;
 	pt_op->rebind = false;
+	/* Maintain one PRL located in pt_update_ops that all others in unbind op reference */
+	if (xe->info.has_page_reclaim_hw_assist && !pt_update_ops->prl.entries) {
+		err = xe_page_reclaim_list_alloc_entries(&pt_update_ops->prl);
+		if (err < 0)
+			xe_page_reclaim_list_invalidate(&pt_update_ops->prl);
+	}
+	pt_op->prl = (pt_update_ops->prl.entries) ? &pt_update_ops->prl : NULL;
 
 	err = vma_reserve_fences(tile_to_xe(tile), vma);
 	if (err)
@@ -1921,6 +2016,13 @@ static int unbind_op_prepare(struct xe_tile *tile,
 
 	pt_op->num_entries = xe_pt_stage_unbind(tile, xe_vma_vm(vma),
 						vma, NULL, pt_op->entries);
+	/* Free PRL if list declared as invalid */
+	if (pt_update_ops->prl.entries &&
+	    pt_update_ops->prl.num_entries == XE_PAGE_RECLAIM_INVALID_LIST) {
+		xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
+		pt_op->prl = NULL;
+		pt_update_ops->prl.entries = NULL;
+	}
 
 	xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
 				pt_op->num_entries, false);
@@ -1979,6 +2081,7 @@ static int unbind_range_prepare(struct xe_vm *vm,
 	pt_op->vma = XE_INVALID_VMA;
 	pt_op->bind = false;
 	pt_op->rebind = false;
+	pt_op->prl = NULL;
 
 	pt_op->num_entries = xe_pt_stage_unbind(tile, vm, NULL, range,
 						pt_op->entries);
@@ -2096,6 +2199,7 @@ xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops)
 	init_llist_head(&pt_update_ops->deferred);
 	pt_update_ops->start = ~0x0ull;
 	pt_update_ops->last = 0x0ull;
+	xe_page_reclaim_list_invalidate(&pt_update_ops->prl);
 }
 
 /**
@@ -2518,6 +2622,11 @@ void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops)
 		&vops->pt_update_ops[tile->id];
 	int i;
 
+	if (pt_update_ops->prl.entries) {
+		xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
+		xe_page_reclaim_list_invalidate(&pt_update_ops->prl);
+	}
+
 	lockdep_assert_held(&vops->vm->lock);
 	xe_vm_assert_held(vops->vm);
 
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 881f01e14db8..26e5295f118e 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 
+#include "xe_page_reclaim.h"
 #include "xe_pt_walk.h"
 
 struct xe_bo;
@@ -85,6 +86,8 @@ struct xe_vm_pgtable_update_op {
 	bool bind;
 	/** @rebind: is a rebind */
 	bool rebind;
+	/** @prl: Backing pointer to page reclaim list of pt_update_ops */
+	struct xe_page_reclaim_list *prl;
 };
 
 /** struct xe_vm_pgtable_update_ops: page table update operations */
@@ -119,6 +122,8 @@ struct xe_vm_pgtable_update_ops {
 	 * slots are idle.
 	 */
 	bool wait_vm_kernel;
+	/** @prl: embedded page reclaim list */
+	struct xe_page_reclaim_list prl;
 };
 
 #endif
-- 
2.51.2


  parent reply	other threads:[~2025-11-18  9:06 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-18  9:05 [PATCH 00/11] Page Reclamation Support for Xe3p Platforms Brian Nguyen
2025-11-18  9:05 ` [PATCH 01/11] [DO, NOT, REVIEW] drm/xe: Do not forward invalid TLB invalidation seqnos to upper layers Brian Nguyen
2025-11-18  9:05 ` [PATCH 02/11] drm/xe: Reset tlb fence timeout on invalid seqno received Brian Nguyen
2025-11-21 17:23   ` Lin, Shuicheng
2025-11-22  1:53     ` Nguyen, Brian3
2025-11-22 18:25   ` Matthew Brost
2025-11-25 11:01     ` Nguyen, Brian3
2025-11-18  9:05 ` [PATCH 03/11] drm/xe/xe_tlb_inval: Modify fence interface to support PPC flush Brian Nguyen
2025-11-21 18:02   ` Lin, Shuicheng
2025-11-22  1:54     ` Nguyen, Brian3
2025-11-22 19:32   ` Matthew Brost
2025-11-25 11:07     ` Nguyen, Brian3
2025-11-18  9:05 ` [PATCH 04/11] drm/xe: Add page reclamation info to device info Brian Nguyen
2025-11-21 18:15   ` Lin, Shuicheng
2025-11-22 18:31   ` Matthew Brost
2025-11-18  9:05 ` [PATCH 05/11] drm/xe/guc: Add page reclamation interface to GuC Brian Nguyen
2025-11-21 18:32   ` Lin, Shuicheng
2025-11-22  1:56     ` Nguyen, Brian3
2025-11-22 18:39       ` Matthew Brost
2025-11-25 11:13         ` Nguyen, Brian3
2025-11-18  9:05 ` Brian Nguyen [this message]
2025-11-21 21:29   ` [PATCH 06/11] drm/xe: Create page reclaim list on unbind Lin, Shuicheng
2025-11-22  1:57     ` Nguyen, Brian3
2025-11-22 19:18   ` Matthew Brost
2025-11-25 11:18     ` Nguyen, Brian3
2025-11-25 18:34       ` Matthew Brost
2025-11-25 19:01         ` Nguyen, Brian3
2025-11-25 19:07           ` Matthew Brost
2025-11-25 19:46             ` Nguyen, Brian3
2025-11-25 22:35               ` Matthew Brost
2025-11-26  2:33                 ` Nguyen, Brian3
2025-11-18  9:05 ` [PATCH 07/11] drm/xe: Suballocate BO for page reclaim Brian Nguyen
2025-11-22 19:42   ` Matthew Brost
2025-11-25 11:20     ` Nguyen, Brian3
2025-11-18  9:05 ` [PATCH 08/11] drm/xe: Prep page reclaim in tlb inval job Brian Nguyen
2025-11-22 13:52   ` Michal Wajdeczko
2025-11-25 11:20     ` Nguyen, Brian3
2025-11-18  9:05 ` [PATCH 09/11] drm/xe: Append page reclamation action to tlb inval Brian Nguyen
2025-11-18  9:05 ` [PATCH 10/11] drm/xe: Optimize flushing of L2$ by skipping unnecessary page reclaim Brian Nguyen
2025-11-24 12:29   ` Matthew Auld
2025-11-25  6:12     ` Nguyen, Brian3
2025-11-25 11:48     ` Upadhyay, Tejas
2025-11-25 13:05       ` Upadhyay, Tejas
2025-11-18  9:05 ` [PATCH 11/11] drm/xe: Add debugfs support for page reclamation Brian Nguyen
2025-11-21 22:32   ` Lin, Shuicheng
2025-11-22  1:57     ` Nguyen, Brian3
2025-11-22 14:18   ` Michal Wajdeczko
2025-11-25 11:21     ` Nguyen, Brian3
2025-11-18  9:52 ` ✗ CI.checkpatch: warning for Page Reclamation Support for Xe3p Platforms Patchwork
2025-11-18  9:53 ` ✓ CI.KUnit: success " Patchwork
2025-11-18 13:02 ` ✗ Xe.CI.Full: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251118090552.246243-7-brian3.nguyen@intel.com \
    --to=brian3.nguyen@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=shuicheng.lin@intel.com \
    --cc=stuart.summers@intel.com \
    --cc=tejas.upadhyay@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox