[PATCH 1/4] tests/xe: Add page reclaim test

public inbox for igt-dev@lists.freedesktop.org
 help / color / mirror / Atom feed

From: Brian Nguyen <brian3.nguyen@intel.com>
To: igt-dev@lists.freedesktop.org
Cc: x.wang@intel.com, Brian Nguyen <brian3.nguyen@intel.com>
Subject: [PATCH 1/4] tests/xe: Add page reclaim test
Date: Mon,  6 Apr 2026 18:42:28 +0000	[thread overview]
Message-ID: <20260406184226.1294486-7-brian3.nguyen@intel.com> (raw)
In-Reply-To: <20260406184226.1294486-6-brian3.nguyen@intel.com>

Page Reclamation is a feature enabled in Xe3p that allows for some
performance gain by optimizing TLB invalidations. Xe2 and beyond has
an physical noncoherent L2 cache that requires a full PPC flush
everytime a TLB invalidation occurs. With page reclamation it will
only take the corresponding pages associated with the unmap that
triggers the TLB invalidation.

xe_page_reclaim test cases create pages of a specific size, binds them
to a VM, and unbinds, observing if the expected pages are added to the
PRL, through the use of gt stats.

Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Cc: Xin Wang <x.wang@intel.com>
---
 tests/intel/xe_page_reclaim.c | 441 ++++++++++++++++++++++++++++++++++
 tests/meson.build             |   1 +
 2 files changed, 442 insertions(+)
 create mode 100644 tests/intel/xe_page_reclaim.c

diff --git a/tests/intel/xe_page_reclaim.c b/tests/intel/xe_page_reclaim.c
new file mode 100644
index 000000000..acc237d43
--- /dev/null
+++ b/tests/intel/xe_page_reclaim.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <fcntl.h>
+
+#include "ioctl_wrappers.h"
+#include "xe/xe_gt.h"
+#include "xe/xe_ioctl.h"
+
+#define OVERFLOW_PRL_SIZE 512
+
+/**
+ * TEST: xe_page_reclaim
+ * Category: Core
+ * Mega feature: General Core features
+ * Sub-category: VM bind
+ * Functionality: Page Reclamation
+ * Test category: functionality test
+ */
+struct xe_prl_stats {
+	int prl_4k_entry_count;
+	int prl_64k_entry_count;
+	int prl_2m_entry_count;
+	int prl_issued_count;
+	int prl_aborted_count;
+};
+
+/*
+ * PRL is only active on the render GT (gt0); media tiles do not participate
+ * in page reclamation. Callers typically pass gt=0.
+ */
+static struct xe_prl_stats get_prl_stats(int fd, int gt)
+{
+	struct xe_prl_stats stats = {0};
+
+	stats.prl_4k_entry_count = xe_gt_stats_get_count(fd, gt, "prl_4k_entry_count");
+	stats.prl_64k_entry_count = xe_gt_stats_get_count(fd, gt, "prl_64k_entry_count");
+	stats.prl_2m_entry_count = xe_gt_stats_get_count(fd, gt, "prl_2m_entry_count");
+	stats.prl_issued_count = xe_gt_stats_get_count(fd, gt, "prl_issued_count");
+	stats.prl_aborted_count = xe_gt_stats_get_count(fd, gt, "prl_aborted_count");
+
+	return stats;
+}
+
+static void log_prl_stat_diff(struct xe_prl_stats *stats_before, struct xe_prl_stats *stats_after)
+{
+	igt_debug("PRL stats diff: 4K: %d->%d, 64K: %d->%d, 2M: %d -> %d, issued: %d->%d, aborted: %d->%d\n",
+		  stats_before->prl_4k_entry_count,
+		  stats_after->prl_4k_entry_count,
+		  stats_before->prl_64k_entry_count,
+		  stats_after->prl_64k_entry_count,
+		  stats_before->prl_2m_entry_count,
+		  stats_after->prl_2m_entry_count,
+		  stats_before->prl_issued_count,
+		  stats_after->prl_issued_count,
+		  stats_before->prl_aborted_count,
+		  stats_after->prl_aborted_count);
+}
+
+/* Compare differences between stats and determine if expected */
+static void compare_prl_stats(struct xe_prl_stats *before, struct xe_prl_stats *after,
+			      struct xe_prl_stats *expected)
+{
+	log_prl_stat_diff(before, after);
+
+	igt_assert_eq(after->prl_4k_entry_count - before->prl_4k_entry_count,
+		      expected->prl_4k_entry_count);
+	igt_assert_eq(after->prl_64k_entry_count - before->prl_64k_entry_count,
+		      expected->prl_64k_entry_count);
+	igt_assert_eq(after->prl_2m_entry_count - before->prl_2m_entry_count,
+		      expected->prl_2m_entry_count);
+	igt_assert_eq(after->prl_issued_count - before->prl_issued_count,
+		      expected->prl_issued_count);
+	igt_assert_eq(after->prl_aborted_count - before->prl_aborted_count,
+		      expected->prl_aborted_count);
+}
+
+/* Helper with more flexibility on unbinding and offsets */
+static void vma_range_list_with_unbind_and_offsets(int fd, const uint64_t *vma_sizes, unsigned int n_vmas,
+				 uint64_t start_addr, uint64_t unbind_size, const uint64_t *vma_offsets)
+{
+	uint32_t vm;
+	uint32_t *bos;
+	uint64_t addr;
+
+	igt_assert(vma_sizes);
+	igt_assert(n_vmas);
+
+	vm = xe_vm_create(fd, 0, 0);
+
+	bos = calloc(n_vmas, sizeof(*bos));
+	igt_assert(bos);
+
+	addr = start_addr;
+	for (unsigned int i = 0; i < n_vmas; i++) {
+		igt_assert(vma_sizes[i]);
+
+		bos[i] = xe_bo_create(fd, 0, vma_sizes[i], system_memory(fd), 0);
+		if (vma_offsets)
+			addr = start_addr + vma_offsets[i];
+		xe_vm_bind_sync(fd, vm, bos[i], 0, addr, vma_sizes[i]);
+		addr += vma_sizes[i];
+	}
+
+	/* Unbind the whole contiguous VA span in one operation. */
+	xe_vm_unbind_sync(fd, vm, 0, start_addr, unbind_size ? unbind_size : addr - start_addr);
+
+	for (unsigned int i = 0; i < n_vmas; i++)
+		gem_close(fd, bos[i]);
+
+	free(bos);
+	xe_vm_destroy(fd, vm);
+}
+
+/*
+ * Takes in an array of vma sizes and allocates/binds individual BOs for each given size,
+ * then unbinds them all at once
+ */
+static void test_vma_ranges_list(int fd, const uint64_t *vma_sizes,
+				 unsigned int n_vmas, uint64_t start_addr)
+{
+	vma_range_list_with_unbind_and_offsets(fd, vma_sizes, n_vmas, start_addr, 0, NULL);
+}
+
+/**
+ * SUBTEST: basic-mixed
+ * Description: Create multiple different sizes of page (4K, 64K, 2M)
+ *      GPU VMA ranges, bind them into a VM at unique addresses, then
+ *      unbind all to trigger page reclamation on different page sizes
+ *      in one page reclaim list.
+ */
+static void test_vma_ranges_basic_mixed(int fd)
+{
+	struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+	const uint64_t num_4k_pages = 16;
+	const uint64_t num_64k_pages = 31;
+	const uint64_t num_2m_pages = 2;
+	uint64_t *sizes = calloc(num_4k_pages + num_64k_pages + num_2m_pages, sizeof(uint64_t));
+	int count = 0;
+
+	igt_assert(sizes);
+	for (int i = 0; i < num_4k_pages; i++)
+		sizes[count++] = SZ_4K;
+
+	for (int i = 0; i < num_64k_pages; i++)
+		sizes[count++] = SZ_64K;
+
+	for (int i = 0; i < num_2m_pages; i++)
+		sizes[count++] = SZ_2M;
+
+	expected_stats.prl_4k_entry_count = num_4k_pages;
+	expected_stats.prl_64k_entry_count = num_64k_pages;
+	expected_stats.prl_2m_entry_count = num_2m_pages;
+	expected_stats.prl_issued_count = 1;
+	expected_stats.prl_aborted_count = 0;
+
+	stats_before = get_prl_stats(fd, 0);
+	test_vma_ranges_list(fd, sizes, count, 1ull << 30);
+	stats_after = get_prl_stats(fd, 0);
+
+	free(sizes);
+	compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+/**
+ * SUBTEST: prl-invalidate-full
+ * Description: Create 512 4K page entries at the maximum page reclaim list
+ *      size boundary and bind them into a VM.
+ *      Expects to trigger a fallback to full PPC flush due to page reclaim
+ *      list size limitations (512 entries max).
+ *
+ * SUBTEST: prl-max-entries
+ * Description: Create the maximum page reclaim list without overflow
+ *      bind them into a VM.
+ *      Expects no fallback to PPC flush due to page reclaim
+ *      list size limitations (512 entries max).
+ */
+static void test_vma_ranges_prl_entries(int fd, unsigned int num_entries,
+					int expected_issued, int expected_aborted)
+{
+	struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+	const uint64_t page_size = SZ_4K;
+	/* Start address aligned but offset by a page to ensure no large PTE are created */
+	uint64_t addr = (1ull << 30) + page_size;
+
+	/* Capped at OVERFLOW_PRL_SIZE - 1: on overflow the last entry triggers abort */
+	expected_stats.prl_4k_entry_count = min_t(int, num_entries, OVERFLOW_PRL_SIZE - 1);
+	expected_stats.prl_64k_entry_count = 0;
+	expected_stats.prl_2m_entry_count = 0;
+	expected_stats.prl_issued_count = expected_issued;
+	expected_stats.prl_aborted_count = expected_aborted;
+
+	stats_before = get_prl_stats(fd, 0);
+	test_vma_ranges_list(fd, &(uint64_t){page_size * num_entries}, 1, addr);
+	stats_after = get_prl_stats(fd, 0);
+	compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+/*
+ * Bind the BOs to multiple VA ranges and unbind all VA with one range.
+ * BO size is chosen as the maximum of the requested VMA sizes.
+ */
+static void test_many_ranges_one_bo(int fd,
+				    const uint64_t vma_size,
+				    unsigned int n_vmas,
+				    uint64_t start_addr)
+{
+	uint32_t vm;
+	uint64_t addr;
+	uint32_t bo;
+
+	igt_assert(n_vmas);
+
+	vm = xe_vm_create(fd, 0, 0);
+
+	igt_assert(vma_size);
+	bo = xe_bo_create(fd, 0, vma_size, system_memory(fd), 0);
+
+	addr = start_addr;
+	for (unsigned int i = 0; i < n_vmas; i++) {
+		/* Bind the same BO (offset 0) at a new VA location */
+		xe_vm_bind_sync(fd, vm, bo, 0, addr, vma_size);
+		addr += vma_size;
+	}
+
+	/* Unbind all VMAs */
+	xe_vm_unbind_sync(fd, vm, 0, start_addr, addr - start_addr);
+
+	gem_close(fd, bo);
+	xe_vm_destroy(fd, vm);
+}
+
+/**
+ * SUBTEST: many-vma-same-bo
+ * Description: Create multiple 4K page VMA ranges bound to the same BO,
+ *      bind them into a VM at unique addresses, then unbind all to trigger
+ *      page reclamation handling when the same BO is bound to multiple
+ *      virtual addresses.
+ */
+static void test_vma_ranges_many_vma_same_bo(int fd, uint64_t vma_size, unsigned int n_vmas)
+{
+	struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+
+	expected_stats.prl_4k_entry_count = n_vmas;
+	expected_stats.prl_issued_count = 1;
+
+	stats_before = get_prl_stats(fd, 0);
+	test_many_ranges_one_bo(fd, vma_size, n_vmas, 1ull << 30);
+	stats_after = get_prl_stats(fd, 0);
+	compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+/**
+ * SUBTEST: invalid-1g
+ * Description: Create a 1G page VMA followed by a 4K page VMA to test
+ *      handling of 1G page mappings during page reclamation.
+ *      Expected is to fallback to invalidation.
+ */
+static void test_vma_range_invalid_1g(int fd)
+{
+	struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+	static const uint64_t sizes[] = {
+		SZ_1G,
+		SZ_4K,
+	};
+	int delta_4k, delta_64k, delta_2m, delta_issued, delta_aborted;
+	bool expected_2m_entries, all_entries_dropped;
+
+	/* 1G page broken into 512 2M pages, but it should invalidate the last entry */
+	expected_stats.prl_2m_entry_count = OVERFLOW_PRL_SIZE - 1;
+	/* No page size because PRL should be invalidated before the second page */
+	expected_stats.prl_4k_entry_count = 0;
+	expected_stats.prl_issued_count = 0;
+	expected_stats.prl_aborted_count = 1;
+
+	stats_before = get_prl_stats(fd, 0);
+	/* Offset 2G to avoid alignment issues */
+	test_vma_ranges_list(fd, sizes, ARRAY_SIZE(sizes), SZ_2G);
+	stats_after = get_prl_stats(fd, 0);
+	log_prl_stat_diff(&stats_before, &stats_after);
+
+	/*
+	 * Depending on page placement, 1G page directory could be dropped from page walk
+	 * which would not generate any entries
+	 */
+	delta_4k = stats_after.prl_4k_entry_count - stats_before.prl_4k_entry_count;
+	delta_64k = stats_after.prl_64k_entry_count - stats_before.prl_64k_entry_count;
+	delta_2m = stats_after.prl_2m_entry_count - stats_before.prl_2m_entry_count;
+	delta_issued = stats_after.prl_issued_count - stats_before.prl_issued_count;
+	delta_aborted = stats_after.prl_aborted_count - stats_before.prl_aborted_count;
+	expected_2m_entries = (delta_2m == expected_stats.prl_2m_entry_count);
+	all_entries_dropped = (delta_2m == 0 && delta_64k == 0 && delta_4k == 0);
+
+	igt_assert_eq(delta_issued, expected_stats.prl_issued_count);
+	igt_assert_eq(delta_aborted, expected_stats.prl_aborted_count);
+	igt_assert_eq(delta_4k, expected_stats.prl_4k_entry_count);
+	igt_assert(expected_2m_entries || all_entries_dropped);
+}
+
+/**
+ * SUBTEST: pde-vs-pd
+ * Description: Test case to trigger invalidation of both PDE (2M pages)
+ *      and PD (page directory filled with 64K pages) to determine correct
+ *      handling of both cases for PRL.
+ */
+static void test_vma_ranges_pde_vs_pd(int fd)
+{
+	struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+	/* Ensure no alignment issue by using 1G */
+	uint64_t start_addr = 1ull << 30;
+	/* 32 pages of 64K to fill one page directory */
+	static const unsigned int num_pages = SZ_2M / SZ_64K;
+	static const uint64_t size_pde[] = {
+		SZ_2M,
+	};
+	uint64_t size_pd[num_pages];
+
+	for (int i = 0; i < num_pages; i++)
+		size_pd[i] = SZ_64K;
+
+	expected_stats = (struct xe_prl_stats) {
+		.prl_64k_entry_count = num_pages,
+		.prl_issued_count = 1,
+	};
+	stats_before = get_prl_stats(fd, 0);
+	test_vma_ranges_list(fd, size_pd, ARRAY_SIZE(size_pd), start_addr);
+	stats_after = get_prl_stats(fd, 0);
+	compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+
+	expected_stats = (struct xe_prl_stats) {
+		.prl_2m_entry_count = 1,
+		.prl_issued_count = 1,
+	};
+	stats_before = get_prl_stats(fd, 0);
+	test_vma_ranges_list(fd, size_pde, ARRAY_SIZE(size_pde), start_addr);
+	stats_after = get_prl_stats(fd, 0);
+	compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+/**
+ * SUBTEST: boundary-split
+ * Description: Test case to trigger PRL generation beyond a page size alignment
+ *      to ensure correct handling of PRL entries that span page size boundaries.
+ */
+static void test_boundary_split(int fd)
+{
+	struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+	/* Dangle a page past the boundary with a combination of address offset and size */
+	uint64_t size_boundary = 64 * SZ_2M + SZ_4K;
+	uint64_t addr = (1ull << 30) + 64 * SZ_2M;
+
+	expected_stats.prl_4k_entry_count = 1;
+	expected_stats.prl_64k_entry_count = 0;
+	expected_stats.prl_2m_entry_count = 64;
+	expected_stats.prl_issued_count = 1;
+	expected_stats.prl_aborted_count = 0;
+
+	stats_before = get_prl_stats(fd, 0);
+	test_vma_ranges_list(fd, &(uint64_t){size_boundary}, 1, addr);
+	stats_after = get_prl_stats(fd, 0);
+	compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+/**
+ * SUBTEST: binds-1g-partial
+ * Description: Bind a 1G VMA and a 2M VMA into a VM and unbind only
+ *      the 1G range to verify that decomposing a 1G mapping into its
+ *      constituent 2M PRL entries overflows the PRL capacity limit,
+ *      triggering a full TLB invalidation fallback (aborted PRL) instead
+ *      of a targeted page reclaim list flush.
+ */
+static void test_binds_1g_partial(int fd)
+{
+	struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+
+	uint64_t sizes[]   = { SZ_1G, SZ_2M };
+	uint64_t offsets[] = { 0, SZ_1G };
+	int count = ARRAY_SIZE(sizes);
+
+	expected_stats.prl_4k_entry_count = 0;
+	expected_stats.prl_64k_entry_count = 0;
+	expected_stats.prl_2m_entry_count = 0;
+	expected_stats.prl_issued_count = 0;
+	expected_stats.prl_aborted_count = 1;
+
+	stats_before = get_prl_stats(fd, 0);
+	vma_range_list_with_unbind_and_offsets(fd, sizes, count, (1ull << 30), SZ_1G + SZ_2M, offsets);
+	stats_after = get_prl_stats(fd, 0);
+
+	compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+int igt_main()
+{
+	int fd;
+	/* Buffer to read debugfs entries boolean */
+	char buf[16] = {0};
+
+	igt_fixture() {
+		fd = drm_open_driver(DRIVER_XE);
+
+		igt_require_f(igt_debugfs_exists(fd, "page_reclaim_hw_assist", O_RDONLY),
+			      "Page Reclamation feature is not supported.\n");
+
+		igt_debugfs_read(fd, "page_reclaim_hw_assist", buf);
+		igt_require_f(buf[0] == '1',
+			      "Page Reclamation feature is not enabled.\n");
+
+		igt_require_f(xe_gt_stats_get_count(fd, 0, "prl_4k_entry_count") >= 0,
+			      "gt_stats is required for Page Reclamation tests.\n");
+	}
+
+	igt_subtest("basic-mixed")
+		test_vma_ranges_basic_mixed(fd);
+
+	igt_subtest("prl-invalidate-full")
+		test_vma_ranges_prl_entries(fd, OVERFLOW_PRL_SIZE, 0, 1);
+
+	igt_subtest("prl-max-entries")
+		test_vma_ranges_prl_entries(fd, OVERFLOW_PRL_SIZE - 1, 1, 0);
+
+	igt_subtest("many-vma-same-bo")
+		test_vma_ranges_many_vma_same_bo(fd, SZ_4K, 16);
+
+	igt_subtest("pde-vs-pd")
+		test_vma_ranges_pde_vs_pd(fd);
+
+	igt_subtest("invalid-1g")
+		test_vma_range_invalid_1g(fd);
+
+	igt_subtest("boundary-split")
+		test_boundary_split(fd);
+
+	igt_subtest("binds-1g-partial")
+		test_binds_1g_partial(fd);
+
+	igt_fixture()
+		drm_close_driver(fd);
+}
diff --git a/tests/meson.build b/tests/meson.build
index 26d9345ec..2637033ea 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -321,6 +321,7 @@ intel_xe_progs = [
 	'xe_noexec_ping_pong',
         'xe_non_msix',
 	'xe_oa',
+	'xe_page_reclaim',
 	'xe_pat',
 	'xe_peer2peer',
 	'xe_pm',
-- 
2.43.0

next prev parent reply	other threads:[~2026-04-06 18:42 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-06 18:42 [PATCH 0/4] tests/xe: Add xe_page_reclaim test suite Brian Nguyen
2026-04-06 18:42 ` Brian Nguyen [this message]
2026-04-06 18:42 ` [PATCH 2/4] tests/xe: Add random page reclaim subtest Brian Nguyen
2026-04-06 18:42 ` [PATCH 3/4] tests/xe: Add transient display PRL skip Brian Nguyen
2026-04-06 18:42 ` [PATCH 4/4] tests/xe: Add large VMA range tests for better coverage Brian Nguyen
2026-04-06 19:29 ` ✓ Xe.CI.BAT: success for tests/xe: Add xe_page_reclaim test suite Patchwork
2026-04-06 19:45 ` ✓ i915.CI.BAT: " Patchwork
2026-04-06 21:45 ` ✓ i915.CI.Full: " Patchwork
2026-04-07  0:23 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-04-07 19:15 ` [PATCH 0/4] " Summers, Stuart
2026-04-07 22:02   ` Nguyen, Brian3

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:acc237d4 dfblob:26d9345e dfblob:2637033e )
 OR (
bs:"[PATCH 1/4] tests/xe: Add page reclaim test" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260406184226.1294486-7-brian3.nguyen@intel.com \
    --to=brian3.nguyen@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=x.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox