From: Brian Nguyen <brian3.nguyen@intel.com>
To: igt-dev@lists.freedesktop.org
Cc: x.wang@intel.com, Brian Nguyen <brian3.nguyen@intel.com>
Subject: [PATCH 1/4] tests/xe: Add page reclaim test
Date: Mon, 6 Apr 2026 18:42:28 +0000 [thread overview]
Message-ID: <20260406184226.1294486-7-brian3.nguyen@intel.com> (raw)
In-Reply-To: <20260406184226.1294486-6-brian3.nguyen@intel.com>
Page Reclamation is a feature enabled in Xe3p that allows for some
performance gain by optimizing TLB invalidations. Xe2 and beyond has
an physical noncoherent L2 cache that requires a full PPC flush
everytime a TLB invalidation occurs. With page reclamation it will
only take the corresponding pages associated with the unmap that
triggers the TLB invalidation.
xe_page_reclaim test cases create pages of a specific size, binds them
to a VM, and unbinds, observing if the expected pages are added to the
PRL, through the use of gt stats.
Signed-off-by: Brian Nguyen <brian3.nguyen@intel.com>
Cc: Xin Wang <x.wang@intel.com>
---
tests/intel/xe_page_reclaim.c | 441 ++++++++++++++++++++++++++++++++++
tests/meson.build | 1 +
2 files changed, 442 insertions(+)
create mode 100644 tests/intel/xe_page_reclaim.c
diff --git a/tests/intel/xe_page_reclaim.c b/tests/intel/xe_page_reclaim.c
new file mode 100644
index 000000000..acc237d43
--- /dev/null
+++ b/tests/intel/xe_page_reclaim.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <fcntl.h>
+
+#include "ioctl_wrappers.h"
+#include "xe/xe_gt.h"
+#include "xe/xe_ioctl.h"
+
+#define OVERFLOW_PRL_SIZE 512
+
+/**
+ * TEST: xe_page_reclaim
+ * Category: Core
+ * Mega feature: General Core features
+ * Sub-category: VM bind
+ * Functionality: Page Reclamation
+ * Test category: functionality test
+ */
+struct xe_prl_stats {
+ int prl_4k_entry_count;
+ int prl_64k_entry_count;
+ int prl_2m_entry_count;
+ int prl_issued_count;
+ int prl_aborted_count;
+};
+
+/*
+ * PRL is only active on the render GT (gt0); media tiles do not participate
+ * in page reclamation. Callers typically pass gt=0.
+ */
+static struct xe_prl_stats get_prl_stats(int fd, int gt)
+{
+ struct xe_prl_stats stats = {0};
+
+ stats.prl_4k_entry_count = xe_gt_stats_get_count(fd, gt, "prl_4k_entry_count");
+ stats.prl_64k_entry_count = xe_gt_stats_get_count(fd, gt, "prl_64k_entry_count");
+ stats.prl_2m_entry_count = xe_gt_stats_get_count(fd, gt, "prl_2m_entry_count");
+ stats.prl_issued_count = xe_gt_stats_get_count(fd, gt, "prl_issued_count");
+ stats.prl_aborted_count = xe_gt_stats_get_count(fd, gt, "prl_aborted_count");
+
+ return stats;
+}
+
+static void log_prl_stat_diff(struct xe_prl_stats *stats_before, struct xe_prl_stats *stats_after)
+{
+ igt_debug("PRL stats diff: 4K: %d->%d, 64K: %d->%d, 2M: %d -> %d, issued: %d->%d, aborted: %d->%d\n",
+ stats_before->prl_4k_entry_count,
+ stats_after->prl_4k_entry_count,
+ stats_before->prl_64k_entry_count,
+ stats_after->prl_64k_entry_count,
+ stats_before->prl_2m_entry_count,
+ stats_after->prl_2m_entry_count,
+ stats_before->prl_issued_count,
+ stats_after->prl_issued_count,
+ stats_before->prl_aborted_count,
+ stats_after->prl_aborted_count);
+}
+
+/* Compare differences between stats and determine if expected */
+static void compare_prl_stats(struct xe_prl_stats *before, struct xe_prl_stats *after,
+ struct xe_prl_stats *expected)
+{
+ log_prl_stat_diff(before, after);
+
+ igt_assert_eq(after->prl_4k_entry_count - before->prl_4k_entry_count,
+ expected->prl_4k_entry_count);
+ igt_assert_eq(after->prl_64k_entry_count - before->prl_64k_entry_count,
+ expected->prl_64k_entry_count);
+ igt_assert_eq(after->prl_2m_entry_count - before->prl_2m_entry_count,
+ expected->prl_2m_entry_count);
+ igt_assert_eq(after->prl_issued_count - before->prl_issued_count,
+ expected->prl_issued_count);
+ igt_assert_eq(after->prl_aborted_count - before->prl_aborted_count,
+ expected->prl_aborted_count);
+}
+
+/* Helper with more flexibility on unbinding and offsets */
+static void vma_range_list_with_unbind_and_offsets(int fd, const uint64_t *vma_sizes, unsigned int n_vmas,
+ uint64_t start_addr, uint64_t unbind_size, const uint64_t *vma_offsets)
+{
+ uint32_t vm;
+ uint32_t *bos;
+ uint64_t addr;
+
+ igt_assert(vma_sizes);
+ igt_assert(n_vmas);
+
+ vm = xe_vm_create(fd, 0, 0);
+
+ bos = calloc(n_vmas, sizeof(*bos));
+ igt_assert(bos);
+
+ addr = start_addr;
+ for (unsigned int i = 0; i < n_vmas; i++) {
+ igt_assert(vma_sizes[i]);
+
+ bos[i] = xe_bo_create(fd, 0, vma_sizes[i], system_memory(fd), 0);
+ if (vma_offsets)
+ addr = start_addr + vma_offsets[i];
+ xe_vm_bind_sync(fd, vm, bos[i], 0, addr, vma_sizes[i]);
+ addr += vma_sizes[i];
+ }
+
+ /* Unbind the whole contiguous VA span in one operation. */
+ xe_vm_unbind_sync(fd, vm, 0, start_addr, unbind_size ? unbind_size : addr - start_addr);
+
+ for (unsigned int i = 0; i < n_vmas; i++)
+ gem_close(fd, bos[i]);
+
+ free(bos);
+ xe_vm_destroy(fd, vm);
+}
+
+/*
+ * Takes in an array of vma sizes and allocates/binds individual BOs for each given size,
+ * then unbinds them all at once
+ */
+static void test_vma_ranges_list(int fd, const uint64_t *vma_sizes,
+ unsigned int n_vmas, uint64_t start_addr)
+{
+ vma_range_list_with_unbind_and_offsets(fd, vma_sizes, n_vmas, start_addr, 0, NULL);
+}
+
+/**
+ * SUBTEST: basic-mixed
+ * Description: Create multiple different sizes of page (4K, 64K, 2M)
+ * GPU VMA ranges, bind them into a VM at unique addresses, then
+ * unbind all to trigger page reclamation on different page sizes
+ * in one page reclaim list.
+ */
+static void test_vma_ranges_basic_mixed(int fd)
+{
+ struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+ const uint64_t num_4k_pages = 16;
+ const uint64_t num_64k_pages = 31;
+ const uint64_t num_2m_pages = 2;
+ uint64_t *sizes = calloc(num_4k_pages + num_64k_pages + num_2m_pages, sizeof(uint64_t));
+ int count = 0;
+
+ igt_assert(sizes);
+ for (int i = 0; i < num_4k_pages; i++)
+ sizes[count++] = SZ_4K;
+
+ for (int i = 0; i < num_64k_pages; i++)
+ sizes[count++] = SZ_64K;
+
+ for (int i = 0; i < num_2m_pages; i++)
+ sizes[count++] = SZ_2M;
+
+ expected_stats.prl_4k_entry_count = num_4k_pages;
+ expected_stats.prl_64k_entry_count = num_64k_pages;
+ expected_stats.prl_2m_entry_count = num_2m_pages;
+ expected_stats.prl_issued_count = 1;
+ expected_stats.prl_aborted_count = 0;
+
+ stats_before = get_prl_stats(fd, 0);
+ test_vma_ranges_list(fd, sizes, count, 1ull << 30);
+ stats_after = get_prl_stats(fd, 0);
+
+ free(sizes);
+ compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+/**
+ * SUBTEST: prl-invalidate-full
+ * Description: Create 512 4K page entries at the maximum page reclaim list
+ * size boundary and bind them into a VM.
+ * Expects to trigger a fallback to full PPC flush due to page reclaim
+ * list size limitations (512 entries max).
+ *
+ * SUBTEST: prl-max-entries
+ * Description: Create the maximum page reclaim list without overflow
+ * bind them into a VM.
+ * Expects no fallback to PPC flush due to page reclaim
+ * list size limitations (512 entries max).
+ */
+static void test_vma_ranges_prl_entries(int fd, unsigned int num_entries,
+ int expected_issued, int expected_aborted)
+{
+ struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+ const uint64_t page_size = SZ_4K;
+ /* Start address aligned but offset by a page to ensure no large PTE are created */
+ uint64_t addr = (1ull << 30) + page_size;
+
+ /* Capped at OVERFLOW_PRL_SIZE - 1: on overflow the last entry triggers abort */
+ expected_stats.prl_4k_entry_count = min_t(int, num_entries, OVERFLOW_PRL_SIZE - 1);
+ expected_stats.prl_64k_entry_count = 0;
+ expected_stats.prl_2m_entry_count = 0;
+ expected_stats.prl_issued_count = expected_issued;
+ expected_stats.prl_aborted_count = expected_aborted;
+
+ stats_before = get_prl_stats(fd, 0);
+ test_vma_ranges_list(fd, &(uint64_t){page_size * num_entries}, 1, addr);
+ stats_after = get_prl_stats(fd, 0);
+ compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+/*
+ * Bind the BOs to multiple VA ranges and unbind all VA with one range.
+ * BO size is chosen as the maximum of the requested VMA sizes.
+ */
+static void test_many_ranges_one_bo(int fd,
+ const uint64_t vma_size,
+ unsigned int n_vmas,
+ uint64_t start_addr)
+{
+ uint32_t vm;
+ uint64_t addr;
+ uint32_t bo;
+
+ igt_assert(n_vmas);
+
+ vm = xe_vm_create(fd, 0, 0);
+
+ igt_assert(vma_size);
+ bo = xe_bo_create(fd, 0, vma_size, system_memory(fd), 0);
+
+ addr = start_addr;
+ for (unsigned int i = 0; i < n_vmas; i++) {
+ /* Bind the same BO (offset 0) at a new VA location */
+ xe_vm_bind_sync(fd, vm, bo, 0, addr, vma_size);
+ addr += vma_size;
+ }
+
+ /* Unbind all VMAs */
+ xe_vm_unbind_sync(fd, vm, 0, start_addr, addr - start_addr);
+
+ gem_close(fd, bo);
+ xe_vm_destroy(fd, vm);
+}
+
+/**
+ * SUBTEST: many-vma-same-bo
+ * Description: Create multiple 4K page VMA ranges bound to the same BO,
+ * bind them into a VM at unique addresses, then unbind all to trigger
+ * page reclamation handling when the same BO is bound to multiple
+ * virtual addresses.
+ */
+static void test_vma_ranges_many_vma_same_bo(int fd, uint64_t vma_size, unsigned int n_vmas)
+{
+ struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+
+ expected_stats.prl_4k_entry_count = n_vmas;
+ expected_stats.prl_issued_count = 1;
+
+ stats_before = get_prl_stats(fd, 0);
+ test_many_ranges_one_bo(fd, vma_size, n_vmas, 1ull << 30);
+ stats_after = get_prl_stats(fd, 0);
+ compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+/**
+ * SUBTEST: invalid-1g
+ * Description: Create a 1G page VMA followed by a 4K page VMA to test
+ * handling of 1G page mappings during page reclamation.
+ * Expected is to fallback to invalidation.
+ */
+static void test_vma_range_invalid_1g(int fd)
+{
+ struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+ static const uint64_t sizes[] = {
+ SZ_1G,
+ SZ_4K,
+ };
+ int delta_4k, delta_64k, delta_2m, delta_issued, delta_aborted;
+ bool expected_2m_entries, all_entries_dropped;
+
+ /* 1G page broken into 512 2M pages, but it should invalidate the last entry */
+ expected_stats.prl_2m_entry_count = OVERFLOW_PRL_SIZE - 1;
+ /* No page size because PRL should be invalidated before the second page */
+ expected_stats.prl_4k_entry_count = 0;
+ expected_stats.prl_issued_count = 0;
+ expected_stats.prl_aborted_count = 1;
+
+ stats_before = get_prl_stats(fd, 0);
+ /* Offset 2G to avoid alignment issues */
+ test_vma_ranges_list(fd, sizes, ARRAY_SIZE(sizes), SZ_2G);
+ stats_after = get_prl_stats(fd, 0);
+ log_prl_stat_diff(&stats_before, &stats_after);
+
+ /*
+ * Depending on page placement, 1G page directory could be dropped from page walk
+ * which would not generate any entries
+ */
+ delta_4k = stats_after.prl_4k_entry_count - stats_before.prl_4k_entry_count;
+ delta_64k = stats_after.prl_64k_entry_count - stats_before.prl_64k_entry_count;
+ delta_2m = stats_after.prl_2m_entry_count - stats_before.prl_2m_entry_count;
+ delta_issued = stats_after.prl_issued_count - stats_before.prl_issued_count;
+ delta_aborted = stats_after.prl_aborted_count - stats_before.prl_aborted_count;
+ expected_2m_entries = (delta_2m == expected_stats.prl_2m_entry_count);
+ all_entries_dropped = (delta_2m == 0 && delta_64k == 0 && delta_4k == 0);
+
+ igt_assert_eq(delta_issued, expected_stats.prl_issued_count);
+ igt_assert_eq(delta_aborted, expected_stats.prl_aborted_count);
+ igt_assert_eq(delta_4k, expected_stats.prl_4k_entry_count);
+ igt_assert(expected_2m_entries || all_entries_dropped);
+}
+
+/**
+ * SUBTEST: pde-vs-pd
+ * Description: Test case to trigger invalidation of both PDE (2M pages)
+ * and PD (page directory filled with 64K pages) to determine correct
+ * handling of both cases for PRL.
+ */
+static void test_vma_ranges_pde_vs_pd(int fd)
+{
+ struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+ /* Ensure no alignment issue by using 1G */
+ uint64_t start_addr = 1ull << 30;
+ /* 32 pages of 64K to fill one page directory */
+ static const unsigned int num_pages = SZ_2M / SZ_64K;
+ static const uint64_t size_pde[] = {
+ SZ_2M,
+ };
+ uint64_t size_pd[num_pages];
+
+ for (int i = 0; i < num_pages; i++)
+ size_pd[i] = SZ_64K;
+
+ expected_stats = (struct xe_prl_stats) {
+ .prl_64k_entry_count = num_pages,
+ .prl_issued_count = 1,
+ };
+ stats_before = get_prl_stats(fd, 0);
+ test_vma_ranges_list(fd, size_pd, ARRAY_SIZE(size_pd), start_addr);
+ stats_after = get_prl_stats(fd, 0);
+ compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+
+ expected_stats = (struct xe_prl_stats) {
+ .prl_2m_entry_count = 1,
+ .prl_issued_count = 1,
+ };
+ stats_before = get_prl_stats(fd, 0);
+ test_vma_ranges_list(fd, size_pde, ARRAY_SIZE(size_pde), start_addr);
+ stats_after = get_prl_stats(fd, 0);
+ compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+/**
+ * SUBTEST: boundary-split
+ * Description: Test case to trigger PRL generation beyond a page size alignment
+ * to ensure correct handling of PRL entries that span page size boundaries.
+ */
+static void test_boundary_split(int fd)
+{
+ struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+ /* Dangle a page past the boundary with a combination of address offset and size */
+ uint64_t size_boundary = 64 * SZ_2M + SZ_4K;
+ uint64_t addr = (1ull << 30) + 64 * SZ_2M;
+
+ expected_stats.prl_4k_entry_count = 1;
+ expected_stats.prl_64k_entry_count = 0;
+ expected_stats.prl_2m_entry_count = 64;
+ expected_stats.prl_issued_count = 1;
+ expected_stats.prl_aborted_count = 0;
+
+ stats_before = get_prl_stats(fd, 0);
+ test_vma_ranges_list(fd, &(uint64_t){size_boundary}, 1, addr);
+ stats_after = get_prl_stats(fd, 0);
+ compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+/**
+ * SUBTEST: binds-1g-partial
+ * Description: Bind a 1G VMA and a 2M VMA into a VM and unbind only
+ * the 1G range to verify that decomposing a 1G mapping into its
+ * constituent 2M PRL entries overflows the PRL capacity limit,
+ * triggering a full TLB invalidation fallback (aborted PRL) instead
+ * of a targeted page reclaim list flush.
+ */
+static void test_binds_1g_partial(int fd)
+{
+ struct xe_prl_stats stats_before, stats_after, expected_stats = { 0 };
+
+ uint64_t sizes[] = { SZ_1G, SZ_2M };
+ uint64_t offsets[] = { 0, SZ_1G };
+ int count = ARRAY_SIZE(sizes);
+
+ expected_stats.prl_4k_entry_count = 0;
+ expected_stats.prl_64k_entry_count = 0;
+ expected_stats.prl_2m_entry_count = 0;
+ expected_stats.prl_issued_count = 0;
+ expected_stats.prl_aborted_count = 1;
+
+ stats_before = get_prl_stats(fd, 0);
+ vma_range_list_with_unbind_and_offsets(fd, sizes, count, (1ull << 30), SZ_1G + SZ_2M, offsets);
+ stats_after = get_prl_stats(fd, 0);
+
+ compare_prl_stats(&stats_before, &stats_after, &expected_stats);
+}
+
+int igt_main()
+{
+ int fd;
+ /* Buffer to read debugfs entries boolean */
+ char buf[16] = {0};
+
+ igt_fixture() {
+ fd = drm_open_driver(DRIVER_XE);
+
+ igt_require_f(igt_debugfs_exists(fd, "page_reclaim_hw_assist", O_RDONLY),
+ "Page Reclamation feature is not supported.\n");
+
+ igt_debugfs_read(fd, "page_reclaim_hw_assist", buf);
+ igt_require_f(buf[0] == '1',
+ "Page Reclamation feature is not enabled.\n");
+
+ igt_require_f(xe_gt_stats_get_count(fd, 0, "prl_4k_entry_count") >= 0,
+ "gt_stats is required for Page Reclamation tests.\n");
+ }
+
+ igt_subtest("basic-mixed")
+ test_vma_ranges_basic_mixed(fd);
+
+ igt_subtest("prl-invalidate-full")
+ test_vma_ranges_prl_entries(fd, OVERFLOW_PRL_SIZE, 0, 1);
+
+ igt_subtest("prl-max-entries")
+ test_vma_ranges_prl_entries(fd, OVERFLOW_PRL_SIZE - 1, 1, 0);
+
+ igt_subtest("many-vma-same-bo")
+ test_vma_ranges_many_vma_same_bo(fd, SZ_4K, 16);
+
+ igt_subtest("pde-vs-pd")
+ test_vma_ranges_pde_vs_pd(fd);
+
+ igt_subtest("invalid-1g")
+ test_vma_range_invalid_1g(fd);
+
+ igt_subtest("boundary-split")
+ test_boundary_split(fd);
+
+ igt_subtest("binds-1g-partial")
+ test_binds_1g_partial(fd);
+
+ igt_fixture()
+ drm_close_driver(fd);
+}
diff --git a/tests/meson.build b/tests/meson.build
index 26d9345ec..2637033ea 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -321,6 +321,7 @@ intel_xe_progs = [
'xe_noexec_ping_pong',
'xe_non_msix',
'xe_oa',
+ 'xe_page_reclaim',
'xe_pat',
'xe_peer2peer',
'xe_pm',
--
2.43.0
next prev parent reply other threads:[~2026-04-06 18:42 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-06 18:42 [PATCH 0/4] tests/xe: Add xe_page_reclaim test suite Brian Nguyen
2026-04-06 18:42 ` Brian Nguyen [this message]
2026-04-06 18:42 ` [PATCH 2/4] tests/xe: Add random page reclaim subtest Brian Nguyen
2026-04-06 18:42 ` [PATCH 3/4] tests/xe: Add transient display PRL skip Brian Nguyen
2026-04-06 18:42 ` [PATCH 4/4] tests/xe: Add large VMA range tests for better coverage Brian Nguyen
2026-04-06 19:29 ` ✓ Xe.CI.BAT: success for tests/xe: Add xe_page_reclaim test suite Patchwork
2026-04-06 19:45 ` ✓ i915.CI.BAT: " Patchwork
2026-04-06 21:45 ` ✓ i915.CI.Full: " Patchwork
2026-04-07 0:23 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-04-07 19:15 ` [PATCH 0/4] " Summers, Stuart
2026-04-07 22:02 ` Nguyen, Brian3
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260406184226.1294486-7-brian3.nguyen@intel.com \
--to=brian3.nguyen@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=x.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox