All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: igt-dev@lists.freedesktop.org
Cc: dev@lankhorst.se, "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Subject: [PATCH i-g-t 5/5] tests/xe_cgroups: add dmem cgroup eviction test
Date: Thu, 26 Mar 2026 17:10:07 +0100	[thread overview]
Message-ID: <20260326161007.39294-6-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20260326161007.39294-1-thomas.hellstrom@linux.intel.com>

Add xe_cgroups, a test exercising the dmem cgroup controller on xe
devices.

The write_eviction subtest:
 - Skips if the dmem cgroup controller is not available.
 - Skips if no VRAM region is registered with the dmem controller.
 - Creates a sub-cgroup and moves the test process into it.
 - Sets a 4 GiB dmem.max limit on the first VRAM region.
 - Creates an LR VM and fills VRAM by repeatedly creating BOs with
   DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING and binding them via
   __xe_vm_bind_lr_sync() until -ENOMEM or -ENOSPC is returned.
 - Verifies that cgroup current usage is within the expected range when
   the limit is hit.
 - Lowers dmem.max in 256 MiB steps, waiting for usage to follow each
   reduction.  -EBUSY is accepted when usage is already at or below
   256 MiB.

The write_eviction_interruptible subtest runs the same test with
SIGCONT signals injected via igt_fork_signal_helper() and reports the
number of signals received.

Assisted-by: GitHub Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 tests/intel/xe_cgroups.c | 296 +++++++++++++++++++++++++++++++++++++++
 tests/meson.build        |   1 +
 2 files changed, 297 insertions(+)
 create mode 100644 tests/intel/xe_cgroups.c

diff --git a/tests/intel/xe_cgroups.c b/tests/intel/xe_cgroups.c
new file mode 100644
index 000000000..08cf8e3bd
--- /dev/null
+++ b/tests/intel/xe_cgroups.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+/**
+ * TEST: xe_cgroups
+ * DESCRIPTION: Tests exercising the dmem cgroup controller on xe devices.
+ * Category: Core
+ * Mega feature: General Core features
+ * Sub-category: cgroup
+ * FUNCTIONALITY: cgroup dmem controller
+ * SUBSETS: xe
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdatomic.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "drmtest.h"
+#include "igt.h"
+#include "igt_aux.h"
+#include "igt_cgroup.h"
+#include "xe_drm.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
+
+#define BO_SIZE			SZ_128M
+#define MAX_LIMIT		((uint64_t)4 * SZ_1G)
+#define EVICT_STEP		SZ_256M
+#define BIND_BASE		0x100000000ULL	/* 4 GiB VA base */
+#define USAGE_SLACK		SZ_128M		/* tolerance above the set max */
+#define USAGE_POLL_MS		10		/* polling interval for usage drop */
+#define USAGE_DROP_TIMEOUT_MS	50		/* max wait for usage to drop */
+
+#define TEST_INTERRUPTIBLE	(1 << 0)
+
+/**
+ * SUBTEST: write_eviction
+ * DESCRIPTION:
+ *   Create a dmem cgroup, move the current process into it and set the max
+ *   device memory limit for the first VRAM region to 4 GiB.  Then fill VRAM
+ *   by creating BOs with %DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (so that the
+ *   physical allocation is deferred until VM_BIND) and binding them into an
+ *   LR VM until the cgroup limit is hit.  Verify that the reported cgroup
+ *   current usage is within the expected range when the error occurs.
+ *   Finally lower the max limit in 256 MiB steps and verify that the cgroup
+ *   usage follows.
+ * REQUIREMENTS: must run as root; xe device with at least one VRAM region
+ */
+
+/**
+ * SUBTEST: write_eviction_interruptible
+ * DESCRIPTION:
+ *   Same as write_eviction but with SIGCONT signals injected throughout via
+ *   igt_fork_signal_helper() to verify that the dmem.max write path handles
+ *   signal interruption correctly.  A signal handler counts received signals
+ *   and the count is reported as debug output at the end of the test.
+ * REQUIREMENTS: must run as root; xe device with at least one VRAM region
+ */
+
+static atomic_int signal_count;
+static struct sigaction sigcont_oldact;
+
+static void sigcont_handler(int sig)
+{
+	atomic_fetch_add(&signal_count, 1);
+
+	/* Chain to the previous handler (IGT's dummy sig_handler) */
+	if (sigcont_oldact.sa_handler &&
+	    sigcont_oldact.sa_handler != SIG_IGN &&
+	    sigcont_oldact.sa_handler != SIG_DFL)
+		sigcont_oldact.sa_handler(sig);
+}
+
+static void install_sigcont_counter(void)
+{
+	struct sigaction sa;
+
+	atomic_store(&signal_count, 0);
+	igt_fork_signal_helper();
+	/*
+	 * Install the counter after igt_fork_signal_helper() so our handler
+	 * is not overwritten.  Save the old handler so we can chain to it.
+	 */
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = sigcont_handler;
+	sigemptyset(&sa.sa_mask);
+	sigaction(SIGCONT, &sa, &sigcont_oldact);
+}
+
+static uint64_t wait_for_usage_drop(struct igt_cgroup *cg, const char *region,
+				    uint64_t limit)
+{
+	uint64_t current;
+	unsigned int elapsed = 0;
+
+	do {
+		igt_cgroup_dmem_get_current(cg, region, &current);
+		if (current <= limit)
+			return current;
+		usleep(USAGE_POLL_MS * 1000);
+		elapsed += USAGE_POLL_MS;
+	} while (elapsed < USAGE_DROP_TIMEOUT_MS);
+
+	return current;
+}
+
+static int fill_vram(int fd, uint32_t vm, uint64_t vram_region,
+		     uint32_t *handles, int max_bo)
+{
+	uint32_t handle;
+	uint64_t addr = BIND_BASE;
+	int n_bo, err = 0;
+
+	for (n_bo = 0; n_bo < max_bo; n_bo++) {
+		err = __xe_bo_create(fd, 0, BO_SIZE, vram_region,
+				     DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING,
+				     NULL, &handle);
+		if (err)
+			break;
+
+		handles[n_bo] = handle;
+
+		err = __xe_vm_bind_lr_sync(fd, vm, handle, 0, addr, BO_SIZE, 0);
+		if (err)
+			break;
+
+		addr += BO_SIZE;
+	}
+
+	igt_assert_f(err == -ENOMEM || err == -ENOSPC,
+		     "Expected -ENOMEM or -ENOSPC, got %d (%s)\n",
+		     err, strerror(-err));
+
+	return n_bo;
+}
+
+static void unfill_vram(int fd, uint32_t vm, uint32_t *handles, int n_bo)
+{
+	uint64_t addr = BIND_BASE;
+	int i;
+
+	for (i = 0; i < n_bo; i++) {
+		if (handles[i]) {
+			xe_vm_unbind_lr_sync(fd, vm, 0, addr, BO_SIZE);
+			gem_close(fd, handles[i]);
+		}
+		addr += BO_SIZE;
+	}
+	free(handles);
+}
+
+static void test_write_eviction(int fd, unsigned int flags)
+{
+	struct igt_cgroup *cg;
+	char *cg_region;
+	uint32_t vm;
+	uint64_t vram_region = 0;
+	uint64_t region;
+	uint32_t *handles = NULL;
+	int n_bo = 0, max_bo;
+	uint64_t current, capacity, cg_max, limit, after;
+	int set_err;
+
+	/* Check dmem cgroup controller is available before doing anything else */
+	igt_require_f(igt_cgroup_dmem_available(),
+		      "dmem cgroup controller not available (no cgroup v2 or no registered regions)\n");
+
+	/* Find first VRAM region */
+	xe_for_each_mem_region(fd, all_memory_regions(fd), region) {
+		if (xe_region_class(fd, region) == DRM_XE_MEM_REGION_CLASS_VRAM) {
+			vram_region = region;
+			break;
+		}
+	}
+	igt_require_f(vram_region, "No VRAM region found on this device\n");
+
+	cg_region = xe_cgroup_region_name(fd, vram_region);
+	igt_require_f(cg_region, "Region not tracked by dmem cgroup controller\n");
+
+	igt_cgroup_dmem_get_capacity(cg_region, &capacity);
+	igt_require_f(capacity >= 4 * BO_SIZE,
+		      "VRAM capacity (%"PRIu64" MiB) too small to test\n",
+		      capacity / SZ_1M);
+
+	/*
+	 * Use up to 4 GiB, or the full capacity if the device has less.
+	 * Leave one BO_SIZE worth of headroom so the device isn't completely
+	 * exhausted before the cgroup limit is hit.
+	 */
+	cg_max = min(MAX_LIMIT, capacity - BO_SIZE);
+	cg_max = ALIGN_DOWN(cg_max, EVICT_STEP);
+
+	if (flags & TEST_INTERRUPTIBLE)
+		install_sigcont_counter();
+
+	/* Create cgroup and move into it */
+	cg = igt_cgroup_new("xe_cgroups_test");
+	igt_cgroup_move_current(cg);
+	igt_cgroup_dmem_set_max(cg, cg_region, cg_max);
+
+	vm = xe_vm_create(fd, DRM_XE_VM_CREATE_FLAG_LR_MODE, 0);
+
+	max_bo = (cg_max / BO_SIZE) + 8; /* headroom for overcommit */
+	handles = calloc(max_bo, sizeof(*handles));
+	igt_assert(handles);
+
+	n_bo = fill_vram(fd, vm, vram_region, handles, max_bo);
+
+	igt_cgroup_dmem_get_current(cg, cg_region, &current);
+	igt_debug("After fill: cgroup current = %"PRIu64" MiB, "
+		  "max = %"PRIu64" MiB\n",
+		  current / SZ_1M, cg_max / SZ_1M);
+
+	igt_assert_f(current <= cg_max + USAGE_SLACK,
+		     "Usage %"PRIu64" MiB exceeds max %"PRIu64" MiB + slack\n",
+		     current / SZ_1M, cg_max / SZ_1M);
+
+	/* Phase 2: lower max in 256 MiB steps, verify usage follows */
+	limit = cg_max;
+	while (limit >= EVICT_STEP) {
+
+		limit -= EVICT_STEP;
+		set_err = __igt_cgroup_dmem_set_max(cg, cg_region, limit);
+		if (set_err == -EBUSY) {
+			igt_cgroup_dmem_get_current(cg, cg_region, &after);
+			igt_assert_f(after <= (uint64_t)EVICT_STEP,
+				     "dmem.max rejected with -EBUSY but usage "
+				     "%"PRIu64" MiB > %"PRIu64" MiB\n",
+				     after / SZ_1M,
+				     (uint64_t)EVICT_STEP / SZ_1M);
+			igt_debug("dmem.max set to %"PRIu64" MiB returned "
+				  "-EBUSY, usage = %"PRIu64" MiB (acceptable)\n",
+				  limit / SZ_1M, after / SZ_1M);
+			break;
+		}
+		igt_assert_f(set_err == 0,
+			     "Failed to set dmem.max to %"PRIu64" MiB: %s\n",
+			     limit / SZ_1M, strerror(-set_err));
+
+		after = wait_for_usage_drop(cg, cg_region, limit);
+
+		igt_debug("Lowered max to %"PRIu64" MiB: usage = %"PRIu64" MiB\n",
+			  limit / SZ_1M, after / SZ_1M);
+
+		igt_assert_f(after <= limit + USAGE_SLACK,
+			     "Usage %"PRIu64" MiB did not follow max %"PRIu64" MiB\n",
+			     after / SZ_1M, limit / SZ_1M);
+	}
+
+	if (flags & TEST_INTERRUPTIBLE) {
+		igt_stop_signal_helper();
+		igt_info("Signals received during test: %d\n",
+			 atomic_load(&signal_count));
+	}
+
+	/* Cleanup */
+	igt_cgroup_dmem_set_max(cg, cg_region, IGT_CGROUP_DMEM_MAX);
+	unfill_vram(fd, vm, handles, n_bo);
+	handles = NULL;
+	xe_vm_destroy(fd, vm);
+	free(cg_region);
+	igt_cgroup_free(cg);
+}
+
+static const struct {
+	const char *name;
+	unsigned int flags;
+} subtests[] = {
+	{ "write_eviction",		0 },
+	{ "write_eviction_interruptible",	TEST_INTERRUPTIBLE },
+	{ }
+};
+
+int igt_main()
+{
+	int fd = -1;
+
+	igt_fixture() {
+		fd = drm_open_driver(DRIVER_XE);
+		igt_require_f(getuid() == 0, "Test requires root\n");
+	}
+
+	for (int i = 0; subtests[i].name; i++)
+		igt_subtest(subtests[i].name)
+			test_write_eviction(fd, subtests[i].flags);
+
+	igt_fixture() {
+		drm_close_driver(fd);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index f2326d293..cee0d89e2 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -292,6 +292,7 @@ intel_xe_progs = [
 	'xe_dma_buf_sync',
 	'xe_drm_fdinfo',
 	'xe_eu_stall',
+	'xe_cgroups',
 	'xe_evict',
 	'xe_evict_ccs',
 	'xe_exec_atomic',
-- 
2.53.0


  parent reply	other threads:[~2026-03-26 16:11 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-26 16:10 [PATCH i-g-t 0/5] Initial dmem cgroup support Thomas Hellström
2026-03-26 16:10 ` [PATCH i-g-t 1/5] lib/igt_cgroup: add cgroup v2 and dmem controller helpers Thomas Hellström
2026-03-26 16:10 ` [PATCH i-g-t 2/5] tests/cgroup_dmem: add dmem cgroup controller test Thomas Hellström
2026-03-26 16:10 ` [PATCH i-g-t 3/5] lib/xe: add xe_cgroup_region_name() helper Thomas Hellström
2026-03-26 16:10 ` [PATCH i-g-t 4/5] lib/xe: add __xe_vm_bind_lr_sync() failable bind helper Thomas Hellström
2026-03-26 16:10 ` Thomas Hellström [this message]
2026-03-26 23:42 ` ✓ Xe.CI.BAT: success for Initial dmem cgroup support Patchwork
2026-03-27  0:00 ` ✓ i915.CI.BAT: " Patchwork
2026-03-27 17:49 ` ✓ Xe.CI.FULL: " Patchwork
2026-03-28  0:45 ` ✗ i915.CI.Full: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260326161007.39294-6-thomas.hellstrom@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=dev@lankhorst.se \
    --cc=igt-dev@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.