From: Sobin Thomas <sobin.thomas@intel.com>
To: igt-dev@lists.freedesktop.org
Cc: nishit.sharma@intel.com, Sobin Thomas <sobin.thomas@intel.com>
Subject: [PATCH i-g-t 1/1] tests/xe_vm: Add oversubscribe concurrent bind stress test
Date: Wed, 18 Feb 2026 16:44:17 +0000 [thread overview]
Message-ID: <20260218164417.856114-2-sobin.thomas@intel.com> (raw)
In-Reply-To: <20260218164417.856114-1-sobin.thomas@intel.com>
Add an xe_vm subtest that oversubscribes VRAM and issues
concurrent binds into a single VM (scratch-page mode) to
reproduce the dma-resv/bind race found under memory pressure.
Prior coverage lacked any case that combined multi-process bind
pressure with VRAM oversubscription, so bind/submit could
panic (NULL deref in xe_pt_stage_bind) instead of failing cleanly.
The new test expects successful completion or ENOMEM/EDEADLK.
Signed-off-by: Sobin Thomas <sobin.thomas@intel.com>
---
tests/intel/xe_vm.c | 421 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 421 insertions(+)
diff --git a/tests/intel/xe_vm.c b/tests/intel/xe_vm.c
index ccff8f804..5c9d5ff0f 100644
--- a/tests/intel/xe_vm.c
+++ b/tests/intel/xe_vm.c
@@ -21,6 +21,176 @@
#include "xe/xe_spin.h"
#include <string.h>
+#define MI_BB_END (0 << 29 | 0x0A << 23 | 0)
+#define MI_LOAD_REG_MEM (0 << 29 | 0x29 << 23 | 0 << 22 | 0 << 21 | 1 << 19 | 2)
+#define MI_STORE_REG_MEM (0 << 29 | 0x24 << 23 | 0 << 22 | 0 << 21 | 1 << 19 | 2)
+#define MI_MATH_R(length) (0 << 29 | 0x1A << 23 | ((length) & 0xFF))
+#define GPR_RX_ADDR(x) (0x600 + (x) * 8)
+#define ALU_LOAD(dst, src) (0x080 << 20 | ((dst) << 10) | (src))
+#define ALU_STORE(dst, src) (0x180 << 20 | (dst) << 10 | (src))
+#define ALU_ADD (0x100 << 20)
+#define ALU_RX(x) (x)
+#define ALU_SRCA 0x20
+#define ALU_SRCB 0x21
+#define ALU_ACCU 0x31
+#define GB(x) (1024ULL * 1024ULL * 1024ULL * (x))
+
+struct gem_bo {
+ uint32_t handle;
+ uint64_t size;
+ int *ptr;
+ uint64_t addr;
+};
+
+struct xe_test_ctx {
+ int fd;
+ uint32_t vm_id;
+
+ uint32_t exec_queue_id;
+
+ uint16_t sram_instance;
+ uint16_t vram_instance;
+ bool has_vram;
+};
+
+static uint64_t align_to_page_size(uint64_t size)
+{
+ return (size + 4095UL) & ~4095UL;
+}
+
+static void create_exec_queue(int fd, struct xe_test_ctx *ctx)
+{
+ struct drm_xe_engine_class_instance *hwe;
+ struct drm_xe_engine_class_instance eci = {
+ .engine_class = DRM_XE_ENGINE_CLASS_RENDER,
+ };
+
+ /* Find first render engine */
+ xe_for_each_engine(fd, hwe) {
+ if (hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER) {
+ eci = *hwe;
+ break;
+ }
+ }
+ ctx->exec_queue_id = xe_exec_queue_create(fd, ctx->vm_id, &eci, 0);
+}
+
+static void vm_bind_gem_bo(int fd, struct xe_test_ctx *ctx, uint32_t handle, uint64_t addr, uint64_t size)
+{
+ int rc;
+ uint64_t timeline_val = 1;
+ uint32_t syncobj_handle = syncobj_create(fd, 0);
+
+ struct drm_xe_sync bind_sync = {
+ .extensions = 0,
+ .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
+ .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ .handle = syncobj_handle,
+ .timeline_value = timeline_val,
+ };
+ struct drm_xe_vm_bind vm_bind = {
+ .extensions = 0,
+ .vm_id = ctx->vm_id,
+ .exec_queue_id = 0,
+ .num_binds = 1,
+ .bind = {
+ .obj = handle,
+ .obj_offset = 0,
+ .range = size,
+ .addr = addr,
+ .op = DRM_XE_VM_BIND_OP_MAP,
+ .flags = 0,
+ },
+ .num_syncs = 1,
+ .syncs = (uintptr_t)&bind_sync,
+ };
+ rc = igt_ioctl(fd, DRM_IOCTL_XE_VM_BIND, &vm_bind);
+
+ igt_info("Bind returned %d\n", rc);
+ igt_assert(rc == 0);
+
+ /* The right way to do this in the real world is to not wait for the
+ * syncobj here - since it just makes everything synchronous -, but
+ * instead pass the syncobj as a 'wait'-type object to thie execbuf
+ * ioctl. We do it here just to make the example simpler.
+ */
+ //wait_syncobj(fd,syncobj_handle, timeline_val);
+ igt_assert(syncobj_timeline_wait(fd, &syncobj_handle, &timeline_val,
+ 1, INT64_MAX, 0, NULL));
+
+ syncobj_destroy(fd, syncobj_handle);
+}
+
+static uint32_t
+vm_bind_gem_bos(int fd, struct xe_test_ctx *ctx, struct gem_bo *bos, int size)
+{
+ int rc;
+ uint32_t syncobj_handle = syncobj_create(fd, 0);
+ uint64_t timeline_val = 1;
+ struct drm_xe_sync bind_sync = {
+ .extensions = 0,
+ .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
+ .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ .handle = syncobj_handle,
+ .timeline_value = timeline_val,
+ };
+ struct drm_xe_vm_bind_op binds[size];
+ struct drm_xe_vm_bind vm_bind = {
+ .extensions = 0,
+ .vm_id = ctx->vm_id,
+ .exec_queue_id = 0,
+ .num_binds = size,
+ .vector_of_binds = (uintptr_t)binds,
+ .num_syncs = 1,
+ .syncs = (uintptr_t)&bind_sync,
+ };
+
+ /* Need to call the ioctl differently when size is 1. */
+ igt_assert(size != 1);
+
+ for (int i = 0; i < size; i++) {
+ binds[i] = (struct drm_xe_vm_bind_op) {
+ .extensions = 0,
+ .obj = bos[i].handle,
+ .pat_index = 0,
+ .pad = 0,
+ .obj_offset = 0,
+ .range = bos[i].size,
+ .addr = bos[i].addr,
+ .op = DRM_XE_VM_BIND_OP_MAP,
+ .flags = 0,
+ .prefetch_mem_region_instance = 0,
+ .pad2 = 0,
+ };
+ }
+ rc = igt_ioctl(fd, DRM_IOCTL_XE_VM_BIND, &vm_bind);
+ igt_assert(rc == 0);
+
+ return syncobj_handle;
+}
+
+static void query_mem_info(int fd, struct xe_test_ctx *ctx)
+{
+ uint64_t vram_reg, sys_reg;
+ struct drm_xe_mem_region *region;
+
+ ctx->has_vram = xe_has_vram(fd);
+ if (ctx->has_vram) {
+ /* Get VRAM instance - vram_memory returns a bitmask,
+ * so we extract the instance from it
+ */
+ vram_reg = vram_memory(fd, 0);
+ region = xe_mem_region(fd, vram_reg);
+ ctx->vram_instance = region->instance;
+ }
+
+ /* Get SRAM instance */
+ sys_reg = system_memory(fd);
+ region = xe_mem_region(fd, sys_reg);
+ ctx->sram_instance = region->instance;
+ igt_debug("has_vram: %d\n", ctx->has_vram);
+}
+
static uint32_t
addr_low(uint64_t addr)
{
@@ -2450,6 +2620,252 @@ static void test_oom(int fd)
}
}
+/**
+ * SUBTEST: oversubscribe-concurrent-bind
+ * Description: Test for oversubscribing the VM with multiple processes
+ * doing binds at the same time, and ensure they all complete successfully.
+ * Functionality: This check is for a specific bug where if multiple processes
+ * oversubscribe the VM, some of the binds may fail with ENOMEM due to
+ * deadlock in the bind code.
+ * Test category: stress test
+ */
+static void test_vm_oversubscribe_concurrent_bind(int fd, int n_vram_bufs,
+ int n_sram_bufs, int n_proc)
+{
+ igt_fork(child, n_proc) {
+ struct xe_test_ctx ctx = {0};
+ int rc;
+ uint64_t addr = GB(1);
+ struct timespec start, end;
+ uint32_t vram_binds_syncobj, sram_binds_syncobj;
+ struct gem_bo vram_bufs[n_vram_bufs];
+ struct gem_bo sram_bufs[n_sram_bufs];
+ int expected_result = 0;
+ int ints_to_add = 4;
+ int gpu_result;
+ int retries;
+ int max_retries = 1024;
+ uint32_t batch_syncobj;
+ /* integers_bo contains the integers we're going to add. */
+ struct gem_bo integers_bo, result_bo, batch_bo;
+ uint64_t tmp_addr;
+ struct drm_xe_sync batch_syncs[3];
+ int n_batch_syncs = 0;
+ int pos = 0;
+ uint64_t timeline_val = 1;
+ struct drm_xe_exec exec;
+
+ rc = clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert(rc == 0);
+ ctx.vm_id = xe_vm_create(fd, DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE, 0);
+ query_mem_info(fd, &ctx);
+ create_exec_queue(fd, &ctx);
+ for (int i = 0; i < n_vram_bufs; i++) {
+ struct gem_bo *bo = &vram_bufs[i];
+
+ bo->size = GB(1);
+ bo->handle = xe_bo_create_caching(fd, ctx.vm_id, vram_bufs[i].size,
+ vram_memory(fd, 0), 0,
+ DRM_XE_GEM_CPU_CACHING_WC);
+ bo->ptr = NULL;
+ bo->addr = addr;
+ addr += bo->size;
+ igt_info("vram buffer %d created at 0x%016lx\n",
+ i, bo->addr);
+ }
+ for (int i = 0; i < n_sram_bufs; i++) {
+ struct gem_bo *bo = &sram_bufs[i];
+
+ bo->size = GB(1);
+ bo->handle = xe_bo_create_caching(fd, ctx.vm_id, sram_bufs[i].size,
+ system_memory(fd), 0,
+ DRM_XE_GEM_CPU_CACHING_WC);
+ bo->ptr = NULL;
+ bo->addr = addr;
+ addr += bo->size;
+ igt_info("sram buffer %d created at 0x%016lx\n",
+ i, bo->addr);
+ }
+ igt_info("\n Binding the buffers to the vm");
+
+ if (n_vram_bufs) {
+ igt_info("binding vram buffers");
+ vram_binds_syncobj = vm_bind_gem_bos(fd, &ctx, vram_bufs, n_vram_bufs);
+ }
+ if (n_sram_bufs) {
+ igt_info("binding sram buffers");
+ sram_binds_syncobj = vm_bind_gem_bos(fd, &ctx, sram_bufs, n_sram_bufs);
+ }
+ integers_bo.size = align_to_page_size(sizeof(int) * ints_to_add);
+ integers_bo.handle = xe_bo_create_caching(fd, ctx.vm_id, integers_bo.size,
+ system_memory(fd), 0,
+ DRM_XE_GEM_CPU_CACHING_WC);
+ integers_bo.ptr = (int *)xe_bo_map(fd, integers_bo.handle, integers_bo.size);
+
+ integers_bo.addr = 0x100000;
+
+ for (int i = 0; i < ints_to_add; i++) {
+ int random_int = rand() % 8;
+
+ integers_bo.ptr[i] = random_int;
+ expected_result += random_int;
+
+ igt_info("%d", random_int);
+ if (i + 1 != ints_to_add)
+ igt_info(" + ");
+ else
+ igt_info(" = ");
+ }
+ igt_assert_eq(munmap(integers_bo.ptr, integers_bo.size), 0);
+ integers_bo.ptr = NULL;
+
+ igt_info("Creating the result buffer object");
+
+ result_bo.size = align_to_page_size(sizeof(int));
+ result_bo.handle = xe_bo_create_caching(fd, ctx.vm_id, result_bo.size,
+ system_memory(fd), 0,
+ DRM_XE_GEM_CPU_CACHING_WC);
+ result_bo.ptr = NULL;
+ result_bo.addr = 0x200000;
+ /* batch_bo contains the commands the GPU will run. */
+
+ igt_info("Creating the batch buffer object");
+ batch_bo.size = 4096;
+ //batch_bo.handle = create_gem_bo_sram(fd, batch_bo.size);
+ batch_bo.handle = xe_bo_create_caching(fd, ctx.vm_id, batch_bo.size,
+ system_memory(fd), 0,
+ DRM_XE_GEM_CPU_CACHING_WC);
+
+ batch_bo.ptr = (int *)xe_bo_map(fd, batch_bo.handle, batch_bo.size);
+ batch_bo.addr = 0x300000;
+
+ /* r0 = integers_bo[0] */
+ batch_bo.ptr[pos++] = MI_LOAD_REG_MEM;
+ batch_bo.ptr[pos++] = GPR_RX_ADDR(0);
+ tmp_addr = integers_bo.addr + 0 * sizeof(uint32_t);
+ batch_bo.ptr[pos++] = tmp_addr & 0xFFFFFFFF;
+ batch_bo.ptr[pos++] = (tmp_addr >> 32) & 0xFFFFFFFF;
+ for (int i = 1; i < ints_to_add; i++) {
+ /* r1 = integers_bo[i] */
+ batch_bo.ptr[pos++] = MI_LOAD_REG_MEM;
+ batch_bo.ptr[pos++] = GPR_RX_ADDR(1);
+ tmp_addr = integers_bo.addr + i * sizeof(uint32_t);
+ batch_bo.ptr[pos++] = tmp_addr & 0xFFFFFFFF;
+ batch_bo.ptr[pos++] = (tmp_addr >> 32) & 0xFFFFFFFF;
+ /* r0 = r0 + r1 */
+ batch_bo.ptr[pos++] = MI_MATH_R(3);
+ batch_bo.ptr[pos++] = ALU_LOAD(ALU_SRCA, ALU_RX(0));
+ batch_bo.ptr[pos++] = ALU_LOAD(ALU_SRCB, ALU_RX(1));
+ batch_bo.ptr[pos++] = ALU_ADD;
+ batch_bo.ptr[pos++] = ALU_STORE(ALU_RX(0), ALU_ACCU);
+ }
+ /* result_bo[0] = r0 */
+ batch_bo.ptr[pos++] = MI_STORE_REG_MEM;
+ batch_bo.ptr[pos++] = GPR_RX_ADDR(0);
+ tmp_addr = result_bo.addr + 0 * sizeof(uint32_t);
+ batch_bo.ptr[pos++] = tmp_addr & 0xFFFFFFFF;
+ batch_bo.ptr[pos++] = (tmp_addr >> 32) & 0xFFFFFFFF;
+
+ batch_bo.ptr[pos++] = MI_BB_END;
+ while (pos % 4 != 0)
+ batch_bo.ptr[pos++] = MI_NOOP;
+
+ igt_assert(pos * sizeof(int) <= batch_bo.size);
+
+ vm_bind_gem_bo(fd, &ctx, integers_bo.handle, integers_bo.addr, integers_bo.size);
+ vm_bind_gem_bo(fd, &ctx, result_bo.handle, result_bo.addr, result_bo.size);
+ vm_bind_gem_bo(fd, &ctx, batch_bo.handle, batch_bo.addr, batch_bo.size);
+
+ /* Now we do the actual batch submission to the GPU. */
+ batch_syncobj = syncobj_create(fd, 0);
+
+ /* Wait for the other threads to create their stuff too. */
+
+ end = start;
+ end.tv_sec += 5;
+ rc = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &end, NULL);
+ igt_assert_eq(rc, 0);
+
+ batch_syncs[n_batch_syncs++] = (struct drm_xe_sync) {
+ .extensions = 0,
+ .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
+ .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ .handle = batch_syncobj,
+ .timeline_value = timeline_val,
+ };
+ if (n_vram_bufs) {
+ batch_syncs[n_batch_syncs++] = (struct drm_xe_sync) {
+ .extensions = 0,
+ .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
+ .flags = 0, /* wait */
+ .handle = vram_binds_syncobj,
+ .timeline_value = 1,
+ };
+ }
+ if (n_sram_bufs) {
+ batch_syncs[n_batch_syncs++] = (struct drm_xe_sync) {
+ .extensions = 0,
+ .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
+ .flags = 0, /* wait */
+ .handle = sram_binds_syncobj,
+ .timeline_value = 1,
+ };
+ }
+ exec = (struct drm_xe_exec) {
+ .exec_queue_id = ctx.exec_queue_id,
+ .num_syncs = n_batch_syncs,
+ .syncs = (uintptr_t)batch_syncs,
+ .address = batch_bo.addr,
+ .num_batch_buffer = 1,
+ };
+ for (retries = 0; retries < max_retries; retries++) {
+ rc = igt_ioctl(fd, DRM_IOCTL_XE_EXEC, &exec);
+
+ if (!(rc && errno == ENOMEM))
+ break;
+
+ usleep(100 * retries);
+ if (retries == 0)
+ igt_warn("got ENOMEM\n");
+ }
+ if (retries == max_retries)
+ igt_warn("gave up after %d retries\n", retries);
+
+ if (rc) {
+ igt_warn("errno: %d (%s)\n", errno, strerror(errno));
+ perror(__func__);
+ }
+ igt_assert_eq(rc, 0);
+
+ if (retries)
+ igt_info("!!!!!! succeeded after %d retries !!!!!!\n",
+ retries);
+
+ /* We need to wait for the GPU to finish. */
+ igt_assert(syncobj_timeline_wait(fd, &batch_syncobj,
+ &timeline_val, 1, INT64_MAX, 0, NULL));
+ result_bo.ptr = (int *)xe_bo_map(fd, result_bo.handle, result_bo.size);
+ gpu_result = result_bo.ptr[0];
+ igt_info("gpu_result = %d\n", gpu_result);
+ igt_info("expected_result = %d\n", expected_result);
+
+ igt_assert_eq(gpu_result, expected_result);
+ igt_assert_eq(munmap(result_bo.ptr, result_bo.size), 0);
+ result_bo.ptr = NULL;
+
+ end.tv_sec += 10;
+ rc = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &end, NULL);
+ assert(rc == 0);
+ gem_close(fd, batch_bo.handle);
+ gem_close(fd, result_bo.handle);
+ gem_close(fd, integers_bo.handle);
+
+ xe_vm_destroy(fd, ctx.vm_id);
+ close(fd);
+ }
+ igt_waitchildren();
+}
+
int igt_main()
{
struct drm_xe_engine_class_instance *hwe, *hwe_non_copy = NULL;
@@ -2850,6 +3266,11 @@ int igt_main()
test_oom(fd);
}
+ igt_subtest("oversubscribe-concurrent-bind") {
+ igt_require(xe_has_vram(fd));
+ test_vm_oversubscribe_concurrent_bind(fd, 2, 4, 4);
+ }
+
igt_fixture()
drm_close_driver(fd);
}
--
2.52.0
next prev parent reply other threads:[~2026-02-18 16:44 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-18 16:44 [PATCH i-g-t 0/1] tests/xe_vm: Add oversubscribe concurrent bind stress test Sobin Thomas
2026-02-18 16:44 ` Sobin Thomas [this message]
2026-02-19 10:43 ` [PATCH i-g-t 1/1] " Sharma, Nishit
2026-03-05 3:25 ` Thomas, Sobin
2026-02-25 16:38 ` Kamil Konieczny
2026-03-23 17:37 ` Thomas Hellström
2026-02-18 18:09 ` ✓ Xe.CI.BAT: success for " Patchwork
2026-02-18 18:17 ` ✓ i915.CI.BAT: " Patchwork
2026-02-18 20:20 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-02-18 20:42 ` ✗ i915.CI.Full: " Patchwork
2026-03-23 17:05 ` [PATCH i-g-t 0/1] " Thomas Hellström
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260218164417.856114-2-sobin.thomas@intel.com \
--to=sobin.thomas@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=nishit.sharma@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox