* [PATCH v4 i-g-t 2/2] tests/intel/xe_vm: Add support for overcommit tests
2026-03-11 6:06 [PATCH v4 i-g-t 0/2] test/intel/xe_vm: Add overcommit and no‑overcommit handling tests Sobin Thomas
2026-03-11 6:06 ` [PATCH v4 i-g-t 1/2] drm-uapi/xe: sync with kernel header Sobin Thomas
@ 2026-03-11 6:06 ` Sobin Thomas
1 sibling, 0 replies; 4+ messages in thread
From: Sobin Thomas @ 2026-03-11 6:06 UTC (permalink / raw)
To: igt-dev, thomas.hellstrom; +Cc: nishit.sharma, Sobin Thomas, root
Current tests focus on VM creation with basic mode selection and do not
support overcommit scenarios.
This change adds tests to verify overcommit behavior across different VM
modes.
Non-fault mode tests:
- vram-lr-defer: DEFER_BACKING rejects overcommit at bind time
- vram-lr-external-nodefer: Long-running mode with external BO and
no defer backing
- vram-no-lr: Non-LR mode
Fault mode tests:
- vram-lr-fault: Fault handling allows graceful overcommit via page
faults
- vram-lr-fault-no-overcommit: Verifies NO_VM_OVERCOMMIT blocks same-VM
BO eviction during VM_BIND while still allowing eviction during
pagefault OOM
These tests validate that VMs handle memory pressure appropriately based
on their configuration—rejecting at bind, failing at exec, or handling
it gracefully via page faults.
v2 - Added Additional test cases for LR mode and No Overcommit.
v3 - Refactored into single api call based on the VM / BO Flags.
v4 - Addressed review comments (reset sync objects and nits).
Added check in cleanup
Signed-off-by: Sobin Thomas <sobin.thomas@intel.com>
Signed-off-by: root <root@DUT2933PVC.iind.intel.com>
Signed-off-by: Sobin Thomas <sobin.thomas@intel.com>
---
tests/intel/xe_vm.c | 483 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 482 insertions(+), 1 deletion(-)
diff --git a/tests/intel/xe_vm.c b/tests/intel/xe_vm.c
index ccff8f804..c02bb6945 100644
--- a/tests/intel/xe_vm.c
+++ b/tests/intel/xe_vm.c
@@ -20,6 +20,7 @@
#include "xe/xe_query.h"
#include "xe/xe_spin.h"
#include <string.h>
+#define USER_FENCE_VALUE 0xdeadbeefdeadbeefull
static uint32_t
addr_low(uint64_t addr)
@@ -2376,6 +2377,475 @@ static void invalid_vm_id(int fd)
do_ioctl_err(fd, DRM_IOCTL_XE_VM_DESTROY, &destroy, ENOENT);
}
+static int wait_fault(int fd, uint32_t exec_queue, void *sync_data,
+ int64_t *timeout)
+{
+ int64_t wait_ns = timeout ? *timeout : INT64_MAX;
+
+ return __xe_wait_ufence(fd, (uint64_t *)sync_data, USER_FENCE_VALUE, exec_queue, &wait_ns);
+}
+
+static int wait_nonfault(int fd, uint32_t exec_queue, void *sync_data,
+ int64_t *timeout)
+{
+ int64_t wait_ns = timeout ? *timeout : INT64_MAX;
+
+ if (!syncobj_wait(fd, (uint32_t *)sync_data, 1, wait_ns, 0,
+ NULL))
+ return -1;
+
+ return 0;
+}
+
+static int vm_overcommit_create_bo(int fd, uint32_t vm, size_t bo_size,
+ uint32_t bo_flags, uint16_t gt_id,
+ bool external, uint32_t *bo)
+{
+ uint32_t placement;
+ int ret;
+
+ /* Try VRAM first */
+ placement = vram_memory(fd, gt_id) ?: system_memory(fd);
+ igt_debug("Using VRAM placement: 0x%x\n", placement);
+
+ ret = __xe_bo_create(fd, external ? 0 : vm, bo_size, placement,
+ bo_flags, NULL, bo);
+ if (ret) {
+ igt_info("VRAM allocation failed, falling back to system memory\n");
+ placement = system_memory(fd);
+ igt_assert_f(placement != 0, "Invalid placement: system_memory(fd) returned 0\n");
+ ret = __xe_bo_create(fd, external ? 0 : vm, bo_size, placement,
+ bo_flags, NULL, bo);
+ }
+ return ret;
+}
+
+/**
+ * SUBTEST: overcommit-fault-%s
+ * Description: Test VM overcommit behavior in fault mode with %arg[1] configuration
+ * Functionality: overcommit
+ * Test category: functionality test
+ *
+ * arg[1]:
+ *
+ * @vram-lr-fault:VRAM with LR and fault mode, expects exec to pass
+ * @vram-lr-fault-no-overcommit:VRAM with LR, fault and NO_VM_OVERCOMMIT, expects bind rejection
+ */
+
+/**
+ * SUBTEST: overcommit-nonfault-%s
+ * Description: Test VM overcommit behavior in nonfault mode with %arg[1] configuration
+ * Functionality: overcommit
+ * Test category: functionality test
+ *
+ * arg[1]:
+ *
+ * @vram-lr-defer:VRAM with LR and defer backing, expects bind rejection
+ * @vram-lr-external-nodefer:VRAM with LR and external BO without defer, expects exec fail
+ * @vram-no-lr:VRAM without LR mode, expects exec to fail
+ */
+struct vm_overcommit_case {
+ const char *name;
+ uint32_t vm_flags;
+ uint32_t bo_flags;
+ bool use_vram;
+ bool external;
+ uint64_t data_addr;
+ uint32_t pat_index;
+ int overcommit_mult;
+};
+
+static const struct vm_overcommit_case overcommit_cases[] = {
+ /* Case 1: DEFER_BACKING */
+ {
+ .name = "vram-lr-defer",
+ .vm_flags = DRM_XE_VM_CREATE_FLAG_LR_MODE,
+ .bo_flags = DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
+ DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM,
+ .external = false,
+ .use_vram = true,
+ .data_addr = 0x1a0000,
+ .pat_index = DEFAULT_PAT_INDEX,
+ .overcommit_mult = 2,
+ },
+ /* Case 1b: External BO without defer backing */
+ {
+ .name = "vram-lr-external-nodefer",
+ .vm_flags = DRM_XE_VM_CREATE_FLAG_LR_MODE,
+ .bo_flags = DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM,
+ .external = true,
+ .use_vram = true,
+ .data_addr = 0x1a0000,
+ .pat_index = DEFAULT_PAT_INDEX,
+ .overcommit_mult = 2,
+ },
+ /* Case 2: LR + FAULT - should not fail on exec */
+ {
+ .name = "vram-lr-fault",
+ .vm_flags = DRM_XE_VM_CREATE_FLAG_LR_MODE |
+ DRM_XE_VM_CREATE_FLAG_FAULT_MODE,
+ .bo_flags = DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM,
+ .external = false,
+ .use_vram = true,
+ .data_addr = 0x300000000,
+ .pat_index = 0,
+ .overcommit_mult = 2,
+ },
+ /* Case 3: !LR - overcommit should fail on exec */
+ {
+ .name = "vram-no-lr",
+ .vm_flags = 0,
+ .bo_flags = DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM,
+ .external = false,
+ .use_vram = true,
+ .data_addr = 0x300000000,
+ .pat_index = 0,
+ .overcommit_mult = 2,
+ },
+ /* Case 4: LR + FAULT + NO_VM_OVERCOMMIT */
+ {
+ .name = "vram-lr-fault-no-overcommit",
+ .vm_flags = DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT | DRM_XE_VM_CREATE_FLAG_LR_MODE |
+ DRM_XE_VM_CREATE_FLAG_FAULT_MODE,
+ .bo_flags = DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
+ DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM,
+ .external = false,
+ .use_vram = true,
+ .data_addr = 0x300000000,
+ .pat_index = 0,
+ .overcommit_mult = 2,
+ },
+ { }
+};
+
+static void
+test_vm_overcommit(int fd, struct drm_xe_engine_class_instance *eci,
+ const struct vm_overcommit_case *c,
+ uint64_t system_size, uint64_t vram_size)
+{
+ size_t sync_size;
+ size_t nf_bo_size = 64 * 1024 * 1024; // 64MB per BO
+ uint64_t overcommit_size, off, bind_exec_queue, data_addr;
+ uint32_t vm = 0, *bos, batch_bo = 0, exec_queue = 0, placement = 0;
+ uint64_t sync_addr = 0x101a0000, batch_addr = 0x200000000;
+ uint64_t stride = 1024 * 1024, lr_vm_sync = 0, base_size;
+ int64_t timeout = 20 * NSEC_PER_SEC, ret;
+ int i, b, create_ret, bind_err, res, num_bos;
+ bool overcommit_detected = false;
+ bool is_fault_mode = (c->vm_flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) != 0;
+ bool is_lr_mode = (c->vm_flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) != 0;
+ struct drm_xe_sync bind_sync[1] = {
+ {
+ .type = DRM_XE_SYNC_TYPE_USER_FENCE,
+ .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ .timeline_value = USER_FENCE_VALUE
+ },
+ };
+ struct drm_xe_sync lr_sync[1] = {
+ {
+ .type = DRM_XE_SYNC_TYPE_USER_FENCE,
+ .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ .timeline_value = USER_FENCE_VALUE,
+ .addr = to_user_pointer(&lr_vm_sync),
+ },
+ };
+
+ /* For fault mode: user fence, for non-fault mode: syncobj */
+ struct drm_xe_sync exec_sync[1] = {
+ {
+ .type = (is_fault_mode || is_lr_mode) ?
+ DRM_XE_SYNC_TYPE_USER_FENCE : DRM_XE_SYNC_TYPE_SYNCOBJ,
+ .flags = DRM_XE_SYNC_FLAG_SIGNAL,
+ .timeline_value = (is_fault_mode || is_lr_mode) ? USER_FENCE_VALUE : 0,
+ .handle = 0,
+ },
+ };
+ struct drm_xe_exec exec = {
+ .num_batch_buffer = 1,
+ .num_syncs = 1,
+ .syncs = to_user_pointer(exec_sync),
+ };
+ struct {
+ uint32_t batch[16];
+ uint64_t pad;
+ uint32_t data;
+ uint64_t vm_sync;
+ } *batch_data;
+ uint64_t *user_fence_sync = NULL;
+
+ data_addr = c->data_addr;
+
+ base_size = c->use_vram ? vram_size : system_size;
+ overcommit_size = (uint64_t)(base_size * c->overcommit_mult);
+ overcommit_size = ALIGN(overcommit_size, 4096);
+ if (overcommit_size > base_size) {
+ igt_debug("Limiting overcommit size from %llu MB to %llu MB\n",
+ (unsigned long long)(overcommit_size >> 20),
+ (unsigned long long)(system_size >> 20));
+ overcommit_size = ALIGN(system_size, 4096);
+ }
+
+ num_bos = (overcommit_size / nf_bo_size) + 1;
+ bos = calloc(num_bos, sizeof(*bos));
+ igt_assert(bos);
+
+ igt_debug("Overcommit test: allocating %d BOs of %llu MB each",
+ num_bos, (unsigned long long)(nf_bo_size >> 20));
+ igt_debug(" total=%llu MB, vram=%llu MB\n",
+ (unsigned long long)(num_bos * nf_bo_size >> 20),
+ (unsigned long long)(vram_size >> 20));
+ /* Create VM with appropriate flags */
+ vm = xe_vm_create(fd, c->vm_flags, 0);
+ igt_assert(vm);
+ bind_exec_queue = xe_bind_exec_queue_create(fd, vm, 0);
+ placement = c->use_vram ? vram_memory(fd, eci->gt_id) : system_memory(fd);
+ /* For fault mode: create user fence sync area */
+ if (is_fault_mode) {
+ sync_size = sizeof(uint64_t) * num_bos;
+ sync_size = xe_bb_size(fd, sync_size);
+ user_fence_sync = mmap(NULL, sync_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ igt_assert(user_fence_sync != MAP_FAILED);
+ memset(user_fence_sync, 0, sync_size);
+ }
+ /* Create and bind BOs */
+ for (i = 0; i < num_bos; i++) {
+ struct {
+ uint64_t vm_sync;
+ } *data;
+
+ /* Create BO using the case's create function */
+ create_ret = __xe_bo_create(fd, vm, nf_bo_size, placement,
+ c->bo_flags, NULL, &bos[i]);
+
+ if (create_ret) {
+ overcommit_detected = true;
+ num_bos = i;
+ igt_debug("BO create failed at %d/%d with error %d (%s)\n",
+ i, num_bos, -create_ret, strerror(-create_ret));
+ igt_assert_f(create_ret == -ENOMEM || create_ret == -ENOSPC ||
+ create_ret == -E2BIG || create_ret == -EPERM,
+ "Unexpected error %d (%s)\n",
+ -create_ret, strerror(-create_ret));
+ break;
+ }
+
+ /* Map and bind BO */
+ data = xe_bo_map(fd, bos[i], nf_bo_size);
+ igt_assert(data);
+ memset(data, 0, nf_bo_size);
+ bind_sync[0].addr = to_user_pointer(&data->vm_sync);
+
+ bind_err = __xe_vm_bind(fd, vm, bind_exec_queue, bos[i], 0,
+ data_addr + (i * nf_bo_size), nf_bo_size,
+ DRM_XE_VM_BIND_OP_MAP, 0, bind_sync, 1, 0,
+ c->pat_index, 0);
+
+ if (bind_err) {
+ munmap(data, nf_bo_size);
+ gem_close(fd, bos[i]);
+ bos[i] = 0;
+ overcommit_detected = true;
+ num_bos = i;
+ igt_debug("Bind failed at %d/%d with error %d (%s)\n",
+ i, num_bos, -bind_err, strerror(-bind_err));
+ igt_assert_f(bind_err == -ENOMEM || bind_err == -ENOSPC ||
+ bind_err == -EPERM,
+ "Unexpected bind error %d (%s)\n",
+ -bind_err, strerror(-bind_err));
+ break;
+ }
+ if (data->vm_sync != USER_FENCE_VALUE)
+ xe_wait_ufence(fd, &data->vm_sync, USER_FENCE_VALUE,
+ bind_exec_queue, 20 * NSEC_PER_SEC);
+ data->vm_sync = 0;
+ munmap(data, nf_bo_size);
+
+ igt_debug("Created and bound BO %d/%d at 0x%llx\n",
+ i + 1, num_bos,
+ (unsigned long long)(data_addr + (i * nf_bo_size)));
+ }
+ if (overcommit_detected) {
+ igt_debug("Overcommit correctly rejected at BO creation/bind (created %d BOs)\n",
+ num_bos);
+ goto cleanup;
+ }
+
+ /* Create batch buffer */
+ ret = vm_overcommit_create_bo(fd, vm, 0x1000, 0, eci->gt_id, c->external, &batch_bo);
+ igt_assert_eq(ret, 0);
+
+ igt_debug("Mapping the created BO");
+ batch_data = xe_bo_map(fd, batch_bo, 0x1000);
+ igt_assert(batch_data);
+ memset(batch_data, 0, 0x1000);
+
+ /* Bind batch buffer and sync areas */
+ if (is_fault_mode) {
+ batch_data[0].vm_sync = 0;
+ bind_sync[0].addr = to_user_pointer(&batch_data[0].vm_sync);
+
+ xe_vm_bind_userptr_async(fd, vm, bind_exec_queue, to_user_pointer(user_fence_sync),
+ sync_addr, sync_size, bind_sync, 1);
+ if (batch_data[0].vm_sync != USER_FENCE_VALUE)
+ xe_wait_ufence(fd, &batch_data[0].vm_sync, USER_FENCE_VALUE,
+ bind_exec_queue, NSEC_PER_SEC);
+
+ batch_data[0].vm_sync = 0;
+
+ xe_vm_bind_async(fd, vm, bind_exec_queue, batch_bo, 0, batch_addr, 0x1000,
+ bind_sync, 1);
+ if (batch_data[0].vm_sync != USER_FENCE_VALUE)
+ xe_wait_ufence(fd, &batch_data[0].vm_sync, USER_FENCE_VALUE,
+ bind_exec_queue, NSEC_PER_SEC);
+ } else if (is_lr_mode) {
+ /* LR mode without fault - bind batch using user fence */
+ lr_vm_sync = 0; /* Reset before use */
+ lr_sync[0].addr = to_user_pointer(&lr_vm_sync);
+ bind_err = __xe_vm_bind(fd, vm, 0, batch_bo, 0, batch_addr, 0x1000,
+ DRM_XE_VM_BIND_OP_MAP, 0, lr_sync, 1, 0, c->pat_index, 0);
+ if (bind_err) {
+ igt_debug("Batch buffer bind failed with error %d (%s) - skipping GPU test\n",
+ -bind_err, strerror(-bind_err));
+ goto cleanup;
+ }
+ xe_wait_ufence(fd, &lr_vm_sync, USER_FENCE_VALUE, 0, NSEC_PER_SEC);
+ lr_vm_sync = 0;
+ } else {
+ igt_debug("Going for vm bind sync");
+ xe_vm_bind_sync(fd, vm, batch_bo, 0, batch_addr, 0x1000);
+ }
+
+ igt_debug("VM binds done - batch_bo at 0x%llx\n", (unsigned long long)batch_addr);
+ /* Create exec queue */
+ exec_queue = xe_exec_queue_create(fd, vm, eci, 0);
+
+ /* Setup sync for exec */
+ if (is_fault_mode) {
+ exec_sync[0].addr = sync_addr;
+ } else if (is_lr_mode) {
+ /* LR mode - use batch_data->vm_sync (GPU accessible memory) */
+ batch_data->vm_sync = 0;
+ exec_sync[0].addr = to_user_pointer(&batch_data->vm_sync);
+ } else {
+ exec_sync[0].handle = syncobj_create(fd, 0);
+ }
+
+ /* Use GPU to write to each BO */
+ for (i = 0; i < num_bos; i++) {
+ igt_debug("Writing to BO %d/%d via GPU\n", i + 1, num_bos);
+
+ for (off = 0; off < nf_bo_size; off += stride) {
+ uint64_t target_addr = data_addr + (i * nf_bo_size) + off;
+
+ b = 0;
+ batch_data->batch[b++] = MI_STORE_DWORD_IMM_GEN4;
+ batch_data->batch[b++] = target_addr & 0xFFFFFFFF;
+ batch_data->batch[b++] = (target_addr >> 32) & 0xFFFFFFFF;
+ batch_data->batch[b++] = 0xBB;
+ batch_data->batch[b++] = MI_BATCH_BUFFER_END;
+
+ /* Reset sync for next exec */
+ if (!is_fault_mode && !is_lr_mode) {
+ if (off != 0 || i != 0) {
+ igt_assert(syncobj_wait(fd, &exec_sync[0].handle,
+ 1, INT64_MAX, 0, NULL));
+ }
+ syncobj_reset(fd, &exec_sync[0].handle, 1);
+ }
+
+ /* Submit batch */
+ exec.exec_queue_id = exec_queue;
+ exec.address = batch_addr;
+
+ res = igt_ioctl(fd, DRM_IOCTL_XE_EXEC, &exec);
+ if (res != 0) {
+ if (errno == ENOMEM || errno == ENOSPC) {
+ igt_debug("Expected fault/error: %d (%s)\n",
+ errno, strerror(errno));
+ goto cleanup;
+ }
+ igt_assert_f(0, "Unexpected exec error: %d\n", errno);
+ }
+
+ /* Wait for completion using appropriate method */
+ if (is_fault_mode) {
+ ret = wait_fault(fd, exec_queue, &user_fence_sync[0], &timeout);
+ user_fence_sync[0] = 0;
+ } else if (is_lr_mode) {
+ timeout = 5 * NSEC_PER_SEC;
+ ret = __xe_wait_ufence(fd, &batch_data->vm_sync, USER_FENCE_VALUE,
+ exec_queue, &timeout);
+ batch_data->vm_sync = 0;
+
+ } else {
+ ret = wait_nonfault(fd, exec_queue, &exec_sync[0].handle, &timeout);
+ }
+
+ if (ret != 0) {
+ igt_debug("Batch wait failed at BO %d offset 0x%lx\n",
+ i, off);
+ goto cleanup;
+ }
+ }
+
+ igt_debug("Accessed BO %d/%d via GPU\n", i + 1, num_bos);
+ }
+
+ igt_debug("All batches completed successfully\n");
+ /* Verify GPU writes */
+ igt_debug("Verifying GPU writes to BOs...\n");
+ for (i = 0; i < num_bos; i++) {
+ uint32_t *verify_data;
+ int errors = 0;
+
+ verify_data = xe_bo_map(fd, bos[i], nf_bo_size);
+ igt_assert(verify_data);
+
+ for (off = 0; off < nf_bo_size; off += stride) {
+ uint32_t expected = 0xBB;
+ uint32_t actual = *(uint32_t *)((char *)verify_data + off);
+
+ if (actual != expected) {
+ if (errors < 5)
+ igt_debug("Mismatch at BO %d offset 0x%llx",
+ i, (unsigned long long)off);
+ errors++;
+ }
+ }
+
+ munmap(verify_data, nf_bo_size);
+ igt_assert_f(errors == 0, "Data verification failed for BO %d with %d errors\n",
+ i, errors);
+ }
+
+cleanup:
+ /* Cleanup */
+ if (!is_fault_mode && !is_lr_mode && exec_sync[0].handle)
+ syncobj_destroy(fd, exec_sync[0].handle);
+ if (exec_queue)
+ xe_exec_queue_destroy(fd, exec_queue);
+ if (bind_exec_queue)
+ xe_exec_queue_destroy(fd, bind_exec_queue);
+ if (batch_data)
+ munmap(batch_data, 0x1000);
+ if (batch_bo)
+ gem_close(fd, batch_bo);
+
+ if (is_fault_mode && user_fence_sync)
+ munmap(user_fence_sync, sync_size);
+
+ if (bos) {
+ for (i = 0; i < num_bos; i++) {
+ if (bos[i])
+ gem_close(fd, bos[i]);
+ }
+ free(bos);
+ }
+ if (vm > 0)
+ xe_vm_destroy(fd, vm);
+}
+
/**
* SUBTEST: out-of-memory
* Description: Test if vm_bind ioctl results in oom
@@ -2385,7 +2855,6 @@ static void invalid_vm_id(int fd)
*/
static void test_oom(int fd)
{
-#define USER_FENCE_VALUE 0xdeadbeefdeadbeefull
#define BO_SIZE xe_bb_size(fd, SZ_512M)
#define MAX_BUFS ((int)(xe_visible_vram_size(fd, 0) / BO_SIZE))
uint64_t addr = 0x1a0000;
@@ -2850,6 +3319,18 @@ int igt_main()
test_oom(fd);
}
+ for (int i = 0; overcommit_cases[i].name; i++) {
+ const struct vm_overcommit_case *c = &overcommit_cases[i];
+ const char *mode = (c->vm_flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) ?
+ "fault" : "nonfault";
+ igt_subtest_f("overcommit-%s-%s", mode, c->name) {
+ igt_require(xe_has_vram(fd));
+ igt_assert(xe_visible_vram_size(fd, 0));
+ test_vm_overcommit(fd, hwe, c, (igt_get_avail_ram_mb() << 20),
+ xe_visible_vram_size(fd, 0));
+ }
+ }
+
igt_fixture()
drm_close_driver(fd);
}
--
2.52.0
^ permalink raw reply related [flat|nested] 4+ messages in thread