From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
"Matthew Brost" <matthew.brost@intel.com>,
"Maarten Lankhorst" <maarten.lankhorst@linux.intel.com>,
"Michal Mrozek" <michal.mrozek@intel.com>,
"John Falkowski" <john.falkowski@intel.com>,
"Rodrigo Vivi" <rodrigo.vivi@intel.com>,
"Lahtinen Joonas" <joonas.lahtinen@linux.intel.com>,
"David Howells" <dhowells@redhat.com>,
"Christian Brauner" <brauner@kernel.org>,
"Kees Cook" <kees@kernel.org>,
"Davidlohr Bueso" <dave@stgolabs.net>,
"Christian König" <christian.koenig@amd.com>,
"Dave Airlie" <airlied@gmail.com>,
"Simona Vetter" <simona.vetter@ffwll.ch>,
dri-devel@lists.freedesktop.org,
LMKL <linux-kernel@vger.kernel.org>
Subject: [PATCH 1/4] drm/xe: Add DRM_IOCTL_XE_VM_RESTART IOCTL
Date: Fri, 12 Jun 2026 15:53:37 +0200 [thread overview]
Message-ID: <20260612135340.116100-2-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20260612135340.116100-1-thomas.hellstrom@linux.intel.com>
Add an async VM restart IOCTL that allows userspace to re-queue the
preempt-rebind worker for a VM that has been paused after a recoverable
error.
Add xe_vm_restart_ioctl() which:
- Looks up the VM by id via xe_vm_lookup()
- Returns -EINVAL if the VM is not in preempt-fence mode or not restartable
- Returns -EALREADY if the VM is not currently paused
- Queues the rebind worker via and returns 0
If the optional @timestamp_ns field is non-zero, logs the latency
between that timestamp and the point the worker is queued.
Add DRM_XE_VM_CREATE_FLAG_RESTARTABLE to opt a VM in to the restartable
behaviour: on recoverable errors (-ENOMEM, -ENOSPC) the rebind worker
is deactivated rather than the VM being killed. Requires
DRM_XE_VM_CREATE_FLAG_LR_MODE and may not be used with
DRM_XE_VM_CREATE_FLAG_FAULT_MODE.
Add struct drm_xe_vm_restart UAPI struct with vm_id, pad, timestamp_ns
and reserved fields, and register the IOCTL at slot 0x10.
Assisted-by: GitHub_Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 51e3a2dd7b22..867d7c55dc03 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -215,6 +215,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_VM_GET_PROPERTY, xe_vm_get_property_ioctl,
DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_RESTART, xe_vm_restart_ioctl, DRM_RENDER_ALLOW),
};
static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 75841f3e9afa..86ed8f31a219 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -563,8 +563,14 @@ static void preempt_rebind_work_func(struct work_struct *w)
}
if (err) {
- drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
- xe_vm_kill(vm, true);
+ if ((err == -ENOMEM || err == -ENOSPC) && xe_vm_is_restartable(vm)) {
+ vm->preempt.rebind_deactivated = true;
+ drm_dbg(&vm->xe->drm, "Rebind deactivated VM on error %pe\n",
+ ERR_PTR(err));
+ } else {
+ drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
+ xe_vm_kill(vm, true);
+ }
}
up_write(&vm->lock);
@@ -573,6 +579,85 @@ static void preempt_rebind_work_func(struct work_struct *w)
trace_xe_vm_rebind_worker_exit(vm);
}
+/**
+ * xe_vm_restart_ioctl() - Queue the preempt-rebind worker for a paused VM
+ * @dev: DRM device
+ * @data: pointer to &struct drm_xe_vm_restart from userspace
+ * @file: DRM file handle
+ *
+ * Looks up the VM identified by @vm_id and, if it is currently paused (its
+ * rebind worker was deactivated after a recoverable error), clears the paused
+ * state and queues the rebind worker. Only valid for VMs in preempt-fence
+ * mode.
+ *
+ * If @timestamp_ns is non-zero, logs the latency between that timestamp and
+ * the point the vm lock is taken, regardless of whether the VM was paused.
+ *
+ * Return: 0 if the worker was queued, -EALREADY if the VM is not paused,
+ * -EINVAL if the VM is not in preempt-fence mode or not restartable,
+ * -ENOENT if the VM was not found.
+ */
+int xe_vm_restart_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_vm_restart *args = data;
+ struct xe_vm *vm;
+ int err = 0;
+
+ if (XE_IOCTL_DBG(xe, args->reserved || args->pad))
+ return -EINVAL;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_DBG(xe, !vm))
+ return -ENOENT;
+
+ if (XE_IOCTL_DBG(xe, !xe_vm_in_preempt_fence_mode(vm))) {
+ xe_vm_put(vm);
+ return -EINVAL;
+ }
+
+ if (XE_IOCTL_DBG(xe, !xe_vm_is_restartable(vm))) {
+ xe_vm_put(vm);
+ return -EINVAL;
+ }
+
+ err = down_read_interruptible(&vm->lock);
+ if (err)
+ goto out;
+
+ if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+ err = -ENOENT;
+ goto out_unlock_read;
+ }
+
+ if (args->timestamp_ns) {
+ u64 delay_us = (ktime_get_ns() - args->timestamp_ns) / NSEC_PER_USEC;
+
+ drm_dbg(&xe->drm, "VM %u restart latency: %llu us\n",
+ args->vm_id, delay_us);
+ }
+
+ err = xe_vm_lock(vm, true);
+ if (err)
+ goto out_unlock_read;
+
+ if (!vm->preempt.rebind_deactivated) {
+ err = -EALREADY;
+ goto out_unlock_resv;
+ }
+
+ xe_vm_reactivate_rebind(vm);
+out_unlock_resv:
+ xe_vm_unlock(vm);
+out_unlock_read:
+ up_read(&vm->lock);
+out:
+ xe_vm_put(vm);
+ return err;
+}
+
/**
* xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list
* @vm: The VM.
@@ -2049,7 +2134,8 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
DRM_XE_VM_CREATE_FLAG_LR_MODE | \
DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \
- DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
+ DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT | \
+ DRM_XE_VM_CREATE_FLAG_RESTARTABLE)
int xe_vm_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
@@ -2092,6 +2178,11 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT))
return -EINVAL;
+ if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_RESTARTABLE &&
+ (!(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) ||
+ args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)))
+ return -EINVAL;
+
if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
flags |= XE_VM_FLAG_SCRATCH_PAGE;
if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
@@ -2100,6 +2191,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
flags |= XE_VM_FLAG_FAULT_MODE;
if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT;
+ if (args->flags & DRM_XE_VM_CREATE_FLAG_RESTARTABLE)
+ flags |= XE_VM_FLAG_RESTARTABLE;
vm = xe_vm_create(xe, flags, xef);
if (IS_ERR(vm))
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index c5b900f38ded..9ee44599cacd 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -212,7 +212,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int xe_vm_get_property_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
-
+int xe_vm_restart_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
void xe_vm_close_and_put(struct xe_vm *vm);
static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
@@ -237,6 +238,11 @@ static inline bool xe_vm_allow_vm_eviction(struct xe_vm *vm)
!(vm->flags & XE_VM_FLAG_NO_VM_OVERCOMMIT));
}
+static inline bool xe_vm_is_restartable(struct xe_vm *vm)
+{
+ return vm->flags & XE_VM_FLAG_RESTARTABLE;
+}
+
int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 635ed29b9a69..7d295c3b8456 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -264,6 +264,7 @@ struct xe_vm {
#define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6), (tile)->id)
#define XE_VM_FLAG_GSC BIT(8)
#define XE_VM_FLAG_NO_VM_OVERCOMMIT BIT(9)
+#define XE_VM_FLAG_RESTARTABLE BIT(10)
unsigned long flags;
/**
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 48e9f1fdb78d..bebb0167bd31 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -85,6 +85,7 @@ extern "C" {
* - &DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS
* - &DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY
* - &DRM_IOCTL_XE_VM_GET_PROPERTY
+ * - &DRM_IOCTL_XE_VM_RESTART
*/
/*
@@ -110,6 +111,7 @@ extern "C" {
#define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e
#define DRM_XE_VM_GET_PROPERTY 0x0f
+#define DRM_XE_VM_RESTART 0x10
/* Must be kept compact -- no holes */
@@ -129,6 +131,7 @@ extern "C" {
#define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr)
#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
#define DRM_IOCTL_XE_VM_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_GET_PROPERTY, struct drm_xe_vm_get_property)
+#define DRM_IOCTL_XE_VM_RESTART DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_RESTART, struct drm_xe_vm_restart)
/**
* DOC: Xe IOCTL Extensions
@@ -985,6 +988,10 @@ struct drm_xe_gem_mmap_offset {
* but only during a &DRM_IOCTL_XE_VM_BIND operation with the
* %DRM_XE_VM_BIND_FLAG_IMMEDIATE flag set. This may be useful for
* user-space naively probing the amount of available memory.
+ * - %DRM_XE_VM_CREATE_FLAG_RESTARTABLE - Requires also
+ * DRM_XE_VM_CREATE_FLAG_LR_MODE. Marks the VM as restartable, enabling
+ * use of &DRM_IOCTL_XE_VM_RESTART to resume the preempt-rebind worker
+ * after an error has paused it.
*/
struct drm_xe_vm_create {
/** @extensions: Pointer to the first extension struct, if any */
@@ -994,6 +1001,7 @@ struct drm_xe_vm_create {
#define DRM_XE_VM_CREATE_FLAG_LR_MODE (1 << 1)
#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 2)
#define DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT (1 << 3)
+#define DRM_XE_VM_CREATE_FLAG_RESTARTABLE (1 << 4)
/** @flags: Flags */
__u32 flags;
@@ -2531,8 +2539,44 @@ struct drm_xe_exec_queue_set_property {
};
/**
- * DOC: Xe DRM RAS
+ * DOC: DRM_XE_VM_RESTART
+ *
+ * Restart a paused VM by queuing its preempt-rebind worker. The VM must be
+ * in preempt-fence mode and must currently be paused (i.e. its rebind worker
+ * was deactivated after a recoverable error such as -ENOMEM or -ENOSPC).
+ *
+ * Returns 0 if the rebind worker was successfully queued. Returns -EALREADY
+ * if the VM is not currently paused. Returns -EINVAL if the VM is not in
+ * preempt-fence mode or not restartable.
*
+ * An optional @timestamp_ns can be provided to measure the latency between
+ * event delivery and the point the worker is queued; the driver logs this
+ * once all sanity checks have passed.
+ */
+
+/**
+ * struct drm_xe_vm_restart - restart a VM's preempt-rebind worker
+ *
+ * Used with %DRM_IOCTL_XE_VM_RESTART.
+ */
+struct drm_xe_vm_restart {
+ /** @vm_id: ID of the VM to restart */
+ __u32 vm_id;
+ /** @pad: reserved, must be zero */
+ __u32 pad;
+ /**
+ * @timestamp_ns: optional CLOCK_MONOTONIC timestamp in nanoseconds.
+ * When non-zero, the driver logs the delay between this timestamp and
+ * the point the vm lock is taken, regardless of whether the VM is
+ * currently paused. Pass zero to disable the logging.
+ */
+ __u64 timestamp_ns;
+ /** @reserved: reserved, must be zero */
+ __u64 reserved;
+};
+
+/**
+ * DOC: Xe DRM RAS
* The enums and strings defined below map to the attributes of the DRM RAS Netlink Interface.
* Refer to Documentation/netlink/specs/drm_ras.yaml for complete interface specification.
*
---
drivers/gpu/drm/xe/xe_device.c | 1 +
drivers/gpu/drm/xe/xe_vm.c | 99 +++++++++++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_vm.h | 8 ++-
drivers/gpu/drm/xe/xe_vm_types.h | 1 +
include/uapi/drm/xe_drm.h | 46 ++++++++++++++-
5 files changed, 150 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 51e3a2dd7b22..867d7c55dc03 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -215,6 +215,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_VM_GET_PROPERTY, xe_vm_get_property_ioctl,
DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_RESTART, xe_vm_restart_ioctl, DRM_RENDER_ALLOW),
};
static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 75841f3e9afa..86ed8f31a219 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -563,8 +563,14 @@ static void preempt_rebind_work_func(struct work_struct *w)
}
if (err) {
- drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
- xe_vm_kill(vm, true);
+ if ((err == -ENOMEM || err == -ENOSPC) && xe_vm_is_restartable(vm)) {
+ vm->preempt.rebind_deactivated = true;
+ drm_dbg(&vm->xe->drm, "Rebind deactivated VM on error %pe\n",
+ ERR_PTR(err));
+ } else {
+ drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
+ xe_vm_kill(vm, true);
+ }
}
up_write(&vm->lock);
@@ -573,6 +579,85 @@ static void preempt_rebind_work_func(struct work_struct *w)
trace_xe_vm_rebind_worker_exit(vm);
}
+/**
+ * xe_vm_restart_ioctl() - Queue the preempt-rebind worker for a paused VM
+ * @dev: DRM device
+ * @data: pointer to &struct drm_xe_vm_restart from userspace
+ * @file: DRM file handle
+ *
+ * Looks up the VM identified by @vm_id and, if it is currently paused (its
+ * rebind worker was deactivated after a recoverable error), clears the paused
+ * state and queues the rebind worker. Only valid for VMs in preempt-fence
+ * mode.
+ *
+ * If @timestamp_ns is non-zero, logs the latency between that timestamp and
+ * the point the vm lock is taken, regardless of whether the VM was paused.
+ *
+ * Return: 0 if the worker was queued, -EALREADY if the VM is not paused,
+ * -EINVAL if the VM is not in preempt-fence mode or not restartable,
+ * -ENOENT if the VM was not found.
+ */
+int xe_vm_restart_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_vm_restart *args = data;
+ struct xe_vm *vm;
+ int err = 0;
+
+ if (XE_IOCTL_DBG(xe, args->reserved || args->pad))
+ return -EINVAL;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_DBG(xe, !vm))
+ return -ENOENT;
+
+ if (XE_IOCTL_DBG(xe, !xe_vm_in_preempt_fence_mode(vm))) {
+ xe_vm_put(vm);
+ return -EINVAL;
+ }
+
+ if (XE_IOCTL_DBG(xe, !xe_vm_is_restartable(vm))) {
+ xe_vm_put(vm);
+ return -EINVAL;
+ }
+
+ err = down_read_interruptible(&vm->lock);
+ if (err)
+ goto out;
+
+ if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+ err = -ENOENT;
+ goto out_unlock_read;
+ }
+
+ if (args->timestamp_ns) {
+ u64 delay_us = (ktime_get_ns() - args->timestamp_ns) / NSEC_PER_USEC;
+
+ drm_dbg(&xe->drm, "VM %u restart latency: %llu us\n",
+ args->vm_id, delay_us);
+ }
+
+ err = xe_vm_lock(vm, true);
+ if (err)
+ goto out_unlock_read;
+
+ if (!vm->preempt.rebind_deactivated) {
+ err = -EALREADY;
+ goto out_unlock_resv;
+ }
+
+ xe_vm_reactivate_rebind(vm);
+out_unlock_resv:
+ xe_vm_unlock(vm);
+out_unlock_read:
+ up_read(&vm->lock);
+out:
+ xe_vm_put(vm);
+ return err;
+}
+
/**
* xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list
* @vm: The VM.
@@ -2049,7 +2134,8 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
DRM_XE_VM_CREATE_FLAG_LR_MODE | \
DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \
- DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
+ DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT | \
+ DRM_XE_VM_CREATE_FLAG_RESTARTABLE)
int xe_vm_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
@@ -2092,6 +2178,11 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT))
return -EINVAL;
+ if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_RESTARTABLE &&
+ (!(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) ||
+ args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)))
+ return -EINVAL;
+
if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
flags |= XE_VM_FLAG_SCRATCH_PAGE;
if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
@@ -2100,6 +2191,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
flags |= XE_VM_FLAG_FAULT_MODE;
if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT;
+ if (args->flags & DRM_XE_VM_CREATE_FLAG_RESTARTABLE)
+ flags |= XE_VM_FLAG_RESTARTABLE;
vm = xe_vm_create(xe, flags, xef);
if (IS_ERR(vm))
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index c5b900f38ded..9ee44599cacd 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -212,7 +212,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int xe_vm_get_property_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
-
+int xe_vm_restart_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
void xe_vm_close_and_put(struct xe_vm *vm);
static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
@@ -237,6 +238,11 @@ static inline bool xe_vm_allow_vm_eviction(struct xe_vm *vm)
!(vm->flags & XE_VM_FLAG_NO_VM_OVERCOMMIT));
}
+static inline bool xe_vm_is_restartable(struct xe_vm *vm)
+{
+ return vm->flags & XE_VM_FLAG_RESTARTABLE;
+}
+
int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 635ed29b9a69..7d295c3b8456 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -264,6 +264,7 @@ struct xe_vm {
#define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6), (tile)->id)
#define XE_VM_FLAG_GSC BIT(8)
#define XE_VM_FLAG_NO_VM_OVERCOMMIT BIT(9)
+#define XE_VM_FLAG_RESTARTABLE BIT(10)
unsigned long flags;
/**
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 48e9f1fdb78d..bebb0167bd31 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -85,6 +85,7 @@ extern "C" {
* - &DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS
* - &DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY
* - &DRM_IOCTL_XE_VM_GET_PROPERTY
+ * - &DRM_IOCTL_XE_VM_RESTART
*/
/*
@@ -110,6 +111,7 @@ extern "C" {
#define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e
#define DRM_XE_VM_GET_PROPERTY 0x0f
+#define DRM_XE_VM_RESTART 0x10
/* Must be kept compact -- no holes */
@@ -129,6 +131,7 @@ extern "C" {
#define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr)
#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
#define DRM_IOCTL_XE_VM_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_GET_PROPERTY, struct drm_xe_vm_get_property)
+#define DRM_IOCTL_XE_VM_RESTART DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_RESTART, struct drm_xe_vm_restart)
/**
* DOC: Xe IOCTL Extensions
@@ -985,6 +988,10 @@ struct drm_xe_gem_mmap_offset {
* but only during a &DRM_IOCTL_XE_VM_BIND operation with the
* %DRM_XE_VM_BIND_FLAG_IMMEDIATE flag set. This may be useful for
* user-space naively probing the amount of available memory.
+ * - %DRM_XE_VM_CREATE_FLAG_RESTARTABLE - Requires also
+ * DRM_XE_VM_CREATE_FLAG_LR_MODE. Marks the VM as restartable, enabling
+ * use of &DRM_IOCTL_XE_VM_RESTART to resume the preempt-rebind worker
+ * after an error has paused it.
*/
struct drm_xe_vm_create {
/** @extensions: Pointer to the first extension struct, if any */
@@ -994,6 +1001,7 @@ struct drm_xe_vm_create {
#define DRM_XE_VM_CREATE_FLAG_LR_MODE (1 << 1)
#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 2)
#define DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT (1 << 3)
+#define DRM_XE_VM_CREATE_FLAG_RESTARTABLE (1 << 4)
/** @flags: Flags */
__u32 flags;
@@ -2531,8 +2539,44 @@ struct drm_xe_exec_queue_set_property {
};
/**
- * DOC: Xe DRM RAS
+ * DOC: DRM_XE_VM_RESTART
+ *
+ * Restart a paused VM by queuing its preempt-rebind worker. The VM must be
+ * in preempt-fence mode and must currently be paused (i.e. its rebind worker
+ * was deactivated after a recoverable error such as -ENOMEM or -ENOSPC).
+ *
+ * Returns 0 if the rebind worker was successfully queued. Returns -EALREADY
+ * if the VM is not currently paused. Returns -EINVAL if the VM is not in
+ * preempt-fence mode or not restartable.
*
+ * An optional @timestamp_ns can be provided to measure the latency between
+ * event delivery and the point the worker is queued; the driver logs this
+ * once all sanity checks have passed.
+ */
+
+/**
+ * struct drm_xe_vm_restart - restart a VM's preempt-rebind worker
+ *
+ * Used with %DRM_IOCTL_XE_VM_RESTART.
+ */
+struct drm_xe_vm_restart {
+ /** @vm_id: ID of the VM to restart */
+ __u32 vm_id;
+ /** @pad: reserved, must be zero */
+ __u32 pad;
+ /**
+ * @timestamp_ns: optional CLOCK_MONOTONIC timestamp in nanoseconds.
+ * When non-zero, the driver logs the delay between this timestamp and
+ * the point the vm lock is taken, regardless of whether the VM is
+ * currently paused. Pass zero to disable the logging.
+ */
+ __u64 timestamp_ns;
+ /** @reserved: reserved, must be zero */
+ __u64 reserved;
+};
+
+/**
+ * DOC: Xe DRM RAS
* The enums and strings defined below map to the attributes of the DRM RAS Netlink Interface.
* Refer to Documentation/netlink/specs/drm_ras.yaml for complete interface specification.
*
--
2.54.0
next prev parent reply other threads:[~2026-06-12 13:54 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-12 13:53 [RFC PATCH 0/4] Xe driver asynchronous notification mechanism Thomas Hellström
2026-06-12 13:53 ` Thomas Hellström [this message]
2026-06-12 13:53 ` [PATCH 2/4] drm/xe: Add fault injection for rebind worker -ENOSPC Thomas Hellström
2026-06-12 13:53 ` [PATCH 3/4] watch_queue: Add a DRM_XE_NOTIFY watch type and export init_watch() Thomas Hellström
2026-06-12 13:53 ` [PATCH 4/4] drm/xe: Add watch_queue-based device event notification Thomas Hellström
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260612135340.116100-2-thomas.hellstrom@linux.intel.com \
--to=thomas.hellstrom@linux.intel.com \
--cc=airlied@gmail.com \
--cc=brauner@kernel.org \
--cc=christian.koenig@amd.com \
--cc=dave@stgolabs.net \
--cc=dhowells@redhat.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-xe@lists.freedesktop.org \
--cc=john.falkowski@intel.com \
--cc=joonas.lahtinen@linux.intel.com \
--cc=kees@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maarten.lankhorst@linux.intel.com \
--cc=matthew.brost@intel.com \
--cc=michal.mrozek@intel.com \
--cc=rodrigo.vivi@intel.com \
--cc=simona.vetter@ffwll.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.