From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
"Matthew Brost" <matthew.brost@intel.com>,
"Maarten Lankhorst" <maarten.lankhorst@linux.intel.com>,
"Michal Mrozek" <michal.mrozek@intel.com>,
"John Falkowski" <john.falkowski@intel.com>,
"Rodrigo Vivi" <rodrigo.vivi@intel.com>,
"Lahtinen Joonas" <joonas.lahtinen@intel.com>,
"David Howells" <dhowells@redhat.com>,
"Christian Brauner" <brauner@kernel.org>,
"Kees Cook" <kees@kernel.org>,
"Davidlohr Bueso" <dave@stgolabs.net>,
"Christian König" <christian.koenig@amd.com>,
"Dave Airlie" <airlied@gmail.com>,
"Simona Vetter" <simona.vetter@ffwll.ch>,
dri-devel@lists.freedesktop.org,
LMKL <linux-kernel@vger.kernel.org>
Subject: [RFC PATCH RESEND 2/2] drm/xe: Add watch_queue-based device event notification
Date: Tue, 10 Mar 2026 16:57:41 +0100 [thread overview]
Message-ID: <20260310155741.87191-3-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20260310155741.87191-1-thomas.hellstrom@linux.intel.com>
Add a watch_queue notification channel tied to struct xe_vm so that
userspace can subscribe to asynchronous GPU device events via the
general kernel notification mechanism.
Introduce DRM_IOCTL_XE_WATCH_QUEUE to let userspace subscribe a
notification pipe (opened with pipe2(O_NOTIFICATION_PIPE)) to the device
event stream. Embed the watch_id field (0-255) in the WATCH_INFO_ID
field of every notification, allowing multiple watches to share a single
pipe and be told apart by the reader.
Deliver notifications as struct drm_xe_watch_notification records, with
type always set to WATCH_TYPE_DRM_XE_NOTIFY and subtype drawn from enum
drm_xe_watch_event. Define DRM_XE_WATCH_EVENT_DEVICE_RESET as the
first event, to be posted by the GPU reset path to inform userspace that
in-flight work has been lost. Expose xe_watch_queue_post_event() as the
in-kernel posting API.
Add event definitions in a separate uapi header, <drm/xe_drm_events.h>.
The main reason is that the header needs to include <linux/watch_queue.h>
which in turn includes <linux/fcntl.h> which may conflict with the
system <fcntl.h>. Hence user-space must pay special attention when including
this file.
Assisted-by: N/A:claude-sonnet-4.6 Tool1 Tool2
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/Kconfig | 1 +
drivers/gpu/drm/xe/Makefile | 1 +
drivers/gpu/drm/xe/xe_device.c | 7 ++
drivers/gpu/drm/xe/xe_device_types.h | 6 ++
drivers/gpu/drm/xe/xe_vm.c | 7 +-
drivers/gpu/drm/xe/xe_vm_types.h | 2 +
drivers/gpu/drm/xe/xe_watch_queue.c | 107 +++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_watch_queue.h | 20 +++++
include/uapi/drm/xe_drm.h | 46 ++++++++++++
include/uapi/drm/xe_drm_events.h | 56 ++++++++++++++
10 files changed, 251 insertions(+), 2 deletions(-)
create mode 100644 drivers/gpu/drm/xe/xe_watch_queue.c
create mode 100644 drivers/gpu/drm/xe/xe_watch_queue.h
create mode 100644 include/uapi/drm/xe_drm_events.h
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 4d7dcaff2b91..dbdc2fb49c53 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -25,6 +25,7 @@ config DRM_XE
select DRM_MIPI_DSI
select RELAY
select IRQ_WORK
+ select WATCH_QUEUE
# xe depends on ACPI_VIDEO when ACPI is enabled
# but for select to work, need to select ACPI_VIDEO's dependencies, ick
select BACKLIGHT_CLASS_DEVICE if ACPI
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index ff778fb2d4ff..1129583865ad 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -144,6 +144,7 @@ xe-y += xe_bb.o \
xe_vsec.o \
xe_wa.o \
xe_wait_user_fence.o \
+ xe_watch_queue.o \
xe_wopcm.o
xe-$(CONFIG_I2C) += xe_i2c.o
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 3462645ca13c..89bc221546ce 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -9,6 +9,7 @@
#include <linux/delay.h>
#include <linux/fault-inject.h>
#include <linux/units.h>
+#include <linux/watch_queue.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_client.h>
@@ -75,6 +76,7 @@
#include "xe_vsec.h"
#include "xe_wait_user_fence.h"
#include "xe_wa.h"
+#include "xe_watch_queue.h"
#include <generated/xe_device_wa_oob.h>
#include <generated/xe_wa_oob.h>
@@ -110,6 +112,8 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
file->driver_priv = xef;
kref_init(&xef->refcount);
+ init_watch_list(&xef->watch_list, NULL);
+
task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
if (task) {
xef->process_name = kstrdup(task->comm, GFP_KERNEL);
@@ -124,6 +128,8 @@ static void xe_file_destroy(struct kref *ref)
{
struct xe_file *xef = container_of(ref, struct xe_file, refcount);
+ remove_watch_from_object(&xef->watch_list, NULL, 0, true);
+
xa_destroy(&xef->exec_queue.xa);
mutex_destroy(&xef->exec_queue.lock);
xa_destroy(&xef->vm.xa);
@@ -211,6 +217,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl,
DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_WATCH_QUEUE, xe_watch_queue_ioctl, DRM_RENDER_ALLOW),
};
static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index caa8f34a6744..a42e6125c069 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -11,6 +11,7 @@
#include <drm/drm_device.h>
#include <drm/drm_file.h>
#include <drm/ttm/ttm_device.h>
+#include <linux/watch_queue.h>
#include "xe_devcoredump_types.h"
#include "xe_heci_gsc.h"
@@ -629,6 +630,11 @@ struct xe_file {
/** @refcount: ref count of this xe file */
struct kref refcount;
+
+#ifdef CONFIG_WATCH_QUEUE
+ /** @watch_list: per-file notification source for device events */
+ struct watch_list watch_list;
+#endif
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 548b0769b3ef..1f331a2b2ecc 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -13,6 +13,7 @@
#include <drm/drm_print.h>
#include <drm/ttm/ttm_tt.h>
#include <uapi/drm/xe_drm.h>
+#include <uapi/drm/xe_drm_events.h>
#include <linux/ascii85.h>
#include <linux/delay.h>
#include <linux/kthread.h>
@@ -40,6 +41,7 @@
#include "xe_tlb_inval.h"
#include "xe_trace_bo.h"
#include "xe_wa.h"
+#include "xe_watch_queue.h"
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
{
@@ -567,13 +569,13 @@ static void preempt_rebind_work_func(struct work_struct *w)
}
if (err) {
- drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
+ xe_watch_queue_post_vm_err_event(vm->xef, vm->id, err);
+ drm_dbg(&vm->xe->drm, "VM worker error: %d\n", err);
xe_vm_kill(vm, true);
}
up_write(&vm->lock);
free_preempt_fences(&preempt_fences);
-
trace_xe_vm_rebind_worker_exit(vm);
}
@@ -2008,6 +2010,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
if (err)
goto err_close_and_put;
+ vm->id = id;
args->vm_id = id;
return 0;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 1f6f7e30e751..df559cf87b4c 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -365,6 +365,8 @@ struct xe_vm {
bool batch_invalidate_tlb;
/** @xef: Xe file handle for tracking this VM's drm client */
struct xe_file *xef;
+ /** @id: The id of the VM in the VM table of @xef. */
+ u32 id;
};
/** struct xe_vma_op_map - VMA map operation */
diff --git a/drivers/gpu/drm/xe/xe_watch_queue.c b/drivers/gpu/drm/xe/xe_watch_queue.c
new file mode 100644
index 000000000000..14c93cdebefe
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_watch_queue.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <linux/slab.h>
+#include <linux/watch_queue.h>
+
+#include <uapi/drm/xe_drm.h>
+#include <uapi/drm/xe_drm_events.h>
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_macros.h"
+#include "xe_watch_queue.h"
+
+/**
+ * struct xe_watch_notification_vm_err - kernel-side VM error event notification
+ *
+ * Layout mirrors &struct drm_xe_watch_notification_vm_err.
+ *
+ * @base: common watch notification header; type is %WATCH_TYPE_DRM_XE_NOTIFY,
+ * subtype is %DRM_XE_WATCH_EVENT_VM_ERR
+ * @vm_id: ID of the VM that hit error
+ * @error_code: error code describing the error condition (negative errno)
+ */
+struct xe_watch_notification_vm_err {
+ struct watch_notification base;
+ u32 vm_id;
+ s32 error_code;
+};
+
+/**
+ * xe_watch_queue_ioctl() - Subscribe a pipe to per-file device event notifications
+ * @dev: DRM device
+ * @data: pointer to &struct drm_xe_watch_queue from userspace
+ * @file: DRM file handle of the subscribing process
+ *
+ * Subscribes a notification pipe to receive Xe device events for the calling
+ * process's file handle. Only events scoped to this file (e.g. VM error on a
+ * VM owned by this file) are delivered. The pipe must have been opened with
+ * O_NOTIFICATION_PIPE and sized with %IOC_WATCH_QUEUE_SET_SIZE before calling
+ * this IOCTL.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int xe_watch_queue_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct xe_file *xef = file->driver_priv;
+ struct xe_device *xe = to_xe_device(dev);
+ struct drm_xe_watch_queue *args = data;
+ struct watch_queue *wqueue;
+ struct watch *watch;
+ int ret;
+
+ if (XE_IOCTL_DBG(xe, args->flags || args->pad))
+ return -EINVAL;
+ if (XE_IOCTL_DBG(xe, args->watch_id > 0xff))
+ return -EINVAL;
+
+ wqueue = get_watch_queue(args->fd);
+ if (XE_IOCTL_DBG(xe, IS_ERR(wqueue)))
+ return PTR_ERR(wqueue);
+
+ watch = kzalloc(sizeof(*watch), GFP_KERNEL | __GFP_ACCOUNT);
+ if (XE_IOCTL_DBG(xe, !watch)) {
+ ret = -ENOMEM;
+ goto out_put_queue;
+ }
+
+ init_watch(watch, wqueue);
+ watch->id = 0;
+ watch->info_id = (u32)args->watch_id << WATCH_INFO_ID__SHIFT;
+
+ ret = add_watch_to_object(watch, &xef->watch_list);
+ if (XE_IOCTL_DBG(xe, ret))
+ kfree(watch);
+
+out_put_queue:
+ put_watch_queue(wqueue);
+ return ret;
+}
+
+/**
+ * xe_watch_queue_post_vm_err_event() - Post a VM error event
+ * @xef: xe file handle that owns the VM
+ * @vm_id: userspace ID of the VM that hit error
+ * @error_code: error code describing the error condition (negative errno)
+ *
+ * Posts a %DRM_XE_WATCH_EVENT_VM_ERR notification carrying @vm_id and
+ * @error_code to every pipe that @xef has subscribed via
+ * %DRM_IOCTL_XE_WATCH_QUEUE. Only the owning process is notified,
+ * preventing information leaks to other clients.
+ */
+void xe_watch_queue_post_vm_err_event(struct xe_file *xef, u32 vm_id,
+ int error_code)
+{
+ struct xe_watch_notification_vm_err n = {};
+
+ n.base.type = WATCH_TYPE_DRM_XE_NOTIFY;
+ n.base.subtype = DRM_XE_WATCH_EVENT_VM_ERR;
+ n.base.info = watch_sizeof(struct xe_watch_notification_vm_err);
+ n.vm_id = vm_id;
+ n.error_code = error_code;
+
+ post_watch_notification(&xef->watch_list, &n.base, current_cred(), 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_watch_queue.h b/drivers/gpu/drm/xe/xe_watch_queue.h
new file mode 100644
index 000000000000..ad199ee68205
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_watch_queue.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _XE_WATCH_QUEUE_H_
+#define _XE_WATCH_QUEUE_H_
+
+#include <linux/types.h>
+
+struct drm_device;
+struct drm_file;
+struct xe_file;
+
+int xe_watch_queue_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+void xe_watch_queue_post_vm_err_event(struct xe_file *xef, u32 vm_id,
+ int error_code);
+
+#endif /* _XE_WATCH_QUEUE_H_ */
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index ef2565048bdf..bc3917700c82 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -83,6 +83,7 @@ extern "C" {
* - &DRM_IOCTL_XE_OBSERVATION
* - &DRM_IOCTL_XE_MADVISE
* - &DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS
+ * - &DRM_IOCTL_XE_WATCH_QUEUE
*/
/*
@@ -107,6 +108,7 @@ extern "C" {
#define DRM_XE_MADVISE 0x0c
#define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e
+#define DRM_XE_WATCH_QUEUE 0x0f
/* Must be kept compact -- no holes */
@@ -125,6 +127,7 @@ extern "C" {
#define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise)
#define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr)
#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
+#define DRM_IOCTL_XE_WATCH_QUEUE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_WATCH_QUEUE, struct drm_xe_watch_queue)
/**
* DOC: Xe IOCTL Extensions
@@ -2357,6 +2360,49 @@ struct drm_xe_exec_queue_set_property {
__u64 reserved[2];
};
+/**
+ * DOC: DRM_XE_WATCH_QUEUE
+ *
+ * Subscribe a notification pipe to receive device events for the calling
+ * process's DRM file handle. Events are scoped to the subscribing file:
+ * only events that belong to that file (for example, VM error on a VM created
+ * through the same file) are delivered, preventing information leaks between
+ * processes sharing the same GPU device.
+ *
+ * The pipe must first be opened with O_NOTIFICATION_PIPE (i.e. O_EXCL passed
+ * to pipe2()) and sized via %IOC_WATCH_QUEUE_SET_SIZE before subscribing.
+ *
+ * Events are delivered as notification records read from the pipe. The
+ * @watch_id field is embedded in the notification info field and can be used
+ * to distinguish multiple watches sharing a pipe.
+ *
+ * Currently defined event subtypes:
+ * - %DRM_XE_WATCH_EVENT_VM_ERR - a VM owned by this file has encountered an error
+ */
+
+/**
+ * struct drm_xe_watch_queue - subscribe to device event notifications
+ *
+ * Used with %DRM_IOCTL_XE_WATCH_QUEUE. Notifications are scoped to the
+ * DRM file handle used to issue this IOCTL.
+ */
+struct drm_xe_watch_queue {
+ /** @fd: file descriptor of pipe opened with O_NOTIFICATION_PIPE */
+ __u32 fd;
+
+ /**
+ * @watch_id: identifier (0–255) embedded in the watch notification
+ * info field; allows multiplexing several watches on one pipe
+ */
+ __u32 watch_id;
+
+ /** @flags: must be zero */
+ __u32 flags;
+
+ /** @pad: reserved, must be zero */
+ __u32 pad;
+};
+
#if defined(__cplusplus)
}
#endif
diff --git a/include/uapi/drm/xe_drm_events.h b/include/uapi/drm/xe_drm_events.h
new file mode 100644
index 000000000000..91813548ae01
--- /dev/null
+++ b/include/uapi/drm/xe_drm_events.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _UAPI_XE_DRM_EVENTS_H_
+#define _UAPI_XE_DRM_EVENTS_H_
+
+#include <linux/types.h>
+#include <linux/watch_queue.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/**
+ * enum drm_xe_watch_event - Xe device watch event subtypes
+ *
+ * Subtypes for notifications delivered via %WATCH_TYPE_DRM_XE_NOTIFY when
+ * reading from a pipe subscribed with %DRM_IOCTL_XE_WATCH_QUEUE.
+ */
+enum drm_xe_watch_event {
+ /**
+ * @DRM_XE_WATCH_EVENT_VM_ERR: a VM has encountered an error.
+ *
+ * Indicates that a memory allocation failure occurred within the
+ * given VM. The vm_id of the affected VM is carried in the
+ * @drm_xe_watch_notification_vm_err::vm_id field of the extended
+ * notification record.
+ */
+ DRM_XE_WATCH_EVENT_VM_ERR = 0,
+};
+
+/**
+ * struct drm_xe_watch_notification_vm_err - VM error event notification
+ *
+ * Notification record delivered for %DRM_XE_WATCH_EVENT_VM_ERR.
+ * The record type is always %WATCH_TYPE_DRM_XE_NOTIFY and the subtype is
+ * %DRM_XE_WATCH_EVENT_VM_ERR.
+ */
+struct drm_xe_watch_notification_vm_err {
+ /** @base: common watch notification header */
+ struct watch_notification base;
+
+ /** @vm_id: ID of the VM that hit out-of-memory */
+ __u32 vm_id;
+
+ /** @error_code: error code describing the error condition (negative errno) */
+ __s32 error_code;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _UAPI_XE_DRM_H_ */
--
2.53.0
prev parent reply other threads:[~2026-03-10 15:58 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-10 15:57 [RFC PATCH RESEND 0/2] Xe driver asynchronous notification mechanism Thomas Hellström
2026-03-10 15:57 ` [RFC PATCH RESEND 1/2] watch_queue: Add a DRM_XE_NOTIFY watch type and export init_watch() Thomas Hellström
2026-03-18 19:54 ` Watch queue maintainer? WAS:[RFC " Thomas Hellström
2026-03-10 15:57 ` Thomas Hellström [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260310155741.87191-3-thomas.hellstrom@linux.intel.com \
--to=thomas.hellstrom@linux.intel.com \
--cc=airlied@gmail.com \
--cc=brauner@kernel.org \
--cc=christian.koenig@amd.com \
--cc=dave@stgolabs.net \
--cc=dhowells@redhat.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-xe@lists.freedesktop.org \
--cc=john.falkowski@intel.com \
--cc=joonas.lahtinen@intel.com \
--cc=kees@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maarten.lankhorst@linux.intel.com \
--cc=matthew.brost@intel.com \
--cc=michal.mrozek@intel.com \
--cc=rodrigo.vivi@intel.com \
--cc=simona.vetter@ffwll.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox