From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: simona.vetter@ffwll.ch, matthew.brost@intel.com,
christian.koenig@amd.com, thomas.hellstrom@linux.intel.com,
joonas.lahtinen@linux.intel.com, christoph.manszewski@intel.com,
rodrigo.vivi@intel.com, andrzej.hajda@intel.com,
matthew.auld@intel.com, maciej.patelczyk@intel.com,
gwan-gyeong.mun@intel.com,
Mika Kuoppala <mika.kuoppala@linux.intel.com>
Subject: [PATCH 07/20] drm/xe/eudebug: Introduce vm bind and vm bind debug data events
Date: Tue, 2 Dec 2025 15:52:26 +0200 [thread overview]
Message-ID: <20251202135241.880267-8-mika.kuoppala@linux.intel.com> (raw)
In-Reply-To: <20251202135241.880267-1-mika.kuoppala@linux.intel.com>
From: Christoph Manszewski <christoph.manszewski@intel.com>
This patch adds events to track the bind ioctl and associated debug data add
and remove operations. As a single bind can involve multiple operations and
may fail mid-process.
Add bind event to signal to debugger when bind operation is executed.
Further add debug data add and remove operations so debugger
can keep track of regions where they reside. The bind event is
important as we will want to include ufence event further
in the series and tie it to this bind.
Only deliver bind+operations to the debugger if the vm bind
op execution chain succeeds.
Signed-off-by: Christoph Manszewski <christoph.manszewski@intel.com>
Co-developed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
drivers/gpu/drm/xe/xe_eudebug.c | 221 +++++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_eudebug.h | 7 +
drivers/gpu/drm/xe/xe_eudebug_types.h | 2 +-
drivers/gpu/drm/xe/xe_vm.c | 4 +
include/uapi/drm/xe_drm_eudebug.h | 50 ++++++
5 files changed, 279 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_eudebug.c b/drivers/gpu/drm/xe/xe_eudebug.c
index b8a9462eed17..3f3654f4a700 100644
--- a/drivers/gpu/drm/xe/xe_eudebug.c
+++ b/drivers/gpu/drm/xe/xe_eudebug.c
@@ -12,6 +12,7 @@
#include <uapi/drm/xe_drm.h>
#include "xe_assert.h"
+#include "xe_debug_data_types.h"
#include "xe_device.h"
#include "xe_eudebug.h"
#include "xe_eudebug_types.h"
@@ -841,6 +842,162 @@ void xe_eudebug_exec_queue_destroy(struct xe_file *xef, struct xe_exec_queue *q)
xe_eudebug_event_put(d, exec_queue_destroy_event(d, xef, q));
}
+static int send_vm_bind_event(struct xe_eudebug *d,
+ struct xe_vm *vm,
+ u64 vm_handle,
+ u32 bind_flags,
+ u32 num_ops, u64 *seqno)
+{
+ struct drm_xe_eudebug_event_vm_bind *e;
+ struct drm_xe_eudebug_event *event;
+ const u32 sz = sizeof(*e);
+ const u32 base_flags = DRM_XE_EUDEBUG_EVENT_STATE_CHANGE;
+
+ *seqno = atomic_long_inc_return(&d->events.seqno);
+
+ event = xe_eudebug_create_event(d, DRM_XE_EUDEBUG_EVENT_VM_BIND,
+ *seqno, base_flags, sz);
+ if (!event)
+ return -ENOMEM;
+
+ e = cast_event(e, event);
+
+ e->vm_handle = vm_handle;
+ e->flags = bind_flags;
+ e->num_bind_ops = num_ops;
+
+ return xe_eudebug_queue_event(d, event);
+}
+
+static int vm_bind_event(struct xe_eudebug *d,
+ struct xe_vm *vm,
+ u32 flags,
+ u32 num_ops,
+ u64 *seqno)
+{
+ int h_vm;
+
+ h_vm = find_handle(d->res, XE_EUDEBUG_RES_TYPE_VM, vm);
+ if (h_vm < 0)
+ return h_vm;
+
+ return send_vm_bind_event(d, vm, h_vm, flags,
+ num_ops, seqno);
+}
+
+static int vm_bind_op_event(struct xe_eudebug *d,
+ struct xe_vm *vm,
+ const u32 flags,
+ const u64 bind_ref_seqno,
+ const u64 num_extensions,
+ struct xe_debug_data *debug_data,
+ u64 *op_seqno)
+{
+ struct drm_xe_eudebug_event_vm_bind_op_debug_data *e;
+ struct drm_xe_eudebug_event *event;
+ const u32 sz = sizeof(*e);
+
+ *op_seqno = atomic_long_inc_return(&d->events.seqno);
+
+ event = xe_eudebug_create_event(d, DRM_XE_EUDEBUG_EVENT_VM_BIND_OP_DEBUG_DATA,
+ *op_seqno, flags, sz);
+ if (!event)
+ return -ENOMEM;
+
+ e = cast_event(e, event);
+
+ e->vm_bind_ref_seqno = bind_ref_seqno;
+ e->num_extensions = num_extensions;
+ e->addr = debug_data->addr;
+ e->range = debug_data->range;
+ e->flags = debug_data->flags;
+ e->offset = debug_data->offset;
+
+ if (debug_data->flags & DRM_XE_VM_BIND_DEBUG_DATA_FLAG_PSEUDO)
+ e->pseudopath = debug_data->pseudopath;
+ else
+ strscpy(e->pathname, debug_data->pathname, PATH_MAX);
+
+ return xe_eudebug_queue_event(d, event);
+}
+
+static int vm_bind_op(struct xe_eudebug *d, struct xe_vm *vm,
+ const u32 flags, const u64 bind_ref_seqno,
+ struct xe_debug_data *debug_data)
+{
+ u64 op_seqno = 0;
+ u64 num_extensions = 0;
+ int ret;
+
+ ret = vm_bind_op_event(d, vm, flags, bind_ref_seqno, num_extensions,
+ debug_data, &op_seqno);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+void xe_eudebug_vm_bind_execute(struct xe_vm *vm,
+ struct xe_vma_ops *ops)
+{
+ struct xe_eudebug *d;
+ struct xe_vma_op *op;
+ u64 bind_seqno = 0;
+ u32 num_ops;
+ int err;
+
+ if (!xe_vm_in_lr_mode(vm))
+ return;
+
+ d = xe_eudebug_get(vm->xef);
+ if (!d)
+ return;
+
+ num_ops = 0;
+ list_for_each_entry(op, &ops->list, link) {
+ if (op->base.op != DRM_GPUVA_OP_DRIVER)
+ continue;
+
+ if (op->subop == XE_VMA_SUBOP_ADD_DEBUG_DATA ||
+ op->subop == XE_VMA_SUBOP_REMOVE_DEBUG_DATA)
+ num_ops++;
+ }
+
+ lockdep_assert_held_write(&vm->lock);
+
+ err = vm_bind_event(d, vm, 0,
+ num_ops, &bind_seqno);
+ if (err)
+ goto out_err;
+
+ list_for_each_entry(op, &ops->list, link) {
+ u32 flags = 0;
+
+ if (op->base.op != DRM_GPUVA_OP_DRIVER)
+ continue;
+
+ if (op->subop == XE_VMA_SUBOP_ADD_DEBUG_DATA)
+ flags = DRM_XE_EUDEBUG_EVENT_CREATE;
+
+ if (op->subop == XE_VMA_SUBOP_REMOVE_DEBUG_DATA)
+ flags = DRM_XE_EUDEBUG_EVENT_DESTROY;
+
+ if (!flags)
+ continue;
+
+ err = vm_bind_op(d, vm, flags, bind_seqno,
+ &op->modify_debug_data.debug_data);
+ if (err)
+ goto out_err;
+ }
+
+out_err:
+ if (err)
+ xe_eudebug_disconnect(d, err);
+
+ xe_eudebug_put(d);
+}
+
static struct xe_file *xe_eudebug_target_get(struct xe_eudebug *d)
{
struct xe_file *xef = NULL;
@@ -853,19 +1010,67 @@ static struct xe_file *xe_eudebug_target_get(struct xe_eudebug *d)
return xef;
}
+static int vm_discover_binds(struct xe_eudebug *d, struct xe_vm *vm)
+{
+ struct xe_debug_data *dd;
+ struct list_head *pos;
+ unsigned int ops, count;
+ u64 ref_seqno;
+ int err;
+
+ if (list_empty(&vm->debug_data.list))
+ return 0;
+
+ count = 0;
+ list_for_each(pos, &vm->debug_data.list)
+ count++;
+
+ ops = count;
+ ref_seqno = 0;
+ err = vm_bind_event(d, vm, 0, ops, &ref_seqno);
+ if (err) {
+ eu_dbg(d, "vm_bind_event error %d\n", err);
+ return err;
+ }
+
+ list_for_each_entry(dd, &vm->debug_data.list, link) {
+ err = vm_bind_op(d, vm, DRM_XE_EUDEBUG_EVENT_CREATE, ref_seqno, dd);
+ if (err) {
+ eu_dbg(d, "vm_bind_op error %d\n", err);
+ return err;
+ }
+
+ ops--;
+ }
+
+ XE_WARN_ON(ops);
+
+ return ops ? -EIO : count;
+}
+
static void discover_client(struct xe_eudebug *d)
{
struct xe_file *xef;
struct xe_exec_queue *q;
struct xe_vm *vm;
unsigned long i;
- unsigned int vm_count = 0, eq_count = 0;
+ unsigned int vm_count = 0, eq_count = 0, ops_count = 0;
int err = 0;
xef = xe_eudebug_target_get(d);
if (!xef)
return;
+ /*
+ * xe_eudebug ref is taken for discovery worker. It will
+ * hold target xe_file ref and xe_file holds vm and exec_queue
+ * refs.
+ *
+ * The relevant ioctls through xe_file are through
+ * down_read(&xef->eudebug.lock). That means we can peek inside
+ * the resources without taking their respective locks by
+ * taking write lock.
+ */
down_write(&xef->eudebug.ioctl_lock);
eu_dbg(d, "Discovery start for %lld", d->session);
@@ -875,6 +1080,12 @@ static void discover_client(struct xe_eudebug *d)
if (err)
break;
vm_count++;
+
+ err = vm_discover_binds(d, vm);
+ if (err < 0)
+ break;
+
+ ops_count += err;
}
xa_for_each(&xef->exec_queue.xa, i, q) {
@@ -884,6 +1095,8 @@ static void discover_client(struct xe_eudebug *d)
err = exec_queue_create_event(d, xef, q);
if (err)
break;
+
+ eq_count++;
}
complete_all(&d->discovery);
@@ -892,9 +1105,9 @@ static void discover_client(struct xe_eudebug *d)
up_write(&xef->eudebug.ioctl_lock);
- if (vm_count || eq_count)
- eu_dbg(d, "Discovery found %u vms, %u exec_queues",
- vm_count, eq_count);
+ if (vm_count || eq_count || ops_count)
+ eu_dbg(d, "Discovery found %u vms, %u exec_queues, %u bind_ops",
+ vm_count, eq_count, ops_count);
xe_file_put(xef);
}
diff --git a/drivers/gpu/drm/xe/xe_eudebug.h b/drivers/gpu/drm/xe/xe_eudebug.h
index 10480a226fac..9c622362c0f7 100644
--- a/drivers/gpu/drm/xe/xe_eudebug.h
+++ b/drivers/gpu/drm/xe/xe_eudebug.h
@@ -10,10 +10,14 @@
struct drm_device;
struct drm_file;
+struct xe_debug_data;
struct xe_device;
struct xe_file;
struct xe_vm;
+struct xe_vma;
+struct xe_vma_ops;
struct xe_exec_queue;
+struct xe_user_fence;
#if IS_ENABLED(CONFIG_DRM_XE_EUDEBUG)
@@ -50,6 +54,8 @@ int xe_eudebug_enable(struct xe_device *xe, bool enable);
void xe_eudebug_exec_queue_create(struct xe_file *xef, struct xe_exec_queue *q);
void xe_eudebug_exec_queue_destroy(struct xe_file *xef, struct xe_exec_queue *q);
+void xe_eudebug_vm_bind_execute(struct xe_vm *vm, struct xe_vma_ops *ops);
+
#else
static inline int xe_eudebug_connect_ioctl(struct drm_device *dev,
@@ -67,6 +73,7 @@ static inline void xe_eudebug_vm_destroy(struct xe_file *xef, struct xe_vm *vm)
static inline void xe_eudebug_exec_queue_create(struct xe_file *xef, struct xe_exec_queue *q) { }
static inline void xe_eudebug_exec_queue_destroy(struct xe_file *xef, struct xe_exec_queue *q) { }
+static inline void xe_eudebug_vm_bind_execute(struct xe_vm *vm, struct xe_vma_ops *ops) { }
#endif /* CONFIG_DRM_XE_EUDEBUG */
#endif /* _XE_EUDEBUG_H_ */
diff --git a/drivers/gpu/drm/xe/xe_eudebug_types.h b/drivers/gpu/drm/xe/xe_eudebug_types.h
index 57bff7482163..502b121114df 100644
--- a/drivers/gpu/drm/xe/xe_eudebug_types.h
+++ b/drivers/gpu/drm/xe/xe_eudebug_types.h
@@ -33,7 +33,7 @@ enum xe_eudebug_state {
};
#define CONFIG_DRM_XE_DEBUGGER_EVENT_QUEUE_SIZE 64
-#define XE_EUDEBUG_MAX_EVENT_TYPE DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE
+#define XE_EUDEBUG_MAX_EVENT_TYPE DRM_XE_EUDEBUG_EVENT_VM_BIND_OP_DEBUG_DATA
/**
* struct xe_eudebug_handle - eudebug resource handle
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 4bc23d384134..6052bb81a827 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3360,6 +3360,10 @@ static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
xe_vm_set_validation_exec(vm, &exec);
fence = ops_execute(vm, vops);
xe_vm_set_validation_exec(vm, NULL);
+
+ if (!IS_ERR(fence) || PTR_ERR(fence) == -ENODATA)
+ xe_eudebug_vm_bind_execute(vm, vops);
+
if (IS_ERR(fence)) {
if (PTR_ERR(fence) == -ENODATA)
vm_bind_ioctl_ops_fini(vm, vops, NULL);
diff --git a/include/uapi/drm/xe_drm_eudebug.h b/include/uapi/drm/xe_drm_eudebug.h
index 360d7a7ecb67..5891f4d91358 100644
--- a/include/uapi/drm/xe_drm_eudebug.h
+++ b/include/uapi/drm/xe_drm_eudebug.h
@@ -49,6 +49,8 @@ struct drm_xe_eudebug_event {
#define DRM_XE_EUDEBUG_EVENT_READ 1
#define DRM_XE_EUDEBUG_EVENT_VM 2
#define DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE 3
+#define DRM_XE_EUDEBUG_EVENT_VM_BIND 4
+#define DRM_XE_EUDEBUG_EVENT_VM_BIND_OP_DEBUG_DATA 5
__u16 flags;
#define DRM_XE_EUDEBUG_EVENT_CREATE (1 << 0)
@@ -81,6 +83,54 @@ struct drm_xe_eudebug_event_exec_queue {
__u64 lrc_handle[];
};
+/*
+ * When the client (debuggee) calls the vm_bind_ioctl with the
+ * DRM_XE_VM_BIND_OP_[ADD|REMOVE]_DEBUG_DATA operation, the following event
+ * sequence will be created (for the debugger):
+ *
+ * ┌───────────────────────┐
+ * │ EVENT_VM_BIND ├──────────────────┬─┬┄┐
+ * └───────────────────────┘ │ │ ┊
+ * ┌──────────────────────────────────┐ │ │ ┊
+ * │ EVENT_VM_BIND_OP_DEBUG_DATA #1 ├───┘ │ ┊
+ * └──────────────────────────────────┘ │ ┊
+ * ... │ ┊
+ * ┌──────────────────────────────────┐ │ ┊
+ * │ EVENT_VM_BIND_OP_DEBUG_DATA #n ├─────┘ ┊
+ * └──────────────────────────────────┘ ┊
+ * ┊
+ * ┌┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┐ ┊
+ * ┊ EVENT_UFENCE ├┄┄┄┄┄┄┄┘
+ * └┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┘
+ *
+ * All the events below VM_BIND will reference the VM_BIND
+ * they associate with, by field .vm_bind_ref_seqno.
+ */
+
+struct drm_xe_eudebug_event_vm_bind {
+ struct drm_xe_eudebug_event base;
+
+ __u64 vm_handle;
+ __u32 flags;
+ __u32 num_bind_ops;
+};
+
+struct drm_xe_eudebug_event_vm_bind_op_debug_data {
+ struct drm_xe_eudebug_event base;
+ __u64 vm_bind_ref_seqno; /* *_event_vm_bind.base.seqno */
+ __u64 num_extensions;
+
+ __u64 addr;
+ __u64 range;
+ __u64 flags;
+ __u32 offset;
+ __u32 reserved;
+ union {
+ __u64 pseudopath;
+ char pathname[PATH_MAX];
+ };
+};
+
#if defined(__cplusplus)
}
#endif
--
2.43.0
next prev parent reply other threads:[~2025-12-02 13:53 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-02 13:52 [PATCH 00/20] Intel Xe GPU Debug Support (eudebug) v6 Mika Kuoppala
2025-12-02 13:52 ` [PATCH 01/20] drm/xe/eudebug: Introduce eudebug interface Mika Kuoppala
2025-12-10 16:48 ` [PATCH 01/21] " Mika Kuoppala
2025-12-02 13:52 ` [PATCH 02/20] drm/xe/eudebug: Introduce discovery for resources Mika Kuoppala
2025-12-02 13:52 ` [PATCH 03/20] drm/xe/eudebug: Introduce exec_queue events Mika Kuoppala
2025-12-02 13:52 ` [PATCH 04/20] drm/xe: Add EUDEBUG_ENABLE exec queue property Mika Kuoppala
2025-12-02 13:52 ` [PATCH 05/20] drm/xe/eudebug: Mark guc contexts as debuggable Mika Kuoppala
2025-12-06 2:03 ` Daniele Ceraolo Spurio
2025-12-02 13:52 ` [PATCH 06/20] drm/xe: Introduce ADD_DEBUG_DATA and REMOVE_DEBUG_DATA vm bind ops Mika Kuoppala
2025-12-02 13:52 ` Mika Kuoppala [this message]
2025-12-02 13:52 ` [PATCH 08/20] drm/xe/eudebug: Add UFENCE events with acks Mika Kuoppala
2025-12-02 13:52 ` [PATCH 09/20] drm/xe/eudebug: vm open/pread/pwrite Mika Kuoppala
2025-12-02 13:52 ` [PATCH 10/20] drm/xe/eudebug: userptr vm pread/pwrite Mika Kuoppala
2025-12-02 13:52 ` [PATCH 11/20] drm/xe/eudebug: hw enablement for eudebug Mika Kuoppala
2025-12-02 13:52 ` [PATCH 12/20] drm/xe/eudebug: Introduce EU control interface Mika Kuoppala
2025-12-02 13:52 ` [PATCH 13/20] drm/xe/eudebug: Introduce per device attention scan worker Mika Kuoppala
2025-12-02 13:52 ` [PATCH 14/20] drm/xe/eudebug_test: Introduce xe_eudebug wa kunit test Mika Kuoppala
2025-12-02 13:52 ` [PATCH 15/20] drm/xe: Implement SR-IOV and eudebug exclusivity Mika Kuoppala
2025-12-02 13:52 ` [PATCH 16/20] drm/xe: Add xe_client_debugfs and introduce debug_data file Mika Kuoppala
2025-12-03 9:07 ` Mika Kuoppala
2025-12-02 13:52 ` [PATCH 17/20] drm/xe/eudebug: Add read/count/compare helper for eu attention Mika Kuoppala
2025-12-02 13:52 ` [PATCH 18/20] drm/xe/vm: Support for adding null page VMA to VM on request Mika Kuoppala
2025-12-02 13:52 ` [PATCH 19/20] drm/xe/eudebug: Introduce EU pagefault handling interface Mika Kuoppala
2025-12-02 13:52 ` [PATCH 20/20] drm/xe/eudebug: Enable EU pagefault handling Mika Kuoppala
2025-12-02 14:02 ` ✗ CI.checkpatch: warning for Intel Xe GPU Debug Support (eudebug) v6 Patchwork
2025-12-02 14:04 ` ✓ CI.KUnit: success " Patchwork
2025-12-02 15:34 ` ✓ Xe.CI.BAT: " Patchwork
2025-12-02 18:30 ` ✗ Xe.CI.Full: failure " Patchwork
2025-12-03 9:13 ` ✗ CI.checkpatch: warning for Intel Xe GPU Debug Support (eudebug) v6 (rev2) Patchwork
2025-12-03 9:15 ` ✓ CI.KUnit: success " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251202135241.880267-8-mika.kuoppala@linux.intel.com \
--to=mika.kuoppala@linux.intel.com \
--cc=andrzej.hajda@intel.com \
--cc=christian.koenig@amd.com \
--cc=christoph.manszewski@intel.com \
--cc=gwan-gyeong.mun@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=joonas.lahtinen@linux.intel.com \
--cc=maciej.patelczyk@intel.com \
--cc=matthew.auld@intel.com \
--cc=matthew.brost@intel.com \
--cc=rodrigo.vivi@intel.com \
--cc=simona.vetter@ffwll.ch \
--cc=thomas.hellstrom@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox