From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
"Matthew Brost" <matthew.brost@intel.com>,
"Maarten Lankhorst" <maarten.lankhorst@linux.intel.com>,
"Michal Mrozek" <michal.mrozek@intel.com>,
"John Falkowski" <john.falkowski@intel.com>,
"Rodrigo Vivi" <rodrigo.vivi@intel.com>,
"Lahtinen Joonas" <joonas.lahtinen@linux.intel.com>,
"David Howells" <dhowells@redhat.com>,
"Christian Brauner" <brauner@kernel.org>,
"Kees Cook" <kees@kernel.org>,
"Davidlohr Bueso" <dave@stgolabs.net>,
"Christian König" <christian.koenig@amd.com>,
"Dave Airlie" <airlied@gmail.com>,
"Simona Vetter" <simona.vetter@ffwll.ch>,
dri-devel@lists.freedesktop.org,
LMKL <linux-kernel@vger.kernel.org>
Subject: [PATCH 2/4] drm/xe: Add fault injection for rebind worker -ENOSPC
Date: Fri, 12 Jun 2026 15:53:38 +0200 [thread overview]
Message-ID: <20260612135340.116100-3-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20260612135340.116100-1-thomas.hellstrom@linux.intel.com>
Add fault injection support using the kernel fault injection
infrastructure to inject -ENOSPC early in the success path of
preempt_rebind_work_func(), before xe_svm_notifier_lock() is taken,
testing the error handling paths without interference from real
resource exhaustion.
Injection is restricted to restartable VMs. When triggered, the
worker deactivates the VM (rebind_deactivated).
Upcoming patches will then also post an error event to userspace.
Enable via debugfs:
echo 1 > /sys/kernel/debug/dri/0/fail_rebind/times
echo 100 > /sys/kernel/debug/dri/0/fail_rebind/probability
Assisted-by: GitHub_Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/xe_debugfs.c | 4 +++-
drivers/gpu/drm/xe/xe_vm.c | 32 ++++++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_vm.h | 5 +++++
3 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 22b471303984..1a92c52ccd83 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -35,8 +35,8 @@
#ifdef CONFIG_DRM_XE_DEBUG
#include "xe_bo_evict.h"
#include "xe_migrate.h"
-#include "xe_vm.h"
#endif
+#include "xe_vm.h"
DECLARE_FAULT_ATTR(gt_reset_failure);
DECLARE_FAULT_ATTR(inject_csc_hw_error);
@@ -612,6 +612,8 @@ void xe_debugfs_register(struct xe_device *xe)
fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure);
+ xe_vm_debugfs_register(root);
+
if (IS_SRIOV_PF(xe))
xe_sriov_pf_debugfs_register(xe, root);
else if (IS_SRIOV_VF(xe))
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 86ed8f31a219..b69a2e5bd9c9 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -18,6 +18,9 @@
#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/swap.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
#include <generated/xe_wa_oob.h>
@@ -43,6 +46,17 @@
#include "xe_vm_madvise.h"
#include "xe_wa.h"
+#ifdef CONFIG_FAULT_INJECTION
+static DECLARE_FAULT_ATTR(rebind_enospc);
+
+static void xe_vm_register_fault_attrs(struct dentry *root)
+{
+ fault_create_debugfs_attr("fail_rebind", root, &rebind_enospc);
+}
+#else
+static inline void xe_vm_register_fault_attrs(struct dentry *root) {}
+#endif
+
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
{
return vm->gpuvm.r_obj;
@@ -529,6 +543,13 @@ static void preempt_rebind_work_func(struct work_struct *w)
goto out_unlock;
}
+#ifdef CONFIG_FAULT_INJECTION
+ if (xe_vm_is_restartable(vm) && should_fail(&rebind_enospc, 1)) {
+ err = -ENOSPC;
+ goto out_unlock;
+ }
+#endif
+
#define retry_required(__tries, __vm) \
(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
@@ -5042,3 +5063,14 @@ void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
}
up_write(&vm->exec_queues.lock);
}
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * xe_vm_debugfs_register() - Register xe_vm debugfs entries
+ * @root: debugfs root dentry for this device
+ */
+void xe_vm_debugfs_register(struct dentry *root)
+{
+ xe_vm_register_fault_attrs(root);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 9ee44599cacd..0f9a38d97bf6 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -216,6 +216,11 @@ int xe_vm_restart_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
void xe_vm_close_and_put(struct xe_vm *vm);
+#ifdef CONFIG_DEBUG_FS
+struct dentry;
+void xe_vm_debugfs_register(struct dentry *root);
+#endif
+
static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
{
return vm->flags & XE_VM_FLAG_FAULT_MODE;
--
2.54.0
next prev parent reply other threads:[~2026-06-12 13:54 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-12 13:53 [RFC PATCH 0/4] Xe driver asynchronous notification mechanism Thomas Hellström
2026-06-12 13:53 ` [PATCH 1/4] drm/xe: Add DRM_IOCTL_XE_VM_RESTART IOCTL Thomas Hellström
2026-06-12 13:53 ` Thomas Hellström [this message]
2026-06-12 13:53 ` [PATCH 3/4] watch_queue: Add a DRM_XE_NOTIFY watch type and export init_watch() Thomas Hellström
2026-06-12 13:53 ` [PATCH 4/4] drm/xe: Add watch_queue-based device event notification Thomas Hellström
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260612135340.116100-3-thomas.hellstrom@linux.intel.com \
--to=thomas.hellstrom@linux.intel.com \
--cc=airlied@gmail.com \
--cc=brauner@kernel.org \
--cc=christian.koenig@amd.com \
--cc=dave@stgolabs.net \
--cc=dhowells@redhat.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-xe@lists.freedesktop.org \
--cc=john.falkowski@intel.com \
--cc=joonas.lahtinen@linux.intel.com \
--cc=kees@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maarten.lankhorst@linux.intel.com \
--cc=matthew.brost@intel.com \
--cc=michal.mrozek@intel.com \
--cc=rodrigo.vivi@intel.com \
--cc=simona.vetter@ffwll.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox