The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	"Matthew Brost" <matthew.brost@intel.com>,
	"Maarten Lankhorst" <maarten.lankhorst@linux.intel.com>,
	"Michal Mrozek" <michal.mrozek@intel.com>,
	"John Falkowski" <john.falkowski@intel.com>,
	"Rodrigo Vivi" <rodrigo.vivi@intel.com>,
	"Lahtinen Joonas" <joonas.lahtinen@linux.intel.com>,
	"David Howells" <dhowells@redhat.com>,
	"Christian Brauner" <brauner@kernel.org>,
	"Kees Cook" <kees@kernel.org>,
	"Davidlohr Bueso" <dave@stgolabs.net>,
	"Christian König" <christian.koenig@amd.com>,
	"Dave Airlie" <airlied@gmail.com>,
	"Simona Vetter" <simona.vetter@ffwll.ch>,
	dri-devel@lists.freedesktop.org,
	LMKL <linux-kernel@vger.kernel.org>
Subject: [PATCH 2/4] drm/xe: Add fault injection for rebind worker -ENOSPC
Date: Fri, 12 Jun 2026 15:53:38 +0200	[thread overview]
Message-ID: <20260612135340.116100-3-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20260612135340.116100-1-thomas.hellstrom@linux.intel.com>

Add fault injection support using the kernel fault injection
infrastructure to inject -ENOSPC early in the success path of
preempt_rebind_work_func(), before xe_svm_notifier_lock() is taken,
testing the error handling paths without interference from real
resource exhaustion.

Injection is restricted to restartable VMs. When triggered, the
worker deactivates the VM (rebind_deactivated).
Upcoming patches will then also post an error event to userspace.

Enable via debugfs:

  echo 1 > /sys/kernel/debug/dri/0/fail_rebind/times
  echo 100 > /sys/kernel/debug/dri/0/fail_rebind/probability

Assisted-by: GitHub_Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c |  4 +++-
 drivers/gpu/drm/xe/xe_vm.c      | 32 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_vm.h      |  5 +++++
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 22b471303984..1a92c52ccd83 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -35,8 +35,8 @@
 #ifdef CONFIG_DRM_XE_DEBUG
 #include "xe_bo_evict.h"
 #include "xe_migrate.h"
-#include "xe_vm.h"
 #endif
+#include "xe_vm.h"
 
 DECLARE_FAULT_ATTR(gt_reset_failure);
 DECLARE_FAULT_ATTR(inject_csc_hw_error);
@@ -612,6 +612,8 @@ void xe_debugfs_register(struct xe_device *xe)
 
 	fault_create_debugfs_attr("fail_gt_reset", root, &gt_reset_failure);
 
+	xe_vm_debugfs_register(root);
+
 	if (IS_SRIOV_PF(xe))
 		xe_sriov_pf_debugfs_register(xe, root);
 	else if (IS_SRIOV_VF(xe))
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 86ed8f31a219..b69a2e5bd9c9 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -18,6 +18,9 @@
 #include <linux/kthread.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
 
 #include <generated/xe_wa_oob.h>
 
@@ -43,6 +46,17 @@
 #include "xe_vm_madvise.h"
 #include "xe_wa.h"
 
+#ifdef CONFIG_FAULT_INJECTION
+static DECLARE_FAULT_ATTR(rebind_enospc);
+
+static void xe_vm_register_fault_attrs(struct dentry *root)
+{
+	fault_create_debugfs_attr("fail_rebind", root, &rebind_enospc);
+}
+#else
+static inline void xe_vm_register_fault_attrs(struct dentry *root) {}
+#endif
+
 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
 {
 	return vm->gpuvm.r_obj;
@@ -529,6 +543,13 @@ static void preempt_rebind_work_func(struct work_struct *w)
 		goto out_unlock;
 	}
 
+#ifdef CONFIG_FAULT_INJECTION
+	if (xe_vm_is_restartable(vm) && should_fail(&rebind_enospc, 1)) {
+		err = -ENOSPC;
+		goto out_unlock;
+	}
+#endif
+
 #define retry_required(__tries, __vm) \
 	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
 	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
@@ -5042,3 +5063,14 @@ void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 	}
 	up_write(&vm->exec_queues.lock);
 }
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * xe_vm_debugfs_register() - Register xe_vm debugfs entries
+ * @root: debugfs root dentry for this device
+ */
+void xe_vm_debugfs_register(struct dentry *root)
+{
+	xe_vm_register_fault_attrs(root);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 9ee44599cacd..0f9a38d97bf6 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -216,6 +216,11 @@ int xe_vm_restart_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file);
 void xe_vm_close_and_put(struct xe_vm *vm);
 
+#ifdef CONFIG_DEBUG_FS
+struct dentry;
+void xe_vm_debugfs_register(struct dentry *root);
+#endif
+
 static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
 {
 	return vm->flags & XE_VM_FLAG_FAULT_MODE;
-- 
2.54.0


  parent reply	other threads:[~2026-06-12 13:54 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-12 13:53 [RFC PATCH 0/4] Xe driver asynchronous notification mechanism Thomas Hellström
2026-06-12 13:53 ` [PATCH 1/4] drm/xe: Add DRM_IOCTL_XE_VM_RESTART IOCTL Thomas Hellström
2026-06-12 13:53 ` Thomas Hellström [this message]
2026-06-12 13:53 ` [PATCH 3/4] watch_queue: Add a DRM_XE_NOTIFY watch type and export init_watch() Thomas Hellström
2026-06-12 13:53 ` [PATCH 4/4] drm/xe: Add watch_queue-based device event notification Thomas Hellström

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260612135340.116100-3-thomas.hellstrom@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=airlied@gmail.com \
    --cc=brauner@kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=dave@stgolabs.net \
    --cc=dhowells@redhat.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=john.falkowski@intel.com \
    --cc=joonas.lahtinen@linux.intel.com \
    --cc=kees@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=matthew.brost@intel.com \
    --cc=michal.mrozek@intel.com \
    --cc=rodrigo.vivi@intel.com \
    --cc=simona.vetter@ffwll.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox