Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] tests/core_hotunplug: exercise fd-holding subtests with Xe workload
@ 2026-05-06 13:57 Nitin Gote
  2026-05-06 14:18 ` Kamil Konieczny
                   ` (8 more replies)
  0 siblings, 9 replies; 13+ messages in thread
From: Nitin Gote @ 2026-05-06 13:57 UTC (permalink / raw)
  To: igt-dev; +Cc: matthew.auld, nitin.r.gote

The six fd-holding subtests (hotunbind-rebind, hotunplug-rescan,
hotrebind, hotreplug, hotrebind-lateclose, hotreplug-lateclose) keep
a DRM fd open across unbind/unplug but leave the GPU idle, so on Xe
the unbind/unplug path is never exercised against an active VM,
exec queue and BOs.

Add xe_workload_start()/xe_workload_stop() helpers, gated on
is_xe_device(), that run a spinner on the GPU across the
unbind/unplug sequence, so the device is removed while a workload
is using it. i915 already has equivalent coverage via
local_i915_healthcheck().

Validated on BMG with a KASAN-enabled kernel: all subtests pass with
no KASAN reports.

Cc: Matthew Auld <matthew.auld@intel.com>
Assisted-by: Copilot:claude-opus-4.7
Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
---
 tests/core_hotunplug.c | 74 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/tests/core_hotunplug.c b/tests/core_hotunplug.c
index 7c9dae1bf..88a2133af 100644
--- a/tests/core_hotunplug.c
+++ b/tests/core_hotunplug.c
@@ -565,6 +565,44 @@ static void set_filter_from_device(int fd)
 	igt_assert_eq(igt_device_filter_add(filter), 1);
 }
 
+/* Xe GPU workload helpers */
+
+struct xe_workload {
+	igt_spin_t *xe_spin;
+	uint64_t xe_ahnd;
+};
+
+static void xe_workload_start(struct hotunplug *priv, int fd,
+			      struct xe_workload *w)
+{
+	if (!is_xe_device(fd))
+		return;
+
+	local_debug("%s\n", "starting Xe GPU spinner");
+	priv->failure = "Xe workload start failure!";
+	w->xe_ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_RELOC);
+	w->xe_spin = igt_spin_new(fd, .ahnd = w->xe_ahnd);
+	priv->failure = NULL;
+}
+
+static void xe_workload_stop(struct xe_workload *w)
+{
+	if (!w->xe_spin)
+		return;
+
+	/*
+	 * The device may be gone (unbind/unplug), so avoid igt_spin_free():
+	 * on Xe it asserts in xe_spin_sync_wait() because the syncobj will
+	 * never signal. Stop the spinner via a userspace mmap write and let
+	 * the DRM fd close reclaim the kernel-side resources.
+	 */
+	local_debug("%s\n", "stopping Xe GPU spinner");
+	igt_spin_end(w->xe_spin);
+	put_ahnd(w->xe_ahnd);
+	w->xe_spin = NULL;
+	w->xe_ahnd = 0;
+}
+
 /* Subtests */
 
 static void unbind_rebind(struct hotunplug *priv)
@@ -591,12 +629,18 @@ static void unplug_rescan(struct hotunplug *priv)
 
 static void hotunbind_rebind(struct hotunplug *priv)
 {
+	struct xe_workload w = { 0 };
+
 	pre_check(priv);
 
 	priv->fd.drm = local_drm_open_driver(false, "", " for hot unbind");
 
+	xe_workload_start(priv, priv->fd.drm, &w);
+
 	driver_unbind(priv, "hot ", 0);
 
+	xe_workload_stop(&w);
+
 	priv->fd.drm = close_device(priv->fd.drm, "late ", "unbound ");
 	igt_assert_eq(priv->fd.drm, -1);
 
@@ -607,12 +651,18 @@ static void hotunbind_rebind(struct hotunplug *priv)
 
 static void hotunplug_rescan(struct hotunplug *priv)
 {
+	struct xe_workload w = { 0 };
+
 	pre_check(priv);
 
 	priv->fd.drm = local_drm_open_driver(false, "", " for hot unplug");
 
+	xe_workload_start(priv, priv->fd.drm, &w);
+
 	device_unplug(priv, "hot ", 0);
 
+	xe_workload_stop(&w);
+
 	priv->fd.drm = close_device(priv->fd.drm, "late ", "removed ");
 	igt_assert_eq(priv->fd.drm, -1);
 
@@ -623,40 +673,58 @@ static void hotunplug_rescan(struct hotunplug *priv)
 
 static void hotrebind(struct hotunplug *priv)
 {
+	struct xe_workload w = { 0 };
+
 	pre_check(priv);
 
 	priv->fd.drm = local_drm_open_driver(false, "", " for hot rebind");
 
+	xe_workload_start(priv, priv->fd.drm, &w);
+
 	driver_unbind(priv, "hot ", 60);
 
 	driver_bind(priv, 0);
 
+	xe_workload_stop(&w);
+
 	igt_assert_f(healthcheck(priv, false), "%s\n", priv->failure);
 }
 
 static void hotreplug(struct hotunplug *priv)
 {
+	struct xe_workload w = { 0 };
+
 	pre_check(priv);
 
 	priv->fd.drm = local_drm_open_driver(false, "", " for hot replug");
 
+	xe_workload_start(priv, priv->fd.drm, &w);
+
 	device_unplug(priv, "hot ", 60);
 
 	bus_rescan(priv, 0);
 
+	xe_workload_stop(&w);
+
 	igt_assert_f(healthcheck(priv, false), "%s\n", priv->failure);
 }
 
 static void hotrebind_lateclose(struct hotunplug *priv)
 {
+	struct xe_workload w = { 0 };
+
 	pre_check(priv);
 
 	priv->fd.drm = local_drm_open_driver(false, "", " for hot rebind");
 
+	xe_workload_start(priv, priv->fd.drm, &w);
+
 	driver_unbind(priv, "hot ", 60);
 
 	driver_bind(priv, 0);
 
+	xe_workload_stop(&w);
+
 	priv->fd.drm = close_device(priv->fd.drm, "late ", "unbound ");
 	igt_assert_eq(priv->fd.drm, -1);
 
@@ -665,14 +733,20 @@ static void hotrebind_lateclose(struct hotunplug *priv)
 
 static void hotreplug_lateclose(struct hotunplug *priv)
 {
+	struct xe_workload w = { 0 };
+
 	pre_check(priv);
 
 	priv->fd.drm = local_drm_open_driver(false, "", " for hot replug");
 
+	xe_workload_start(priv, priv->fd.drm, &w);
+
 	device_unplug(priv, "hot ", 60);
 
 	bus_rescan(priv, 0);
 
+	xe_workload_stop(&w);
+
 	priv->fd.drm = close_device(priv->fd.drm, "late ", "removed ");
 	igt_assert_eq(priv->fd.drm, -1);
 
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2026-05-12 11:03 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-06 13:57 [PATCH] tests/core_hotunplug: exercise fd-holding subtests with Xe workload Nitin Gote
2026-05-06 14:18 ` Kamil Konieczny
2026-05-06 14:37   ` Gote, Nitin R
2026-05-08  9:04     ` Janusz Krzysztofik
2026-05-12 11:02       ` Gote, Nitin R
2026-05-06 17:08 ` ✓ Xe.CI.BAT: success for " Patchwork
2026-05-06 17:15 ` ✓ i915.CI.BAT: " Patchwork
2026-05-06 18:15 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-05-06 23:19 ` ✗ i915.CI.Full: " Patchwork
2026-05-07 18:49 ` ✓ i915.CI.BAT: success for tests/core_hotunplug: exercise fd-holding subtests with Xe workload (rev2) Patchwork
2026-05-07 19:12 ` ✓ Xe.CI.BAT: " Patchwork
2026-05-08  6:07 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-05-08  8:24 ` ✓ i915.CI.Full: success " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox