[PATCH 1/5] drm/xe/guc: Defer user exec queue scheduler start until after page table restore

Linux kernel -stable discussions
 help / color / mirror / Atom feed

* [PATCH 1/5] drm/xe/guc: Defer user exec queue scheduler start until after page table restore
       [not found] <20260522164355.2773-1-thomas.hellstrom@linux.intel.com>
@ 2026-05-22 16:43 ` Thomas Hellström
  2026-05-22 16:43 ` [PATCH 2/5] drm/xe/guc: Don't ban LR VM exec queues on PM suspend Thomas Hellström
  2026-05-22 16:43 ` [PATCH 5/5] drm/xe: Suspend fault-mode LR jobs before VRAM eviction on S3/S4 Thomas Hellström
  2 siblings, 0 replies; 3+ messages in thread
From: Thomas Hellström @ 2026-05-22 16:43 UTC (permalink / raw)
  To: intel-xe; +Cc: Thomas Hellström, Matthew Auld, Satyanarayana K V P, stable

On S3/S4 and d3cold runtime PM resume, exec queue schedulers are
restarted before xe_bo_restore_late() has restored userspace VM page
table BOs and LRC BOs. If a pending job is submitted in this window,
GuC will attempt to load the context using stale or invalid data in
VRAM, leading to GuC exceptions.

Defer user exec queue scheduler start until after page tables and LRC
BOs are restored, ensuring no job can be submitted before the backing
storage is valid. Migrate and kernel VM exec queues are still started
immediately as they are required by the restore process itself.

For GT reset, VRAM is not evicted and all BOs remain valid, so user
exec queue schedulers are started without deferral.

This covers both LR and non-LR userspace exec queues.

Fixes: 7f387e6012b6 ("drm/xe: add XE_BO_FLAG_PINNED_LATE_RESTORE")
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: <stable@vger.kernel.org> # v6.16+
Assisted-by: GitHub_Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c         | 16 +++++++++++
 drivers/gpu/drm/xe/xe_gt.h         |  2 ++
 drivers/gpu/drm/xe/xe_guc.c        | 13 +++++++++
 drivers/gpu/drm/xe/xe_guc.h        |  1 +
 drivers/gpu/drm/xe/xe_guc_submit.c | 44 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_guc_submit.h |  1 +
 drivers/gpu/drm/xe/xe_pm.c         |  6 ++++
 drivers/gpu/drm/xe/xe_uc.c         | 16 +++++++++++
 drivers/gpu/drm/xe/xe_uc.h         |  1 +
 9 files changed, 100 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 783eb6d631b5..2c63e4d6a649 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -955,6 +955,8 @@ static void gt_reset_worker(struct work_struct *w)
 	if (err)
 		goto err_out;
 
+	xe_uc_start_user_queues(&gt->uc);
+
 	xe_force_wake_put(gt_to_fw(gt), fw_ref);
 
 	/* Pair with get while enqueueing the work in xe_gt_reset_async() */
@@ -967,6 +969,7 @@ static void gt_reset_worker(struct work_struct *w)
 err_out:
 	xe_force_wake_put(gt_to_fw(gt), fw_ref);
 	XE_WARN_ON(xe_uc_start(&gt->uc));
+	xe_uc_start_user_queues(&gt->uc);
 
 err_fail:
 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
@@ -1050,6 +1053,19 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
 	return ret;
 }
 
+/**
+ * xe_gt_start_user_queues() - Start user exec queues after page table restore
+ * @gt: the GT object
+ *
+ * Starts the DRM schedulers for all user exec queues on the GT. This must be
+ * called after xe_bo_restore_late() to ensure that userspace page table BOs
+ * are valid before any job submission triggers GuC context registration.
+ */
+void xe_gt_start_user_queues(struct xe_gt *gt)
+{
+	xe_uc_start_user_queues(&gt->uc);
+}
+
 int xe_gt_resume(struct xe_gt *gt)
 {
 	int err;
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 4150aa594f05..b6ba05a317f7 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -170,4 +170,6 @@ static inline bool xe_gt_supports_multi_queue(const struct xe_gt *gt,
 	return gt->info.multi_queue_engine_class_mask & BIT(class);
 }
 
+void xe_gt_start_user_queues(struct xe_gt *gt);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 4023700ff2a9..0359909b8b27 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -1717,6 +1717,19 @@ int xe_guc_start(struct xe_guc *guc)
 	return xe_guc_submit_start(guc);
 }
 
+/**
+ * xe_guc_start_user_queues() - Start user exec queue schedulers on the GuC
+ * @guc: the GuC object
+ *
+ * Starts the DRM schedulers for all user exec queues managed by this GuC.
+ * Must be called after xe_bo_restore_late() to ensure page tables are valid
+ * before any job submission triggers GuC context registration.
+ */
+void xe_guc_start_user_queues(struct xe_guc *guc)
+{
+	xe_guc_submit_start_user_queues(guc);
+}
+
 /**
  * xe_guc_runtime_suspend() - GuC runtime suspend
  * @guc: The GuC object
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 02514914f404..ad2a6521852c 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -60,6 +60,7 @@ void xe_guc_reset_wait(struct xe_guc *guc);
 void xe_guc_stop_prepare(struct xe_guc *guc);
 void xe_guc_stop(struct xe_guc *guc);
 int xe_guc_start(struct xe_guc *guc);
+void xe_guc_start_user_queues(struct xe_guc *guc);
 void xe_guc_declare_wedged(struct xe_guc *guc);
 bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc);
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 4d32b430bc15..084ecc8e7efa 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -2535,6 +2535,16 @@ static void guc_exec_queue_start(struct xe_exec_queue *q)
 	xe_sched_submission_resume_tdr(sched);
 }
 
+/*
+ * Returns true for user exec queues whose page tables may not yet be
+ * restored when xe_guc_submit_start() is called during GT resume.
+ * These queues must be started later, after xe_bo_restore_late().
+ */
+static bool exec_queue_needs_late_start(const struct xe_exec_queue *q)
+{
+	return !(q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM));
+}
+
 int xe_guc_submit_start(struct xe_guc *guc)
 {
 	struct xe_exec_queue *q;
@@ -2549,6 +2559,10 @@ int xe_guc_submit_start(struct xe_guc *guc)
 		if (q->guc->id != index)
 			continue;
 
+		/* User queues are deferred until page tables are restored */
+		if (exec_queue_needs_late_start(q))
+			continue;
+
 		guc_exec_queue_start(q);
 	}
 	mutex_unlock(&guc->submission_state.lock);
@@ -2558,6 +2572,36 @@ int xe_guc_submit_start(struct xe_guc *guc)
 	return 0;
 }
 
+/**
+ * xe_guc_submit_start_user_queues() - Start user exec queues after late restore
+ * @guc: the GuC object
+ *
+ * Starts the DRM schedulers for all user exec queues (those not flagged as
+ * migrate or VM queues). Must be called after xe_bo_restore_late() to ensure
+ * page tables are valid before any job submission is attempted.
+ */
+void xe_guc_submit_start_user_queues(struct xe_guc *guc)
+{
+	struct xe_exec_queue *q;
+	unsigned long index;
+
+	if (!guc->submission_state.initialized)
+		return;
+
+	mutex_lock(&guc->submission_state.lock);
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+		/* Prevent redundant attempts to start parallel queues */
+		if (q->guc->id != index)
+			continue;
+
+		if (!exec_queue_needs_late_start(q))
+			continue;
+
+		guc_exec_queue_start(q);
+	}
+	mutex_unlock(&guc->submission_state.lock);
+}
+
 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
 					   struct xe_exec_queue *q)
 {
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index b3839a90c142..b210b2f6cd2d 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -20,6 +20,7 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc);
 void xe_guc_submit_reset_wait(struct xe_guc *guc);
 void xe_guc_submit_stop(struct xe_guc *guc);
 int xe_guc_submit_start(struct xe_guc *guc);
+void xe_guc_submit_start_user_queues(struct xe_guc *guc);
 void xe_guc_submit_pause(struct xe_guc *guc);
 void xe_guc_submit_pause_abort(struct xe_guc *guc);
 void xe_guc_submit_pause_vf(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index d4672eb07476..c203a59d7000 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -282,6 +282,9 @@ int xe_pm_resume(struct xe_device *xe)
 	if (err)
 		goto err;
 
+	for_each_gt(gt, xe, id)
+		xe_gt_start_user_queues(gt);
+
 	xe_pxp_pm_resume(xe->pxp);
 
 	if (IS_VF_CCS_READY(xe))
@@ -696,6 +699,9 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 		err = xe_bo_restore_late(xe);
 		if (err)
 			goto out;
+
+		for_each_gt(gt, xe, id)
+			xe_gt_start_user_queues(gt);
 	}
 
 	xe_pxp_pm_resume(xe->pxp);
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 75091bde0d50..12606133f5bc 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -263,6 +263,22 @@ int xe_uc_start(struct xe_uc *uc)
 	return xe_guc_start(&uc->guc);
 }
 
+/**
+ * xe_uc_start_user_queues() - Start user exec queues after late restore
+ * @uc: the UC object
+ *
+ * Starts the DRM schedulers for all user exec queues. Must be called after
+ * xe_bo_restore_late() to ensure page tables are valid before any job
+ * submission is attempted. Has no effect if GuC submission is not enabled.
+ */
+void xe_uc_start_user_queues(struct xe_uc *uc)
+{
+	if (!xe_device_uc_enabled(uc_to_xe(uc)))
+		return;
+
+	xe_guc_start_user_queues(&uc->guc);
+}
+
 static void uc_reset_wait(struct xe_uc *uc)
 {
 	int ret;
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 255a54a8f876..2fd056cfa1d0 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -18,6 +18,7 @@ void xe_uc_runtime_suspend(struct xe_uc *uc);
 void xe_uc_stop_prepare(struct xe_uc *uc);
 void xe_uc_stop(struct xe_uc *uc);
 int xe_uc_start(struct xe_uc *uc);
+void xe_uc_start_user_queues(struct xe_uc *uc);
 void xe_uc_suspend_prepare(struct xe_uc *uc);
 int xe_uc_suspend(struct xe_uc *uc);
 int xe_uc_sanitize_reset(struct xe_uc *uc);
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/5] drm/xe/guc: Don't ban LR VM exec queues on PM suspend
       [not found] <20260522164355.2773-1-thomas.hellstrom@linux.intel.com>
  2026-05-22 16:43 ` [PATCH 1/5] drm/xe/guc: Defer user exec queue scheduler start until after page table restore Thomas Hellström
@ 2026-05-22 16:43 ` Thomas Hellström
  2026-05-22 16:43 ` [PATCH 5/5] drm/xe: Suspend fault-mode LR jobs before VRAM eviction on S3/S4 Thomas Hellström
  2 siblings, 0 replies; 3+ messages in thread
From: Thomas Hellström @ 2026-05-22 16:43 UTC (permalink / raw)
  To: intel-xe
  Cc: Thomas Hellström, Matthew Brost, Tomasz Lis, Rodrigo Vivi,
	stable

When xe_guc_submit_stop() is called during an S3/S4 suspend or GT
reset, guc_exec_queue_stop() bans any user exec queue that has a job
which has started but not yet completed.  For normal (non-LR) exec
queues this is the correct behaviour: a started-but-incomplete job at
reset time may indicate a hung workload.

For exec queues attached to Long Running (LR) VMs the same condition
is always true during normal operation: LR jobs are designed to run
indefinitely and are never "completed" in the DRM scheduler sense —
they are preempted and resumed via the preempt-fence mechanism.
Banning such an exec queue on PM suspend permanently prevents the job
from restarting after resume, causing the userspace compute workload to
fail silently.

Fix this by not banning LR VM exec queues when a system suspend or
hibernation is in progress, while preserving the ban for GT reset where
a started-but-incomplete job is a legitimate indicator of a hang.

Fixes: f6375fb3aa94 ("drm/xe: Track LR jobs in DRM scheduler pending list")
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Tomasz Lis <tomasz.lis@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: <stable@vger.kernel.org> # v6.19+
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Assisted-by: GitHub_Copilot:claude-sonnet-4.6
---
 drivers/gpu/drm/xe/xe_device_types.h |  8 ++++++++
 drivers/gpu/drm/xe/xe_guc_submit.c   | 10 +++++++++-
 drivers/gpu/drm/xe/xe_pm.c           |  5 ++++-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 32dd2ffbc796..9dbf7b3a0c49 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -433,6 +433,14 @@ struct xe_device {
 	struct notifier_block pm_notifier;
 	/** @pm_block: Completion to block validating tasks on suspend / hibernate prepare */
 	struct completion pm_block;
+	/**
+	 * @pm_suspend_in_progress: True while the device is going through
+	 * system suspend or hibernation (set at xe_pm_suspend() entry, cleared
+	 * at xe_pm_resume() entry or on suspend error). Used to suppress exec
+	 * queue bans that should only apply during GT reset, not PM suspend.
+	 * Serialised by the PM suspend sequence; no lock required.
+	 */
+	bool pm_suspend_in_progress;
 	/** @rebind_resume_list: List of wq items to kick on resume. */
 	struct list_head rebind_resume_list;
 	/** @rebind_resume_lock: Lock to protect the rebind_resume_list */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 084ecc8e7efa..42bc7425de0d 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -2268,8 +2268,16 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
 	 * Ban any engine (aside from kernel and engines used for VM ops) with a
 	 * started but not complete job or if a job has gone through a GT reset
 	 * more than twice.
+	 *
+	 * LR VM exec queues are excluded from this ban during PM suspend: their
+	 * jobs are intentionally long-running and are preempted and resumed via
+	 * the preempt-fence mechanism. Banning them on PM suspend would
+	 * permanently prevent the job from restarting after resume.
+	 * On GT reset however we do want to ban them, as that may indicate a
+	 * genuinely hung workload.
 	 */
-	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
+	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM)) &&
+	    !(q->vm && xe_vm_in_lr_mode(q->vm) && guc_to_xe(guc)->pm_suspend_in_progress)) {
 		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
 		bool ban = false;
 
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index c203a59d7000..76d211986822 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -176,6 +176,7 @@ int xe_pm_suspend(struct xe_device *xe)
 	int err;
 
 	drm_dbg(&xe->drm, "Suspending device\n");
+	xe->pm_suspend_in_progress = true;
 	xe_pm_block_begin_signalling();
 	trace_xe_pm_suspend(xe, __builtin_return_address(0));
 
@@ -217,6 +218,7 @@ int xe_pm_suspend(struct xe_device *xe)
 	xe_pxp_pm_resume(xe->pxp);
 err:
 	drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
+	xe->pm_suspend_in_progress = false;
 	xe_pm_block_end_signalling();
 	return err;
 }
@@ -234,8 +236,9 @@ int xe_pm_resume(struct xe_device *xe)
 	u8 id;
 	int err;
 
-	xe_pm_block_begin_signalling();
+	xe->pm_suspend_in_progress = false;
 	drm_dbg(&xe->drm, "Resuming device\n");
+	xe_pm_block_begin_signalling();
 	trace_xe_pm_resume(xe, __builtin_return_address(0));
 
 	for_each_gt(gt, xe, id)
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 5/5] drm/xe: Suspend fault-mode LR jobs before VRAM eviction on S3/S4
       [not found] <20260522164355.2773-1-thomas.hellstrom@linux.intel.com>
  2026-05-22 16:43 ` [PATCH 1/5] drm/xe/guc: Defer user exec queue scheduler start until after page table restore Thomas Hellström
  2026-05-22 16:43 ` [PATCH 2/5] drm/xe/guc: Don't ban LR VM exec queues on PM suspend Thomas Hellström
@ 2026-05-22 16:43 ` Thomas Hellström
  2 siblings, 0 replies; 3+ messages in thread
From: Thomas Hellström @ 2026-05-22 16:43 UTC (permalink / raw)
  To: intel-xe; +Cc: Thomas Hellström, Matthew Auld, Rodrigo Vivi, stable

Fault-mode (SVM) exec queues run persistent LR jobs that can re-fault
GPU page table entries at any time. During S3/S4 suspend, VRAM eviction
unmaps GPU VMAs, but a running fault-mode job can immediately re-fault
those pages back in, racing with the eviction.

Fault-mode exec queues are suspended and drained before any VRAM
eviction begins, ensuring the GPU is quiescent before page tables or
BOs are invalidated. On resume, all previously suspended fault-mode
exec queues are re-registered and restarted once hardware is restored
and page fault handlers are ready to run.

Fault-mode exec queues created concurrently with PM suspend are
immediately suspended so the resume path picks them up, closing the
window where a newly-created queue could race with eviction.

Remove the stale "FIXME: Super racey..." comment from xe_pm_suspend():
the race it described is now prevented by suspending fault-mode jobs
before any eviction begins.

v2:
 - Add xe_device::pm_suspend_in_progress flag to suppress erroneous LR
   exec queue bans during PM suspend (now handled in a separate patch)
 - Rebase on exec queue suspend refcount and EXEC_MODE_LR rename patches

Fixes: eb5723a75104 ("drm/xe: Block exec and rebind worker while evicting for suspend / hibernate")
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: <stable@vger.kernel.org> # v6.17+
Assisted-by: GitHub_Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue_types.h      |   7 +
 drivers/gpu/drm/xe/xe_guc_submit.c            |  25 +++
 drivers/gpu/drm/xe/xe_guc_submit.h            |   1 +
 drivers/gpu/drm/xe/xe_hw_engine_group.c       | 161 ++++++++++++++++--
 drivers/gpu/drm/xe/xe_hw_engine_group.h       |   3 +
 drivers/gpu/drm/xe/xe_hw_engine_group_types.h |   7 +
 drivers/gpu/drm/xe/xe_pm.c                    |  15 +-
 7 files changed, 206 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 2f5ccf294675..77f2bc5ff2f6 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -200,6 +200,13 @@ struct xe_exec_queue {
 		u32 seqno;
 		/** @lr.link: link into VM's list of exec queues */
 		struct list_head link;
+		/**
+		 * @lr.pm_suspended: Marks that this fault-mode exec
+		 * queue was suspended for PM and must be resumed on
+		 * PM post-suspend. Protected by the hw engine group's
+		 * mode_sem.
+		 */
+		bool pm_suspended;
 	} lr;
 
 #define XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT	0
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 7da7db2059ff..a97a4caf6dc8 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -2616,6 +2616,31 @@ void xe_guc_submit_start_user_queues(struct xe_guc *guc)
 	mutex_unlock(&guc->submission_state.lock);
 }
 
+/**
+ * xe_guc_submit_pm_resume_exec_queue() - Re-enable a fault-mode exec queue after PM resume
+ * @q: the exec queue to resume
+ *
+ * Re-enables a fault-mode LR exec queue for execution after PM resume.
+ * Has no effect if GuC is stopped or if the queue is in a terminal state
+ * (killed, banned, wedged, or destroyed).
+ */
+void xe_guc_submit_pm_resume_exec_queue(struct xe_exec_queue *q)
+{
+	struct xe_guc *guc = exec_queue_to_guc(q);
+
+	if (!guc->submission_state.initialized)
+		return;
+
+	mutex_lock(&guc->submission_state.lock);
+	if (!xe_guc_read_stopped(guc) &&
+	    !exec_queue_killed_or_banned_or_wedged(q) && !exec_queue_destroyed(q)) {
+		if (!exec_queue_registered(q))
+			register_exec_queue(q, GUC_CONTEXT_NORMAL);
+		q->ops->resume(q);
+	}
+	mutex_unlock(&guc->submission_state.lock);
+}
+
 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
 					   struct xe_exec_queue *q)
 {
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index b210b2f6cd2d..c312fe31d917 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -21,6 +21,7 @@ void xe_guc_submit_reset_wait(struct xe_guc *guc);
 void xe_guc_submit_stop(struct xe_guc *guc);
 int xe_guc_submit_start(struct xe_guc *guc);
 void xe_guc_submit_start_user_queues(struct xe_guc *guc);
+void xe_guc_submit_pm_resume_exec_queue(struct xe_exec_queue *q);
 void xe_guc_submit_pause(struct xe_guc *guc);
 void xe_guc_submit_pause_abort(struct xe_guc *guc);
 void xe_guc_submit_pause_vf(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
index fba0ed039bad..1561fb95fdcf 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
@@ -6,11 +6,14 @@
 #include <drm/drm_managed.h>
 
 #include "xe_assert.h"
+#include "xe_device.h"
 #include "xe_device_types.h"
 #include "xe_exec_queue.h"
 #include "xe_gt.h"
 #include "xe_gt_stats.h"
+#include "xe_guc_submit.h"
 #include "xe_hw_engine_group.h"
+#include "xe_hw_engine_types.h"
 #include "xe_sync.h"
 #include "xe_vm.h"
 
@@ -126,11 +129,10 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
 int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
 {
 	int err;
-	struct xe_device *xe = gt_to_xe(q->gt);
 
-	xe_assert(xe, group);
-	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
-	xe_assert(xe, q->vm);
+	xe_assert(gt_to_xe(q->gt), group);
+	xe_assert(gt_to_xe(q->gt), !(q->flags & EXEC_QUEUE_FLAG_VM));
+	xe_assert(gt_to_xe(q->gt), q->vm);
 
 	if (xe_vm_in_preempt_fence_mode(q->vm))
 		return 0;
@@ -139,13 +141,22 @@ int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct x
 	if (err)
 		return err;
 
-	if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) {
-		q->ops->suspend(q);
-		err = q->ops->suspend_wait(q);
-		if (err)
-			goto err_suspend;
+	if (xe_vm_in_fault_mode(q->vm)) {
+		if (group->pm_suspended) {
+			q->lr.pm_suspended = true;
+			q->ops->suspend(q);
+			err = q->ops->suspend_wait(q);
+			if (err)
+				goto err_suspend;
+		}
+		if (group->cur_mode == EXEC_MODE_DMA_FENCE) {
+			q->ops->suspend(q);
+			err = q->ops->suspend_wait(q);
+			if (err)
+				goto err_suspend;
 
-		xe_hw_engine_group_resume_faulting_lr_jobs(group);
+			xe_hw_engine_group_resume_faulting_lr_jobs(group);
+		}
 	}
 
 	list_add(&q->hw_engine_group_link, &group->exec_queue_list);
@@ -176,6 +187,8 @@ void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct
 	if (!list_empty(&q->hw_engine_group_link))
 		list_del(&q->hw_engine_group_link);
 
+	q->lr.pm_suspended = false;
+
 	up_write(&group->mode_sem);
 }
 
@@ -189,6 +202,134 @@ void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group
 	queue_work(group->resume_wq, &group->resume_work);
 }
 
+/**
+ * xe_suspend_all_faulting_lr_jobs() - Suspend all fault-mode exec queues on the device
+ * @xe: the xe device
+ *
+ * Suspends all fault-mode LR exec queues across all GTs before VRAM eviction
+ * during PM suspend. Fault-mode jobs can re-fault GPU page table entries at
+ * any time, racing with the eviction process. Must be paired with
+ * xe_resume_all_faulting_lr_jobs() after hardware is restored on resume.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int xe_suspend_all_faulting_lr_jobs(struct xe_device *xe)
+{
+	struct xe_hw_engine_group *visited[XE_ENGINE_CLASS_MAX] = {};
+	int n_visited = 0;
+	struct xe_gt *gt;
+	u8 gt_id;
+	int err;
+
+	for_each_gt(gt, xe, gt_id) {
+		struct xe_hw_engine *hwe;
+		enum xe_hw_engine_id hwe_id;
+
+		for_each_hw_engine(hwe, gt, hwe_id) {
+			struct xe_hw_engine_group *group = hwe->hw_engine_group;
+			struct xe_exec_queue *q;
+			bool already_seen = false;
+			int i;
+
+			if (!group)
+				continue;
+
+			for (i = 0; i < n_visited; i++) {
+				if (visited[i] == group) {
+					already_seen = true;
+					break;
+				}
+			}
+			if (already_seen)
+				continue;
+
+			visited[n_visited++] = group;
+
+			err = down_write_killable(&group->mode_sem);
+			if (err)
+				goto err_resume;
+
+			group->pm_suspended = true;
+			list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
+				if (xe_vm_in_fault_mode(q->vm)) {
+					q->lr.pm_suspended = true;
+					q->ops->suspend(q);
+				}
+			}
+
+			list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
+				if (!xe_vm_in_fault_mode(q->vm))
+					continue;
+
+				err = q->ops->suspend_wait(q);
+				if (err) {
+					up_write(&group->mode_sem);
+					goto err_resume;
+				}
+			}
+
+			up_write(&group->mode_sem);
+		}
+	}
+
+	return 0;
+
+err_resume:
+	xe_resume_all_faulting_lr_jobs(xe);
+	return err;
+}
+
+/**
+ * xe_resume_all_faulting_lr_jobs() - Resume all fault-mode exec queues on the device
+ * @xe: the xe device
+ *
+ * Re-enables all fault-mode LR exec queues that were suspended for PM. Must be
+ * called after hardware is restored and page fault handlers are free to run.
+ */
+void xe_resume_all_faulting_lr_jobs(struct xe_device *xe)
+{
+	struct xe_hw_engine_group *visited[XE_ENGINE_CLASS_MAX] = {};
+	int n_visited = 0;
+	struct xe_gt *gt;
+	u8 gt_id;
+
+	for_each_gt(gt, xe, gt_id) {
+		struct xe_hw_engine *hwe;
+		enum xe_hw_engine_id hwe_id;
+
+		for_each_hw_engine(hwe, gt, hwe_id) {
+			struct xe_hw_engine_group *group = hwe->hw_engine_group;
+			struct xe_exec_queue *q;
+			bool already_seen = false;
+			int i;
+
+			if (!group)
+				continue;
+
+			for (i = 0; i < n_visited; i++) {
+				if (visited[i] == group) {
+					already_seen = true;
+					break;
+				}
+			}
+			if (already_seen)
+				continue;
+
+			visited[n_visited++] = group;
+
+			down_write(&group->mode_sem);
+			group->pm_suspended = false;
+			list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
+				if (!q->lr.pm_suspended)
+					continue;
+				q->lr.pm_suspended = false;
+				xe_guc_submit_pm_resume_exec_queue(q);
+			}
+			up_write(&group->mode_sem);
+		}
+	}
+}
+
 /**
  * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group
  * @group: The hw engine group
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h
index 8b17ccd30b70..67807d67530c 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h
@@ -9,6 +9,7 @@
 #include "xe_hw_engine_group_types.h"
 
 struct drm_device;
+struct xe_device;
 struct xe_exec_queue;
 struct xe_gt;
 struct xe_sync_entry;
@@ -27,5 +28,7 @@ void xe_hw_engine_group_put(struct xe_hw_engine_group *group);
 enum xe_hw_engine_group_execution_mode
 xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q);
 void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group);
+int xe_suspend_all_faulting_lr_jobs(struct xe_device *xe);
+void xe_resume_all_faulting_lr_jobs(struct xe_device *xe);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group_types.h b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h
index b4c41de6ba5f..090313da2f25 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h
@@ -46,6 +46,13 @@ struct xe_hw_engine_group {
 	struct rw_semaphore mode_sem;
 	/** @cur_mode: current execution mode of this hw engine group */
 	enum xe_hw_engine_group_execution_mode cur_mode;
+	/**
+	 * @pm_suspended: true while PM suspend is in progress for this group.
+	 * New fault-mode exec queues added while this is set are immediately
+	 * suspended (with @lr.pm_suspended marked) and resumed by
+	 * xe_resume_all_faulting_lr_jobs(). Protected by @mode_sem.
+	 */
+	bool pm_suspended;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 76d211986822..58afb44b1b0c 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -20,6 +20,7 @@
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_gt_idle.h"
+#include "xe_hw_engine_group.h"
 #include "xe_i2c.h"
 #include "xe_irq.h"
 #include "xe_late_bind_fw.h"
@@ -191,7 +192,6 @@ int xe_pm_suspend(struct xe_device *xe)
 
 	xe_display_pm_suspend(xe);
 
-	/* FIXME: Super racey... */
 	err = xe_bo_evict_all(xe);
 	if (err)
 		goto err_display;
@@ -414,9 +414,17 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
 	{
 		struct xe_validation_ctx ctx;
 
-		reinit_completion(&xe->pm_block);
-		xe_pm_block_begin_signalling();
 		xe_pm_runtime_get(xe);
+
+		err = xe_suspend_all_faulting_lr_jobs(xe);
+		if (err) {
+			drm_err(&xe->drm, "Notifier suspend faulting LR jobs failed (%d)\n", err);
+			xe_pm_runtime_put(xe);
+			return notifier_from_errno(err);
+		}
+
+		xe_pm_block_begin_signalling();
+		reinit_completion(&xe->pm_block);
 		(void)xe_validation_ctx_init(&ctx, &xe->val, NULL,
 					     (struct xe_val_flags) {.exclusive = true});
 		err = xe_bo_evict_all_user(xe);
@@ -440,6 +448,7 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
 		complete_all(&xe->pm_block);
 		xe_pm_wake_rebind_workers(xe);
 		xe_bo_notifier_unprepare_all_pinned(xe);
+		xe_resume_all_faulting_lr_jobs(xe);
 		xe_pm_runtime_put(xe);
 		break;
 	}
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-05-22 16:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20260522164355.2773-1-thomas.hellstrom@linux.intel.com>
2026-05-22 16:43 ` [PATCH 1/5] drm/xe/guc: Defer user exec queue scheduler start until after page table restore Thomas Hellström
2026-05-22 16:43 ` [PATCH 2/5] drm/xe/guc: Don't ban LR VM exec queues on PM suspend Thomas Hellström
2026-05-22 16:43 ` [PATCH 5/5] drm/xe: Suspend fault-mode LR jobs before VRAM eviction on S3/S4 Thomas Hellström

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox