[RFC PATCH 4/4] drm/xe: Stop abusing DRM scheduler internals

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Matthew Brost <matthew.brost@intel.com>
To: dri-devel@lists.freedesktop.org, intel-xe@lists.freedesktop.org
Cc: alexdeucher@gmail.com, dakr@kernel.org, christian.koenig@amd.com,
	pstanner@redhat.com
Subject: [RFC PATCH 4/4] drm/xe: Stop abusing DRM scheduler internals
Date: Wed,  1 Oct 2025 22:16:04 -0700	[thread overview]
Message-ID: <20251002051604.1865322-5-matthew.brost@intel.com> (raw)
In-Reply-To: <20251002051604.1865322-1-matthew.brost@intel.com>

Use new pending job list iterator and new helper functions in Xe to
avoid reaching into DRM scheduler internals.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_gpu_scheduler.h    | 27 +++++--------------
 drivers/gpu/drm/xe/xe_guc_submit.c       | 34 +++++++++++-------------
 drivers/gpu/drm/xe/xe_guc_submit_types.h |  1 -
 3 files changed, 21 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index 04f85c4f7e80..ccfb7962e6c1 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -7,7 +7,7 @@
 #define _XE_GPU_SCHEDULER_H_
 
 #include "xe_gpu_scheduler_types.h"
-#include "xe_sched_job_types.h"
+#include "xe_sched_job.h"
 
 int xe_sched_init(struct xe_gpu_scheduler *sched,
 		  const struct drm_sched_backend_ops *ops,
@@ -54,13 +54,9 @@ static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
 {
 	struct drm_sched_job *s_job;
 
-	list_for_each_entry(s_job, &sched->base.pending_list, list) {
-		struct drm_sched_fence *s_fence = s_job->s_fence;
-		struct dma_fence *hw_fence = s_fence->parent;
-
-		if (hw_fence && !dma_fence_is_signaled(hw_fence))
+	drm_sched_for_each_pending_job(s_job, &sched->base, NULL, true)
+		if (!drm_sched_job_is_signaled(s_job))
 			sched->base.ops->run_job(s_job);
-	}
 }
 
 static inline bool
@@ -69,25 +65,14 @@ xe_sched_invalidate_job(struct xe_sched_job *job, int threshold)
 	return drm_sched_invalidate_job(&job->drm, threshold);
 }
 
-static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
-					    struct xe_sched_job *job)
-{
-	spin_lock(&sched->base.job_list_lock);
-	list_add(&job->drm.list, &sched->base.pending_list);
-	spin_unlock(&sched->base.job_list_lock);
-}
-
 static inline
 struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched)
 {
-	struct xe_sched_job *job;
+	struct drm_sched_job *job;
 
-	spin_lock(&sched->base.job_list_lock);
-	job = list_first_entry_or_null(&sched->base.pending_list,
-				       struct xe_sched_job, drm.list);
-	spin_unlock(&sched->base.job_list_lock);
+	job = drm_sched_first_pending_job(&sched->base);
 
-	return job;
+	return job ? to_xe_sched_job(job) : NULL;
 }
 
 static inline int
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 53024eb5670b..da13c1380cb3 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1217,7 +1217,7 @@ static enum drm_gpu_sched_stat
 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
-	struct xe_sched_job *tmp_job;
+	struct drm_sched_job *tmp_job;
 	struct xe_exec_queue *q = job->q;
 	struct xe_gpu_scheduler *sched = &q->guc->sched;
 	struct xe_guc *guc = exec_queue_to_guc(q);
@@ -1226,7 +1226,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	unsigned int fw_ref;
 	int err = -ETIME;
 	pid_t pid = -1;
-	int i = 0;
 	bool wedged = false, skip_timeout_check;
 
 	/*
@@ -1391,21 +1390,19 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	 * Fence state now stable, stop / start scheduler which cleans up any
 	 * fences that are complete
 	 */
-	xe_sched_add_pending_job(sched, job);
+	xe_sched_job_set_error(job, err);
 	xe_sched_submission_start(sched);
 
 	xe_guc_exec_queue_trigger_cleanup(q);
 
 	/* Mark all outstanding jobs as bad, thus completing them */
-	spin_lock(&sched->base.job_list_lock);
-	list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
-		xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
-	spin_unlock(&sched->base.job_list_lock);
+	drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL, false)
+		xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED);
 
 	/* Start fence signaling */
 	xe_hw_fence_irq_start(q->fence_irq);
 
-	return DRM_GPU_SCHED_STAT_RESET;
+	return DRM_GPU_SCHED_STAT_NO_HANG;
 
 sched_enable:
 	enable_scheduling(q);
@@ -2478,30 +2475,30 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
 	if (snapshot->parallel_execution)
 		guc_exec_queue_wq_snapshot_capture(q, snapshot);
 
-	spin_lock(&sched->base.job_list_lock);
-	snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
+	snapshot->pending_list_size = drm_sched_pending_job_count(&sched->base);
 	snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
 					       sizeof(struct pending_list_snapshot),
 					       GFP_ATOMIC);
 
 	if (snapshot->pending_list) {
 		struct xe_sched_job *job_iter;
+		struct drm_sched_job *drm_job;
 
 		i = 0;
-		list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
+		drm_sched_for_each_pending_job(drm_job, &sched->base, NULL, false) {
+			job_iter = to_xe_sched_job(drm_job);
+
+			if (i >= snapshot->pending_list_size)
+				break;
+
 			snapshot->pending_list[i].seqno =
 				xe_sched_job_seqno(job_iter);
 			snapshot->pending_list[i].fence =
 				dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
-			snapshot->pending_list[i].finished =
-				dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
-				? 1 : 0;
 			i++;
 		}
 	}
 
-	spin_unlock(&sched->base.job_list_lock);
-
 	return snapshot;
 }
 
@@ -2562,10 +2559,9 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
 
 	for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
 	     i++)
-		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
+		drm_printf(p, "\tJob: seqno=%d, fence=%d\n",
 			   snapshot->pending_list[i].seqno,
-			   snapshot->pending_list[i].fence,
-			   snapshot->pending_list[i].finished);
+			   snapshot->pending_list[i].fence);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
index dc7456c34583..59d88dd66e6e 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -64,7 +64,6 @@ struct guc_submit_parallel_scratch {
 struct pending_list_snapshot {
 	u32 seqno;
 	bool fence;
-	bool finished;
 };
 
 /**
-- 
2.34.1

next prev parent reply	other threads:[~2025-10-02  5:16 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-02  5:16 [RFC PATCH 0/4] Fix DRM scheduler layering violations in Xe Matthew Brost
2025-10-02  5:16 ` [RFC PATCH 1/4] drm/sched: Add pending job list iterator Matthew Brost
2025-10-07  7:28   ` Christian König
2025-10-07  8:09     ` Matthew Brost
2025-10-07  8:28       ` Matthew Brost
2025-10-07  8:44         ` Christian König
2025-10-07  9:44           ` Matthew Brost
2025-10-07  9:51             ` Danilo Krummrich
2025-10-02  5:16 ` [RFC PATCH 2/4] drm/sched: Add several job helpers to avoid drivers touching scheduler state Matthew Brost
2025-10-02  5:16 ` [RFC PATCH 3/4] drm/xe: Add dedicated message lock Matthew Brost
2025-10-02  5:16 ` Matthew Brost [this message]
2025-10-02  6:01 ` ✗ CI.checkpatch: warning for Fix DRM scheduler layering violations in Xe Patchwork
2025-10-02  6:02 ` ✓ CI.KUnit: success " Patchwork
2025-10-02  6:38 ` ✓ Xe.CI.BAT: " Patchwork
2025-10-02  7:58 ` ✗ Xe.CI.Full: failure " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:04f85c4f7e8 dfblob:ccfb7962e6c dfblob:53024eb5670
dfblob:da13c1380cb dfblob:dc7456c3458 dfblob:59d88dd66e6 )
 OR (
bs:"[RFC PATCH 4/4] drm/xe: Stop abusing DRM scheduler internals" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251002051604.1865322-5-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=alexdeucher@gmail.com \
    --cc=christian.koenig@amd.com \
    --cc=dakr@kernel.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=pstanner@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox