[PATCH v9 04/34] drm/xe: Track LR jobs in DRM scheduler pending list

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org
Subject: [PATCH v9 04/34] drm/xe: Track LR jobs in DRM scheduler pending list
Date: Wed,  8 Oct 2025 11:04:30 -0700	[thread overview]
Message-ID: <20251008180500.3261209-5-matthew.brost@intel.com> (raw)
In-Reply-To: <20251008180500.3261209-1-matthew.brost@intel.com>

VF migration requires jobs to remain pending so they can be replayed
after the VF comes back. Previously, LR job fences were intentionally
signaled immediately after submission to avoid the risk of exporting
them, as these fences do not naturally signal in a timely manner and
could break dma-fence contracts. A side effect of this approach was that
LR jobs were never added to the DRM scheduler’s pending list, preventing
them from being tracked for later resubmission.

We now avoid signaling LR job fences and ensure they are never exported;
Xe already guards against exporting these internal fences. With that
guarantee in place, we can safely track LR jobs in the scheduler’s
pending list so they are eligible for resubmission during VF
post-migration recovery (and similar recovery paths).

An added benefit is that LR queues now gain the DRM scheduler’s built-in
flow control over ring usage rather than rejecting new jobs in the exec
IOCTL if the ring is full.

v2:
 - Ensure DRM scheduler TDR doesn't run for LR jobs
 - Stack variable for killed_or_banned_or_wedged
v4:
 - Clarify commit message (Tomasz)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/xe/xe_exec.c       | 12 ++-------
 drivers/gpu/drm/xe/xe_exec_queue.c | 19 -------------
 drivers/gpu/drm/xe/xe_exec_queue.h |  2 --
 drivers/gpu/drm/xe/xe_guc_submit.c | 43 ++++++++++++++++++++----------
 4 files changed, 31 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 83897950f0da..0dc27476832b 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -124,7 +124,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct xe_validation_ctx ctx;
 	struct xe_sched_job *job;
 	struct xe_vm *vm;
-	bool write_locked, skip_retry = false;
+	bool write_locked;
 	int err = 0;
 	struct xe_hw_engine_group *group;
 	enum xe_hw_engine_group_execution_mode mode, previous_mode;
@@ -266,12 +266,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto err_exec;
 	}
 
-	if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
-		err = -EWOULDBLOCK;	/* Aliased to -EAGAIN */
-		skip_retry = true;
-		goto err_exec;
-	}
-
 	if (xe_exec_queue_uses_pxp(q)) {
 		err = xe_vm_validate_protected(q->vm);
 		if (err)
@@ -328,8 +322,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		xe_sched_job_init_user_fence(job, &syncs[i]);
 	}
 
-	if (xe_exec_queue_is_lr(q))
-		q->ring_ops->emit_job(job);
 	if (!xe_vm_in_lr_mode(vm))
 		xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished);
 	xe_sched_job_push(job);
@@ -355,7 +347,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		xe_validation_ctx_fini(&ctx);
 err_unlock_list:
 	up_read(&vm->lock);
-	if (err == -EAGAIN && !skip_retry)
+	if (err == -EAGAIN)
 		goto retry;
 err_hw_exec_mode:
 	if (mode == EXEC_MODE_DMA_FENCE)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index df82463b19f6..7621089a47fe 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -850,25 +850,6 @@ bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
 		!(q->flags & EXEC_QUEUE_FLAG_VM);
 }
 
-static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
-{
-	return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1;
-}
-
-/**
- * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full
- * @q: The exec_queue
- *
- * Return: True if the exec_queue's ring is full, false otherwise.
- */
-bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
-{
-	struct xe_lrc *lrc = q->lrc[0];
-	s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
-
-	return xe_exec_queue_num_job_inflight(q) >= max_job;
-}
-
 /**
  * xe_exec_queue_is_idle() - Whether an exec_queue is idle.
  * @q: The exec_queue
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index 8821ceb838d0..a4dfbe858bda 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -64,8 +64,6 @@ static inline bool xe_exec_queue_uses_pxp(struct xe_exec_queue *q)
 
 bool xe_exec_queue_is_lr(struct xe_exec_queue *q);
 
-bool xe_exec_queue_ring_full(struct xe_exec_queue *q);
-
 bool xe_exec_queue_is_idle(struct xe_exec_queue *q);
 
 void xe_exec_queue_kill(struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 13746f32b231..3a534d93505f 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -851,30 +851,31 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 	struct xe_exec_queue *q = job->q;
 	struct xe_guc *guc = exec_queue_to_guc(q);
-	struct dma_fence *fence = NULL;
-	bool lr = xe_exec_queue_is_lr(q);
+	bool lr = xe_exec_queue_is_lr(q), killed_or_banned_or_wedged =
+		exec_queue_killed_or_banned_or_wedged(q);
 
 	xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
 		     exec_queue_banned(q) || exec_queue_suspended(q));
 
 	trace_xe_sched_job_run(job);
 
-	if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
+	if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
 		if (!exec_queue_registered(q))
 			register_exec_queue(q, GUC_CONTEXT_NORMAL);
-		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
-			q->ring_ops->emit_job(job);
+		q->ring_ops->emit_job(job);
 		submit_exec_queue(q);
 	}
 
-	if (lr) {
-		xe_sched_job_set_error(job, -EOPNOTSUPP);
-		dma_fence_put(job->fence);	/* Drop ref from xe_sched_job_arm */
-	} else {
-		fence = job->fence;
-	}
+	/*
+	 * We don't care about job-fence ordering in LR VMs because these fences
+	 * are never exported; they are used solely to keep jobs on the pending
+	 * list. Once a queue enters an error state, there's no need to track
+	 * them.
+	 */
+	if (killed_or_banned_or_wedged && lr)
+		xe_sched_job_set_error(job, -ECANCELED);
 
-	return fence;
+	return job->fence;
 }
 
 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
@@ -916,7 +917,8 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
 		xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n");
 		xe_sched_submission_start(sched);
 		xe_gt_reset_async(q->gt);
-		xe_sched_tdr_queue_imm(sched);
+		if (!xe_exec_queue_is_lr(q))
+			xe_sched_tdr_queue_imm(sched);
 		return;
 	}
 
@@ -1008,6 +1010,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 	struct xe_exec_queue *q = ge->q;
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_gpu_scheduler *sched = &ge->sched;
+	struct xe_sched_job *job;
 	bool wedged = false;
 
 	xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
@@ -1058,7 +1061,16 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 	if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0]))
 		xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id);
 
+	xe_hw_fence_irq_stop(q->fence_irq);
+
 	xe_sched_submission_start(sched);
+
+	spin_lock(&sched->base.job_list_lock);
+	list_for_each_entry(job, &sched->base.pending_list, drm.list)
+		xe_sched_job_set_error(job, -ECANCELED);
+	spin_unlock(&sched->base.job_list_lock);
+
+	xe_hw_fence_irq_start(q->fence_irq);
 }
 
 #define ADJUST_FIVE_PERCENT(__t)	mul_u64_u32_div(__t, 105, 100)
@@ -1129,7 +1141,8 @@ static void enable_scheduling(struct xe_exec_queue *q)
 		xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
 		set_exec_queue_banned(q);
 		xe_gt_reset_async(q->gt);
-		xe_sched_tdr_queue_imm(&q->guc->sched);
+		if (!xe_exec_queue_is_lr(q))
+			xe_sched_tdr_queue_imm(&q->guc->sched);
 	}
 }
 
@@ -1187,6 +1200,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	int i = 0;
 	bool wedged = false, skip_timeout_check;
 
+	xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_lr(q));
+
 	/*
 	 * TDR has fired before free job worker. Common if exec queue
 	 * immediately closed after last fence signaled. Add back to pending
-- 
2.34.1

next prev parent reply	other threads:[~2025-10-08 18:05 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-08 18:04 [PATCH v9 00/34] VF migration redesign Matthew Brost
2025-10-08 18:04 ` [PATCH v9 01/34] drm/xe: Add NULL checks to scratch LRC allocation Matthew Brost
2025-10-08 18:04 ` [PATCH v9 02/34] drm/xe: Save off position in ring in which a job was programmed Matthew Brost
2025-10-08 18:04 ` [PATCH v9 03/34] drm/xe/guc: Track pending-enable source in submission state Matthew Brost
2025-10-08 18:04 ` Matthew Brost [this message]
2025-10-08 18:04 ` [PATCH v9 05/34] drm/xe: Return first unsignaled job first pending job helper Matthew Brost
2025-10-08 18:04 ` [PATCH v9 06/34] drm/xe: Don't change LRC ring head on job resubmission Matthew Brost
2025-10-08 18:04 ` [PATCH v9 07/34] drm/xe: Make LRC W/A scratch buffer usage consistent Matthew Brost
2025-10-08 18:04 ` [PATCH v9 08/34] drm/xe/vf: Add xe_gt_recovery_pending helper Matthew Brost
2025-10-08 18:04 ` [PATCH v9 09/34] drm/xe/vf: Make VF recovery run on per-GT worker Matthew Brost
2025-10-08 18:04 ` [PATCH v9 10/34] drm/xe/vf: Abort H2G sends during VF post-migration recovery Matthew Brost
2025-10-08 18:04 ` [PATCH v9 11/34] drm/xe/vf: Remove memory allocations from VF post migration recovery Matthew Brost
2025-10-08 18:04 ` [PATCH v9 12/34] drm/xe: Move GGTT lock init to alloc Matthew Brost
2025-10-08 18:04 ` [PATCH v9 13/34] drm/xe/vf: Move LMEM config to tile layer Matthew Brost
2025-10-08 20:36   ` Michal Wajdeczko
2025-10-08 18:04 ` [PATCH v9 14/34] drm/xe/vf: Close multi-GT GGTT shift race Matthew Brost
2025-10-08 21:11   ` Michal Wajdeczko
2025-10-08 18:04 ` [PATCH v9 15/34] drm/xe/vf: Teardown VF post migration worker on driver unload Matthew Brost
2025-10-08 18:04 ` [PATCH v9 16/34] drm/xe/vf: Don't allow GT reset to be queued during VF post migration recovery Matthew Brost
2025-10-08 18:04 ` [PATCH v9 17/34] drm/xe/vf: Wakeup in GuC backend on " Matthew Brost
2025-10-08 18:04 ` [PATCH v9 18/34] drm/xe/vf: Avoid indefinite blocking in preempt rebind worker for VFs supporting migration Matthew Brost
2025-10-08 18:04 ` [PATCH v9 19/34] drm/xe/vf: Use GUC_HXG_TYPE_EVENT for GuC context register Matthew Brost
2025-10-08 18:04 ` [PATCH v9 20/34] drm/xe/vf: Flush and stop CTs in VF post migration recovery Matthew Brost
2025-10-08 18:04 ` [PATCH v9 21/34] drm/xe/vf: Reset TLB invalidations during " Matthew Brost
2025-10-08 18:04 ` [PATCH v9 22/34] drm/xe/vf: Kickstart after resfix in " Matthew Brost
2025-10-08 18:04 ` [PATCH v9 23/34] drm/xe: Add CTB_H2G_BUFFER_OFFSET define Matthew Brost
2025-10-08 18:04 ` [PATCH v9 24/34] drm/xe/vf: Start CTs before resfix VF post migration recovery Matthew Brost
2025-10-08 18:04 ` [PATCH v9 25/34] drm/xe/vf: Abort VF post migration recovery on failure Matthew Brost
2025-10-08 19:49   ` Niranjana Vishwanathapura
2025-10-08 18:04 ` [PATCH v9 26/34] drm/xe/vf: Replay GuC submission state on pause / unpause Matthew Brost
2025-10-08 18:04 ` [PATCH v9 27/34] drm/xe: Move queue init before LRC creation Matthew Brost
2025-10-08 18:04 ` [PATCH v9 28/34] drm/xe/vf: Add debug prints for GuC replaying state during VF recovery Matthew Brost
2025-10-08 18:04 ` [PATCH v9 29/34] drm/xe/vf: Workaround for race condition in GuC firmware during VF pause Matthew Brost
2025-10-08 18:04 ` [PATCH v9 30/34] drm/xe: Use PPGTT addresses for TLB invalidation to avoid GGTT fixups Matthew Brost
2025-10-08 18:04 ` [PATCH v9 31/34] drm/xe/vf: Use primary GT ordered work queue on media GT on PTL VF Matthew Brost
2025-10-08 18:04 ` [PATCH v9 32/34] drm/xe/vf: Ensure media GT VF recovery runs after primary GT on PTL Matthew Brost
2025-10-08 18:04 ` [PATCH v9 33/34] drm/xe/vf: Rebase CCS save/restore BB GGTT addresses Matthew Brost
2025-10-08 18:05 ` [PATCH v9 34/34] drm/xe/guc: Increase wait timeout to 2sec after BUSY reply from GuC Matthew Brost
2025-10-08 18:28 ` ✗ CI.checkpatch: warning for VF migration redesign (rev9) Patchwork
2025-10-08 18:29 ` ✓ CI.KUnit: success " Patchwork
2025-10-08 19:04 ` ✓ Xe.CI.BAT: " Patchwork
2025-10-08 21:40 ` ✗ Xe.CI.Full: failure " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:83897950f0d dfblob:0dc27476832 dfblob:df82463b19f
dfblob:7621089a47f dfblob:8821ceb838d dfblob:a4dfbe858bd
dfblob:13746f32b23 dfblob:3a534d93505 )
 OR (
bs:"[PATCH v9 04/34] drm/xe: Track LR jobs in DRM scheduler pending list" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251008180500.3261209-5-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox