[PATCH 10/10] drm/amdgpu: rework ring reset backup and reemit

AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Alex Deucher <alexander.deucher@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Alex Deucher <alexander.deucher@amd.com>
Subject: [PATCH 10/10] drm/amdgpu: rework ring reset backup and reemit
Date: Fri, 16 Jan 2026 11:20:27 -0500	[thread overview]
Message-ID: <20260116162027.21550-11-alexander.deucher@amd.com> (raw)
In-Reply-To: <20260116162027.21550-1-alexander.deucher@amd.com>

Store the start and end wptrs in the IB fence. On queue
reset, only save the ring contents of the non-guilty contexts.
Since the VM fence is a sub-fence of the the IB fence, the IB
fence stores the start and end wptrs for both fences as the IB
state encapsulates the VM fence state.

For reemit, only reemit the state for non-guilty contexts; for
guilty contexts, just emit the fences.  Split the reemit per
fence when when we reemit, update the start and end wptrs
with the new values from reemit.  This allows us to
reemit jobs repeatedly as the wptrs get properly updated
each time.  Submitting the fence directly also simplifies
the logic as we no longer need to store additional wptrs for
each fence within the IB ring sequence.  If the context is
guilty, we emit the fence(s) and if not, we reemit the
whole IB ring sequence for that IB.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 98 +++++++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c    |  9 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  | 37 +--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  | 20 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  1 +
 5 files changed, 54 insertions(+), 111 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d48f61076c06a..541cdbe1e28bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -89,16 +89,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
 	return seq;
 }
 
-static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af)
-{
-	af->fence_wptr_start = af->ring->wptr;
-}
-
-static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
-{
-	af->fence_wptr_end = af->ring->wptr;
-}
-
 /**
  * amdgpu_fence_emit - emit a fence on the requested ring
  *
@@ -126,11 +116,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
 		       &ring->fence_drv.lock,
 		       adev->fence_context + ring->idx, seq);
 
-	amdgpu_fence_save_fence_wptr_start(af);
+	af->flags = flags | AMDGPU_FENCE_FLAG_INT;
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
-			       seq, flags | AMDGPU_FENCE_FLAG_INT);
-	amdgpu_fence_save_fence_wptr_end(af);
-	amdgpu_fence_save_wptr(af);
+			       seq, af->flags);
+
 	pm_runtime_get_noresume(adev_to_drm(adev)->dev);
 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -241,7 +230,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
 
 	do {
 		struct dma_fence *fence, **ptr;
-		struct amdgpu_fence *am_fence;
 
 		++last_seq;
 		last_seq &= drv->num_fences_mask;
@@ -254,12 +242,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
 		if (!fence)
 			continue;
 
-		/* Save the wptr in the fence driver so we know what the last processed
-		 * wptr was.  This is required for re-emitting the ring state for
-		 * queues that are reset but are not guilty and thus have no guilty fence.
-		 */
-		am_fence = container_of(fence, struct amdgpu_fence, base);
-		drv->signalled_wptr = am_fence->wptr;
 		dma_fence_signal(fence);
 		dma_fence_put(fence);
 		pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@@ -727,25 +709,27 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring,
  */
 
 /**
- * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and set errors
+ * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and reemit
  *
- * @af: fence of the ring to update
+ * @guilty_fence: fence of the ring to update
  *
  */
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+					     struct amdgpu_fence *guilty_fence)
 {
 	struct dma_fence *unprocessed;
 	struct dma_fence __rcu **ptr;
 	struct amdgpu_fence *fence;
-	struct amdgpu_ring *ring = af->ring;
 	unsigned long flags;
 	u32 seq, last_seq;
-	bool reemitted = false;
+	unsigned int i;
 
 	last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
 	seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
 
 	/* mark all fences from the guilty context with an error */
+	amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy +
+			  20 * ring->guilty_fence_count);
 	spin_lock_irqsave(&ring->fence_drv.lock, flags);
 	do {
 		last_seq++;
@@ -758,39 +742,41 @@ void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
 		if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
 			fence = container_of(unprocessed, struct amdgpu_fence, base);
 
-			if (fence->reemitted > 1)
-				reemitted = true;
-			else if (fence == af)
+			if (fence == guilty_fence)
 				dma_fence_set_error(&fence->base, -ETIME);
-			else if (fence->context == af->context)
+			else if (fence->context == guilty_fence->context)
 				dma_fence_set_error(&fence->base, -ECANCELED);
+
+			if (fence->context == guilty_fence->context) {
+				/* just emit the fence for the guilty context */
+				amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+						       fence->base.seqno, fence->flags);
+			} else {
+				/* reemit the packet stream and update wptrs */
+				fence->wptr_start = ring->wptr;
+				for (i = 0; i < fence->backup_size; i++)
+					amdgpu_ring_write(ring, ring->ring_backup[fence->backup_idx + i]);
+				fence->wptr_end = ring->wptr;
+			}
 		}
 		rcu_read_unlock();
 	} while (last_seq != seq);
 	spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
-
-	if (reemitted) {
-		/* if we've already reemitted once then just cancel everything */
-		amdgpu_fence_driver_force_completion(af->ring, &af->base);
-		af->ring->ring_backup_entries_to_copy = 0;
-	}
-}
-
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
-{
-	af->wptr = af->ring->wptr;
+	amdgpu_ring_commit(ring);
 }
 
 static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
-						   u64 start_wptr, u64 end_wptr)
+						   struct amdgpu_fence *af)
 {
-	unsigned int first_idx = start_wptr & ring->buf_mask;
-	unsigned int last_idx = end_wptr & ring->buf_mask;
+	unsigned int first_idx = af->wptr_start & ring->buf_mask;
+	unsigned int last_idx = af->wptr_end & ring->buf_mask;
 	unsigned int i;
 
 	/* Backup the contents of the ring buffer. */
+	af->backup_idx = ring->ring_backup_entries_to_copy;
 	for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
 		ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
+	af->backup_size = ring->ring_backup_entries_to_copy - af->backup_idx;
 }
 
 void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
@@ -799,13 +785,12 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
 	struct dma_fence *unprocessed;
 	struct dma_fence __rcu **ptr;
 	struct amdgpu_fence *fence;
-	u64 wptr;
 	u32 seq, last_seq;
 
 	last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
 	seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
-	wptr = ring->fence_drv.signalled_wptr;
 	ring->ring_backup_entries_to_copy = 0;
+	ring->guilty_fence_count = 0;
 
 	do {
 		last_seq++;
@@ -818,21 +803,12 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
 		if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
 			fence = container_of(unprocessed, struct amdgpu_fence, base);
 
-			/* save everything if the ring is not guilty, otherwise
-			 * just save the content from other contexts.
-			 */
-			if (!fence->reemitted &&
-			    (!guilty_fence || (fence->context != guilty_fence->context))) {
-				amdgpu_ring_backup_unprocessed_command(ring, wptr,
-								       fence->wptr);
-			} else if (!fence->reemitted) {
-				/* always save the fence */
-				amdgpu_ring_backup_unprocessed_command(ring,
-								       fence->fence_wptr_start,
-								       fence->fence_wptr_end);
-			}
-			wptr = fence->wptr;
-			fence->reemitted++;
+			/* keep track of the guilty fences for reemit */
+			if (fence->context == guilty_fence->context)
+				ring->guilty_fence_count++;
+			/* Non-vm fence has all the state.  Backup the non-guilty contexts */
+			if (!fence->is_vm_fence && (fence->context != guilty_fence->context))
+				amdgpu_ring_backup_unprocessed_command(ring, fence);
 		}
 		rcu_read_unlock();
 	} while (last_seq != seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index fb58637ed1507..865a803026c3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -185,6 +185,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct amdgpu_job *job,
 		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
 		return r;
 	}
+	af->wptr_start = ring->wptr;
 
 	need_ctx_switch = ring->current_ctx != fence_ctx;
 	if (ring->funcs->emit_pipeline_sync && job &&
@@ -303,13 +304,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct amdgpu_job *job,
 	    ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
 		ring->funcs->emit_wave_limit(ring, false);
 
-	/* Save the wptr associated with this fence.
-	 * This must be last for resets to work properly
-	 * as we need to save the wptr associated with this
-	 * fence so we know what rings contents to backup
-	 * after we reset the queue.
-	 */
-	amdgpu_fence_save_wptr(af);
+	af->wptr_end = ring->wptr;
 
 	amdgpu_ring_ib_end(ring);
 	amdgpu_ring_commit(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index b82357c657237..df37335127979 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -104,29 +104,6 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
 	return 0;
 }
 
-/**
- * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
- *
- * @ring: amdgpu_ring structure holding ring information
- * @ndw: number of dwords to allocate in the ring buffer
- *
- * Allocate @ndw dwords in the ring buffer (all asics).
- * doesn't check the max_dw limit as we may be reemitting
- * several submissions.
- */
-static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
-{
-	/* Align requested size with padding so unlock_commit can
-	 * pad safely */
-	ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
-
-	ring->count_dw = ndw;
-	ring->wptr_old = ring->wptr;
-
-	if (ring->funcs->begin_use)
-		ring->funcs->begin_use(ring);
-}
-
 /**
  * amdgpu_ring_insert_nop - insert NOP packets
  *
@@ -877,7 +854,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
 int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
 				 struct amdgpu_fence *guilty_fence)
 {
-	unsigned int i;
 	int r;
 
 	/* verify that the ring is functional */
@@ -885,16 +861,9 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
 	if (r)
 		return r;
 
-	/* set an error on all fences from the context */
-	if (guilty_fence)
-		amdgpu_fence_driver_update_timedout_fence_state(guilty_fence);
-	/* Re-emit the non-guilty commands */
-	if (ring->ring_backup_entries_to_copy) {
-		amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy);
-		for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
-			amdgpu_ring_write(ring, ring->ring_backup[i]);
-		amdgpu_ring_commit(ring);
-	}
+	/* set an error on all fences from the context and reemit */
+	amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence);
+
 	/* Start the scheduler again */
 	drm_sched_wqueue_start(&ring->sched);
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index ce095427611fb..6dca1ccd2fc2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -121,7 +121,6 @@ struct amdgpu_fence_driver {
 	/* sync_seq is protected by ring emission lock */
 	uint32_t			sync_seq;
 	atomic_t			last_seq;
-	u64				signalled_wptr;
 	bool				initialized;
 	struct amdgpu_irq_src		*irq_src;
 	unsigned			irq_type;
@@ -146,15 +145,17 @@ struct amdgpu_fence {
 	struct amdgpu_ring		*ring;
 	ktime_t				start_timestamp;
 
+	bool				is_vm_fence;
+	unsigned int			flags;
 	/* wptr for the total submission for resets */
-	u64				wptr;
+	u64				wptr_start;
+	u64				wptr_end;
 	/* fence context for resets */
 	u64				context;
-	/* has this fence been reemitted */
-	unsigned int			reemitted;
-	/* wptr for the fence for the submission */
-	u64				fence_wptr_start;
-	u64				fence_wptr_end;
+	/* idx into the ring backup */
+	unsigned int			backup_idx;
+	unsigned int			backup_size;
+
 };
 
 extern const struct drm_sched_backend_ops amdgpu_sched_ops;
@@ -162,8 +163,8 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops;
 void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring,
 					  struct dma_fence *timedout_fence);
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af);
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+					     struct amdgpu_fence *guilty_fence);
 
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@@ -314,6 +315,7 @@ struct amdgpu_ring {
 	/* backups for resets */
 	uint32_t		*ring_backup;
 	unsigned int		ring_backup_entries_to_copy;
+	unsigned int		guilty_fence_count;
 	unsigned		rptr_offs;
 	u64			rptr_gpu_addr;
 	u32			*rptr_cpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6a2ea200d90c8..bd2c01b1ef18f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -848,6 +848,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 		r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
 		if (r)
 			return r;
+		job->hw_vm_fence->is_vm_fence = true;
 		fence = &job->hw_vm_fence->base;
 		/* get a ref for the job */
 		dma_fence_get(fence);
-- 
2.52.0

next prev parent reply	other threads:[~2026-01-16 16:21 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-16 16:20 [PATCH 00/10] Improvements for IB handling V3 Alex Deucher
2026-01-16 16:20 ` [PATCH 01/10] drm/amdgpu: fix type for wptr in ring backup Alex Deucher
2026-01-19 12:18   ` Christian König
2026-01-16 16:20 ` [PATCH 02/10] drm/amdgpu: rename amdgpu_fence_driver_guilty_force_completion() Alex Deucher
2026-01-19 12:22   ` Christian König
2026-01-16 16:20 ` [PATCH 03/10] drm/amdgpu/job: use GFP_ATOMIC while in gpu reset Alex Deucher
2026-01-19 12:27   ` Christian König
2026-01-16 16:20 ` [PATCH 04/10] drm/amdgpu: switch all IPs to using job for IBs Alex Deucher
2026-01-19 12:31   ` Christian König
2026-01-16 16:20 ` [PATCH 05/10] drm/amdgpu: require a job to schedule an IB Alex Deucher
2026-01-19 12:41   ` Christian König
2026-01-16 16:20 ` [PATCH 06/10] drm/amdgpu: don't call drm_sched_stop/start() in asic reset Alex Deucher
2026-01-19 12:42   ` Christian König
2026-01-16 16:20 ` [PATCH 07/10] drm/amdgpu: drop drm_sched_increase_karma() Alex Deucher
2026-01-19 12:44   ` Christian König
2026-01-16 16:20 ` [PATCH 08/10] drm/amdgpu: plumb timedout fence through to force completion Alex Deucher
2026-01-16 16:20 ` [PATCH 09/10] drm/amdgpu: simplify VCN reset helper Alex Deucher
2026-01-16 16:20 ` Alex Deucher [this message]
2026-01-19 13:19   ` [PATCH 10/10] drm/amdgpu: rework ring reset backup and reemit Christian König
2026-01-20  2:41   ` Timur Kristóf

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:d48f61076c06 dfblob:541cdbe1e28b dfblob:fb58637ed150
dfblob:865a803026c3 dfblob:b82357c65723 dfblob:df3733512797
dfblob:ce095427611f dfblob:6dca1ccd2fc2 dfblob:6a2ea200d90c
dfblob:bd2c01b1ef18 )
 OR (
bs:"[PATCH 10/10] drm/amdgpu: rework ring reset backup and reemit" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260116162027.21550-11-alexander.deucher@amd.com \
    --to=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox