public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu/mes: fix mes ring buffer overflow
@ 2024-08-27 14:10 Alex Deucher
  2024-08-27 14:21 ` Greg KH
  0 siblings, 1 reply; 4+ messages in thread
From: Alex Deucher @ 2024-08-27 14:10 UTC (permalink / raw)
  To: stable, gregkh, sashal; +Cc: Jack Xiao, Alex Deucher

From: Jack Xiao <Jack.Xiao@amd.com>

wait memory room until enough before writing mes packets
to avoid ring buffer overflow.

v2: squash in sched_hw_submission fix

Backport from 6.11.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3571
Fixes: de3246254156 ("drm/amdgpu: cleanup MES11 command submission")
Fixes: fffe347e1478 ("drm/amdgpu: cleanup MES12 command submission")
Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 34e087e8920e635c62e2ed6a758b0cd27f836d13)
Cc: stable@vger.kernel.org # 6.10.x
(cherry picked from commit 11752c013f562a1124088a35bd314aa0e9f0e88f)
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c   | 18 ++++++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 06f0a6534a94..88ffb15e25cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -212,6 +212,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	 */
 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
 		sched_hw_submission = max(sched_hw_submission, 256);
+	if (ring->funcs->type == AMDGPU_RING_TYPE_MES)
+		sched_hw_submission = 8;
 	else if (ring == &adev->sdma.instance[0].page)
 		sched_hw_submission = 256;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 32d4519541c6..e1a66d585f5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -163,7 +163,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	const char *op_str, *misc_op_str;
 	unsigned long flags;
 	u64 status_gpu_addr;
-	u32 status_offset;
+	u32 seq, status_offset;
 	u64 *status_ptr;
 	signed long r;
 	int ret;
@@ -191,6 +191,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	if (r)
 		goto error_unlock_free;
 
+	seq = ++ring->fence_drv.sync_seq;
+	r = amdgpu_fence_wait_polling(ring,
+				      seq - ring->fence_drv.num_fences_mask,
+				      timeout);
+	if (r < 1)
+		goto error_undo;
+
 	api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
 	api_status->api_completion_fence_addr = status_gpu_addr;
 	api_status->api_completion_fence_value = 1;
@@ -203,8 +210,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
 	mes_status_pkt.api_status.api_completion_fence_addr =
 		ring->fence_drv.gpu_addr;
-	mes_status_pkt.api_status.api_completion_fence_value =
-		++ring->fence_drv.sync_seq;
+	mes_status_pkt.api_status.api_completion_fence_value = seq;
 
 	amdgpu_ring_write_multiple(ring, &mes_status_pkt,
 				   sizeof(mes_status_pkt) / 4);
@@ -224,7 +230,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 		dev_dbg(adev->dev, "MES msg=%d was emitted\n",
 			x_pkt->header.opcode);
 
-	r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
+	r = amdgpu_fence_wait_polling(ring, seq, timeout);
 	if (r < 1 || !*status_ptr) {
 
 		if (misc_op_str)
@@ -247,6 +253,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	amdgpu_device_wb_free(adev, status_offset);
 	return 0;
 
+error_undo:
+	dev_err(adev->dev, "MES ring buffer is full.\n");
+	amdgpu_ring_undo(ring);
+
 error_unlock_free:
 	spin_unlock_irqrestore(&mes->ring_lock, flags);
 
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-08-27 16:13 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-27 14:10 [PATCH] drm/amdgpu/mes: fix mes ring buffer overflow Alex Deucher
2024-08-27 14:21 ` Greg KH
2024-08-27 15:01   ` Deucher, Alexander
2024-08-27 16:13     ` Greg KH

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox