From: "Christian König" <ckoenig.leichtzumerken@gmail.com>
To: Jack Xiao <Jack.Xiao@amd.com>,
amd-gfx@lists.freedesktop.org, Alexander.Deucher@amd.com
Subject: Re: [PATCH] drm/amdgpu/mes: fix mes ring buffer overflow
Date: Fri, 19 Jul 2024 17:44:17 +0200 [thread overview]
Message-ID: <bfca2c44-9e40-4e04-bfcf-1285b3552707@gmail.com> (raw)
In-Reply-To: <20240719091615.1534436-1-Jack.Xiao@amd.com>
Am 19.07.24 um 11:16 schrieb Jack Xiao:
> wait memory room until enough before writing mes packets
> to avoid ring buffer overflow.
>
> Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 18 ++++++++++++++----
> drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 18 ++++++++++++++----
> 2 files changed, 28 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 8ce51b9236c1..68c74adf79f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -168,7 +168,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> const char *op_str, *misc_op_str;
> unsigned long flags;
> u64 status_gpu_addr;
> - u32 status_offset;
> + u32 seq, status_offset;
> u64 *status_ptr;
> signed long r;
> int ret;
> @@ -196,6 +196,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> if (r)
> goto error_unlock_free;
>
> + seq = ++ring->fence_drv.sync_seq;
> + r = amdgpu_fence_wait_polling(ring,
> + seq - ring->fence_drv.num_fences_mask,
> + timeout);
> + if (r < 1)
> + goto error_undo;
> +
> api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
> api_status->api_completion_fence_addr = status_gpu_addr;
> api_status->api_completion_fence_value = 1;
> @@ -208,8 +215,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> mes_status_pkt.api_status.api_completion_fence_addr =
> ring->fence_drv.gpu_addr;
> - mes_status_pkt.api_status.api_completion_fence_value =
> - ++ring->fence_drv.sync_seq;
> + mes_status_pkt.api_status.api_completion_fence_value = seq;
>
> amdgpu_ring_write_multiple(ring, &mes_status_pkt,
> sizeof(mes_status_pkt) / 4);
> @@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> dev_dbg(adev->dev, "MES msg=%d was emitted\n",
> x_pkt->header.opcode);
>
> - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
> + r = amdgpu_fence_wait_polling(ring, seq, timeout);
> if (r < 1 || !*status_ptr) {
>
> if (misc_op_str)
> @@ -252,6 +258,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> amdgpu_device_wb_free(adev, status_offset);
> return 0;
>
> +error_undo:
> + dev_err(adev->dev, "MES ring buffer is full.\n");
> + amdgpu_ring_undo(ring);
> +
> error_unlock_free:
> spin_unlock_irqrestore(&mes->ring_lock, flags);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> index c9f74231ad59..48e01206bcc4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> @@ -154,7 +154,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> const char *op_str, *misc_op_str;
> unsigned long flags;
> u64 status_gpu_addr;
> - u32 status_offset;
> + u32 seq, status_offset;
> u64 *status_ptr;
> signed long r;
> int ret;
> @@ -182,6 +182,13 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> if (r)
> goto error_unlock_free;
>
> + seq = ++ring->fence_drv.sync_seq;
> + r = amdgpu_fence_wait_polling(ring,
> + seq - ring->fence_drv.num_fences_mask,
That's what's amdgpu_fence_emit_polling() does anyway.
So this here just moves the polling a bit earlier.
I think we rather need to increase the MES ring size instead.
Regards,
Christian.
> + timeout);
> + if (r < 1)
> + goto error_undo;
> +
> api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
> api_status->api_completion_fence_addr = status_gpu_addr;
> api_status->api_completion_fence_value = 1;
> @@ -194,8 +201,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> mes_status_pkt.api_status.api_completion_fence_addr =
> ring->fence_drv.gpu_addr;
> - mes_status_pkt.api_status.api_completion_fence_value =
> - ++ring->fence_drv.sync_seq;
> + mes_status_pkt.api_status.api_completion_fence_value = seq;
>
> amdgpu_ring_write_multiple(ring, &mes_status_pkt,
> sizeof(mes_status_pkt) / 4);
> @@ -215,7 +221,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> dev_dbg(adev->dev, "MES msg=%d was emitted\n",
> x_pkt->header.opcode);
>
> - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
> + r = amdgpu_fence_wait_polling(ring, seq, timeout);
> if (r < 1 || !*status_ptr) {
>
> if (misc_op_str)
> @@ -238,6 +244,10 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> amdgpu_device_wb_free(adev, status_offset);
> return 0;
>
> +error_undo:
> + dev_err(adev->dev, "MES ring buffer is full.\n");
> + amdgpu_ring_undo(ring);
> +
> error_unlock_free:
> spin_unlock_irqrestore(&mes->ring_lock, flags);
>
next prev parent reply other threads:[~2024-07-19 15:44 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-19 9:16 [PATCH] drm/amdgpu/mes: fix mes ring buffer overflow Jack Xiao
2024-07-19 13:56 ` Alex Deucher
2024-07-19 15:44 ` Christian König [this message]
2024-07-22 3:27 ` Xiao, Jack
2024-07-22 8:20 ` Christian König
2024-07-22 8:46 ` Xiao, Jack
2024-07-22 11:20 ` Christian König
2024-07-22 19:52 ` Alex Deucher
2024-07-23 3:08 ` Xiao, Jack
2024-07-23 8:15 ` Xiao, Jack
-- strict thread matches above, loose matches on Subject: below --
2024-08-27 14:10 Alex Deucher
2024-08-27 14:21 ` Greg KH
2024-08-27 15:01 ` Deucher, Alexander
2024-08-27 16:13 ` Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bfca2c44-9e40-4e04-bfcf-1285b3552707@gmail.com \
--to=ckoenig.leichtzumerken@gmail.com \
--cc=Alexander.Deucher@amd.com \
--cc=Jack.Xiao@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.