From: "Lazar, Lijo" <lijo.lazar@amd.com>
To: Victor Zhao <Victor.Zhao@amd.com>, amd-gfx@lists.freedesktop.org
Cc: HaiJun.Chang@amd.com
Subject: Re: [PATCH v3 2/2] drm/amdgpu: use GPU_HDP_FLUSH for sriov
Date: Tue, 14 Oct 2025 16:19:55 +0530 [thread overview]
Message-ID: <e8950119-5f43-48cb-b912-5f9dd205aec0@amd.com> (raw)
In-Reply-To: <20251014093711.434989-2-Victor.Zhao@amd.com>
On 10/14/2025 3:07 PM, Victor Zhao wrote:
> Currently SRIOV runtime will use kiq to write HDP_MEM_FLUSH_CNTL for
> hdp flush. This register need to be write from CPU for nbif to aware,
> otherwise it will not work.
>
> Implement amdgpu_kiq_hdp_flush and use kiq to do gpu hdp flush during
> sriov runtime.
>
> v2:
> - fallback to amdgpu_asic_flush_hdp when amdgpu_kiq_hdp_flush failed
> - add function amdgpu_mes_hdp_flush
>
> v3:
> - changed returned error
>
> Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Series is -
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Thanks,
Lijo
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 71 ++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 +
> 5 files changed, 95 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 7a899fb4de29..65cc6f776536 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -7279,10 +7279,17 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
> if (adev->gmc.xgmi.connected_to_cpu)
> return;
>
> - if (ring && ring->funcs->emit_hdp_flush)
> + if (ring && ring->funcs->emit_hdp_flush) {
> amdgpu_ring_emit_hdp_flush(ring);
> - else
> - amdgpu_asic_flush_hdp(adev, ring);
> + return;
> + }
> +
> + if (!ring && amdgpu_sriov_runtime(adev)) {
> + if (!amdgpu_kiq_hdp_flush(adev))
> + return;
> + }
> +
> + amdgpu_asic_flush_hdp(adev, ring);
> }
>
> void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 7f02e36ccc1e..3d24f9cd750a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -33,6 +33,7 @@
> #include "amdgpu_reset.h"
> #include "amdgpu_xcp.h"
> #include "amdgpu_xgmi.h"
> +#include "amdgpu_mes.h"
> #include "nvd.h"
>
> /* delay 0.1 second to enable gfx off feature */
> @@ -1194,6 +1195,75 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3
> dev_err(adev->dev, "failed to write reg:%x\n", reg);
> }
>
> +int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev)
> +{
> + signed long r, cnt = 0;
> + unsigned long flags;
> + uint32_t seq;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
> + struct amdgpu_ring *ring = &kiq->ring;
> +
> + if (amdgpu_device_skip_hw_access(adev))
> + return 0;
> +
> + if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready)
> + return amdgpu_mes_hdp_flush(adev);
> +
> + if (!ring->funcs->emit_hdp_flush) {
> + return -EOPNOTSUPP;
> + }
> +
> + spin_lock_irqsave(&kiq->ring_lock, flags);
> + r = amdgpu_ring_alloc(ring, 32);
> + if (r)
> + goto failed_unlock;
> +
> + amdgpu_ring_emit_hdp_flush(ring);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
> + amdgpu_ring_commit(ring);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> +
> + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
> +
> + /* don't wait anymore for gpu reset case because this way may
> + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
> + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
> + * never return if we keep waiting in virt_kiq_rreg, which cause
> + * gpu_recover() hang there.
> + *
> + * also don't wait anymore for IRQ context
> + * */
> + if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
> + goto failed_kiq_hdp_flush;
> +
> + might_sleep();
> + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
> + if (amdgpu_in_reset(adev))
> + goto failed_kiq_hdp_flush;
> +
> + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
> + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
> + }
> +
> + if (cnt > MAX_KIQ_REG_TRY) {
> + dev_err(adev->dev, "failed to flush HDP via KIQ timeout\n");
> + return -ETIMEDOUT;
> + }
> +
> + return 0;
> +
> +failed_undo:
> + amdgpu_ring_undo(ring);
> +failed_unlock:
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> +failed_kiq_hdp_flush:
> + dev_err(adev->dev, "failed to flush HDP via KIQ\n");
> + return r < 0 ? r : -EIO;
> +}
> +
> int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
> {
> if (amdgpu_num_kcq == -1) {
> @@ -2484,3 +2554,4 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
> &amdgpu_debugfs_compute_sched_mask_fops);
> #endif
> }
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index fb5f7a0ee029..efd61a1ccc66 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -615,6 +615,7 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
> struct amdgpu_iv_entry *entry);
> uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
> void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
> +int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev);
> int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
> void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index 8d03e8c9cc6d..be62681b0c3a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -523,6 +523,18 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
> return r;
> }
>
> +int amdgpu_mes_hdp_flush(struct amdgpu_device *adev)
> +{
> + uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
> +
> + hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev);
> + hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev);
> + ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0;
> +
> + return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset,
> + ref_and_mask, ref_and_mask);
> +}
> +
> int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
> uint64_t process_context_addr,
> uint32_t spi_gdbg_per_vmid_cntl,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index 6b506fc72f58..3a51ace2fa14 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -427,6 +427,7 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
> int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
> uint32_t reg0, uint32_t reg1,
> uint32_t ref, uint32_t mask);
> +int amdgpu_mes_hdp_flush(struct amdgpu_device *adev);
> int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
> uint64_t process_context_addr,
> uint32_t spi_gdbg_per_vmid_cntl,
prev parent reply other threads:[~2025-10-14 10:50 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-14 9:37 [PATCH v3 1/2] drm/amdgpu: Add kiq hdp flush callbacks Victor Zhao
2025-10-14 9:37 ` [PATCH v3 2/2] drm/amdgpu: use GPU_HDP_FLUSH for sriov Victor Zhao
2025-10-14 10:49 ` Lazar, Lijo [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e8950119-5f43-48cb-b912-5f9dd205aec0@amd.com \
--to=lijo.lazar@amd.com \
--cc=HaiJun.Chang@amd.com \
--cc=Victor.Zhao@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox