* [PATCH 1/2] drm/amdgpu: set default gfx reset masks for gfx6-8
@ 2025-10-14 21:17 Alex Deucher
2025-10-20 17:36 ` Alex Deucher
0 siblings, 1 reply; 5+ messages in thread
From: Alex Deucher @ 2025-10-14 21:17 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Deucher
These were not set so soft recovery was inadvertantly
disabled.
Fixes: 6ac55eab4fc4 ("drm/amdgpu: move reset support type checks into the caller")
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 5 +++++
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 5 +++++
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 +++++
3 files changed, 15 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 7693b79534267..80565392313f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3102,6 +3102,11 @@ static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
return r;
}
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 5976ed55d9dbd..2b7aba22ecc19 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4399,6 +4399,11 @@ static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
gfx_v7_0_gpu_early_init(adev);
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 0856ff65288c0..8a81713d97aac 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2023,6 +2023,11 @@ static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return 0;
}
--
2.51.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: set default gfx reset masks for gfx6-8
2025-10-14 21:17 [PATCH 1/2] drm/amdgpu: set default gfx reset masks for gfx6-8 Alex Deucher
@ 2025-10-20 17:36 ` Alex Deucher
0 siblings, 0 replies; 5+ messages in thread
From: Alex Deucher @ 2025-10-20 17:36 UTC (permalink / raw)
To: Alex Deucher; +Cc: amd-gfx
Ping on this series.
On Tue, Oct 14, 2025 at 5:56 PM Alex Deucher <alexander.deucher@amd.com> wrote:
>
> These were not set so soft recovery was inadvertantly
> disabled.
>
> Fixes: 6ac55eab4fc4 ("drm/amdgpu: move reset support type checks into the caller")
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 5 +++++
> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 5 +++++
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 +++++
> 3 files changed, 15 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> index 7693b79534267..80565392313f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> @@ -3102,6 +3102,11 @@ static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
> return r;
> }
>
> + adev->gfx.gfx_supported_reset =
> + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
> + adev->gfx.compute_supported_reset =
> + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
> +
> return r;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 5976ed55d9dbd..2b7aba22ecc19 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -4399,6 +4399,11 @@ static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
>
> gfx_v7_0_gpu_early_init(adev);
>
> + adev->gfx.gfx_supported_reset =
> + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
> + adev->gfx.compute_supported_reset =
> + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
> +
> return r;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 0856ff65288c0..8a81713d97aac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -2023,6 +2023,11 @@ static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
> if (r)
> return r;
>
> + adev->gfx.gfx_supported_reset =
> + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
> + adev->gfx.compute_supported_reset =
> + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
> +
> return 0;
> }
>
> --
> 2.51.0
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/2] drm/amdgpu: set default gfx reset masks for gfx6-8
@ 2025-10-23 18:45 Alex Deucher
2025-10-23 18:45 ` [PATCH 2/2] drm/amdgpu: move reset debug disable handling Alex Deucher
0 siblings, 1 reply; 5+ messages in thread
From: Alex Deucher @ 2025-10-23 18:45 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Deucher
These were not set so soft recovery was inadvertantly
disabled.
Fixes: 6ac55eab4fc4 ("drm/amdgpu: move reset support type checks into the caller")
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 5 +++++
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 5 +++++
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 +++++
3 files changed, 15 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 7693b79534267..80565392313f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3102,6 +3102,11 @@ static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
return r;
}
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 5976ed55d9dbd..2b7aba22ecc19 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4399,6 +4399,11 @@ static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
gfx_v7_0_gpu_early_init(adev);
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index d3d0a4b0380cf..1c87375e1dd58 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2023,6 +2023,11 @@ static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
return 0;
}
--
2.51.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] drm/amdgpu: move reset debug disable handling
2025-10-23 18:45 [PATCH 1/2] drm/amdgpu: set default gfx reset masks for gfx6-8 Alex Deucher
@ 2025-10-23 18:45 ` Alex Deucher
2025-10-24 1:04 ` Zhang, Jesse(Jie)
0 siblings, 1 reply; 5+ messages in thread
From: Alex Deucher @ 2025-10-23 18:45 UTC (permalink / raw)
To: amd-gfx; +Cc: Alex Deucher
Move everything to the supported resets masks rather than
having an explicit misc checks for this.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 8 +++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 3 ---
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 ++++--
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +-
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++++--
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 8 ++++++--
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 6 ++++--
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 3 ++-
12 files changed, 32 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 3842a15e2df8a..3d396ab625f33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -130,11 +130,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
/* attempt a per ring reset */
- if (unlikely(adev->debug_disable_gpu_ring_reset)) {
- dev_err(adev->dev, "Ring reset disabled by debug mask\n");
- } else if (amdgpu_gpu_recovery &&
- amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
- ring->funcs->reset) {
+ if (amdgpu_gpu_recovery &&
+ amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
+ ring->funcs->reset) {
dev_err(adev->dev, "Starting %s ring reset\n",
s_job->sched->name);
ring->in_ring_reset = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 43f769fed810e..bf1b90a341d8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -468,9 +468,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
ktime_t deadline;
bool ret;
- if (unlikely(ring->adev->debug_disable_soft_recovery))
- return false;
-
deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 5bbd264f8357c..39b8adf23a9fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4959,7 +4959,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 6994fb2cbf917..e1785a8984662 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1821,13 +1821,15 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(11, 0, 3):
if ((adev->gfx.me_fw_version >= 2280) &&
(adev->gfx.mec_fw_version >= 2410) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
break;
default:
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 8d6000c7ce26d..b86a40e7c2d3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -1548,7 +1548,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(12, 0, 1):
if ((adev->gfx.me_fw_version >= 2660) &&
(adev->gfx.mec_fw_version >= 2920) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index f1a2efc2a8d0a..0148d7ff34d99 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2409,7 +2409,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index e0b50c690f8cb..c4c551ef6b874 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1149,14 +1149,16 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
if ((adev->gfx.mec_fw_version >= 155) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
}
break;
case IP_VERSION(9, 5, 0):
if ((adev->gfx.mec_fw_version >= 21) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index b95afb4afd032..5ec8e28980d5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -2361,11 +2361,15 @@ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
- if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ if ((adev->gfx.mec_fw_version >= 0xb0) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
case IP_VERSION(9, 5, 0):
- if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ if ((adev->gfx.mec_fw_version >= 0xf) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 7dc67a22a7a01..8ddc4df06a1fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1429,7 +1429,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(5, 0, 2):
case IP_VERSION(5, 0, 5):
if ((adev->sdma.instance[0].fw_version >= 35) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index d3b2ac5813383..e163369773adc 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1348,12 +1348,14 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(5, 2, 3):
case IP_VERSION(5, 2, 4):
if ((adev->sdma.instance[0].fw_version >= 76) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
case IP_VERSION(5, 2, 5):
if ((adev->sdma.instance[0].fw_version >= 34) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 032cabd8fa8d0..fbe166a4b9b88 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1356,7 +1356,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(6, 0, 2):
case IP_VERSION(6, 0, 3):
if ((adev->sdma.instance[0].fw_version >= 21) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index cb5a9daed63ce..007f527d54e7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1337,7 +1337,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->sdma.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_sdma_sysfs_reset_mask_init(adev);
--
2.51.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* RE: [PATCH 2/2] drm/amdgpu: move reset debug disable handling
2025-10-23 18:45 ` [PATCH 2/2] drm/amdgpu: move reset debug disable handling Alex Deucher
@ 2025-10-24 1:04 ` Zhang, Jesse(Jie)
0 siblings, 0 replies; 5+ messages in thread
From: Zhang, Jesse(Jie) @ 2025-10-24 1:04 UTC (permalink / raw)
To: Deucher, Alexander, amd-gfx@lists.freedesktop.org; +Cc: Deucher, Alexander
[AMD Official Use Only - AMD Internal Distribution Only]
this series is Reveiwed-by: Jesse Zhang <Jesse.Zhang@amd.com>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Alex
> Deucher
> Sent: Friday, October 24, 2025 2:45 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: [PATCH 2/2] drm/amdgpu: move reset debug disable handling
>
> Move everything to the supported resets masks rather than having an explicit misc
> checks for this.
>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 8 +++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 3 ---
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 ++-
> drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 ++++--
> drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 3 ++-
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++++--
> drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 8 ++++++--
> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 ++-
> drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 6 ++++--
> drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 ++-
> drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 3 ++-
> 12 files changed, 32 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 3842a15e2df8a..3d396ab625f33 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -130,11 +130,9 @@ static enum drm_gpu_sched_stat
> amdgpu_job_timedout(struct drm_sched_job *s_job)
> }
>
> /* attempt a per ring reset */
> - if (unlikely(adev->debug_disable_gpu_ring_reset)) {
> - dev_err(adev->dev, "Ring reset disabled by debug mask\n");
> - } else if (amdgpu_gpu_recovery &&
> - amdgpu_ring_is_reset_type_supported(ring,
> AMDGPU_RESET_TYPE_PER_QUEUE) &&
> - ring->funcs->reset) {
> + if (amdgpu_gpu_recovery &&
> + amdgpu_ring_is_reset_type_supported(ring,
> AMDGPU_RESET_TYPE_PER_QUEUE) &&
> + ring->funcs->reset) {
> dev_err(adev->dev, "Starting %s ring reset\n",
> s_job->sched->name);
> ring->in_ring_reset = true;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 43f769fed810e..bf1b90a341d8d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -468,9 +468,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring,
> unsigned int vmid,
> ktime_t deadline;
> bool ret;
>
> - if (unlikely(ring->adev->debug_disable_soft_recovery))
> - return false;
> -
> deadline = ktime_add_us(ktime_get(), 10000);
>
> if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) diff
> --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 5bbd264f8357c..39b8adf23a9fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4959,7 +4959,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block
> *ip_block)
> amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
> adev->gfx.compute_supported_reset =
> amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
> - if (!amdgpu_sriov_vf(adev)) {
> + if (!amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset) {
> adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> adev->gfx.gfx_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 6994fb2cbf917..e1785a8984662 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -1821,13 +1821,15 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block
> *ip_block)
> case IP_VERSION(11, 0, 3):
> if ((adev->gfx.me_fw_version >= 2280) &&
> (adev->gfx.mec_fw_version >= 2410) &&
> - !amdgpu_sriov_vf(adev)) {
> + !amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset) {
> adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> adev->gfx.gfx_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> }
> break;
> default:
> - if (!amdgpu_sriov_vf(adev)) {
> + if (!amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset) {
> adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> adev->gfx.gfx_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 8d6000c7ce26d..b86a40e7c2d3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -1548,7 +1548,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block
> *ip_block)
> case IP_VERSION(12, 0, 1):
> if ((adev->gfx.me_fw_version >= 2660) &&
> (adev->gfx.mec_fw_version >= 2920) &&
> - !amdgpu_sriov_vf(adev)) {
> + !amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset) {
> adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> adev->gfx.gfx_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index f1a2efc2a8d0a..0148d7ff34d99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -2409,7 +2409,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block
> *ip_block)
> amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
> adev->gfx.compute_supported_reset =
> amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
> - if (!amdgpu_sriov_vf(adev))
> + if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
> adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>
> r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); diff --git
> a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index e0b50c690f8cb..c4c551ef6b874 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -1149,14 +1149,16 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block
> *ip_block)
> case IP_VERSION(9, 4, 3):
> case IP_VERSION(9, 4, 4):
> if ((adev->gfx.mec_fw_version >= 155) &&
> - !amdgpu_sriov_vf(adev)) {
> + !amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset) {
> adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_PIPE;
> }
> break;
> case IP_VERSION(9, 5, 0):
> if ((adev->gfx.mec_fw_version >= 21) &&
> - !amdgpu_sriov_vf(adev)) {
> + !amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset) {
> adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> adev->gfx.compute_supported_reset |=
> AMDGPU_RESET_TYPE_PER_PIPE;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index b95afb4afd032..5ec8e28980d5f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -2361,11 +2361,15 @@ static void sdma_v4_4_2_update_reset_mask(struct
> amdgpu_device *adev)
> switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
> case IP_VERSION(9, 4, 3):
> case IP_VERSION(9, 4, 4):
> - if ((adev->gfx.mec_fw_version >= 0xb0) &&
> amdgpu_dpm_reset_sdma_is_supported(adev))
> + if ((adev->gfx.mec_fw_version >= 0xb0) &&
> + amdgpu_dpm_reset_sdma_is_supported(adev) &&
> + !adev->debug_disable_gpu_ring_reset)
> adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> break;
> case IP_VERSION(9, 5, 0):
> - if ((adev->gfx.mec_fw_version >= 0xf) &&
> amdgpu_dpm_reset_sdma_is_supported(adev))
> + if ((adev->gfx.mec_fw_version >= 0xf) &&
> + amdgpu_dpm_reset_sdma_is_supported(adev) &&
> + !adev->debug_disable_gpu_ring_reset)
> adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> break;
> default:
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 7dc67a22a7a01..8ddc4df06a1fd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1429,7 +1429,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block
> *ip_block)
> case IP_VERSION(5, 0, 2):
> case IP_VERSION(5, 0, 5):
> if ((adev->sdma.instance[0].fw_version >= 35) &&
> - !amdgpu_sriov_vf(adev))
> + !amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset)
> adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> break;
> default:
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index d3b2ac5813383..e163369773adc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -1348,12 +1348,14 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block
> *ip_block)
> case IP_VERSION(5, 2, 3):
> case IP_VERSION(5, 2, 4):
> if ((adev->sdma.instance[0].fw_version >= 76) &&
> - !amdgpu_sriov_vf(adev))
> + !amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset)
> adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> break;
> case IP_VERSION(5, 2, 5):
> if ((adev->sdma.instance[0].fw_version >= 34) &&
> - !amdgpu_sriov_vf(adev))
> + !amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset)
> adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> break;
> default:
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> index 032cabd8fa8d0..fbe166a4b9b88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> @@ -1356,7 +1356,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block
> *ip_block)
> case IP_VERSION(6, 0, 2):
> case IP_VERSION(6, 0, 3):
> if ((adev->sdma.instance[0].fw_version >= 21) &&
> - !amdgpu_sriov_vf(adev))
> + !amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset)
> adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
> break;
> default:
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> index cb5a9daed63ce..007f527d54e7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> @@ -1337,7 +1337,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block
> *ip_block)
>
> adev->sdma.supported_reset =
> amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
> - if (!amdgpu_sriov_vf(adev))
> + if (!amdgpu_sriov_vf(adev) &&
> + !adev->debug_disable_gpu_ring_reset)
> adev->sdma.supported_reset |=
> AMDGPU_RESET_TYPE_PER_QUEUE;
>
> r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> --
> 2.51.0
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-10-24 1:04 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-23 18:45 [PATCH 1/2] drm/amdgpu: set default gfx reset masks for gfx6-8 Alex Deucher
2025-10-23 18:45 ` [PATCH 2/2] drm/amdgpu: move reset debug disable handling Alex Deucher
2025-10-24 1:04 ` Zhang, Jesse(Jie)
-- strict thread matches above, loose matches on Subject: below --
2025-10-14 21:17 [PATCH 1/2] drm/amdgpu: set default gfx reset masks for gfx6-8 Alex Deucher
2025-10-20 17:36 ` Alex Deucher
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox