From: Alex Deucher <alexander.deucher@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Alex Deucher <alexander.deucher@amd.com>
Subject: [PATCH 2/2] drm/amdgpu: move reset debug disable handling
Date: Tue, 14 Oct 2025 17:17:12 -0400 [thread overview]
Message-ID: <20251014211712.2524052-2-alexander.deucher@amd.com> (raw)
In-Reply-To: <20251014211712.2524052-1-alexander.deucher@amd.com>
Move everything to the supported resets masks rather than
having an explicit misc checks for this.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 8 +++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 3 ---
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 ++++--
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +-
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++++--
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 8 ++++++--
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 6 ++++--
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 3 ++-
12 files changed, 32 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 3842a15e2df8a..3d396ab625f33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -130,11 +130,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
/* attempt a per ring reset */
- if (unlikely(adev->debug_disable_gpu_ring_reset)) {
- dev_err(adev->dev, "Ring reset disabled by debug mask\n");
- } else if (amdgpu_gpu_recovery &&
- amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
- ring->funcs->reset) {
+ if (amdgpu_gpu_recovery &&
+ amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
+ ring->funcs->reset) {
dev_err(adev->dev, "Starting %s ring reset\n",
s_job->sched->name);
ring->in_ring_reset = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 43f769fed810e..bf1b90a341d8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -468,9 +468,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
ktime_t deadline;
bool ret;
- if (unlikely(ring->adev->debug_disable_soft_recovery))
- return false;
-
deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index d670cb48c0319..f7357673f13e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4959,7 +4959,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a1ce8f9e04ecb..60250503c633d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1821,13 +1821,15 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(11, 0, 3):
if ((adev->gfx.me_fw_version >= 2280) &&
(adev->gfx.mec_fw_version >= 2410) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
break;
default:
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index a923f1946fc23..d405ca3c01b66 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -1548,7 +1548,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(12, 0, 1):
if ((adev->gfx.me_fw_version >= 2660) &&
(adev->gfx.mec_fw_version >= 2920) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index dd19a97436db9..7d0a2d239b78d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2409,7 +2409,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index fb5585ab52beb..b6e42189eae2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1149,14 +1149,16 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
if ((adev->gfx.mec_fw_version >= 155) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
}
break;
case IP_VERSION(9, 5, 0):
if ((adev->gfx.mec_fw_version >= 21) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 50abf4dd5ef91..6746b0108932e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -2383,11 +2383,15 @@ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
- if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ if ((adev->gfx.mec_fw_version >= 0xb0) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
case IP_VERSION(9, 5, 0):
- if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ if ((adev->gfx.mec_fw_version >= 0xf) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index ab012592a276a..9d99211721953 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1443,7 +1443,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(5, 0, 2):
case IP_VERSION(5, 0, 5):
if ((adev->sdma.instance[0].fw_version >= 35) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index fdc9cc13396a2..dcdb7dbb952c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1362,12 +1362,14 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(5, 2, 3):
case IP_VERSION(5, 2, 4):
if ((adev->sdma.instance[0].fw_version >= 76) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
case IP_VERSION(5, 2, 5):
if ((adev->sdma.instance[0].fw_version >= 34) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index aa89d915d3f98..7c5da95de92e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1370,7 +1370,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(6, 0, 2):
case IP_VERSION(6, 0, 3):
if ((adev->sdma.instance[0].fw_version >= 21) &&
- !amdgpu_sriov_vf(adev))
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 84d0e1aa4d501..5ba323259f2a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1351,7 +1351,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->sdma.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_sdma_sysfs_reset_mask_init(adev);
--
2.51.0
next prev parent reply other threads:[~2025-10-14 21:17 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-14 21:17 [PATCH 1/2] drm/amdgpu: set default gfx reset masks for gfx6-8 Alex Deucher
2025-10-14 21:17 ` Alex Deucher [this message]
2025-10-20 17:36 ` Alex Deucher
-- strict thread matches above, loose matches on Subject: below --
2025-10-23 18:45 Alex Deucher
2025-10-23 18:45 ` [PATCH 2/2] drm/amdgpu: move reset debug disable handling Alex Deucher
2025-10-24 1:04 ` Zhang, Jesse(Jie)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251014211712.2524052-2-alexander.deucher@amd.com \
--to=alexander.deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox