* [PATCH v2 1/2] drm/amd/pm: use debug port for mode1 reset request on smu 13&14
@ 2026-01-23 10:08 Kenneth Feng
2026-01-23 10:08 ` [PATCH v2 2/2] drm/amd/pm: send unload command to smu during modprobe -r amdgpu Kenneth Feng
0 siblings, 1 reply; 2+ messages in thread
From: Kenneth Feng @ 2026-01-23 10:08 UTC (permalink / raw)
To: amd-gfx; +Cc: KevinYang.Wang, Alexander.Deucher, Kenneth Feng, Yang Wang
use debug port for mode1 reset request so fw can handle mode1 reset
even when it is stuck.
Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
---
.../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 14 ++-----------
.../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 20 ++++++++++++++++++-
2 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index ce52b616b935..d216db3b804b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2617,21 +2617,11 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- u32 smu_version;
- int ret;
/* SRIOV does not support SMU mode1 reset */
if (amdgpu_sriov_vf(adev))
return false;
- /* PMFW support is available since 78.41 */
- ret = smu_cmn_get_smc_version(smu, NULL, &smu_version);
- if (ret)
- return false;
-
- if (smu_version < 0x004e2900)
- return false;
-
return true;
}
@@ -2830,8 +2820,8 @@ static int smu_v13_0_0_mode1_reset(struct smu_context *smu)
/* SMU 13_0_0 PMFW supports RAS fatal error reset from 78.77 */
smu_v13_0_0_set_mode1_reset_param(smu, 0x004e4d00, ¶m);
- ret = smu_cmn_send_smc_msg_with_param(smu,
- SMU_MSG_Mode1Reset, param, NULL);
+ ret = smu_cmn_send_debug_smc_msg_with_param(smu,
+ DEBUGSMC_MSG_Mode1Reset, param);
break;
case IP_VERSION(13, 0, 10):
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 0375e8484b2a..5a212a4641c3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -72,6 +72,8 @@
#define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000
+#define DEBUGSMC_MSG_Mode1Reset 2
+
#define PP_OD_FEATURE_GFXCLK_FMIN 0
#define PP_OD_FEATURE_GFXCLK_FMAX 1
#define PP_OD_FEATURE_UCLK_FMIN 2
@@ -2735,6 +2737,22 @@ static int smu_v13_0_7_update_pcie_parameters(struct smu_context *smu,
return ret;
}
+static int smu_v13_0_7_mode1_reset(struct smu_context *smu)
+{
+ int ret;
+
+ ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset);
+ if (!ret) {
+ /* disable mmio access while doing mode 1 reset*/
+ smu->adev->no_hw_access = true;
+ /* ensure no_hw_access is globally visible before any MMIO */
+ smp_mb();
+ msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
+ }
+
+ return ret;
+}
+
static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -2796,7 +2814,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.baco_enter = smu_v13_0_baco_enter,
.baco_exit = smu_v13_0_baco_exit,
.mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
- .mode1_reset = smu_v13_0_mode1_reset,
+ .mode1_reset = smu_v13_0_7_mode1_reset,
.set_mp1_state = smu_v13_0_7_set_mp1_state,
.set_df_cstate = smu_v13_0_7_set_df_cstate,
.gpo_control = smu_v13_0_gpo_control,
--
2.34.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH v2 2/2] drm/amd/pm: send unload command to smu during modprobe -r amdgpu
2026-01-23 10:08 [PATCH v2 1/2] drm/amd/pm: use debug port for mode1 reset request on smu 13&14 Kenneth Feng
@ 2026-01-23 10:08 ` Kenneth Feng
0 siblings, 0 replies; 2+ messages in thread
From: Kenneth Feng @ 2026-01-23 10:08 UTC (permalink / raw)
To: amd-gfx; +Cc: KevinYang.Wang, Alexander.Deucher, Kenneth Feng, Yang Wang
Send unload command to smu during modprobe -r amdgpu for smu 13/14.
1. This can fix the high voltage/temperatue issue after driver is unloaded.
2. Reloading driver could fail but with the debug port based mode1 reset
during driver is reloaded, it is good and safe.
Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ---
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 7 +++----
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 -
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 8 +-------
4 files changed, 4 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b2deb6a74eb2..be115c3df370 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4657,9 +4657,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
dev_info(adev->dev, "Pending hive reset.\n");
amdgpu_set_init_level(adev,
AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
- } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
- !amdgpu_device_has_display_hardware(adev)) {
- r = psp_gpu_reset(adev);
} else {
tmp = amdgpu_reset_method;
/* It should do a default reset when loading or reloading the driver,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 6b6b05e8f736..666d2bdd08d9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -820,7 +820,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block)
smu->adev = adev;
smu->pm_enabled = !!amdgpu_dpm;
smu->is_apu = false;
- smu->smu_baco.state = SMU_BACO_STATE_NONE;
+ smu->smu_baco.state = SMU_BACO_STATE_EXIT;
smu->smu_baco.platform_support = false;
smu->smu_baco.maco_support = false;
smu->user_dpm_profile.fan_mode = -1;
@@ -2134,9 +2134,8 @@ static int smu_reset_mp1_state(struct smu_context *smu)
int ret = 0;
if ((!adev->in_runpm) && (!adev->in_suspend) &&
- (!amdgpu_in_reset(adev)) && amdgpu_ip_version(adev, MP1_HWIP, 0) ==
- IP_VERSION(13, 0, 10) &&
- !amdgpu_device_has_display_hardware(adev))
+ (!amdgpu_in_reset(adev)) && !smu->is_apu &&
+ amdgpu_ip_version(adev, MP1_HWIP, 0) >= IP_VERSION(13, 0, 0))
ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 1def04826f10..6bd104b7187f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -508,7 +508,6 @@ enum smu_reset_mode {
enum smu_baco_state {
SMU_BACO_STATE_ENTER = 0,
SMU_BACO_STATE_EXIT,
- SMU_BACO_STATE_NONE,
};
struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index d216db3b804b..5a871c9bf43c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2770,13 +2770,7 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
switch (mp1_state) {
case PP_MP1_STATE_UNLOAD:
- ret = smu_cmn_send_smc_msg_with_param(smu,
- SMU_MSG_PrepareMp1ForUnload,
- 0x55, NULL);
-
- if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
- ret = smu_v13_0_disable_pmfw_state(smu);
-
+ ret = smu_cmn_set_mp1_state(smu, mp1_state);
break;
default:
/* Ignore others */
--
2.34.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-01-23 10:11 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-23 10:08 [PATCH v2 1/2] drm/amd/pm: use debug port for mode1 reset request on smu 13&14 Kenneth Feng
2026-01-23 10:08 ` [PATCH v2 2/2] drm/amd/pm: send unload command to smu during modprobe -r amdgpu Kenneth Feng
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox