AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] drm/amd/pm: use debug port for mode1 reset request on smu 13&14
@ 2026-01-23 10:08 Kenneth Feng
  2026-01-23 10:08 ` [PATCH v2 2/2] drm/amd/pm: send unload command to smu during modprobe -r amdgpu Kenneth Feng
  0 siblings, 1 reply; 2+ messages in thread
From: Kenneth Feng @ 2026-01-23 10:08 UTC (permalink / raw)
  To: amd-gfx; +Cc: KevinYang.Wang, Alexander.Deucher, Kenneth Feng, Yang Wang

use debug port for mode1 reset request so fw can handle mode1 reset
even when it is stuck.

Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 14 ++-----------
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 20 ++++++++++++++++++-
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index ce52b616b935..d216db3b804b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2617,21 +2617,11 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
 static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
-	u32 smu_version;
-	int ret;
 
 	/* SRIOV does not support SMU mode1 reset */
 	if (amdgpu_sriov_vf(adev))
 		return false;
 
-	/* PMFW support is available since 78.41 */
-	ret = smu_cmn_get_smc_version(smu, NULL, &smu_version);
-	if (ret)
-		return false;
-
-	if (smu_version < 0x004e2900)
-		return false;
-
 	return true;
 }
 
@@ -2830,8 +2820,8 @@ static int smu_v13_0_0_mode1_reset(struct smu_context *smu)
 		/* SMU 13_0_0 PMFW supports RAS fatal error reset from 78.77 */
 		smu_v13_0_0_set_mode1_reset_param(smu, 0x004e4d00, &param);
 
-		ret = smu_cmn_send_smc_msg_with_param(smu,
-						SMU_MSG_Mode1Reset, param, NULL);
+		ret = smu_cmn_send_debug_smc_msg_with_param(smu,
+						DEBUGSMC_MSG_Mode1Reset, param);
 		break;
 
 	case IP_VERSION(13, 0, 10):
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 0375e8484b2a..5a212a4641c3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -72,6 +72,8 @@
 
 #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE	0x4000
 
+#define DEBUGSMC_MSG_Mode1Reset	2
+
 #define PP_OD_FEATURE_GFXCLK_FMIN			0
 #define PP_OD_FEATURE_GFXCLK_FMAX			1
 #define PP_OD_FEATURE_UCLK_FMIN				2
@@ -2735,6 +2737,22 @@ static int smu_v13_0_7_update_pcie_parameters(struct smu_context *smu,
 	return ret;
 }
 
+static int smu_v13_0_7_mode1_reset(struct smu_context *smu)
+{
+	int ret;
+
+	ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset);
+	if (!ret) {
+		/* disable mmio access while doing mode 1 reset*/
+		smu->adev->no_hw_access = true;
+		/* ensure no_hw_access is globally visible before any MMIO */
+		smp_mb();
+		msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
+	}
+
+	return ret;
+}
+
 static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
 	.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -2796,7 +2814,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.baco_enter = smu_v13_0_baco_enter,
 	.baco_exit = smu_v13_0_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
-	.mode1_reset = smu_v13_0_mode1_reset,
+	.mode1_reset = smu_v13_0_7_mode1_reset,
 	.set_mp1_state = smu_v13_0_7_set_mp1_state,
 	.set_df_cstate = smu_v13_0_7_set_df_cstate,
 	.gpo_control = smu_v13_0_gpo_control,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH v2 2/2] drm/amd/pm: send unload command to smu during modprobe -r amdgpu
  2026-01-23 10:08 [PATCH v2 1/2] drm/amd/pm: use debug port for mode1 reset request on smu 13&14 Kenneth Feng
@ 2026-01-23 10:08 ` Kenneth Feng
  0 siblings, 0 replies; 2+ messages in thread
From: Kenneth Feng @ 2026-01-23 10:08 UTC (permalink / raw)
  To: amd-gfx; +Cc: KevinYang.Wang, Alexander.Deucher, Kenneth Feng, Yang Wang

Send unload command to smu during modprobe -r amdgpu for smu 13/14.
1. This can fix the high voltage/temperatue issue after driver is unloaded.
2. Reloading driver could fail but with the debug port based mode1 reset
during driver is reloaded, it is good and safe.

Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c           | 3 ---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c            | 7 +++----
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h        | 1 -
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 8 +-------
 4 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b2deb6a74eb2..be115c3df370 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4657,9 +4657,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 			dev_info(adev->dev, "Pending hive reset.\n");
 			amdgpu_set_init_level(adev,
 					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
-		} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
-				   !amdgpu_device_has_display_hardware(adev)) {
-					r = psp_gpu_reset(adev);
 		} else {
 				tmp = amdgpu_reset_method;
 				/* It should do a default reset when loading or reloading the driver,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 6b6b05e8f736..666d2bdd08d9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -820,7 +820,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block)
 	smu->adev = adev;
 	smu->pm_enabled = !!amdgpu_dpm;
 	smu->is_apu = false;
-	smu->smu_baco.state = SMU_BACO_STATE_NONE;
+	smu->smu_baco.state = SMU_BACO_STATE_EXIT;
 	smu->smu_baco.platform_support = false;
 	smu->smu_baco.maco_support = false;
 	smu->user_dpm_profile.fan_mode = -1;
@@ -2134,9 +2134,8 @@ static int smu_reset_mp1_state(struct smu_context *smu)
 	int ret = 0;
 
 	if ((!adev->in_runpm) && (!adev->in_suspend) &&
-		(!amdgpu_in_reset(adev)) && amdgpu_ip_version(adev, MP1_HWIP, 0) ==
-									IP_VERSION(13, 0, 10) &&
-		!amdgpu_device_has_display_hardware(adev))
+		(!amdgpu_in_reset(adev)) && !smu->is_apu &&
+			amdgpu_ip_version(adev, MP1_HWIP, 0) >= IP_VERSION(13, 0, 0))
 		ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
 
 	return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 1def04826f10..6bd104b7187f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -508,7 +508,6 @@ enum smu_reset_mode {
 enum smu_baco_state {
 	SMU_BACO_STATE_ENTER = 0,
 	SMU_BACO_STATE_EXIT,
-	SMU_BACO_STATE_NONE,
 };
 
 struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index d216db3b804b..5a871c9bf43c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2770,13 +2770,7 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
 
 	switch (mp1_state) {
 	case PP_MP1_STATE_UNLOAD:
-		ret = smu_cmn_send_smc_msg_with_param(smu,
-											  SMU_MSG_PrepareMp1ForUnload,
-											  0x55, NULL);
-
-		if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
-			ret = smu_v13_0_disable_pmfw_state(smu);
-
+		ret = smu_cmn_set_mp1_state(smu, mp1_state);
 		break;
 	default:
 		/* Ignore others */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-01-23 10:11 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-23 10:08 [PATCH v2 1/2] drm/amd/pm: use debug port for mode1 reset request on smu 13&14 Kenneth Feng
2026-01-23 10:08 ` [PATCH v2 2/2] drm/amd/pm: send unload command to smu during modprobe -r amdgpu Kenneth Feng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox