amd-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [v3 1/5] drm/amdgpu: Refactor VCN v5.0.1 HW init into separate instance function
@ 2025-08-20  3:03 Jesse.Zhang
  2025-08-20  3:03 ` [v3 2/5] drm/amdgpu: Add ring reset support for VCN v5.0.1 Jesse.Zhang
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Jesse.Zhang @ 2025-08-20  3:03 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian Koenig, lijo.lazar, leo.liu,
	sonny.jiang, Jesse.Zhang, Ruili Ji

Split the per-instance initialization code from vcn_v5_0_1_hw_init()
into a new vcn_v5_0_1_hw_init_inst() function. This improves code
organization by:

1. Separating the instance-specific initialization logic
2. Making the main init function more readable
3. Following the pattern used in queue reset

The SR-IOV specific initialization remains in the main function since
it has different requirements.

v2: return directly from amdgpu_ring_test_helper (Lijo)

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Ruili Ji <ruiliji2@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 36 +++++++++++++++----------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
index 7cb21e2b4eb0..1b5d44fa2b57 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -256,6 +256,26 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
 	return 0;
 }
 
+static int vcn_v5_0_1_hw_init_inst(struct amdgpu_device *adev, int i)
+{
+	struct amdgpu_ring *ring;
+	int vcn_inst;
+
+	vcn_inst = GET_INST(VCN, i);
+	ring = &adev->vcn.inst[i].ring_enc[0];
+
+	if (ring->use_doorbell)
+		adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+			((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+			11 * vcn_inst),
+			adev->vcn.inst[i].aid_id);
+
+	/* Re-init fw_shared, if required */
+	vcn_v5_0_1_fw_shared_init(adev, i);
+
+	return amdgpu_ring_test_helper(ring);
+}
+
 /**
  * vcn_v5_0_1_hw_init - start and test VCN block
  *
@@ -267,7 +287,7 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
 	struct amdgpu_ring *ring;
-	int i, r, vcn_inst;
+	int i, r;
 
 	if (amdgpu_sriov_vf(adev)) {
 		r = vcn_v5_0_1_start_sriov(adev);
@@ -285,19 +305,7 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
 		if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
 			adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-			vcn_inst = GET_INST(VCN, i);
-			ring = &adev->vcn.inst[i].ring_enc[0];
-
-			if (ring->use_doorbell)
-				adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
-					((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-					11 * vcn_inst),
-					adev->vcn.inst[i].aid_id);
-
-			/* Re-init fw_shared, if required */
-			vcn_v5_0_1_fw_shared_init(adev, i);
-
-			r = amdgpu_ring_test_helper(ring);
+			r = vcn_v5_0_1_hw_init_inst(adev, i);
 			if (r)
 				return r;
 		}
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [v3 2/5] drm/amdgpu: Add ring reset support for VCN v5.0.1
  2025-08-20  3:03 [v3 1/5] drm/amdgpu: Refactor VCN v5.0.1 HW init into separate instance function Jesse.Zhang
@ 2025-08-20  3:03 ` Jesse.Zhang
  2025-08-21 15:38   ` Lijo Lazar
  2025-08-20  3:03 ` [v3 3/5] drm/amdgpu: Move VCN reset mask setup to late_init for VCN 5.0.1 Jesse.Zhang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Jesse.Zhang @ 2025-08-20  3:03 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian Koenig, lijo.lazar, leo.liu,
	sonny.jiang, Jesse.Zhang, Ruili Ji

Implement the ring reset callback for VCN v5.0.1 to properly handle
hardware recovery when encountering GPU hangs. The new functionality:

1. Adds vcn_v5_0_1_ring_reset() function that:
   - Prepares for reset using amdgpu_ring_reset_helper_begin()
   - Performs VCN instance reset via amdgpu_dpm_reset_vcn()
   - Re-initializes hardware through vcn_v5_0_1_hw_init_inst()
   - Restarts DPG mode with vcn_v5_0_1_start_dpg_mode()
   - Completes reset with amdgpu_ring_reset_helper_end()

2. Hooks the reset function into the unified ring functions via:
   - Adding .reset = vcn_v5_0_1_ring_reset to vcn_v5_0_1_unified_ring_vm_funcs

3. Maintains existing behavior for SR-IOV VF cases by checking RRMT status

This provides proper hardware recovery capabilities for VCN 5.0.1 IP block
during fault conditions, matching functionality available in other VCN versions.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Ruili Ji <ruiliji2@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 29 +++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
index 1b5d44fa2b57..779043eac827 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -1284,6 +1284,34 @@ static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring)
 	}
 }
 
+static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring,
+				 unsigned int vmid,
+				 struct amdgpu_fence *timedout_fence)
+{
+	int r = 0;
+	int vcn_inst;
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+	vcn_inst = GET_INST(VCN, ring->me);
+	r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst);
+
+	if (r) {
+		DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r);
+		return r;
+	}
+
+	/* This flag is not set for VF, assumed to be disabled always */
+	if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+		adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+	vcn_v5_0_1_hw_init_inst(adev, ring->me);
+	vcn_v5_0_1_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram);
+
+	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
 static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_VCN_ENC,
 	.align_mask = 0x3f,
@@ -1312,6 +1340,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
 	.emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
 	.emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+	.reset = vcn_v5_0_1_ring_reset,
 };
 
 /**
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [v3 3/5] drm/amdgpu: Move VCN reset mask setup to late_init for VCN 5.0.1
  2025-08-20  3:03 [v3 1/5] drm/amdgpu: Refactor VCN v5.0.1 HW init into separate instance function Jesse.Zhang
  2025-08-20  3:03 ` [v3 2/5] drm/amdgpu: Add ring reset support for VCN v5.0.1 Jesse.Zhang
@ 2025-08-20  3:03 ` Jesse.Zhang
  2025-08-20  3:03 ` [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization Jesse.Zhang
  2025-08-20  3:03 ` [v3 5/5] drm/amd/pm: Add VCN reset message support for SMU v13.0.12 Jesse.Zhang
  3 siblings, 0 replies; 11+ messages in thread
From: Jesse.Zhang @ 2025-08-20  3:03 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian Koenig, lijo.lazar, leo.liu,
	sonny.jiang, Jesse.Zhang, Ruili Ji

This patch moves the initialization of the VCN supported_reset mask from
sw_init to a new late_init function for VCN 5.0.1. The change ensures
that all necessary hardware and firmware initialization is complete
before determining the supported reset types.

Key changes:
- Added vcn_v5_0_1_late_init() function to handle late initialization
- Moved supported_reset mask setup from sw_init to late_init
- Added check for per-queue reset support via amdgpu_dpm_reset_vcn_is_supported()
- Updated ip_funcs to use the new late_init function

This change helps ensure proper reset behavior by waiting until all
dependencies are initialized before determining available reset types.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Ruili Ji <ruiliji2@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
index 779043eac827..0c3f9c87960a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -113,6 +113,19 @@ static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
 	return 0;
 }
 
+static int vcn_v5_0_1_late_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	adev->vcn.supported_reset =
+		amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
+	if (amdgpu_dpm_reset_vcn_is_supported(adev))
+		adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+	return 0;
+}
+
 static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
 {
 	struct amdgpu_vcn5_fw_shared *fw_shared;
@@ -187,10 +200,6 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
 		vcn_v5_0_1_fw_shared_init(adev, i);
 	}
 
-	/* TODO: Add queue reset mask when FW fully supports it */
-	adev->vcn.supported_reset =
-		amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
-
 	if (amdgpu_sriov_vf(adev)) {
 		r = amdgpu_virt_alloc_mm_table(adev);
 		if (r)
@@ -1544,7 +1553,7 @@ static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
 static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = {
 	.name = "vcn_v5_0_1",
 	.early_init = vcn_v5_0_1_early_init,
-	.late_init = NULL,
+	.late_init = vcn_v5_0_1_late_init,
 	.sw_init = vcn_v5_0_1_sw_init,
 	.sw_fini = vcn_v5_0_1_sw_fini,
 	.hw_init = vcn_v5_0_1_hw_init,
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization
  2025-08-20  3:03 [v3 1/5] drm/amdgpu: Refactor VCN v5.0.1 HW init into separate instance function Jesse.Zhang
  2025-08-20  3:03 ` [v3 2/5] drm/amdgpu: Add ring reset support for VCN v5.0.1 Jesse.Zhang
  2025-08-20  3:03 ` [v3 3/5] drm/amdgpu: Move VCN reset mask setup to late_init for VCN 5.0.1 Jesse.Zhang
@ 2025-08-20  3:03 ` Jesse.Zhang
  2025-08-20  6:03   ` Wang, Yang(Kevin)
  2025-08-21 15:44   ` Lijo Lazar
  2025-08-20  3:03 ` [v3 5/5] drm/amd/pm: Add VCN reset message support for SMU v13.0.12 Jesse.Zhang
  3 siblings, 2 replies; 11+ messages in thread
From: Jesse.Zhang @ 2025-08-20  3:03 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian Koenig, lijo.lazar, leo.liu,
	sonny.jiang, Jesse.Zhang, Alex Deucher

Update the conditions for setting the SMU vcn reset caps in the SMU v13.0.6 PPT
initialization function. Specifically:

- Add support for VCN reset capability for firmware versions 0x00558200 and
  above when the program version is 0.
- Add support for VCN reset capability for firmware versions 0x05551800 and
  above when the program version is 5.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 627a8188d868..9306bfe808e4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -437,7 +437,9 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu)
 	    ((pgm == 4) && (fw_ver >= 0x4557000)))
 		smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));
 
-	if ((pgm == 4) && (fw_ver >= 0x04557100))
+	if (((pgm == 0) && (fw_ver >= 0x00558200)) ||
+	    ((pgm == 4) && (fw_ver >= 0x04557100)) ||
+	    ((pgm == 5) && (fw_ver >= 0x05551800)))
 		smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));
 }
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [v3 5/5] drm/amd/pm: Add VCN reset message support for SMU v13.0.12
  2025-08-20  3:03 [v3 1/5] drm/amdgpu: Refactor VCN v5.0.1 HW init into separate instance function Jesse.Zhang
                   ` (2 preceding siblings ...)
  2025-08-20  3:03 ` [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization Jesse.Zhang
@ 2025-08-20  3:03 ` Jesse.Zhang
  2025-08-21 15:46   ` Lijo Lazar
  3 siblings, 1 reply; 11+ messages in thread
From: Jesse.Zhang @ 2025-08-20  3:03 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian Koenig, lijo.lazar, leo.liu,
	sonny.jiang, Jesse.Zhang, Alex Deucher

This commit adds support for VCN reset functionality in SMU v13.0.12 by:

1. Adding two new PPSMC messages in smu_v13_0_12_ppsmc.h:
   - PPSMC_MSG_ResetVCN (0x5E)
   - PPSMC_MSG_CPPipeReset (0x5F)
   - Updates PPSMC_Message_Count to 0x60 to account for new messages

2. Adding message mapping for ResetVCN in smu_v13_0_12_ppt.c:
   - Maps SMU_MSG_ResetVCN to PPSMC_MSG_ResetVCN

These changes enable proper VCN reset handling through the SMU firmware
interface for compatible AMD GPUs.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h | 4 +++-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c         | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
index aff2776a8b6f..037529eb70bb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
@@ -120,7 +120,9 @@
 #define PPSMC_MSG_GetBadPageSeverity                0x5B
 #define PPSMC_MSG_GetSystemMetricsTable             0x5C
 #define PPSMC_MSG_GetSystemMetricsVersion           0x5D
-#define PPSMC_Message_Count                         0x5E
+#define PPSMC_MSG_ResetVCN                          0x5E
+#define PPSMC_MSG_CPPipeReset                       0x5F
+#define PPSMC_Message_Count                         0x60
 
 //PPSMC Reset Types for driver msg argument
 #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET        0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
index 32fd0be05cff..a3eb19f702d0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
@@ -136,6 +136,7 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(RmaDueToBadPageThreshold,            PPSMC_MSG_RmaDueToBadPageThreshold,        0),
 	MSG_MAP(SetThrottlingPolicy,                 PPSMC_MSG_SetThrottlingPolicy,             0),
 	MSG_MAP(ResetSDMA,                           PPSMC_MSG_ResetSDMA,                       0),
+	MSG_MAP(ResetVCN,                            PPSMC_MSG_ResetVCN,                        0),
 	MSG_MAP(GetStaticMetricsTable,               PPSMC_MSG_GetStaticMetricsTable,           1),
 	MSG_MAP(GetSystemMetricsTable,               PPSMC_MSG_GetSystemMetricsTable,           0),
 };
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* RE: [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization
  2025-08-20  3:03 ` [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization Jesse.Zhang
@ 2025-08-20  6:03   ` Wang, Yang(Kevin)
  2025-08-21 15:44   ` Lijo Lazar
  1 sibling, 0 replies; 11+ messages in thread
From: Wang, Yang(Kevin) @ 2025-08-20  6:03 UTC (permalink / raw)
  To: Zhang, Jesse(Jie), amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Koenig, Christian, Lazar, Lijo, Liu, Leo,
	Jiang, Sonny, Zhang, Jesse(Jie), Deucher, Alexander

[AMD Official Use Only - AMD Internal Distribution Only]

For patch #4,#5 is

Reviewed-by: Yang Wang <kevinyang.wang@amd.com>

Best Regards,
Kevin

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Jesse.Zhang
Sent: Wednesday, August 20, 2025 11:03
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; Liu, Leo <Leo.Liu@amd.com>; Jiang, Sonny <Sonny.Jiang@amd.com>; Zhang, Jesse(Jie) <Jesse.Zhang@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>
Subject: [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization

Update the conditions for setting the SMU vcn reset caps in the SMU v13.0.6 PPT initialization function. Specifically:

- Add support for VCN reset capability for firmware versions 0x00558200 and
  above when the program version is 0.
- Add support for VCN reset capability for firmware versions 0x05551800 and
  above when the program version is 5.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 627a8188d868..9306bfe808e4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -437,7 +437,9 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu)
            ((pgm == 4) && (fw_ver >= 0x4557000)))
                smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));

-       if ((pgm == 4) && (fw_ver >= 0x04557100))
+       if (((pgm == 0) && (fw_ver >= 0x00558200)) ||
+           ((pgm == 4) && (fw_ver >= 0x04557100)) ||
+           ((pgm == 5) && (fw_ver >= 0x05551800)))
                smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));  }

--
2.49.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [v3 2/5] drm/amdgpu: Add ring reset support for VCN v5.0.1
  2025-08-20  3:03 ` [v3 2/5] drm/amdgpu: Add ring reset support for VCN v5.0.1 Jesse.Zhang
@ 2025-08-21 15:38   ` Lijo Lazar
  0 siblings, 0 replies; 11+ messages in thread
From: Lijo Lazar @ 2025-08-21 15:38 UTC (permalink / raw)
  To: Jesse.Zhang, amd-gfx
  Cc: Alexander.Deucher, Christian Koenig, leo.liu, sonny.jiang,
	Ruili Ji



On 8/20/2025 8:33 AM, Jesse.Zhang wrote:
> Implement the ring reset callback for VCN v5.0.1 to properly handle
> hardware recovery when encountering GPU hangs. The new functionality:
> 
> 1. Adds vcn_v5_0_1_ring_reset() function that:
>    - Prepares for reset using amdgpu_ring_reset_helper_begin()
>    - Performs VCN instance reset via amdgpu_dpm_reset_vcn()
>    - Re-initializes hardware through vcn_v5_0_1_hw_init_inst()
>    - Restarts DPG mode with vcn_v5_0_1_start_dpg_mode()
>    - Completes reset with amdgpu_ring_reset_helper_end()
> 
> 2. Hooks the reset function into the unified ring functions via:
>    - Adding .reset = vcn_v5_0_1_ring_reset to vcn_v5_0_1_unified_ring_vm_funcs
> 
> 3. Maintains existing behavior for SR-IOV VF cases by checking RRMT status
> 
> This provides proper hardware recovery capabilities for VCN 5.0.1 IP block
> during fault conditions, matching functionality available in other VCN versions.
> 
> Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> Signed-off-by: Ruili Ji <ruiliji2@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 29 +++++++++++++++++++++++++
>  1 file changed, 29 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
> index 1b5d44fa2b57..779043eac827 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
> @@ -1284,6 +1284,34 @@ static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring)
>  	}
>  }
>  
> +static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring,
> +				 unsigned int vmid,
> +				 struct amdgpu_fence *timedout_fence)
> +{
> +	int r = 0;
> +	int vcn_inst;
> +	struct amdgpu_device *adev = ring->adev;
> +	struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
> +
> +	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
> +
> +	vcn_inst = GET_INST(VCN, ring->me);
> +	r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst);
> +
> +	if (r) {
> +		DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r);
> +		return r;
> +	}
> +
> +	/* This flag is not set for VF, assumed to be disabled always */
> +	if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
> +		adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);

This is not required. The assumption is settings is common across all
instances, hence only the first instance's setting is taken. So if vcn
instance 2 or 3 is reset, this doesn't matter.

> +	vcn_v5_0_1_hw_init_inst(adev, ring->me);
> +	vcn_v5_0_1_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram);

You could use vinst->indirect_sram. That said, it seems there is no need
to pass this as an extra parameter.

Thanks,
Lijo
> +
> +	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
> +}
> +
>  static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
>  	.type = AMDGPU_RING_TYPE_VCN_ENC,
>  	.align_mask = 0x3f,
> @@ -1312,6 +1340,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
>  	.emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
>  	.emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
>  	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
> +	.reset = vcn_v5_0_1_ring_reset,
>  };
>  
>  /**


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization
  2025-08-20  3:03 ` [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization Jesse.Zhang
  2025-08-20  6:03   ` Wang, Yang(Kevin)
@ 2025-08-21 15:44   ` Lijo Lazar
  2025-08-22  1:33     ` Zhang, Jesse(Jie)
  1 sibling, 1 reply; 11+ messages in thread
From: Lijo Lazar @ 2025-08-21 15:44 UTC (permalink / raw)
  To: Jesse.Zhang, amd-gfx
  Cc: Alexander.Deucher, Christian Koenig, leo.liu, sonny.jiang



On 8/20/2025 8:33 AM, Jesse.Zhang wrote:
> Update the conditions for setting the SMU vcn reset caps in the SMU v13.0.6 PPT
> initialization function. Specifically:
> 
> - Add support for VCN reset capability for firmware versions 0x00558200 and
>   above when the program version is 0.
> - Add support for VCN reset capability for firmware versions 0x05551800 and
>   above when the program version is 5.
> 
> Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> Acked-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index 627a8188d868..9306bfe808e4 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -437,7 +437,9 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu)
>  	    ((pgm == 4) && (fw_ver >= 0x4557000)))
>  		smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));
>  
> -	if ((pgm == 4) && (fw_ver >= 0x04557100))
> +	if (((pgm == 0) && (fw_ver >= 0x00558200)) ||
> +	    ((pgm == 4) && (fw_ver >= 0x04557100)) ||
> +	    ((pgm == 5) && (fw_ver >= 0x05551800)))

pgm = 5 should be under smu_v13_0_14_init_caps().

Thanks,
Lijo

>  		smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));
>  }
>  


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [v3 5/5] drm/amd/pm: Add VCN reset message support for SMU v13.0.12
  2025-08-20  3:03 ` [v3 5/5] drm/amd/pm: Add VCN reset message support for SMU v13.0.12 Jesse.Zhang
@ 2025-08-21 15:46   ` Lijo Lazar
  0 siblings, 0 replies; 11+ messages in thread
From: Lijo Lazar @ 2025-08-21 15:46 UTC (permalink / raw)
  To: Jesse.Zhang, amd-gfx
  Cc: Alexander.Deucher, Christian Koenig, leo.liu, sonny.jiang



On 8/20/2025 8:33 AM, Jesse.Zhang wrote:
> This commit adds support for VCN reset functionality in SMU v13.0.12 by:
> 
> 1. Adding two new PPSMC messages in smu_v13_0_12_ppsmc.h:
>    - PPSMC_MSG_ResetVCN (0x5E)
>    - PPSMC_MSG_CPPipeReset (0x5F)
>    - Updates PPSMC_Message_Count to 0x60 to account for new messages
> 
> 2. Adding message mapping for ResetVCN in smu_v13_0_12_ppt.c:
>    - Maps SMU_MSG_ResetVCN to PPSMC_MSG_ResetVCN
> 
> These changes enable proper VCN reset handling through the SMU firmware
> interface for compatible AMD GPUs.
> 

Is there a corresponding FW version which enables this? If not yet,
would be better to add both together in a different series.

Thanks,
Lijo

> Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> Acked-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h | 4 +++-
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c         | 1 +
>  2 files changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
> index aff2776a8b6f..037529eb70bb 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
> @@ -120,7 +120,9 @@
>  #define PPSMC_MSG_GetBadPageSeverity                0x5B
>  #define PPSMC_MSG_GetSystemMetricsTable             0x5C
>  #define PPSMC_MSG_GetSystemMetricsVersion           0x5D
> -#define PPSMC_Message_Count                         0x5E
> +#define PPSMC_MSG_ResetVCN                          0x5E
> +#define PPSMC_MSG_CPPipeReset                       0x5F
> +#define PPSMC_Message_Count                         0x60
>  
>  //PPSMC Reset Types for driver msg argument
>  #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET        0x1
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
> index 32fd0be05cff..a3eb19f702d0 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
> @@ -136,6 +136,7 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] =
>  	MSG_MAP(RmaDueToBadPageThreshold,            PPSMC_MSG_RmaDueToBadPageThreshold,        0),
>  	MSG_MAP(SetThrottlingPolicy,                 PPSMC_MSG_SetThrottlingPolicy,             0),
>  	MSG_MAP(ResetSDMA,                           PPSMC_MSG_ResetSDMA,                       0),
> +	MSG_MAP(ResetVCN,                            PPSMC_MSG_ResetVCN,                        0),
>  	MSG_MAP(GetStaticMetricsTable,               PPSMC_MSG_GetStaticMetricsTable,           1),
>  	MSG_MAP(GetSystemMetricsTable,               PPSMC_MSG_GetSystemMetricsTable,           0),
>  };


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization
  2025-08-21 15:44   ` Lijo Lazar
@ 2025-08-22  1:33     ` Zhang, Jesse(Jie)
  2025-08-22  1:47       ` Lazar, Lijo
  0 siblings, 1 reply; 11+ messages in thread
From: Zhang, Jesse(Jie) @ 2025-08-22  1:33 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Koenig, Christian, Liu, Leo, Jiang, Sonny

[AMD Official Use Only - AMD Internal Distribution Only]

-----Original Message-----
From: Lazar, Lijo <Lijo.Lazar@amd.com>
Sent: Thursday, August 21, 2025 11:45 PM
To: Zhang, Jesse(Jie) <Jesse.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Liu, Leo <Leo.Liu@amd.com>; Jiang, Sonny <Sonny.Jiang@amd.com>
Subject: Re: [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization



On 8/20/2025 8:33 AM, Jesse.Zhang wrote:
> Update the conditions for setting the SMU vcn reset caps in the SMU
> v13.0.6 PPT initialization function. Specifically:
>
> - Add support for VCN reset capability for firmware versions 0x00558200 and
>   above when the program version is 0.
> - Add support for VCN reset capability for firmware versions 0x05551800 and
>   above when the program version is 5.
>
> Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> Acked-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index 627a8188d868..9306bfe808e4 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -437,7 +437,9 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu)
>           ((pgm == 4) && (fw_ver >= 0x4557000)))
>               smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));
>
> -     if ((pgm == 4) && (fw_ver >= 0x04557100))
> +     if (((pgm == 0) && (fw_ver >= 0x00558200)) ||
> +         ((pgm == 4) && (fw_ver >= 0x04557100)) ||
> +         ((pgm == 5) && (fw_ver >= 0x05551800)))

pgm = 5 should be under smu_v13_0_14_init_caps().
 No, Pgm 0, 4, and 5 are all 13.0.6.
13.0.14 is not ready yet and is being tested.

Thanks
Jesse

Thanks,
Lijo

>               smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));  }
>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization
  2025-08-22  1:33     ` Zhang, Jesse(Jie)
@ 2025-08-22  1:47       ` Lazar, Lijo
  0 siblings, 0 replies; 11+ messages in thread
From: Lazar, Lijo @ 2025-08-22  1:47 UTC (permalink / raw)
  To: Zhang, Jesse(Jie), amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Koenig, Christian, Liu, Leo, Jiang, Sonny

[-- Attachment #1: Type: text/plain, Size: 2695 bytes --]

[Public]

Hi Jesse,

As far as I know, program 5 IP version is 13.0.14. Probably you meant 13.0.12 is not ready.

Thanks,
Lijo
________________________________
From: Zhang, Jesse(Jie) <Jesse.Zhang@amd.com>
Sent: Friday, August 22, 2025 7:03:05 AM
To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Liu, Leo <Leo.Liu@amd.com>; Jiang, Sonny <Sonny.Jiang@amd.com>
Subject: RE: [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization

[AMD Official Use Only - AMD Internal Distribution Only]

-----Original Message-----
From: Lazar, Lijo <Lijo.Lazar@amd.com>
Sent: Thursday, August 21, 2025 11:45 PM
To: Zhang, Jesse(Jie) <Jesse.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Liu, Leo <Leo.Liu@amd.com>; Jiang, Sonny <Sonny.Jiang@amd.com>
Subject: Re: [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization



On 8/20/2025 8:33 AM, Jesse.Zhang wrote:
> Update the conditions for setting the SMU vcn reset caps in the SMU
> v13.0.6 PPT initialization function. Specifically:
>
> - Add support for VCN reset capability for firmware versions 0x00558200 and
>   above when the program version is 0.
> - Add support for VCN reset capability for firmware versions 0x05551800 and
>   above when the program version is 5.
>
> Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> Acked-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index 627a8188d868..9306bfe808e4 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -437,7 +437,9 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu)
>           ((pgm == 4) && (fw_ver >= 0x4557000)))
>               smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));
>
> -     if ((pgm == 4) && (fw_ver >= 0x04557100))
> +     if (((pgm == 0) && (fw_ver >= 0x00558200)) ||
> +         ((pgm == 4) && (fw_ver >= 0x04557100)) ||
> +         ((pgm == 5) && (fw_ver >= 0x05551800)))

pgm = 5 should be under smu_v13_0_14_init_caps().
 No, Pgm 0, 4, and 5 are all 13.0.6.
13.0.14 is not ready yet and is being tested.

Thanks
Jesse

Thanks,
Lijo

>               smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));  }
>


[-- Attachment #2: Type: text/html, Size: 4941 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2025-08-22  1:47 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-20  3:03 [v3 1/5] drm/amdgpu: Refactor VCN v5.0.1 HW init into separate instance function Jesse.Zhang
2025-08-20  3:03 ` [v3 2/5] drm/amdgpu: Add ring reset support for VCN v5.0.1 Jesse.Zhang
2025-08-21 15:38   ` Lijo Lazar
2025-08-20  3:03 ` [v3 3/5] drm/amdgpu: Move VCN reset mask setup to late_init for VCN 5.0.1 Jesse.Zhang
2025-08-20  3:03 ` [v3 4/5] drm/amd/pm: Update SMU v13.0.6 PPT caps initialization Jesse.Zhang
2025-08-20  6:03   ` Wang, Yang(Kevin)
2025-08-21 15:44   ` Lijo Lazar
2025-08-22  1:33     ` Zhang, Jesse(Jie)
2025-08-22  1:47       ` Lazar, Lijo
2025-08-20  3:03 ` [v3 5/5] drm/amd/pm: Add VCN reset message support for SMU v13.0.12 Jesse.Zhang
2025-08-21 15:46   ` Lijo Lazar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).