AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] drm/amdgpu: check whether smu is idle in sriov case
@ 2023-07-07 10:17 Danijel Slivka
  2023-07-07 10:51 ` Lazar, Lijo
  0 siblings, 1 reply; 2+ messages in thread
From: Danijel Slivka @ 2023-07-07 10:17 UTC (permalink / raw)
  To: amd-gfx; +Cc: Danijel Slivka, Jingwen Chen, Nikola Prica

Why:
If the reg mmMP1_SMN_C2PMSG_90 is being programed to 0x0 before
guest initialization, then modprobe amdgpu will fail at smu hw_init.
(the default mmMP1_SMN_C2PMSG_90 at a clean guest environment is 0x1).

How to fix:
this patch is to check whether smu is idle by sending a test
message to smu. If smu is idle, it will respond.

Signed-off-by: Danijel Slivka <danijel.slivka@amd.com>
Signed-off-by: Nikola Prica <nikola.prica@amd.com>
Signed-off-by: Jingwen Chen <Jingwen.Chen2@amd.com>
Signed-off-by: pengzhou <PengJu.Zhou@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |  8 ++++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 ++++
 .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  1 +
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        | 40 +++++++++++++++++++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h        |  2 +
 5 files changed, 58 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index ce41a8309582..63ea4cd32ece 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1443,6 +1443,14 @@ static int smu_start_smc_engine(struct smu_context *smu)
 		}
 	}
 
+	if (amdgpu_sriov_vf(adev) && smu->ppt_funcs->wait_smu_idle) {
+		ret = smu->ppt_funcs->wait_smu_idle(smu);
+		if (ret) {
+			dev_err(adev->dev, "SMU is not idle\n");
+			return ret;
+		}
+	}
+
 	/*
 	 * Send msg GetDriverIfVersion to check if the return value is equal
 	 * with DRIVER_IF_VERSION of smc header.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 6e2069dcb6b9..1bf87ad30d93 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -926,6 +926,13 @@ struct pptable_funcs {
 	 */
 	int (*check_fw_status)(struct smu_context *smu);
 
+	/**
+	 * @wait_smu_idle: wait for SMU idle status.
+	 *
+	 * Return: Zero if check passes, negative errno on failure.
+	 */
+	int (*wait_smu_idle)(struct smu_context *smu);
+
 	/**
 	 * @set_mp1_state: put SMU into a correct state for comming
 	 *                 resume from runpm or gpu reset.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index c94d825a871b..3745e4f96433 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -3503,6 +3503,7 @@ static const struct pptable_funcs navi10_ppt_funcs = {
 	.init_power = smu_v11_0_init_power,
 	.fini_power = smu_v11_0_fini_power,
 	.check_fw_status = smu_v11_0_check_fw_status,
+	.wait_smu_idle = smu_cmn_wait_smu_idle,
 	.setup_pptable = navi10_setup_pptable,
 	.get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
 	.check_fw_version = smu_v11_0_check_fw_version,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 3ecb900e6ecd..e3c972984b2b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -313,6 +313,46 @@ int smu_cmn_wait_for_response(struct smu_context *smu)
 	return res;
 }
 
+/**
+ * smu_cmn_wait_smu_idle -- wait for smu to become idle
+ * @smu: pointer to an SMU context
+ *
+ * Send SMU_MSG_TestMessage to check whether SMU is idle.
+ * If SMU is idle, it will respond.
+ * The returned parameter will be the param you pass + 1.
+ *
+ * Return 0 on success, -errno on error, indicating the execution
+ * status and result of the message being waited for. See
+ * __smu_cmn_reg2errno() for details of the -errno.
+ */
+int smu_cmn_wait_smu_idle(struct smu_context *smu)
+{
+	u32 reg;
+	u32 param = 0xff00011;
+	uint32_t read_arg;
+	int res, index;
+
+	index = smu_cmn_to_asic_specific_index(smu,
+					       CMN2ASIC_MAPPING_MSG,
+					       SMU_MSG_TestMessage);
+
+	__smu_cmn_send_msg(smu, index, param);
+	reg = __smu_cmn_poll_stat(smu);
+	res = __smu_cmn_reg2errno(smu, reg);
+
+	if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
+	    res && (res != -ETIME)) {
+		amdgpu_device_halt(smu->adev);
+		WARN_ON(1);
+	}
+
+	smu_cmn_read_arg(smu, &read_arg);
+	if (read_arg == param + 1)
+		return 0;
+	return res;
+}
+
+
 /**
  * smu_cmn_send_smc_msg_with_param -- send a message with parameter
  * @smu: pointer to an SMU context
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index d7cd358a53bd..65da886d6a8c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -50,6 +50,8 @@ int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu,
 
 int smu_cmn_wait_for_response(struct smu_context *smu);
 
+int smu_cmn_wait_smu_idle(struct smu_context *smu);
+
 int smu_cmn_to_asic_specific_index(struct smu_context *smu,
 				   enum smu_cmn2asic_mapping_type type,
 				   uint32_t index);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] drm/amdgpu: check whether smu is idle in sriov case
  2023-07-07 10:17 [PATCH v2] drm/amdgpu: check whether smu is idle in sriov case Danijel Slivka
@ 2023-07-07 10:51 ` Lazar, Lijo
  0 siblings, 0 replies; 2+ messages in thread
From: Lazar, Lijo @ 2023-07-07 10:51 UTC (permalink / raw)
  To: Danijel Slivka, amd-gfx; +Cc: Jingwen Chen, Nikola Prica



On 7/7/2023 3:47 PM, Danijel Slivka wrote:
> Why:
> If the reg mmMP1_SMN_C2PMSG_90 is being programed to 0x0 before
> guest initialization, then modprobe amdgpu will fail at smu hw_init.
> (the default mmMP1_SMN_C2PMSG_90 at a clean guest environment is 0x1).
> 
A response to the FW  message doesn't mean SMU is idle. Probably, this 
is only a check that FW is ready.

Instead of introducing a new ppt function, move this implementation to 
check_fw_status(). For VF case, test message may be sent to ascertain 
that FW is ready.

Thanks,
Lijo

> How to fix:
> this patch is to check whether smu is idle by sending a test
> message to smu. If smu is idle, it will respond.
> 
> Signed-off-by: Danijel Slivka <danijel.slivka@amd.com>
> Signed-off-by: Nikola Prica <nikola.prica@amd.com>
> Signed-off-by: Jingwen Chen <Jingwen.Chen2@amd.com>
> Signed-off-by: pengzhou <PengJu.Zhou@amd.com>
> ---
>   drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |  8 ++++
>   drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 ++++
>   .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  1 +
>   drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        | 40 +++++++++++++++++++
>   drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h        |  2 +
>   5 files changed, 58 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index ce41a8309582..63ea4cd32ece 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -1443,6 +1443,14 @@ static int smu_start_smc_engine(struct smu_context *smu)
>   		}
>   	}
>   
> +	if (amdgpu_sriov_vf(adev) && smu->ppt_funcs->wait_smu_idle) {
> +		ret = smu->ppt_funcs->wait_smu_idle(smu);
> +		if (ret) {
> +			dev_err(adev->dev, "SMU is not idle\n");
> +			return ret;
> +		}
> +	}
> +
>   	/*
>   	 * Send msg GetDriverIfVersion to check if the return value is equal
>   	 * with DRIVER_IF_VERSION of smc header.
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index 6e2069dcb6b9..1bf87ad30d93 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -926,6 +926,13 @@ struct pptable_funcs {
>   	 */
>   	int (*check_fw_status)(struct smu_context *smu);
>   
> +	/**
> +	 * @wait_smu_idle: wait for SMU idle status.
> +	 *
> +	 * Return: Zero if check passes, negative errno on failure.
> +	 */
> +	int (*wait_smu_idle)(struct smu_context *smu);
> +
>   	/**
>   	 * @set_mp1_state: put SMU into a correct state for comming
>   	 *                 resume from runpm or gpu reset.
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> index c94d825a871b..3745e4f96433 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> @@ -3503,6 +3503,7 @@ static const struct pptable_funcs navi10_ppt_funcs = {
>   	.init_power = smu_v11_0_init_power,
>   	.fini_power = smu_v11_0_fini_power,
>   	.check_fw_status = smu_v11_0_check_fw_status,
> +	.wait_smu_idle = smu_cmn_wait_smu_idle,
>   	.setup_pptable = navi10_setup_pptable,
>   	.get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
>   	.check_fw_version = smu_v11_0_check_fw_version,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> index 3ecb900e6ecd..e3c972984b2b 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> @@ -313,6 +313,46 @@ int smu_cmn_wait_for_response(struct smu_context *smu)
>   	return res;
>   }
>   
> +/**
> + * smu_cmn_wait_smu_idle -- wait for smu to become idle
> + * @smu: pointer to an SMU context
> + *
> + * Send SMU_MSG_TestMessage to check whether SMU is idle.
> + * If SMU is idle, it will respond.
> + * The returned parameter will be the param you pass + 1.
> + *
> + * Return 0 on success, -errno on error, indicating the execution
> + * status and result of the message being waited for. See
> + * __smu_cmn_reg2errno() for details of the -errno.
> + */
> +int smu_cmn_wait_smu_idle(struct smu_context *smu)
> +{
> +	u32 reg;
> +	u32 param = 0xff00011;
> +	uint32_t read_arg;
> +	int res, index;
> +
> +	index = smu_cmn_to_asic_specific_index(smu,
> +					       CMN2ASIC_MAPPING_MSG,
> +					       SMU_MSG_TestMessage);
> +
> +	__smu_cmn_send_msg(smu, index, param);
> +	reg = __smu_cmn_poll_stat(smu);
> +	res = __smu_cmn_reg2errno(smu, reg);
> +
> +	if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
> +	    res && (res != -ETIME)) {
> +		amdgpu_device_halt(smu->adev);
> +		WARN_ON(1);
> +	}
> +
> +	smu_cmn_read_arg(smu, &read_arg);
> +	if (read_arg == param + 1)
> +		return 0;
> +	return res;
> +}
> +
> +
>   /**
>    * smu_cmn_send_smc_msg_with_param -- send a message with parameter
>    * @smu: pointer to an SMU context
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
> index d7cd358a53bd..65da886d6a8c 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
> @@ -50,6 +50,8 @@ int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu,
>   
>   int smu_cmn_wait_for_response(struct smu_context *smu);
>   
> +int smu_cmn_wait_smu_idle(struct smu_context *smu);
> +
>   int smu_cmn_to_asic_specific_index(struct smu_context *smu,
>   				   enum smu_cmn2asic_mapping_type type,
>   				   uint32_t index);

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-07-07 10:52 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-07-07 10:17 [PATCH v2] drm/amdgpu: check whether smu is idle in sriov case Danijel Slivka
2023-07-07 10:51 ` Lazar, Lijo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox