From: "Lazar, Lijo" <lijo.lazar@amd.com>
To: Evan Quan <evan.quan@amd.com>, amd-gfx@lists.freedesktop.org
Cc: Alexander.Deucher@amd.com, rui.huang@amd.com
Subject: Re: [PATCH 07/12] drm/amd/pm: correct the checks for granting gpu reset APIs
Date: Mon, 14 Feb 2022 09:34:17 +0530 [thread overview]
Message-ID: <80a49c9c-77f6-5047-6584-da24f49596f7@amd.com> (raw)
In-Reply-To: <20220211075209.894833-7-evan.quan@amd.com>
On 2/11/2022 1:22 PM, Evan Quan wrote:
> Those gpu reset APIs can be granted when:
> - System is up and dpm features are enabled.
> - System is under resuming and dpm features are not yet enabled.
> Under such scenario, the PMFW is already alive and can support
> those gpu reset functionalities.
>
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> Change-Id: I8c2f07138921eb53a2bd7fb94f9b3622af0eacf8
> ---
> .../gpu/drm/amd/include/kgd_pp_interface.h | 1 +
> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 34 +++++++++++++++
> .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 42 +++++++++++++++----
> .../drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c | 1 +
> .../drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c | 17 ++++++++
> drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h | 1 +
> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 32 +++++++-------
> 7 files changed, 101 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index a4c267f15959..892648a4a353 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -409,6 +409,7 @@ struct amd_pm_funcs {
> struct dpm_clocks *clock_table);
> int (*get_smu_prv_buf_details)(void *handle, void **addr, size_t *size);
> void (*pm_compute_clocks)(void *handle);
> + bool (*is_smc_alive)(void *handle);
> };
>
> struct metrics_table_header {
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index b46ae0063047..5f1d3342f87b 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -120,12 +120,25 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
> return ret;
> }
>
> +static bool amdgpu_dpm_is_smc_alive(struct amdgpu_device *adev)
> +{
> + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> +
> + if (!pp_funcs || !pp_funcs->is_smc_alive)
> + return false;
> +
> + return pp_funcs->is_smc_alive;
> +}
> +
> int amdgpu_dpm_baco_enter(struct amdgpu_device *adev)
> {
> const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> void *pp_handle = adev->powerplay.pp_handle;
> int ret = 0;
>
> + if (!amdgpu_dpm_is_smc_alive(adev))
> + return -EOPNOTSUPP;
> +
> if (!pp_funcs || !pp_funcs->set_asic_baco_state)
> return -ENOENT;
>
> @@ -145,6 +158,9 @@ int amdgpu_dpm_baco_exit(struct amdgpu_device *adev)
> void *pp_handle = adev->powerplay.pp_handle;
> int ret = 0;
>
> + if (!amdgpu_dpm_is_smc_alive(adev))
> + return -EOPNOTSUPP;
> +
> if (!pp_funcs || !pp_funcs->set_asic_baco_state)
> return -ENOENT;
>
> @@ -164,6 +180,9 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
> int ret = 0;
> const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
>
> + if (!amdgpu_dpm_is_smc_alive(adev))
> + return -EOPNOTSUPP;
> +
> if (pp_funcs && pp_funcs->set_mp1_state) {
> mutex_lock(&adev->pm.mutex);
>
> @@ -184,6 +203,9 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
> bool baco_cap;
> int ret = 0;
>
> + if (!amdgpu_dpm_is_smc_alive(adev))
> + return false;
> +
> if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
> return false;
>
> @@ -203,6 +225,9 @@ int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev)
> void *pp_handle = adev->powerplay.pp_handle;
> int ret = 0;
>
> + if (!amdgpu_dpm_is_smc_alive(adev))
> + return -EOPNOTSUPP;
> +
> if (!pp_funcs || !pp_funcs->asic_reset_mode_2)
> return -ENOENT;
>
> @@ -221,6 +246,9 @@ int amdgpu_dpm_baco_reset(struct amdgpu_device *adev)
> void *pp_handle = adev->powerplay.pp_handle;
> int ret = 0;
>
> + if (!amdgpu_dpm_is_smc_alive(adev))
> + return -EOPNOTSUPP;
> +
> if (!pp_funcs || !pp_funcs->set_asic_baco_state)
> return -ENOENT;
>
> @@ -244,6 +272,9 @@ bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev)
> struct smu_context *smu = adev->powerplay.pp_handle;
> bool support_mode1_reset = false;
>
> + if (!amdgpu_dpm_is_smc_alive(adev))
> + return false;
> +
> if (is_support_sw_smu(adev)) {
> mutex_lock(&adev->pm.mutex);
> support_mode1_reset = smu_mode1_reset_is_support(smu);
> @@ -258,6 +289,9 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev)
> struct smu_context *smu = adev->powerplay.pp_handle;
> int ret = -EOPNOTSUPP;
>
> + if (!amdgpu_dpm_is_smc_alive(adev))
> + return -EOPNOTSUPP;
> +
> if (is_support_sw_smu(adev)) {
> mutex_lock(&adev->pm.mutex);
> ret = smu_mode1_reset(smu);
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> index bba923cfe08c..4c709f7bcd51 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> @@ -844,9 +844,6 @@ static int pp_dpm_set_mp1_state(void *handle, enum pp_mp1_state mp1_state)
> if (!hwmgr)
> return -EINVAL;
>
> - if (!hwmgr->pm_en)
> - return 0;
> -
> if (hwmgr->hwmgr_func->set_mp1_state)
> return hwmgr->hwmgr_func->set_mp1_state(hwmgr, mp1_state);
>
> @@ -1305,8 +1302,7 @@ static int pp_get_asic_baco_capability(void *handle, bool *cap)
> if (!hwmgr)
> return -EINVAL;
>
> - if (!(hwmgr->not_vf && amdgpu_dpm) ||
> - !hwmgr->hwmgr_func->get_asic_baco_capability)
> + if (!hwmgr->hwmgr_func->get_asic_baco_capability)
> return 0;
>
> hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap);
> @@ -1321,7 +1317,7 @@ static int pp_get_asic_baco_state(void *handle, int *state)
> if (!hwmgr)
> return -EINVAL;
>
> - if (!hwmgr->pm_en || !hwmgr->hwmgr_func->get_asic_baco_state)
> + if (!hwmgr->hwmgr_func->get_asic_baco_state)
> return 0;
>
> hwmgr->hwmgr_func->get_asic_baco_state(hwmgr, (enum BACO_STATE *)state);
> @@ -1336,8 +1332,7 @@ static int pp_set_asic_baco_state(void *handle, int state)
> if (!hwmgr)
> return -EINVAL;
>
> - if (!(hwmgr->not_vf && amdgpu_dpm) ||
> - !hwmgr->hwmgr_func->set_asic_baco_state)
> + if (!hwmgr->hwmgr_func->set_asic_baco_state)
> return 0;
>
> hwmgr->hwmgr_func->set_asic_baco_state(hwmgr, (enum BACO_STATE)state);
> @@ -1379,7 +1374,7 @@ static int pp_asic_reset_mode_2(void *handle)
> {
> struct pp_hwmgr *hwmgr = handle;
>
> - if (!hwmgr || !hwmgr->pm_en)
> + if (!hwmgr)
> return -EINVAL;
>
> if (hwmgr->hwmgr_func->asic_reset == NULL) {
> @@ -1517,6 +1512,34 @@ static void pp_pm_compute_clocks(void *handle)
> NULL);
> }
>
> +/* MP Apertures */
> +#define MP1_Public 0x03b00000
> +#define smnMP1_FIRMWARE_FLAGS 0x3010028
> +#define MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK 0x00000001L
> +
> +static bool pp_is_smc_alive(void *handle)
> +{
> + struct pp_hwmgr *hwmgr = handle;
> + struct amdgpu_device *adev = hwmgr->adev;
> + uint32_t mp1_fw_flags;
> +
> + /*
> + * If some ASIC(e.g. smu7/smu8) needs special handling for
> + * checking smc alive, it should have its own implementation
> + * for ->is_smc_alive.
> + */
> + if (hwmgr->hwmgr_func->is_smc_alive)
> + return hwmgr->hwmgr_func->is_smc_alive(hwmgr);
> +
> + mp1_fw_flags = RREG32_PCIE(MP1_Public |
> + (smnMP1_FIRMWARE_FLAGS & 0xffffffff));
> +
The flags check doesn't tell whether PMFW is hung or not. It is a
minimal thing that is set after PMFW boot. To call the API this
condition is necessary in an implicit way. Driver always check this on
boot, if not driver aborts smu init.
So better thing is to go ahead and send the message without any check,
it will tell the result whether PMFW is really working or not.
In short this API is not needed.
Thanks,
Lijo
> + if (mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK)
> + return true;
> +
> + return false;
> +}
> +
> static const struct amd_pm_funcs pp_dpm_funcs = {
> .load_firmware = pp_dpm_load_fw,
> .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete,
> @@ -1582,4 +1605,5 @@ static const struct amd_pm_funcs pp_dpm_funcs = {
> .gfx_state_change_set = pp_gfx_state_change_set,
> .get_smu_prv_buf_details = pp_get_prv_buffer_details,
> .pm_compute_clocks = pp_pm_compute_clocks,
> + .is_smc_alive = pp_is_smc_alive,
> };
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
> index a1e11037831a..118039b96524 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
> +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
> @@ -5735,6 +5735,7 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
> .get_asic_baco_state = smu7_baco_get_state,
> .set_asic_baco_state = smu7_baco_set_state,
> .power_off_asic = smu7_power_off_asic,
> + .is_smc_alive = smu7_is_smc_ram_running,
> };
>
> uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock,
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
> index b50fd4a4a3d1..fc4d58329f6d 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
> +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
> @@ -2015,6 +2015,22 @@ static void smu8_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
> }
> }
>
> +#define ixMP1_FIRMWARE_FLAGS 0x3008210
> +#define MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK 0x00000001L
> +
> +static bool smu8_is_smc_running(struct pp_hwmgr *hwmgr)
> +{
> + struct amdgpu_device *adev = hwmgr->adev;
> + uint32_t mp1_fw_flags;
> +
> + mp1_fw_flags = RREG32_SMC(ixMP1_FIRMWARE_FLAGS);
> +
> + if (mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK)
> + return true;
> +
> + return false;
> +}
> +
> static const struct pp_hwmgr_func smu8_hwmgr_funcs = {
> .backend_init = smu8_hwmgr_backend_init,
> .backend_fini = smu8_hwmgr_backend_fini,
> @@ -2047,6 +2063,7 @@ static const struct pp_hwmgr_func smu8_hwmgr_funcs = {
> .dynamic_state_management_disable = smu8_disable_dpm_tasks,
> .notify_cac_buffer_info = smu8_notify_cac_buffer_info,
> .get_thermal_temperature_range = smu8_get_thermal_temperature_range,
> + .is_smc_alive = smu8_is_smc_running,
> };
>
> int smu8_init_function_pointers(struct pp_hwmgr *hwmgr)
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
> index 4f7f2f455301..790fc387752c 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
> +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
> @@ -364,6 +364,7 @@ struct pp_hwmgr_func {
> bool disable);
> ssize_t (*get_gpu_metrics)(struct pp_hwmgr *hwmgr, void **table);
> int (*gfx_state_change)(struct pp_hwmgr *hwmgr, uint32_t state);
> + bool (*is_smc_alive)(struct pp_hwmgr *hwmgr);
> };
>
> struct pp_table_func {
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index 8b8feaf7aa0e..27a453fb4db7 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -1845,9 +1845,6 @@ static int smu_set_mp1_state(void *handle,
> struct smu_context *smu = handle;
> int ret = 0;
>
> - if (!smu->pm_enabled)
> - return -EOPNOTSUPP;
> -
> if (smu->ppt_funcs &&
> smu->ppt_funcs->set_mp1_state)
> ret = smu->ppt_funcs->set_mp1_state(smu, mp1_state);
> @@ -2513,9 +2510,6 @@ static int smu_get_baco_capability(void *handle, bool *cap)
>
> *cap = false;
>
> - if (!smu->pm_enabled)
> - return 0;
> -
> if (smu->ppt_funcs && smu->ppt_funcs->baco_is_support)
> *cap = smu->ppt_funcs->baco_is_support(smu);
>
> @@ -2527,9 +2521,6 @@ static int smu_baco_set_state(void *handle, int state)
> struct smu_context *smu = handle;
> int ret = 0;
>
> - if (!smu->pm_enabled)
> - return -EOPNOTSUPP;
> -
> if (state == 0) {
> if (smu->ppt_funcs->baco_exit)
> ret = smu->ppt_funcs->baco_exit(smu);
> @@ -2551,9 +2542,6 @@ bool smu_mode1_reset_is_support(struct smu_context *smu)
> {
> bool ret = false;
>
> - if (!smu->pm_enabled)
> - return false;
> -
> if (smu->ppt_funcs && smu->ppt_funcs->mode1_reset_is_support)
> ret = smu->ppt_funcs->mode1_reset_is_support(smu);
>
> @@ -2564,9 +2552,6 @@ int smu_mode1_reset(struct smu_context *smu)
> {
> int ret = 0;
>
> - if (!smu->pm_enabled)
> - return -EOPNOTSUPP;
> -
> if (smu->ppt_funcs->mode1_reset)
> ret = smu->ppt_funcs->mode1_reset(smu);
>
> @@ -2578,9 +2563,6 @@ static int smu_mode2_reset(void *handle)
> struct smu_context *smu = handle;
> int ret = 0;
>
> - if (!smu->pm_enabled)
> - return -EOPNOTSUPP;
> -
> if (smu->ppt_funcs->mode2_reset)
> ret = smu->ppt_funcs->mode2_reset(smu);
>
> @@ -2712,6 +2694,19 @@ static int smu_get_prv_buffer_details(void *handle, void **addr, size_t *size)
> return 0;
> }
>
> +static bool smu_is_smc_alive(void *handle)
> +{
> + struct smu_context *smu = handle;
> +
> + if (!smu->ppt_funcs->check_fw_status)
> + return false;
> +
> + if (!smu->ppt_funcs->check_fw_status(smu))
> + return true;
> +
> + return false;
> +}
> +
> static const struct amd_pm_funcs swsmu_pm_funcs = {
> /* export for sysfs */
> .set_fan_control_mode = smu_set_fan_control_mode,
> @@ -2765,6 +2760,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
> .get_uclk_dpm_states = smu_get_uclk_dpm_states,
> .get_dpm_clock_table = smu_get_dpm_clock_table,
> .get_smu_prv_buf_details = smu_get_prv_buffer_details,
> + .is_smc_alive = smu_is_smc_alive,
> };
>
> int smu_wait_for_event(struct smu_context *smu, enum smu_event_type event,
>
next prev parent reply other threads:[~2022-02-14 4:04 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-11 7:51 [PATCH 01/12] drm/amd/pm: drop unused structure members Evan Quan
2022-02-11 7:51 ` [PATCH 02/12] drm/amd/pm: drop unused interfaces Evan Quan
2022-02-11 7:52 ` [PATCH 03/12] drm/amd/pm: drop unneeded !smu->pm_enabled check Evan Quan
2022-02-11 7:52 ` [PATCH 04/12] drm/amd/pm: use adev->pm.dpm_enabled for dpm enablement check Evan Quan
2022-02-11 7:52 ` [PATCH 05/12] drm/amd/pm: move the check for dpm enablement to amdgpu_dpm.c Evan Quan
2022-02-11 8:06 ` Chen, Guchun
2022-02-17 1:53 ` Quan, Evan
2022-02-11 13:39 ` Lazar, Lijo
2022-02-17 2:35 ` Quan, Evan
2022-02-17 4:55 ` Lazar, Lijo
2022-02-11 7:52 ` [PATCH 06/12] drm/amd/pm: correct the checks for sriov(pp_one_vf) Evan Quan
2022-02-11 7:52 ` [PATCH 07/12] drm/amd/pm: correct the checks for granting gpu reset APIs Evan Quan
2022-02-14 4:04 ` Lazar, Lijo [this message]
2022-02-17 2:48 ` Quan, Evan
2022-02-17 5:00 ` Lazar, Lijo
2022-02-11 7:52 ` [PATCH 08/12] drm/amd/pm: add proper check for amdgpu_dpm before granting pp_dpm_load_fw Evan Quan
2022-02-11 7:52 ` [PATCH 09/12] drm/amd/pm: drop redundant !pp_funcs check Evan Quan
2022-02-11 7:52 ` [PATCH 10/12] drm/amd/pm: drop nonsense !smu->ppt_funcs check Evan Quan
2022-02-11 7:52 ` [PATCH 11/12] drm/amd/pm: drop extra non-necessary null pointers checks Evan Quan
2022-02-11 7:52 ` [PATCH 12/12] drm/amd/pm: revise the implementations for asic reset Evan Quan
2022-02-11 13:21 ` Lazar, Lijo
2022-02-17 2:53 ` Quan, Evan
2022-02-11 7:55 ` [PATCH 01/12] drm/amd/pm: drop unused structure members Christian König
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=80a49c9c-77f6-5047-6584-da24f49596f7@amd.com \
--to=lijo.lazar@amd.com \
--cc=Alexander.Deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=evan.quan@amd.com \
--cc=rui.huang@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox