* [PATCH] drm/amd: Don't wake dGPUs while reading sensors
@ 2024-08-20 2:04 Mario Limonciello
2024-08-23 13:44 ` Hamza Mahfooz
2024-08-23 14:09 ` Alex Deucher
0 siblings, 2 replies; 10+ messages in thread
From: Mario Limonciello @ 2024-08-20 2:04 UTC (permalink / raw)
To: amd-gfx; +Cc: Mario Limonciello
From: Mario Limonciello <mario.limonciello@amd.com>
If the dGPU is off, then reading the sysfs files with a sensor monitoring
application will wake it. Change the behavior to return an error when the
dGPU is in D3cold.
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/gpu/drm/amd/pm/amdgpu_pm.c | 90 +++++++++++++++---------------
1 file changed, 45 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index c11952a4389bc..d6e38466fbb82 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -142,7 +142,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -173,7 +173,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
if (strncmp("battery", buf, strlen("battery")) == 0)
@@ -270,7 +270,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -309,7 +309,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
if (strncmp("low", buf, strlen("low")) == 0) {
@@ -371,7 +371,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -409,7 +409,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
if (adev->pm.pp_force_state_enabled)
@@ -471,7 +471,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
adev->pm.pp_force_state_enabled = false;
@@ -541,7 +541,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -577,7 +577,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -760,7 +760,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
if (count > 127 || count == 0)
@@ -862,7 +862,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -922,7 +922,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = kstrtou64(buf, 0, &featuremask);
@@ -957,7 +957,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -1026,7 +1026,7 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -1095,7 +1095,7 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = amdgpu_read_mask(buf, count, &mask);
@@ -1280,7 +1280,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -1309,7 +1309,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = kstrtol(buf, 0, &value);
@@ -1342,7 +1342,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -1371,7 +1371,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = kstrtol(buf, 0, &value);
@@ -1424,7 +1424,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -1463,7 +1463,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
tmp[0] = *(buf);
@@ -1517,7 +1517,7 @@ static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -1630,7 +1630,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
if (adev->flags & AMD_IS_APU)
@@ -1673,7 +1673,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
if (adev->unique_id)
@@ -1846,7 +1846,7 @@ static ssize_t amdgpu_get_pm_metrics(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -1887,7 +1887,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -2005,7 +2005,7 @@ static ssize_t amdgpu_set_smartshift_bias(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
r = pm_runtime_get_sync(ddev->dev);
@@ -2227,7 +2227,7 @@ static ssize_t amdgpu_get_xgmi_plpd_policy(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc);
@@ -2250,7 +2250,7 @@ static ssize_t amdgpu_set_xgmi_plpd_policy(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = kstrtos32(buf, 0, &mode);
@@ -2652,7 +2652,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2684,7 +2684,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
err = kstrtoint(buf, 10, &value);
@@ -2742,7 +2742,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
err = kstrtou32(buf, 10, &value);
@@ -2787,7 +2787,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2817,7 +2817,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2881,7 +2881,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2912,7 +2912,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
err = kstrtou32(buf, 10, &value);
@@ -2956,7 +2956,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2988,7 +2988,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
err = kstrtoint(buf, 10, &value);
@@ -3128,7 +3128,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -3209,7 +3209,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
if (amdgpu_sriov_vf(adev))
@@ -3663,7 +3663,7 @@ static int amdgpu_retrieve_od_settings(struct amdgpu_device *adev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = pm_runtime_get_sync(adev->dev);
@@ -3747,7 +3747,7 @@ amdgpu_distribute_custom_od_settings(struct amdgpu_device *adev,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = parse_input_od_command_lines(in_buf,
@@ -4626,7 +4626,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused)
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
r = pm_runtime_get_sync(dev->dev);
@@ -4671,7 +4671,7 @@ static ssize_t amdgpu_pm_prv_buffer_read(struct file *f, char __user *buf,
if (amdgpu_in_reset(adev))
return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
+ if (adev->in_suspend || adev->in_runpm)
return -EPERM;
ret = amdgpu_dpm_get_smu_prv_buf_details(adev, &smu_prv_buf, &smu_prv_buf_size);
--
2.43.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/amd: Don't wake dGPUs while reading sensors
2024-08-20 2:04 [PATCH] drm/amd: Don't wake dGPUs while reading sensors Mario Limonciello
@ 2024-08-23 13:44 ` Hamza Mahfooz
2024-08-23 13:58 ` Mario Limonciello
2024-08-23 14:09 ` Alex Deucher
1 sibling, 1 reply; 10+ messages in thread
From: Hamza Mahfooz @ 2024-08-23 13:44 UTC (permalink / raw)
To: Mario Limonciello, amd-gfx; +Cc: Mario Limonciello
On 8/19/24 22:04, Mario Limonciello wrote:
> From: Mario Limonciello <mario.limonciello@amd.com>
>
> If the dGPU is off, then reading the sysfs files with a sensor monitoring
> application will wake it. Change the behavior to return an error when the
> dGPU is in D3cold.
>
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
> drivers/gpu/drm/amd/pm/amdgpu_pm.c | 90 +++++++++++++++---------------
> 1 file changed, 45 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index c11952a4389bc..d6e38466fbb82 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -142,7 +142,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
Seems like this expression is used rather often, so it might be good to
have an inline function for it. Something like the following:
static inline bool amdgpu_is_d3cold(struct amdgpu_dev *adev)
{
return adev->in_suspend || adev->in_runpm;
}
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -173,7 +173,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (strncmp("battery", buf, strlen("battery")) == 0)
> @@ -270,7 +270,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -309,7 +309,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (strncmp("low", buf, strlen("low")) == 0) {
> @@ -371,7 +371,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -409,7 +409,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (adev->pm.pp_force_state_enabled)
> @@ -471,7 +471,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> adev->pm.pp_force_state_enabled = false;
> @@ -541,7 +541,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -577,7 +577,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -760,7 +760,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (count > 127 || count == 0)
> @@ -862,7 +862,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -922,7 +922,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = kstrtou64(buf, 0, &featuremask);
> @@ -957,7 +957,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1026,7 +1026,7 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1095,7 +1095,7 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = amdgpu_read_mask(buf, count, &mask);
> @@ -1280,7 +1280,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1309,7 +1309,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = kstrtol(buf, 0, &value);
> @@ -1342,7 +1342,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1371,7 +1371,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = kstrtol(buf, 0, &value);
> @@ -1424,7 +1424,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1463,7 +1463,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> tmp[0] = *(buf);
> @@ -1517,7 +1517,7 @@ static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -1630,7 +1630,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (adev->flags & AMD_IS_APU)
> @@ -1673,7 +1673,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (adev->unique_id)
> @@ -1846,7 +1846,7 @@ static ssize_t amdgpu_get_pm_metrics(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1887,7 +1887,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -2005,7 +2005,7 @@ static ssize_t amdgpu_set_smartshift_bias(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> r = pm_runtime_get_sync(ddev->dev);
> @@ -2227,7 +2227,7 @@ static ssize_t amdgpu_get_xgmi_plpd_policy(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc);
> @@ -2250,7 +2250,7 @@ static ssize_t amdgpu_set_xgmi_plpd_policy(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = kstrtos32(buf, 0, &mode);
> @@ -2652,7 +2652,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -2684,7 +2684,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = kstrtoint(buf, 10, &value);
> @@ -2742,7 +2742,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = kstrtou32(buf, 10, &value);
> @@ -2787,7 +2787,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -2817,7 +2817,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -2881,7 +2881,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -2912,7 +2912,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = kstrtou32(buf, 10, &value);
> @@ -2956,7 +2956,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -2988,7 +2988,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = kstrtoint(buf, 10, &value);
> @@ -3128,7 +3128,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -3209,7 +3209,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (amdgpu_sriov_vf(adev))
> @@ -3663,7 +3663,7 @@ static int amdgpu_retrieve_od_settings(struct amdgpu_device *adev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(adev->dev);
> @@ -3747,7 +3747,7 @@ amdgpu_distribute_custom_od_settings(struct amdgpu_device *adev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = parse_input_od_command_lines(in_buf,
> @@ -4626,7 +4626,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused)
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> r = pm_runtime_get_sync(dev->dev);
> @@ -4671,7 +4671,7 @@ static ssize_t amdgpu_pm_prv_buffer_read(struct file *f, char __user *buf,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = amdgpu_dpm_get_smu_prv_buf_details(adev, &smu_prv_buf, &smu_prv_buf_size);
--
Hamza
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/amd: Don't wake dGPUs while reading sensors
2024-08-23 13:44 ` Hamza Mahfooz
@ 2024-08-23 13:58 ` Mario Limonciello
0 siblings, 0 replies; 10+ messages in thread
From: Mario Limonciello @ 2024-08-23 13:58 UTC (permalink / raw)
To: Hamza Mahfooz, Mario Limonciello, amd-gfx
On 8/23/2024 08:44, Hamza Mahfooz wrote:
> On 8/19/24 22:04, Mario Limonciello wrote:
>> From: Mario Limonciello <mario.limonciello@amd.com>
>>
>> If the dGPU is off, then reading the sysfs files with a sensor monitoring
>> application will wake it. Change the behavior to return an error when the
>> dGPU is in D3cold.
>>
>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>> ---
>> drivers/gpu/drm/amd/pm/amdgpu_pm.c | 90 +++++++++++++++---------------
>> 1 file changed, 45 insertions(+), 45 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>> index c11952a4389bc..d6e38466fbb82 100644
>> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>> @@ -142,7 +142,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>
> Seems like this expression is used rather often, so it might be good to
> have an inline function for it. Something like the following:
>
> static inline bool amdgpu_is_d3cold(struct amdgpu_dev *adev)
> {
> return adev->in_suspend || adev->in_runpm;
> }
>
Thanks for the review. Not only in that expression used frequently but
also the check if it's in reset. Considering that I'm thinking to make
a non-inline function for every one of these like this to drop the
boilerplate.
static bool amdgpu_attr_accessible(struct amdgpu_dev *adev)
{
if (amdgpu_in_reset(adev))
return false;
if (adev->in_suspend)
return false;
if (adev->in_runpm)
return false;
return true;
}
and then each use can be:
if (!amdgpu_attr_accessible(adev))
return -EPERM;
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -173,7 +173,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> if (strncmp("battery", buf, strlen("battery")) == 0)
>> @@ -270,7 +270,7 @@ static ssize_t
>> amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -309,7 +309,7 @@ static ssize_t
>> amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> if (strncmp("low", buf, strlen("low")) == 0) {
>> @@ -371,7 +371,7 @@ static ssize_t amdgpu_get_pp_num_states(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -409,7 +409,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_force_state(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> if (adev->pm.pp_force_state_enabled)
>> @@ -471,7 +471,7 @@ static ssize_t amdgpu_set_pp_force_state(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> adev->pm.pp_force_state_enabled = false;
>> @@ -541,7 +541,7 @@ static ssize_t amdgpu_get_pp_table(struct device
>> *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -577,7 +577,7 @@ static ssize_t amdgpu_set_pp_table(struct device
>> *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -760,7 +760,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> if (count > 127 || count == 0)
>> @@ -862,7 +862,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -922,7 +922,7 @@ static ssize_t amdgpu_set_pp_features(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = kstrtou64(buf, 0, &featuremask);
>> @@ -957,7 +957,7 @@ static ssize_t amdgpu_get_pp_features(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1026,7 +1026,7 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1095,7 +1095,7 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = amdgpu_read_mask(buf, count, &mask);
>> @@ -1280,7 +1280,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1309,7 +1309,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = kstrtol(buf, 0, &value);
>> @@ -1342,7 +1342,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1371,7 +1371,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = kstrtol(buf, 0, &value);
>> @@ -1424,7 +1424,7 @@ static ssize_t
>> amdgpu_get_pp_power_profile_mode(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1463,7 +1463,7 @@ static ssize_t
>> amdgpu_set_pp_power_profile_mode(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> tmp[0] = *(buf);
>> @@ -1517,7 +1517,7 @@ static int
>> amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -1630,7 +1630,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device
>> *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> if (adev->flags & AMD_IS_APU)
>> @@ -1673,7 +1673,7 @@ static ssize_t amdgpu_get_unique_id(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> if (adev->unique_id)
>> @@ -1846,7 +1846,7 @@ static ssize_t amdgpu_get_pm_metrics(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1887,7 +1887,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -2005,7 +2005,7 @@ static ssize_t amdgpu_set_smartshift_bias(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> r = pm_runtime_get_sync(ddev->dev);
>> @@ -2227,7 +2227,7 @@ static ssize_t
>> amdgpu_get_xgmi_plpd_policy(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc);
>> @@ -2250,7 +2250,7 @@ static ssize_t
>> amdgpu_set_xgmi_plpd_policy(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = kstrtos32(buf, 0, &mode);
>> @@ -2652,7 +2652,7 @@ static ssize_t
>> amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -2684,7 +2684,7 @@ static ssize_t
>> amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> err = kstrtoint(buf, 10, &value);
>> @@ -2742,7 +2742,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> err = kstrtou32(buf, 10, &value);
>> @@ -2787,7 +2787,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -2817,7 +2817,7 @@ static ssize_t
>> amdgpu_hwmon_get_fan1_input(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -2881,7 +2881,7 @@ static ssize_t
>> amdgpu_hwmon_get_fan1_target(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -2912,7 +2912,7 @@ static ssize_t
>> amdgpu_hwmon_set_fan1_target(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> err = kstrtou32(buf, 10, &value);
>> @@ -2956,7 +2956,7 @@ static ssize_t
>> amdgpu_hwmon_get_fan1_enable(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -2988,7 +2988,7 @@ static ssize_t
>> amdgpu_hwmon_set_fan1_enable(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> err = kstrtoint(buf, 10, &value);
>> @@ -3128,7 +3128,7 @@ static ssize_t
>> amdgpu_hwmon_show_power_cap_generic(struct device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -3209,7 +3209,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct
>> device *dev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> if (amdgpu_sriov_vf(adev))
>> @@ -3663,7 +3663,7 @@ static int amdgpu_retrieve_od_settings(struct
>> amdgpu_device *adev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = pm_runtime_get_sync(adev->dev);
>> @@ -3747,7 +3747,7 @@ amdgpu_distribute_custom_od_settings(struct
>> amdgpu_device *adev,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = parse_input_od_command_lines(in_buf,
>> @@ -4626,7 +4626,7 @@ static int amdgpu_debugfs_pm_info_show(struct
>> seq_file *m, void *unused)
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> r = pm_runtime_get_sync(dev->dev);
>> @@ -4671,7 +4671,7 @@ static ssize_t amdgpu_pm_prv_buffer_read(struct
>> file *f, char __user *buf,
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>> ret = amdgpu_dpm_get_smu_prv_buf_details(adev, &smu_prv_buf,
>> &smu_prv_buf_size);
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/amd: Don't wake dGPUs while reading sensors
2024-08-20 2:04 [PATCH] drm/amd: Don't wake dGPUs while reading sensors Mario Limonciello
2024-08-23 13:44 ` Hamza Mahfooz
@ 2024-08-23 14:09 ` Alex Deucher
2024-08-23 14:13 ` Mario Limonciello
1 sibling, 1 reply; 10+ messages in thread
From: Alex Deucher @ 2024-08-23 14:09 UTC (permalink / raw)
To: Mario Limonciello; +Cc: amd-gfx, Mario Limonciello
On Mon, Aug 19, 2024 at 10:30 PM Mario Limonciello <superm1@kernel.org> wrote:
>
> From: Mario Limonciello <mario.limonciello@amd.com>
>
> If the dGPU is off, then reading the sysfs files with a sensor monitoring
> application will wake it. Change the behavior to return an error when the
> dGPU is in D3cold.
I'm a little concerned that this will generate a flurry of bug reports
if this now reports an error. One more comment below.
>
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
> drivers/gpu/drm/amd/pm/amdgpu_pm.c | 90 +++++++++++++++---------------
> 1 file changed, 45 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index c11952a4389bc..d6e38466fbb82 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -142,7 +142,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -173,7 +173,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (strncmp("battery", buf, strlen("battery")) == 0)
> @@ -270,7 +270,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -309,7 +309,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (strncmp("low", buf, strlen("low")) == 0) {
> @@ -371,7 +371,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -409,7 +409,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (adev->pm.pp_force_state_enabled)
> @@ -471,7 +471,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> adev->pm.pp_force_state_enabled = false;
> @@ -541,7 +541,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -577,7 +577,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -760,7 +760,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (count > 127 || count == 0)
> @@ -862,7 +862,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -922,7 +922,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = kstrtou64(buf, 0, &featuremask);
> @@ -957,7 +957,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1026,7 +1026,7 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1095,7 +1095,7 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = amdgpu_read_mask(buf, count, &mask);
> @@ -1280,7 +1280,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1309,7 +1309,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = kstrtol(buf, 0, &value);
> @@ -1342,7 +1342,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1371,7 +1371,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = kstrtol(buf, 0, &value);
> @@ -1424,7 +1424,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1463,7 +1463,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> tmp[0] = *(buf);
> @@ -1517,7 +1517,7 @@ static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -1630,7 +1630,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (adev->flags & AMD_IS_APU)
> @@ -1673,7 +1673,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (adev->unique_id)
> @@ -1846,7 +1846,7 @@ static ssize_t amdgpu_get_pm_metrics(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -1887,7 +1887,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(ddev->dev);
> @@ -2005,7 +2005,7 @@ static ssize_t amdgpu_set_smartshift_bias(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> r = pm_runtime_get_sync(ddev->dev);
> @@ -2227,7 +2227,7 @@ static ssize_t amdgpu_get_xgmi_plpd_policy(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc);
> @@ -2250,7 +2250,7 @@ static ssize_t amdgpu_set_xgmi_plpd_policy(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = kstrtos32(buf, 0, &mode);
> @@ -2652,7 +2652,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -2684,7 +2684,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = kstrtoint(buf, 10, &value);
> @@ -2742,7 +2742,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = kstrtou32(buf, 10, &value);
> @@ -2787,7 +2787,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -2817,7 +2817,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -2881,7 +2881,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -2912,7 +2912,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = kstrtou32(buf, 10, &value);
> @@ -2956,7 +2956,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -2988,7 +2988,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> err = kstrtoint(buf, 10, &value);
> @@ -3128,7 +3128,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> @@ -3209,7 +3209,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> if (amdgpu_sriov_vf(adev))
> @@ -3663,7 +3663,7 @@ static int amdgpu_retrieve_od_settings(struct amdgpu_device *adev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = pm_runtime_get_sync(adev->dev);
> @@ -3747,7 +3747,7 @@ amdgpu_distribute_custom_od_settings(struct amdgpu_device *adev,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = parse_input_od_command_lines(in_buf,
> @@ -4626,7 +4626,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused)
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
I'd prefer to keep the current behavior for debugfs.
Alex
>
> r = pm_runtime_get_sync(dev->dev);
> @@ -4671,7 +4671,7 @@ static ssize_t amdgpu_pm_prv_buffer_read(struct file *f, char __user *buf,
>
> if (amdgpu_in_reset(adev))
> return -EPERM;
> - if (adev->in_suspend && !adev->in_runpm)
> + if (adev->in_suspend || adev->in_runpm)
> return -EPERM;
>
> ret = amdgpu_dpm_get_smu_prv_buf_details(adev, &smu_prv_buf, &smu_prv_buf_size);
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/amd: Don't wake dGPUs while reading sensors
2024-08-23 14:09 ` Alex Deucher
@ 2024-08-23 14:13 ` Mario Limonciello
2024-08-23 14:31 ` Alex Deucher
0 siblings, 1 reply; 10+ messages in thread
From: Mario Limonciello @ 2024-08-23 14:13 UTC (permalink / raw)
To: Alex Deucher, Mario Limonciello; +Cc: amd-gfx
On 8/23/2024 09:09, Alex Deucher wrote:
> On Mon, Aug 19, 2024 at 10:30 PM Mario Limonciello <superm1@kernel.org> wrote:
>>
>> From: Mario Limonciello <mario.limonciello@amd.com>
>>
>> If the dGPU is off, then reading the sysfs files with a sensor monitoring
>> application will wake it. Change the behavior to return an error when the
>> dGPU is in D3cold.
>
> I'm a little concerned that this will generate a flurry of bug reports
> if this now reports an error. One more comment below.
>
Do you have a particular app you're worried about, or just a general
worry? I've had a lot of people reach out to me complaining about
battery life on A+A systems, and it comes down to the use of sensor
monitoring software waking the dGPU which people don't seem to expect.
I did double check that software like 'sensors', 'mission center' and
'nvtop' don't freak out from this change.
Here is what 'sensors' shows on my local workstation with this change.
amdgpu-pci-6100
Adapter: PCI adapter
vddgfx: N/A
ERROR: Can't get value of subfeature fan1_min: Can't read
ERROR: Can't get value of subfeature fan1_max: Can't read
fan1: N/A (min = 0 RPM, max = 0 RPM)
edge: N/A (crit = +97.0°C, hyst = -273.1°C)
ERROR: Can't get value of subfeature power1_cap: Can't read
PPT: N/A (cap = 0.00 W)
>>
>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>> ---
>> drivers/gpu/drm/amd/pm/amdgpu_pm.c | 90 +++++++++++++++---------------
>> 1 file changed, 45 insertions(+), 45 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>> index c11952a4389bc..d6e38466fbb82 100644
>> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>> @@ -142,7 +142,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -173,7 +173,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> if (strncmp("battery", buf, strlen("battery")) == 0)
>> @@ -270,7 +270,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -309,7 +309,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> if (strncmp("low", buf, strlen("low")) == 0) {
>> @@ -371,7 +371,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -409,7 +409,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> if (adev->pm.pp_force_state_enabled)
>> @@ -471,7 +471,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> adev->pm.pp_force_state_enabled = false;
>> @@ -541,7 +541,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -577,7 +577,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -760,7 +760,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> if (count > 127 || count == 0)
>> @@ -862,7 +862,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -922,7 +922,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = kstrtou64(buf, 0, &featuremask);
>> @@ -957,7 +957,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1026,7 +1026,7 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1095,7 +1095,7 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = amdgpu_read_mask(buf, count, &mask);
>> @@ -1280,7 +1280,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1309,7 +1309,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = kstrtol(buf, 0, &value);
>> @@ -1342,7 +1342,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1371,7 +1371,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = kstrtol(buf, 0, &value);
>> @@ -1424,7 +1424,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1463,7 +1463,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> tmp[0] = *(buf);
>> @@ -1517,7 +1517,7 @@ static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -1630,7 +1630,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> if (adev->flags & AMD_IS_APU)
>> @@ -1673,7 +1673,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> if (adev->unique_id)
>> @@ -1846,7 +1846,7 @@ static ssize_t amdgpu_get_pm_metrics(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1887,7 +1887,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(ddev->dev);
>> @@ -2005,7 +2005,7 @@ static ssize_t amdgpu_set_smartshift_bias(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> r = pm_runtime_get_sync(ddev->dev);
>> @@ -2227,7 +2227,7 @@ static ssize_t amdgpu_get_xgmi_plpd_policy(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc);
>> @@ -2250,7 +2250,7 @@ static ssize_t amdgpu_set_xgmi_plpd_policy(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = kstrtos32(buf, 0, &mode);
>> @@ -2652,7 +2652,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -2684,7 +2684,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> err = kstrtoint(buf, 10, &value);
>> @@ -2742,7 +2742,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> err = kstrtou32(buf, 10, &value);
>> @@ -2787,7 +2787,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -2817,7 +2817,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -2881,7 +2881,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -2912,7 +2912,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> err = kstrtou32(buf, 10, &value);
>> @@ -2956,7 +2956,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -2988,7 +2988,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> err = kstrtoint(buf, 10, &value);
>> @@ -3128,7 +3128,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> @@ -3209,7 +3209,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> if (amdgpu_sriov_vf(adev))
>> @@ -3663,7 +3663,7 @@ static int amdgpu_retrieve_od_settings(struct amdgpu_device *adev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = pm_runtime_get_sync(adev->dev);
>> @@ -3747,7 +3747,7 @@ amdgpu_distribute_custom_od_settings(struct amdgpu_device *adev,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = parse_input_od_command_lines(in_buf,
>> @@ -4626,7 +4626,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused)
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>
> I'd prefer to keep the current behavior for debugfs.
OK. I'll exclude it for debugfs in the next spin.
>
> Alex
>
>>
>> r = pm_runtime_get_sync(dev->dev);
>> @@ -4671,7 +4671,7 @@ static ssize_t amdgpu_pm_prv_buffer_read(struct file *f, char __user *buf,
>>
>> if (amdgpu_in_reset(adev))
>> return -EPERM;
>> - if (adev->in_suspend && !adev->in_runpm)
>> + if (adev->in_suspend || adev->in_runpm)
>> return -EPERM;
>>
>> ret = amdgpu_dpm_get_smu_prv_buf_details(adev, &smu_prv_buf, &smu_prv_buf_size);
>> --
>> 2.43.0
>>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/amd: Don't wake dGPUs while reading sensors
2024-08-23 14:13 ` Mario Limonciello
@ 2024-08-23 14:31 ` Alex Deucher
2024-08-23 14:39 ` Mario Limonciello
0 siblings, 1 reply; 10+ messages in thread
From: Alex Deucher @ 2024-08-23 14:31 UTC (permalink / raw)
To: Mario Limonciello; +Cc: Mario Limonciello, amd-gfx
On Fri, Aug 23, 2024 at 10:13 AM Mario Limonciello
<mario.limonciello@amd.com> wrote:
>
> On 8/23/2024 09:09, Alex Deucher wrote:
> > On Mon, Aug 19, 2024 at 10:30 PM Mario Limonciello <superm1@kernel.org> wrote:
> >>
> >> From: Mario Limonciello <mario.limonciello@amd.com>
> >>
> >> If the dGPU is off, then reading the sysfs files with a sensor monitoring
> >> application will wake it. Change the behavior to return an error when the
> >> dGPU is in D3cold.
> >
> > I'm a little concerned that this will generate a flurry of bug reports
> > if this now reports an error. One more comment below.
> >
>
> Do you have a particular app you're worried about, or just a general
> worry? I've had a lot of people reach out to me complaining about
> battery life on A+A systems, and it comes down to the use of sensor
> monitoring software waking the dGPU which people don't seem to expect.
Nothing in particular. Just expecting reports of "I checked my GPU
temperature and it returned an error. It was working with the last
kernel."
>
> I did double check that software like 'sensors', 'mission center' and
> 'nvtop' don't freak out from this change.
Do we know what other devices do? I wonder if it would make more
sense to have the userspace tools check the runpm state before trying
to access the device. Of course there are a lot of them and fixing
them all is non-trivial...
Alex
>
> Here is what 'sensors' shows on my local workstation with this change.
>
> amdgpu-pci-6100
> Adapter: PCI adapter
> vddgfx: N/A
> ERROR: Can't get value of subfeature fan1_min: Can't read
> ERROR: Can't get value of subfeature fan1_max: Can't read
> fan1: N/A (min = 0 RPM, max = 0 RPM)
> edge: N/A (crit = +97.0°C, hyst = -273.1°C)
> ERROR: Can't get value of subfeature power1_cap: Can't read
> PPT: N/A (cap = 0.00 W)
>
> >>
> >> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> >> ---
> >> drivers/gpu/drm/amd/pm/amdgpu_pm.c | 90 +++++++++++++++---------------
> >> 1 file changed, 45 insertions(+), 45 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >> index c11952a4389bc..d6e38466fbb82 100644
> >> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >> @@ -142,7 +142,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -173,7 +173,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> if (strncmp("battery", buf, strlen("battery")) == 0)
> >> @@ -270,7 +270,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -309,7 +309,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> if (strncmp("low", buf, strlen("low")) == 0) {
> >> @@ -371,7 +371,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -409,7 +409,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> if (adev->pm.pp_force_state_enabled)
> >> @@ -471,7 +471,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> adev->pm.pp_force_state_enabled = false;
> >> @@ -541,7 +541,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -577,7 +577,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -760,7 +760,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> if (count > 127 || count == 0)
> >> @@ -862,7 +862,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -922,7 +922,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = kstrtou64(buf, 0, &featuremask);
> >> @@ -957,7 +957,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -1026,7 +1026,7 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -1095,7 +1095,7 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = amdgpu_read_mask(buf, count, &mask);
> >> @@ -1280,7 +1280,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -1309,7 +1309,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = kstrtol(buf, 0, &value);
> >> @@ -1342,7 +1342,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -1371,7 +1371,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = kstrtol(buf, 0, &value);
> >> @@ -1424,7 +1424,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -1463,7 +1463,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> tmp[0] = *(buf);
> >> @@ -1517,7 +1517,7 @@ static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >> @@ -1630,7 +1630,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> if (adev->flags & AMD_IS_APU)
> >> @@ -1673,7 +1673,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> if (adev->unique_id)
> >> @@ -1846,7 +1846,7 @@ static ssize_t amdgpu_get_pm_metrics(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -1887,7 +1887,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(ddev->dev);
> >> @@ -2005,7 +2005,7 @@ static ssize_t amdgpu_set_smartshift_bias(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> r = pm_runtime_get_sync(ddev->dev);
> >> @@ -2227,7 +2227,7 @@ static ssize_t amdgpu_get_xgmi_plpd_policy(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc);
> >> @@ -2250,7 +2250,7 @@ static ssize_t amdgpu_set_xgmi_plpd_policy(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = kstrtos32(buf, 0, &mode);
> >> @@ -2652,7 +2652,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >> @@ -2684,7 +2684,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> err = kstrtoint(buf, 10, &value);
> >> @@ -2742,7 +2742,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> err = kstrtou32(buf, 10, &value);
> >> @@ -2787,7 +2787,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >> @@ -2817,7 +2817,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >> @@ -2881,7 +2881,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >> @@ -2912,7 +2912,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> err = kstrtou32(buf, 10, &value);
> >> @@ -2956,7 +2956,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >> @@ -2988,7 +2988,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> err = kstrtoint(buf, 10, &value);
> >> @@ -3128,7 +3128,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >> @@ -3209,7 +3209,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> if (amdgpu_sriov_vf(adev))
> >> @@ -3663,7 +3663,7 @@ static int amdgpu_retrieve_od_settings(struct amdgpu_device *adev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = pm_runtime_get_sync(adev->dev);
> >> @@ -3747,7 +3747,7 @@ amdgpu_distribute_custom_od_settings(struct amdgpu_device *adev,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = parse_input_od_command_lines(in_buf,
> >> @@ -4626,7 +4626,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused)
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >
> > I'd prefer to keep the current behavior for debugfs.
>
> OK. I'll exclude it for debugfs in the next spin.
>
> >
> > Alex
> >
> >>
> >> r = pm_runtime_get_sync(dev->dev);
> >> @@ -4671,7 +4671,7 @@ static ssize_t amdgpu_pm_prv_buffer_read(struct file *f, char __user *buf,
> >>
> >> if (amdgpu_in_reset(adev))
> >> return -EPERM;
> >> - if (adev->in_suspend && !adev->in_runpm)
> >> + if (adev->in_suspend || adev->in_runpm)
> >> return -EPERM;
> >>
> >> ret = amdgpu_dpm_get_smu_prv_buf_details(adev, &smu_prv_buf, &smu_prv_buf_size);
> >> --
> >> 2.43.0
> >>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/amd: Don't wake dGPUs while reading sensors
2024-08-23 14:31 ` Alex Deucher
@ 2024-08-23 14:39 ` Mario Limonciello
2024-08-24 2:22 ` Luke Jones
0 siblings, 1 reply; 10+ messages in thread
From: Mario Limonciello @ 2024-08-23 14:39 UTC (permalink / raw)
To: Alex Deucher, Luke D. Jones; +Cc: Mario Limonciello, amd-gfx
On 8/23/2024 09:31, Alex Deucher wrote:
> On Fri, Aug 23, 2024 at 10:13 AM Mario Limonciello
> <mario.limonciello@amd.com> wrote:
>>
>> On 8/23/2024 09:09, Alex Deucher wrote:
>>> On Mon, Aug 19, 2024 at 10:30 PM Mario Limonciello <superm1@kernel.org> wrote:
>>>>
>>>> From: Mario Limonciello <mario.limonciello@amd.com>
>>>>
>>>> If the dGPU is off, then reading the sysfs files with a sensor monitoring
>>>> application will wake it. Change the behavior to return an error when the
>>>> dGPU is in D3cold.
>>>
>>> I'm a little concerned that this will generate a flurry of bug reports
>>> if this now reports an error. One more comment below.
>>>
>>
>> Do you have a particular app you're worried about, or just a general
>> worry? I've had a lot of people reach out to me complaining about
>> battery life on A+A systems, and it comes down to the use of sensor
>> monitoring software waking the dGPU which people don't seem to expect.
>
> Nothing in particular. Just expecting reports of "I checked my GPU
> temperature and it returned an error. It was working with the last
> kernel."
>
>>
>> I did double check that software like 'sensors', 'mission center' and
>> 'nvtop' don't freak out from this change.
>
> Do we know what other devices do?
I'm not sure. Let me CC Luke Jones, he might know.
> I wonder if it would make more
> sense to have the userspace tools check the runpm state before trying
> to access the device. Of course there are a lot of them and fixing
> them all is non-trivial...
Yes that's another way to go about it. But I've raised a bug with
mission center folks 8 months ago [1] to tell them to stop querying
dGPUs in runtime PM, and Luke Jones also raised it with them 4 months
earlier [2] but it's still not sorted in their project.
[1] https://gitlab.com/mission-center-devs/mission-center/-/issues/143
[2] https://gitlab.com/mission-center-devs/mission-center/-/issues/30
I suspect we'll hit similar road blocks with the dozens of other
softwares that do this. So that's why I was thinking cut off the
snake's head.
>
> Alex
>
>>
>> Here is what 'sensors' shows on my local workstation with this change.
>>
>> amdgpu-pci-6100
>> Adapter: PCI adapter
>> vddgfx: N/A
>> ERROR: Can't get value of subfeature fan1_min: Can't read
>> ERROR: Can't get value of subfeature fan1_max: Can't read
>> fan1: N/A (min = 0 RPM, max = 0 RPM)
>> edge: N/A (crit = +97.0°C, hyst = -273.1°C)
>> ERROR: Can't get value of subfeature power1_cap: Can't read
>> PPT: N/A (cap = 0.00 W)
>>
>>>>
>>>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/pm/amdgpu_pm.c | 90 +++++++++++++++---------------
>>>> 1 file changed, 45 insertions(+), 45 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>>>> index c11952a4389bc..d6e38466fbb82 100644
>>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
>>>> @@ -142,7 +142,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -173,7 +173,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> if (strncmp("battery", buf, strlen("battery")) == 0)
>>>> @@ -270,7 +270,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -309,7 +309,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> if (strncmp("low", buf, strlen("low")) == 0) {
>>>> @@ -371,7 +371,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -409,7 +409,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> if (adev->pm.pp_force_state_enabled)
>>>> @@ -471,7 +471,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> adev->pm.pp_force_state_enabled = false;
>>>> @@ -541,7 +541,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -577,7 +577,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -760,7 +760,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> if (count > 127 || count == 0)
>>>> @@ -862,7 +862,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -922,7 +922,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = kstrtou64(buf, 0, &featuremask);
>>>> @@ -957,7 +957,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -1026,7 +1026,7 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -1095,7 +1095,7 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = amdgpu_read_mask(buf, count, &mask);
>>>> @@ -1280,7 +1280,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -1309,7 +1309,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = kstrtol(buf, 0, &value);
>>>> @@ -1342,7 +1342,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -1371,7 +1371,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = kstrtol(buf, 0, &value);
>>>> @@ -1424,7 +1424,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -1463,7 +1463,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> tmp[0] = *(buf);
>>>> @@ -1517,7 +1517,7 @@ static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>>> @@ -1630,7 +1630,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> if (adev->flags & AMD_IS_APU)
>>>> @@ -1673,7 +1673,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> if (adev->unique_id)
>>>> @@ -1846,7 +1846,7 @@ static ssize_t amdgpu_get_pm_metrics(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -1887,7 +1887,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(ddev->dev);
>>>> @@ -2005,7 +2005,7 @@ static ssize_t amdgpu_set_smartshift_bias(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> r = pm_runtime_get_sync(ddev->dev);
>>>> @@ -2227,7 +2227,7 @@ static ssize_t amdgpu_get_xgmi_plpd_policy(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc);
>>>> @@ -2250,7 +2250,7 @@ static ssize_t amdgpu_set_xgmi_plpd_policy(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = kstrtos32(buf, 0, &mode);
>>>> @@ -2652,7 +2652,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>>> @@ -2684,7 +2684,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> err = kstrtoint(buf, 10, &value);
>>>> @@ -2742,7 +2742,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> err = kstrtou32(buf, 10, &value);
>>>> @@ -2787,7 +2787,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>>> @@ -2817,7 +2817,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>>> @@ -2881,7 +2881,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>>> @@ -2912,7 +2912,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> err = kstrtou32(buf, 10, &value);
>>>> @@ -2956,7 +2956,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>>> @@ -2988,7 +2988,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> err = kstrtoint(buf, 10, &value);
>>>> @@ -3128,7 +3128,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>>> @@ -3209,7 +3209,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> if (amdgpu_sriov_vf(adev))
>>>> @@ -3663,7 +3663,7 @@ static int amdgpu_retrieve_od_settings(struct amdgpu_device *adev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = pm_runtime_get_sync(adev->dev);
>>>> @@ -3747,7 +3747,7 @@ amdgpu_distribute_custom_od_settings(struct amdgpu_device *adev,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = parse_input_od_command_lines(in_buf,
>>>> @@ -4626,7 +4626,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused)
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>
>>> I'd prefer to keep the current behavior for debugfs.
>>
>> OK. I'll exclude it for debugfs in the next spin.
>>
>>>
>>> Alex
>>>
>>>>
>>>> r = pm_runtime_get_sync(dev->dev);
>>>> @@ -4671,7 +4671,7 @@ static ssize_t amdgpu_pm_prv_buffer_read(struct file *f, char __user *buf,
>>>>
>>>> if (amdgpu_in_reset(adev))
>>>> return -EPERM;
>>>> - if (adev->in_suspend && !adev->in_runpm)
>>>> + if (adev->in_suspend || adev->in_runpm)
>>>> return -EPERM;
>>>>
>>>> ret = amdgpu_dpm_get_smu_prv_buf_details(adev, &smu_prv_buf, &smu_prv_buf_size);
>>>> --
>>>> 2.43.0
>>>>
>>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/amd: Don't wake dGPUs while reading sensors
2024-08-23 14:39 ` Mario Limonciello
@ 2024-08-24 2:22 ` Luke Jones
2024-08-25 20:40 ` Luna Nova
0 siblings, 1 reply; 10+ messages in thread
From: Luke Jones @ 2024-08-24 2:22 UTC (permalink / raw)
To: Mario Limonciello, Alex Deucher; +Cc: Mario Limonciello, amd-gfx
On Sat, 24 Aug 2024, at 2:39 AM, Mario Limonciello wrote:
> On 8/23/2024 09:31, Alex Deucher wrote:
> > On Fri, Aug 23, 2024 at 10:13 AM Mario Limonciello
> > <mario.limonciello@amd.com> wrote:
> >>
> >> On 8/23/2024 09:09, Alex Deucher wrote:
> >>> On Mon, Aug 19, 2024 at 10:30 PM Mario Limonciello <superm1@kernel.org> wrote:
> >>>>
> >>>> From: Mario Limonciello <mario.limonciello@amd.com>
> >>>>
> >>>> If the dGPU is off, then reading the sysfs files with a sensor monitoring
> >>>> application will wake it. Change the behavior to return an error when the
> >>>> dGPU is in D3cold.
> >>>
> >>> I'm a little concerned that this will generate a flurry of bug reports
> >>> if this now reports an error. One more comment below.
> >>>
> >>
> >> Do you have a particular app you're worried about, or just a general
> >> worry? I've had a lot of people reach out to me complaining about
> >> battery life on A+A systems, and it comes down to the use of sensor
> >> monitoring software waking the dGPU which people don't seem to expect.
> >
> > Nothing in particular. Just expecting reports of "I checked my GPU
> > temperature and it returned an error. It was working with the last
> > kernel."
> >
> >>
> >> I did double check that software like 'sensors', 'mission center' and
> >> 'nvtop' don't freak out from this change.
> >
> > Do we know what other devices do?
>
> I'm not sure. Let me CC Luke Jones, he might know.
These apps have been actively working around or finding good ways to avoid waking the dGPU. In some cases the dGPU monitoring widget is disabled until the user requests it.
>
> > I wonder if it would make more
> > sense to have the userspace tools check the runpm state before trying
> > to access the device. Of course there are a lot of them and fixing
> > them all is non-trivial...
>
> Yes that's another way to go about it. But I've raised a bug with
> mission center folks 8 months ago [1] to tell them to stop querying
> dGPUs in runtime PM, and Luke Jones also raised it with them 4 months
> earlier [2] but it's still not sorted in their project.
>
> [1] https://gitlab.com/mission-center-devs/mission-center/-/issues/143
> [2] https://gitlab.com/mission-center-devs/mission-center/-/issues/30
>
> I suspect we'll hit similar road blocks with the dozens of other
> softwares that do this. So that's why I was thinking cut off the
> snake's head.
It is a continuous issue. If I had a dollar for every message in my discord channel asking why the dGPU is not sleeping I'd have a few thousand in savings now.
If the only way to prevent this is to return error if in d3cold then I'm 110% in agreeance. If apps can't handle that possiblity then it will force proper handling and prevent all future issues like this.
Cheers,
Luke.
>
> >
> > Alex
> >
> >>
> >> Here is what 'sensors' shows on my local workstation with this change.
> >>
> >> amdgpu-pci-6100
> >> Adapter: PCI adapter
> >> vddgfx: N/A
> >> ERROR: Can't get value of subfeature fan1_min: Can't read
> >> ERROR: Can't get value of subfeature fan1_max: Can't read
> >> fan1: N/A (min = 0 RPM, max = 0 RPM)
> >> edge: N/A (crit = +97.0°C, hyst = -273.1°C)
> >> ERROR: Can't get value of subfeature power1_cap: Can't read
> >> PPT: N/A (cap = 0.00 W)
> >>
> >>>>
> >>>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> >>>> ---
> >>>> drivers/gpu/drm/amd/pm/amdgpu_pm.c | 90 +++++++++++++++---------------
> >>>> 1 file changed, 45 insertions(+), 45 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >>>> index c11952a4389bc..d6e38466fbb82 100644
> >>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >>>> @@ -142,7 +142,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -173,7 +173,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> if (strncmp("battery", buf, strlen("battery")) == 0)
> >>>> @@ -270,7 +270,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -309,7 +309,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> if (strncmp("low", buf, strlen("low")) == 0) {
> >>>> @@ -371,7 +371,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -409,7 +409,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> if (adev->pm.pp_force_state_enabled)
> >>>> @@ -471,7 +471,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> adev->pm.pp_force_state_enabled = false;
> >>>> @@ -541,7 +541,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -577,7 +577,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -760,7 +760,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> if (count > 127 || count == 0)
> >>>> @@ -862,7 +862,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -922,7 +922,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = kstrtou64(buf, 0, &featuremask);
> >>>> @@ -957,7 +957,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -1026,7 +1026,7 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -1095,7 +1095,7 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = amdgpu_read_mask(buf, count, &mask);
> >>>> @@ -1280,7 +1280,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -1309,7 +1309,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = kstrtol(buf, 0, &value);
> >>>> @@ -1342,7 +1342,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -1371,7 +1371,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = kstrtol(buf, 0, &value);
> >>>> @@ -1424,7 +1424,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -1463,7 +1463,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> tmp[0] = *(buf);
> >>>> @@ -1517,7 +1517,7 @@ static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>>> @@ -1630,7 +1630,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> if (adev->flags & AMD_IS_APU)
> >>>> @@ -1673,7 +1673,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> if (adev->unique_id)
> >>>> @@ -1846,7 +1846,7 @@ static ssize_t amdgpu_get_pm_metrics(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -1887,7 +1887,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -2005,7 +2005,7 @@ static ssize_t amdgpu_set_smartshift_bias(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> r = pm_runtime_get_sync(ddev->dev);
> >>>> @@ -2227,7 +2227,7 @@ static ssize_t amdgpu_get_xgmi_plpd_policy(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc);
> >>>> @@ -2250,7 +2250,7 @@ static ssize_t amdgpu_set_xgmi_plpd_policy(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = kstrtos32(buf, 0, &mode);
> >>>> @@ -2652,7 +2652,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>>> @@ -2684,7 +2684,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> err = kstrtoint(buf, 10, &value);
> >>>> @@ -2742,7 +2742,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> err = kstrtou32(buf, 10, &value);
> >>>> @@ -2787,7 +2787,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>>> @@ -2817,7 +2817,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>>> @@ -2881,7 +2881,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>>> @@ -2912,7 +2912,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> err = kstrtou32(buf, 10, &value);
> >>>> @@ -2956,7 +2956,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>>> @@ -2988,7 +2988,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> err = kstrtoint(buf, 10, &value);
> >>>> @@ -3128,7 +3128,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>>> @@ -3209,7 +3209,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> if (amdgpu_sriov_vf(adev))
> >>>> @@ -3663,7 +3663,7 @@ static int amdgpu_retrieve_od_settings(struct amdgpu_device *adev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = pm_runtime_get_sync(adev->dev);
> >>>> @@ -3747,7 +3747,7 @@ amdgpu_distribute_custom_od_settings(struct amdgpu_device *adev,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = parse_input_od_command_lines(in_buf,
> >>>> @@ -4626,7 +4626,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused)
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>
> >>> I'd prefer to keep the current behavior for debugfs.
> >>
> >> OK. I'll exclude it for debugfs in the next spin.
> >>
> >>>
> >>> Alex
> >>>
> >>>>
> >>>> r = pm_runtime_get_sync(dev->dev);
> >>>> @@ -4671,7 +4671,7 @@ static ssize_t amdgpu_pm_prv_buffer_read(struct file *f, char __user *buf,
> >>>>
> >>>> if (amdgpu_in_reset(adev))
> >>>> return -EPERM;
> >>>> - if (adev->in_suspend && !adev->in_runpm)
> >>>> + if (adev->in_suspend || adev->in_runpm)
> >>>> return -EPERM;
> >>>>
> >>>> ret = amdgpu_dpm_get_smu_prv_buf_details(adev, &smu_prv_buf, &smu_prv_buf_size);
> >>>> --
> >>>> 2.43.0
> >>>>
> >>
>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/amd: Don't wake dGPUs while reading sensors
2024-08-24 2:22 ` Luke Jones
@ 2024-08-25 20:40 ` Luna Nova
2024-08-26 0:19 ` Mario Limonciello
0 siblings, 1 reply; 10+ messages in thread
From: Luna Nova @ 2024-08-25 20:40 UTC (permalink / raw)
To: amd-gfx; +Cc: Mario Limonciello, Luke Jones, Mario Limonciello, Alex Deucher
Raised this as an issue a while back on the bug tracker and it got closed as WONTFIX. https://gitlab.freedesktop.org/drm/amd/-/issues/2229
Been running a patched kernel with a similar patch locally ever since because even figuring out everything on the system that's accidentally waking the GPU was too time consuming.
I'd love if this gets accepted.
I think fundamentally waking the device to ask how much power it is using thus increasing the power usage makes no sense - by trying to measure it we changed it, so if power can't be measured while off it only makes sense to return an error. Same applies for other sensors that currently wake the GPU - most of them are changing the property by waking it.
Because this behavior is odd and it's not obvious on single GPU systems that anything's going wrong app and lib devs are likely to keep making this "mistake" forever.
Luna
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/amd: Don't wake dGPUs while reading sensors
2024-08-25 20:40 ` Luna Nova
@ 2024-08-26 0:19 ` Mario Limonciello
0 siblings, 0 replies; 10+ messages in thread
From: Mario Limonciello @ 2024-08-26 0:19 UTC (permalink / raw)
To: Luna Nova, amd-gfx; +Cc: Luke Jones, Mario Limonciello, Alex Deucher
On 8/25/24 15:40, Luna Nova wrote:
> Raised this as an issue a while back on the bug tracker and it got closed as WONTFIX. https://gitlab.freedesktop.org/drm/amd/-/issues/2229
> Been running a patched kernel with a similar patch locally ever since because even figuring out everything on the system that's accidentally waking the GPU was too time consuming.
>
> I'd love if this gets accepted.
> I think fundamentally waking the device to ask how much power it is using thus increasing the power usage makes no sense - by trying to measure it we changed it, so if power can't be measured while off it only makes sense to return an error. Same applies for other sensors that currently wake the GPU - most of them are changing the property by waking it.
>
> Because this behavior is odd and it's not obvious on single GPU systems that anything's going wrong app and lib devs are likely to keep making this "mistake" forever.
>
> Luna
So FWIW I did file a v2 [1] that "undoes" the debugfs changes.
[1]
https://lore.kernel.org/amd-gfx/20240823145527.150749-1-mario.limonciello@amd.com/
If there is too much push back to an error code another option we can do
is return "0" for this case, which will make "sense" for some sysfs
files specifically if in d3cold. However for d3hot and some sysfs it
isn't fully true.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2024-08-26 0:19 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-20 2:04 [PATCH] drm/amd: Don't wake dGPUs while reading sensors Mario Limonciello
2024-08-23 13:44 ` Hamza Mahfooz
2024-08-23 13:58 ` Mario Limonciello
2024-08-23 14:09 ` Alex Deucher
2024-08-23 14:13 ` Mario Limonciello
2024-08-23 14:31 ` Alex Deucher
2024-08-23 14:39 ` Mario Limonciello
2024-08-24 2:22 ` Luke Jones
2024-08-25 20:40 ` Luna Nova
2024-08-26 0:19 ` Mario Limonciello
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox