linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] pmdomain: governor: Consider CPU latency tolerance from pm_domain_cpu_gov
@ 2025-07-08  5:15 Maulik Shah
  2025-07-08 14:41 ` Ulf Hansson
  0 siblings, 1 reply; 3+ messages in thread
From: Maulik Shah @ 2025-07-08  5:15 UTC (permalink / raw)
  To: Ulf Hansson; +Cc: linux-pm, linux-kernel, linux-arm-msm, Maulik Shah

pm_domain_cpu_gov is selecting a cluster idle state but does not consider
latency tolerance of child CPUs. This results in deeper cluster idle state
whose latency does not meet latency tolerance requirement.

Select deeper idle state only if global and device latency tolerance of all
child CPUs meet.

Test results on SM8750 with 300 usec PM-QoS on CPU0 which is less than
domain idle state entry (2150) + exit (1983) usec latency mentioned in
devicetree, demonstrate the issue.

	# echo 300 > /sys/devices/system/cpu/cpu0/power/pm_qos_resume_latency_us

Before: (Usage is incrementing)
======
	# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
	State          Time Spent(ms) Usage      Rejected   Above      Below
	S0             29817          537        8          270        0

	# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
	State          Time Spent(ms) Usage      Rejected   Above      Below
	S0             30348          542        8          271        0

After: (Usage is not incrementing due to latency tolerance)
======
	# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
	State          Time Spent(ms) Usage      Rejected   Above      Below
	S0             39319          626        14         307        0

	# cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
	State          Time Spent(ms) Usage      Rejected   Above      Below
	S0             39319          626        14         307        0

Signed-off-by: Maulik Shah <maulik.shah@oss.qualcomm.com>
---
 drivers/pmdomain/governor.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/pmdomain/governor.c b/drivers/pmdomain/governor.c
index c1e148657c873a6b5b4d9c0f058d54cb020c56e2..d5ac4c1b5b5a432f0072209d17379e58ec891202 100644
--- a/drivers/pmdomain/governor.c
+++ b/drivers/pmdomain/governor.c
@@ -8,6 +8,7 @@
 #include <linux/pm_domain.h>
 #include <linux/pm_qos.h>
 #include <linux/hrtimer.h>
+#include <linux/cpu.h>
 #include <linux/cpuidle.h>
 #include <linux/cpumask.h>
 #include <linux/ktime.h>
@@ -349,6 +350,8 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
 	struct cpuidle_device *dev;
 	ktime_t domain_wakeup, next_hrtimer;
 	ktime_t now = ktime_get();
+	struct device *device;
+	s64 cpu_constraint, global_constraint;
 	s64 idle_duration_ns;
 	int cpu, i;
 
@@ -359,6 +362,7 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
 	if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
 		return true;
 
+	global_constraint = cpu_latency_qos_limit();
 	/*
 	 * Find the next wakeup for any of the online CPUs within the PM domain
 	 * and its subdomains. Note, we only need the genpd->cpus, as it already
@@ -372,8 +376,16 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
 			if (ktime_before(next_hrtimer, domain_wakeup))
 				domain_wakeup = next_hrtimer;
 		}
+
+		device = get_cpu_device(cpu);
+		if (device) {
+			cpu_constraint = dev_pm_qos_read_value(device, DEV_PM_QOS_RESUME_LATENCY);
+			if (cpu_constraint < global_constraint)
+				global_constraint = cpu_constraint;
+		}
 	}
 
+	global_constraint *= NSEC_PER_USEC;
 	/* The minimum idle duration is from now - until the next wakeup. */
 	idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, now));
 	if (idle_duration_ns <= 0)
@@ -389,8 +401,10 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
 	 */
 	i = genpd->state_idx;
 	do {
-		if (idle_duration_ns >= (genpd->states[i].residency_ns +
-		    genpd->states[i].power_off_latency_ns)) {
+		if ((idle_duration_ns >= (genpd->states[i].residency_ns +
+		    genpd->states[i].power_off_latency_ns)) &&
+		    (global_constraint >= (genpd->states[i].power_on_latency_ns +
+		    genpd->states[i].power_off_latency_ns))) {
 			genpd->state_idx = i;
 			genpd->gd->last_enter = now;
 			genpd->gd->reflect_residency = true;

---
base-commit: 50c8770a42faf8b1c7abe93e7c114337f580a97d
change-id: 20250703-pmdomain_qos-25b8dbb623ea

Best regards,
-- 
Maulik Shah <maulik.shah@oss.qualcomm.com>


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] pmdomain: governor: Consider CPU latency tolerance from pm_domain_cpu_gov
  2025-07-08  5:15 [PATCH] pmdomain: governor: Consider CPU latency tolerance from pm_domain_cpu_gov Maulik Shah
@ 2025-07-08 14:41 ` Ulf Hansson
  2025-07-09  8:32   ` Maulik Shah (mkshah)
  0 siblings, 1 reply; 3+ messages in thread
From: Ulf Hansson @ 2025-07-08 14:41 UTC (permalink / raw)
  To: Maulik Shah; +Cc: linux-pm, linux-kernel, linux-arm-msm

On Tue, 8 Jul 2025 at 07:15, Maulik Shah <maulik.shah@oss.qualcomm.com> wrote:
>
> pm_domain_cpu_gov is selecting a cluster idle state but does not consider
> latency tolerance of child CPUs. This results in deeper cluster idle state
> whose latency does not meet latency tolerance requirement.
>
> Select deeper idle state only if global and device latency tolerance of all
> child CPUs meet.
>
> Test results on SM8750 with 300 usec PM-QoS on CPU0 which is less than
> domain idle state entry (2150) + exit (1983) usec latency mentioned in
> devicetree, demonstrate the issue.
>
>         # echo 300 > /sys/devices/system/cpu/cpu0/power/pm_qos_resume_latency_us
>
> Before: (Usage is incrementing)
> ======
>         # cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
>         State          Time Spent(ms) Usage      Rejected   Above      Below
>         S0             29817          537        8          270        0
>
>         # cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
>         State          Time Spent(ms) Usage      Rejected   Above      Below
>         S0             30348          542        8          271        0
>
> After: (Usage is not incrementing due to latency tolerance)
> ======
>         # cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
>         State          Time Spent(ms) Usage      Rejected   Above      Below
>         S0             39319          626        14         307        0
>
>         # cat /sys/kernel/debug/pm_genpd/power-domain-cluster0/idle_states
>         State          Time Spent(ms) Usage      Rejected   Above      Below
>         S0             39319          626        14         307        0
>
> Signed-off-by: Maulik Shah <maulik.shah@oss.qualcomm.com>

Thanks for posting this! I am actually carrying a local patch in my
tree for this, just haven't reached the point of posting it. :-)

> ---
>  drivers/pmdomain/governor.c | 18 ++++++++++++++++--
>  1 file changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/pmdomain/governor.c b/drivers/pmdomain/governor.c
> index c1e148657c873a6b5b4d9c0f058d54cb020c56e2..d5ac4c1b5b5a432f0072209d17379e58ec891202 100644
> --- a/drivers/pmdomain/governor.c
> +++ b/drivers/pmdomain/governor.c
> @@ -8,6 +8,7 @@
>  #include <linux/pm_domain.h>
>  #include <linux/pm_qos.h>
>  #include <linux/hrtimer.h>
> +#include <linux/cpu.h>
>  #include <linux/cpuidle.h>
>  #include <linux/cpumask.h>
>  #include <linux/ktime.h>
> @@ -349,6 +350,8 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
>         struct cpuidle_device *dev;
>         ktime_t domain_wakeup, next_hrtimer;
>         ktime_t now = ktime_get();
> +       struct device *device;

Nitpick: Maybe rename to "cpu_dev" to better distinguish between the
cpuidle_device *dev, above?

> +       s64 cpu_constraint, global_constraint;
>         s64 idle_duration_ns;
>         int cpu, i;
>
> @@ -359,6 +362,7 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
>         if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
>                 return true;
>
> +       global_constraint = cpu_latency_qos_limit();
>         /*
>          * Find the next wakeup for any of the online CPUs within the PM domain
>          * and its subdomains. Note, we only need the genpd->cpus, as it already
> @@ -372,8 +376,16 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
>                         if (ktime_before(next_hrtimer, domain_wakeup))
>                                 domain_wakeup = next_hrtimer;
>                 }
> +
> +               device = get_cpu_device(cpu);
> +               if (device) {
> +                       cpu_constraint = dev_pm_qos_read_value(device, DEV_PM_QOS_RESUME_LATENCY);

We should be able to use dev_pm_qos_raw_resume_latency() here, similar
to how cpuidle_governor_latency_req() does it. I think it's better as
it avoids acquiring/releasing the spinlock.

> +                       if (cpu_constraint < global_constraint)
> +                               global_constraint = cpu_constraint;
> +               }
>         }
>
> +       global_constraint *= NSEC_PER_USEC;
>         /* The minimum idle duration is from now - until the next wakeup. */
>         idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, now));
>         if (idle_duration_ns <= 0)
> @@ -389,8 +401,10 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
>          */
>         i = genpd->state_idx;
>         do {
> -               if (idle_duration_ns >= (genpd->states[i].residency_ns +
> -                   genpd->states[i].power_off_latency_ns)) {
> +               if ((idle_duration_ns >= (genpd->states[i].residency_ns +
> +                   genpd->states[i].power_off_latency_ns)) &&
> +                   (global_constraint >= (genpd->states[i].power_on_latency_ns +
> +                   genpd->states[i].power_off_latency_ns))) {
>                         genpd->state_idx = i;
>                         genpd->gd->last_enter = now;
>                         genpd->gd->reflect_residency = true;
>
> ---
> base-commit: 50c8770a42faf8b1c7abe93e7c114337f580a97d
> change-id: 20250703-pmdomain_qos-25b8dbb623ea
>
> Best regards,
> --
> Maulik Shah <maulik.shah@oss.qualcomm.com>
>

Besides the minor things above, this looks good to me.

Kind regards
Uffe

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] pmdomain: governor: Consider CPU latency tolerance from pm_domain_cpu_gov
  2025-07-08 14:41 ` Ulf Hansson
@ 2025-07-09  8:32   ` Maulik Shah (mkshah)
  0 siblings, 0 replies; 3+ messages in thread
From: Maulik Shah (mkshah) @ 2025-07-09  8:32 UTC (permalink / raw)
  To: Ulf Hansson; +Cc: linux-pm, linux-kernel, linux-arm-msm



On 7/8/2025 8:11 PM, Ulf Hansson wrote:
> On Tue, 8 Jul 2025 at 07:15, Maulik Shah <maulik.shah@oss.qualcomm.com> wrote:
>>
>>
>> diff --git a/drivers/pmdomain/governor.c b/drivers/pmdomain/governor.c
>> index c1e148657c873a6b5b4d9c0f058d54cb020c56e2..d5ac4c1b5b5a432f0072209d17379e58ec891202 100644
>> --- a/drivers/pmdomain/governor.c
>> +++ b/drivers/pmdomain/governor.c
>> @@ -8,6 +8,7 @@
>>  #include <linux/pm_domain.h>
>>  #include <linux/pm_qos.h>
>>  #include <linux/hrtimer.h>
>> +#include <linux/cpu.h>
>>  #include <linux/cpuidle.h>
>>  #include <linux/cpumask.h>
>>  #include <linux/ktime.h>
>> @@ -349,6 +350,8 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
>>         struct cpuidle_device *dev;
>>         ktime_t domain_wakeup, next_hrtimer;
>>         ktime_t now = ktime_get();
>> +       struct device *device;
> 
> Nitpick: Maybe rename to "cpu_dev" to better distinguish between the
> cpuidle_device *dev, above?

Updated in v2.

> 
>> +       s64 cpu_constraint, global_constraint;
>>         s64 idle_duration_ns;
>>         int cpu, i;
>>
>> @@ -359,6 +362,7 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
>>         if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
>>                 return true;
>>
>> +       global_constraint = cpu_latency_qos_limit();
>>         /*
>>          * Find the next wakeup for any of the online CPUs within the PM domain
>>          * and its subdomains. Note, we only need the genpd->cpus, as it already
>> @@ -372,8 +376,16 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd)
>>                         if (ktime_before(next_hrtimer, domain_wakeup))
>>                                 domain_wakeup = next_hrtimer;
>>                 }
>> +
>> +               device = get_cpu_device(cpu);
>> +               if (device) {
>> +                       cpu_constraint = dev_pm_qos_read_value(device, DEV_PM_QOS_RESUME_LATENCY);
> 
> We should be able to use dev_pm_qos_raw_resume_latency() here, similar
> to how cpuidle_governor_latency_req() does it. I think it's better as
> it avoids acquiring/releasing the spinlock.

Yes, updated to use dev_pm_qos_raw_resume_latency() in v2.

Thanks,
Maulik

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-07-09  8:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-08  5:15 [PATCH] pmdomain: governor: Consider CPU latency tolerance from pm_domain_cpu_gov Maulik Shah
2025-07-08 14:41 ` Ulf Hansson
2025-07-09  8:32   ` Maulik Shah (mkshah)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).