linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] arm64: topology: Improve cpuinfo_avg_freq for ARM64
@ 2025-11-04  7:55 Bowen Yu
  2025-11-04  7:55 ` [PATCH 1/3] arm64: topology: Improve AMU-based frequency calculation Bowen Yu
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Bowen Yu @ 2025-11-04  7:55 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, catalin.marinas, will,
	beata.michalska, ptsm, linuxarm, jonathan.cameron
  Cc: zhanjie9, prime.zeng, wanghuiqiang, xuwei5, zhenglifeng1,
	yubowen8, zhangpengjie2

This series addresses several issues in CPU avgfreq reporting:

Patch 1:
- Implements direct frequency calculation using AMU counters:
  freq = (core_cycles_delta * timer_freq) / (const_cycles_delta 
  * HZ_PER_KHZ)
- Eliminates precision loss from SCHED_CAPACITY_SHIFT bit-shifting

Patch 2:
- Resolves unreliable cpuinfo_avg_freq behavior during idle periods
- Replaces invalid returns with governor's current frequency as fallback
  value when all CPUs in policy are idle

Patch 3:
- Removes redundant housekeeping_cpu() check

Bowen Yu (3):
  arm64: topology: Improve AMU-based frequency calculation
  arm64: topology: Use current freq in governor for idle cpus in
    cpuinfo_avg_freq
  arm64: topology: Remove redundant housekeeping_cpu() checks in
    arch_freq_get_on_cpu

 arch/arm64/kernel/topology.c | 40 +++++++++++++++---------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

-- 
2.33.0



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/3] arm64: topology: Improve AMU-based frequency calculation
  2025-11-04  7:55 [PATCH 0/3] arm64: topology: Improve cpuinfo_avg_freq for ARM64 Bowen Yu
@ 2025-11-04  7:55 ` Bowen Yu
  2025-11-06  4:12   ` Jie Zhan
  2025-11-10 17:04   ` Beata Michalska
  2025-11-04  7:55 ` [PATCH 2/3] arm64: topology: Use current freq in governor for idle cpus in cpuinfo_avg_freq Bowen Yu
  2025-11-04  7:55 ` [PATCH 3/3] arm64: topology: Remove redundant housekeeping_cpu() checks in arch_freq_get_on_cpu Bowen Yu
  2 siblings, 2 replies; 9+ messages in thread
From: Bowen Yu @ 2025-11-04  7:55 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, catalin.marinas, will,
	beata.michalska, ptsm, linuxarm, jonathan.cameron
  Cc: zhanjie9, prime.zeng, wanghuiqiang, xuwei5, zhenglifeng1,
	yubowen8, zhangpengjie2

The current approach of reverse-calculating CPU frequency from capacity
values introduces quantization errors due to intermediate scaling of
arch_scale_freq_capacity, which results in the calculated frequency having
only 1/1024 resolution.

This patch:
1. Directly computes frequency using AMU counters in amu_scale_freq_tick():
freq = (core_cycles_delta * timer_freq) / (const_cycles_delta * 1000)
 - core_cycles_delta: Measured CPU cycles
 - timer_freq: Architectural timer frequency
 - const_cycles_delta: Reference cycles from fixed-frequency timer
2. Returns pre-computed avgfreq in arch_freq_get_on_cpu()

examples:
Before change
[root@localhost ~]# cat /sys/devices/system/cpu/cpufreq/policy*/cpuinfo_avg_freq
2297851
2297851
2295312
2297851
2297851
2295312
2297851
2295312
2297851
2297851
2297851
2295312
2295312
2297851
2297851
2297851
2297851
2300390
2297851
2297851
2297851

After change
[root@localhost ~]# cat /sys/devices/system/cpu/cpufreq/policy*/cpuinfo_avg_freq
2299177
2298117
2299188
2297330
2296530
2298817
2298434
2298986
2298596
2299395
2299560
2298446
2299108
2299294
2298707
2298453
2298632
2299218
2297962

Signed-off-by: Bowen Yu <yubowen8@huawei.com>
---
 arch/arm64/kernel/topology.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 5d07ee85bdae..c0dbc27289ea 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -20,6 +20,7 @@
 #include <linux/percpu.h>
 #include <linux/sched/isolation.h>
 #include <linux/xarray.h>
+#include <linux/units.h>
 
 #include <asm/cpu.h>
 #include <asm/cputype.h>
@@ -144,6 +145,8 @@ int __init parse_acpi_topology(void)
  */
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) =  1UL << (2 * SCHED_CAPACITY_SHIFT);
 static cpumask_var_t amu_fie_cpus;
+static DEFINE_PER_CPU(unsigned long, core_delta);
+static DEFINE_PER_CPU(unsigned long, const_delta);
 
 struct amu_cntr_sample {
 	u64		arch_const_cycles_prev;
@@ -246,6 +249,7 @@ static void amu_scale_freq_tick(void)
 	 * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
 	 */
 	scale = core_cnt - prev_core_cnt;
+	this_cpu_write(core_delta, scale);
 	scale *= this_cpu_read(arch_max_freq_scale);
 	scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
 			  const_cnt - prev_const_cnt);
@@ -253,6 +257,7 @@ static void amu_scale_freq_tick(void)
 	scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
 	this_cpu_write(arch_freq_scale, (unsigned long)scale);
 
+	this_cpu_write(const_delta, const_cnt - prev_const_cnt);
 	amu_sample->last_scale_update = jiffies;
 }
 
@@ -288,7 +293,7 @@ int arch_freq_get_on_cpu(int cpu)
 	unsigned int start_cpu = cpu;
 	unsigned long last_update;
 	unsigned int freq = 0;
-	u64 scale;
+	u64 delta_core_kHz;
 
 	if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu))
 		return -EOPNOTSUPP;
@@ -340,14 +345,11 @@ int arch_freq_get_on_cpu(int cpu)
 			break;
 		}
 	}
-	/*
-	 * Reversed computation to the one used to determine
-	 * the arch_freq_scale value
-	 * (see amu_scale_freq_tick for details)
-	 */
-	scale = arch_scale_freq_capacity(cpu);
-	freq = scale * arch_scale_freq_ref(cpu);
-	freq >>= SCHED_CAPACITY_SHIFT;
+
+	if (check_mul_overflow(per_cpu(core_delta, cpu), arch_timer_get_cntfrq(), &delta_core_kHz))
+		return -EINVAL;
+
+	freq = div_u64(delta_core_kHz, per_cpu(const_delta, cpu) * HZ_PER_KHZ);
 	return freq;
 }
 
-- 
2.33.0



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] arm64: topology: Use current freq in governor for idle cpus in cpuinfo_avg_freq
  2025-11-04  7:55 [PATCH 0/3] arm64: topology: Improve cpuinfo_avg_freq for ARM64 Bowen Yu
  2025-11-04  7:55 ` [PATCH 1/3] arm64: topology: Improve AMU-based frequency calculation Bowen Yu
@ 2025-11-04  7:55 ` Bowen Yu
  2025-11-10 17:11   ` Beata Michalska
  2025-11-04  7:55 ` [PATCH 3/3] arm64: topology: Remove redundant housekeeping_cpu() checks in arch_freq_get_on_cpu Bowen Yu
  2 siblings, 1 reply; 9+ messages in thread
From: Bowen Yu @ 2025-11-04  7:55 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, catalin.marinas, will,
	beata.michalska, ptsm, linuxarm, jonathan.cameron
  Cc: zhanjie9, prime.zeng, wanghuiqiang, xuwei5, zhenglifeng1,
	yubowen8, zhangpengjie2

The current cpuinfo_avg_freq interface returns an error when all CPUs
under a policy are idle, which is relatively common. To address this, it
is better to use the current frequency stored in the governor. This
implementation is also used on x86 architecture.

Since the current frequency in the governor is the last known frequency,
it should be more user-friendly.

This patch also removes redundant !housekeeping_cpu() check since it is
inherently done when checking jiffies.

Original output when all cpus under a policy are idle:
[root@localhost home]# cat /sys/devices/system/cpu/cpufreq/policy0/
cpuinfo_avg_freq
cat: /sys/devices/system/cpu/cpufreq/policy0/cpuinfo_avg_freq: Resource
 temporarily unavailable

Output after changes:
[root@localhost home]# cat /sys/devices/system/cpu/cpufreq/policy0
/cpuinfo_avg_freq
1200000

Signed-off-by: Bowen Yu <yubowen8@huawei.com>
---
 arch/arm64/kernel/topology.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index c0dbc27289ea..f1370a4a4df9 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -333,14 +333,13 @@ int arch_freq_get_on_cpu(int cpu)
 				if (!idle_cpu(ref_cpu))
 					break;
 			}
+
+			if (ref_cpu >= nr_cpu_ids) {
+				cpufreq_cpu_put(policy);
+				return cpufreq_quick_get(start_cpu);
+			}
 
 			cpufreq_cpu_put(policy);
-
-			if (ref_cpu >= nr_cpu_ids)
-				/* No alternative to pull info from */
-				return -EAGAIN;
-
-			cpu = ref_cpu;
 		} else {
 			break;
 		}
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] arm64: topology: Remove redundant housekeeping_cpu() checks in arch_freq_get_on_cpu
  2025-11-04  7:55 [PATCH 0/3] arm64: topology: Improve cpuinfo_avg_freq for ARM64 Bowen Yu
  2025-11-04  7:55 ` [PATCH 1/3] arm64: topology: Improve AMU-based frequency calculation Bowen Yu
  2025-11-04  7:55 ` [PATCH 2/3] arm64: topology: Use current freq in governor for idle cpus in cpuinfo_avg_freq Bowen Yu
@ 2025-11-04  7:55 ` Bowen Yu
  2025-11-10 17:15   ` Beata Michalska
  2 siblings, 1 reply; 9+ messages in thread
From: Bowen Yu @ 2025-11-04  7:55 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, catalin.marinas, will,
	beata.michalska, ptsm, linuxarm, jonathan.cameron
  Cc: zhanjie9, prime.zeng, wanghuiqiang, xuwei5, zhenglifeng1,
	yubowen8, zhangpengjie2

This patch removes redundant !housekeeping_cpu() check since it is
inherently done when checking jiffies.

Signed-off-by: Bowen Yu <yubowen8@huawei.com>
---
 arch/arm64/kernel/topology.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index f1370a4a4df9..6981ef3019d3 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -310,20 +310,13 @@ int arch_freq_get_on_cpu(int cpu)
 		 * (and thus freq scale), if available, for given policy: this boils
 		 * down to identifying an active cpu within the same freq domain, if any.
 		 */
-		if (!housekeeping_cpu(cpu, HK_TYPE_TICK) ||
-		    time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) {
+		if (time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) {
 			struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 			int ref_cpu;
 
 			if (!policy)
 				return -EINVAL;
 
-			if (!cpumask_intersects(policy->related_cpus,
-						housekeeping_cpumask(HK_TYPE_TICK))) {
-				cpufreq_cpu_put(policy);
-				return -EOPNOTSUPP;
-			}
-
 			for_each_cpu_wrap(ref_cpu, policy->cpus, cpu + 1) {
 				if (ref_cpu == start_cpu) {
 					/* Prevent verifying same CPU twice */
-- 
2.33.0



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] arm64: topology: Improve AMU-based frequency calculation
  2025-11-04  7:55 ` [PATCH 1/3] arm64: topology: Improve AMU-based frequency calculation Bowen Yu
@ 2025-11-06  4:12   ` Jie Zhan
  2025-11-10 17:04   ` Beata Michalska
  1 sibling, 0 replies; 9+ messages in thread
From: Jie Zhan @ 2025-11-06  4:12 UTC (permalink / raw)
  To: Bowen Yu, linux-arm-kernel, linux-kernel, catalin.marinas, will,
	beata.michalska, ptsm, linuxarm, jonathan.cameron
  Cc: prime.zeng, wanghuiqiang, xuwei5, zhenglifeng1, zhangpengjie2



On 11/4/2025 3:55 PM, Bowen Yu wrote:
> The current approach of reverse-calculating CPU frequency from capacity
> values introduces quantization errors due to intermediate scaling of
> arch_scale_freq_capacity, which results in the calculated frequency having
> only 1/1024 resolution.
> 
> This patch:
> 1. Directly computes frequency using AMU counters in amu_scale_freq_tick():
> freq = (core_cycles_delta * timer_freq) / (const_cycles_delta * 1000)
>  - core_cycles_delta: Measured CPU cycles
>  - timer_freq: Architectural timer frequency
>  - const_cycles_delta: Reference cycles from fixed-frequency timer
> 2. Returns pre-computed avgfreq in arch_freq_get_on_cpu()
> 
> examples:
> Before change
> [root@localhost ~]# cat /sys/devices/system/cpu/cpufreq/policy*/cpuinfo_avg_freq
> 2297851
> 2297851
> 2295312
> 2297851
> 2297851
> 2295312
> 2297851
> 2295312
> 2297851
> 2297851
> 2297851
> 2295312
> 2295312
> 2297851
> 2297851
> 2297851
> 2297851
> 2300390
> 2297851
> 2297851
> 2297851
> 
> After change
> [root@localhost ~]# cat /sys/devices/system/cpu/cpufreq/policy*/cpuinfo_avg_freq
> 2299177
> 2298117
> 2299188
> 2297330
> 2296530
> 2298817
> 2298434
> 2298986
> 2298596
> 2299395
> 2299560
> 2298446
> 2299108
> 2299294
> 2298707
> 2298453
> 2298632
> 2299218
> 2297962
> 
> Signed-off-by: Bowen Yu <yubowen8@huawei.com>
> ---
>  arch/arm64/kernel/topology.c | 20 +++++++++++---------
>  1 file changed, 11 insertions(+), 9 deletions(-)
> 
...
> @@ -288,7 +293,7 @@ int arch_freq_get_on_cpu(int cpu)
>  	unsigned int start_cpu = cpu;
>  	unsigned long last_update;
>  	unsigned int freq = 0;
> -	u64 scale;
> +	u64 delta_core_kHz;
>  
>  	if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu))
>  		return -EOPNOTSUPP;
> @@ -340,14 +345,11 @@ int arch_freq_get_on_cpu(int cpu)
>  			break;
>  		}
>  	}
> -	/*
> -	 * Reversed computation to the one used to determine
> -	 * the arch_freq_scale value
> -	 * (see amu_scale_freq_tick for details)
> -	 */
> -	scale = arch_scale_freq_capacity(cpu);
> -	freq = scale * arch_scale_freq_ref(cpu);
> -	freq >>= SCHED_CAPACITY_SHIFT;
> +
> +	if (check_mul_overflow(per_cpu(core_delta, cpu), arch_timer_get_cntfrq(), &delta_core_kHz))
Hi Bowen,

IIUC, the variable 'delta_core_kHz' doesn't mean its name.
'core_delta * timer_freq' is just a transitional number.
The naming is misleading.

Perhaps consider reusing 'freq'? i.e. define 'freq' as u64 and replace
'delta_core_kHz' with 'freq', then return (int)freq at the end.

Jie
> +		return -EINVAL;
> +
> +	freq = div_u64(delta_core_kHz, per_cpu(const_delta, cpu) * HZ_PER_KHZ);
>  	return freq;
>  }
>  


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] arm64: topology: Improve AMU-based frequency calculation
  2025-11-04  7:55 ` [PATCH 1/3] arm64: topology: Improve AMU-based frequency calculation Bowen Yu
  2025-11-06  4:12   ` Jie Zhan
@ 2025-11-10 17:04   ` Beata Michalska
  1 sibling, 0 replies; 9+ messages in thread
From: Beata Michalska @ 2025-11-10 17:04 UTC (permalink / raw)
  To: Bowen Yu
  Cc: linux-arm-kernel, linux-kernel, catalin.marinas, will, ptsm,
	linuxarm, jonathan.cameron, zhanjie9, prime.zeng, wanghuiqiang,
	xuwei5, zhenglifeng1, zhangpengjie2

On Tue, Nov 04, 2025 at 03:55:42PM +0800, Bowen Yu wrote:
> The current approach of reverse-calculating CPU frequency from capacity
> values introduces quantization errors due to intermediate scaling of
> arch_scale_freq_capacity, which results in the calculated frequency having
> only 1/1024 resolution.
> 
> This patch:
> 1. Directly computes frequency using AMU counters in amu_scale_freq_tick():
> freq = (core_cycles_delta * timer_freq) / (const_cycles_delta * 1000)
>  - core_cycles_delta: Measured CPU cycles
>  - timer_freq: Architectural timer frequency
>  - const_cycles_delta: Reference cycles from fixed-frequency timer
> 2. Returns pre-computed avgfreq in arch_freq_get_on_cpu()
> 
> examples:
> Before change
> [root@localhost ~]# cat /sys/devices/system/cpu/cpufreq/policy*/cpuinfo_avg_freq
> 2297851
> 2297851
> 2295312
> 2297851
> 2297851
> 2295312
> 2297851
> 2295312
> 2297851
> 2297851
> 2297851
> 2295312
> 2295312
> 2297851
> 2297851
> 2297851
> 2297851
> 2300390
> 2297851
> 2297851
> 2297851
> 
> After change
> [root@localhost ~]# cat /sys/devices/system/cpu/cpufreq/policy*/cpuinfo_avg_freq
> 2299177
> 2298117
> 2299188
> 2297330
> 2296530
> 2298817
> 2298434
> 2298986
> 2298596
> 2299395
> 2299560
> 2298446
> 2299108
> 2299294
> 2298707
> 2298453
> 2298632
> 2299218
> 2297962
Based on your numbers the shift is on average ~0.055–0.057%.
I'm not entirely convinced it is worth it, especially that this is an average
frequency. What is the use case here if < 0,2% makes a difference ?

---
BR
Beata
> 
> Signed-off-by: Bowen Yu <yubowen8@huawei.com>
> ---
>  arch/arm64/kernel/topology.c | 20 +++++++++++---------
>  1 file changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index 5d07ee85bdae..c0dbc27289ea 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -20,6 +20,7 @@
>  #include <linux/percpu.h>
>  #include <linux/sched/isolation.h>
>  #include <linux/xarray.h>
> +#include <linux/units.h>
>  
>  #include <asm/cpu.h>
>  #include <asm/cputype.h>
> @@ -144,6 +145,8 @@ int __init parse_acpi_topology(void)
>   */
>  static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) =  1UL << (2 * SCHED_CAPACITY_SHIFT);
>  static cpumask_var_t amu_fie_cpus;
> +static DEFINE_PER_CPU(unsigned long, core_delta);
> +static DEFINE_PER_CPU(unsigned long, const_delta);
>  
>  struct amu_cntr_sample {
>  	u64		arch_const_cycles_prev;
> @@ -246,6 +249,7 @@ static void amu_scale_freq_tick(void)
>  	 * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
>  	 */
>  	scale = core_cnt - prev_core_cnt;
> +	this_cpu_write(core_delta, scale);
>  	scale *= this_cpu_read(arch_max_freq_scale);
>  	scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
>  			  const_cnt - prev_const_cnt);
> @@ -253,6 +257,7 @@ static void amu_scale_freq_tick(void)
>  	scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
>  	this_cpu_write(arch_freq_scale, (unsigned long)scale);
>  
> +	this_cpu_write(const_delta, const_cnt - prev_const_cnt);
>  	amu_sample->last_scale_update = jiffies;
>  }
>  
> @@ -288,7 +293,7 @@ int arch_freq_get_on_cpu(int cpu)
>  	unsigned int start_cpu = cpu;
>  	unsigned long last_update;
>  	unsigned int freq = 0;
> -	u64 scale;
> +	u64 delta_core_kHz;
>  
>  	if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu))
>  		return -EOPNOTSUPP;
> @@ -340,14 +345,11 @@ int arch_freq_get_on_cpu(int cpu)
>  			break;
>  		}
>  	}
> -	/*
> -	 * Reversed computation to the one used to determine
> -	 * the arch_freq_scale value
> -	 * (see amu_scale_freq_tick for details)
> -	 */
> -	scale = arch_scale_freq_capacity(cpu);
> -	freq = scale * arch_scale_freq_ref(cpu);
> -	freq >>= SCHED_CAPACITY_SHIFT;
> +
> +	if (check_mul_overflow(per_cpu(core_delta, cpu), arch_timer_get_cntfrq(), &delta_core_kHz))
> +		return -EINVAL;
> +
> +	freq = div_u64(delta_core_kHz, per_cpu(const_delta, cpu) * HZ_PER_KHZ);
>  	return freq;
>  }
>  
> -- 
> 2.33.0
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] arm64: topology: Use current freq in governor for idle cpus in cpuinfo_avg_freq
  2025-11-04  7:55 ` [PATCH 2/3] arm64: topology: Use current freq in governor for idle cpus in cpuinfo_avg_freq Bowen Yu
@ 2025-11-10 17:11   ` Beata Michalska
  2025-11-16  7:46     ` yubowen (H)
  0 siblings, 1 reply; 9+ messages in thread
From: Beata Michalska @ 2025-11-10 17:11 UTC (permalink / raw)
  To: Bowen Yu
  Cc: linux-arm-kernel, linux-kernel, catalin.marinas, will, ptsm,
	linuxarm, jonathan.cameron, zhanjie9, prime.zeng, wanghuiqiang,
	xuwei5, zhenglifeng1, zhangpengjie2

On Tue, Nov 04, 2025 at 03:55:43PM +0800, Bowen Yu wrote:
> The current cpuinfo_avg_freq interface returns an error when all CPUs
> under a policy are idle, which is relatively common. To address this, it
> is better to use the current frequency stored in the governor. This
> implementation is also used on x86 architecture.
Well, the very reason of having this knob was not to mix hardware and software
view of so called 'current frequency'. Note that for x86 there is a dedicated
config option that keeps the behaviour you have mentioned for backward
compatibility.


cpuinfo_avg_freq -> average freq as seen by hw
cpuinfo_cur_freq -> closes one can get to current frequency - hw feedback
scaling_cur_freq -> requested freq, so smth the software believes should be the
		    current freq (often might not be)

The following thread might be useful -> [1]

---
[1] https://lore.kernel.org/all/20240603114811.oio3uemniib5uaa2@vireshk-i7/
---

BR
Beata

> 
> Since the current frequency in the governor is the last known frequency,
> it should be more user-friendly.
> 
> This patch also removes redundant !housekeeping_cpu() check since it is
> inherently done when checking jiffies.
> 
> Original output when all cpus under a policy are idle:
> [root@localhost home]# cat /sys/devices/system/cpu/cpufreq/policy0/
> cpuinfo_avg_freq
> cat: /sys/devices/system/cpu/cpufreq/policy0/cpuinfo_avg_freq: Resource
>  temporarily unavailable
> 
> Output after changes:
> [root@localhost home]# cat /sys/devices/system/cpu/cpufreq/policy0
> /cpuinfo_avg_freq
> 1200000
> 
> Signed-off-by: Bowen Yu <yubowen8@huawei.com>
> ---
>  arch/arm64/kernel/topology.c | 11 +++++------
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index c0dbc27289ea..f1370a4a4df9 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -333,14 +333,13 @@ int arch_freq_get_on_cpu(int cpu)
>  				if (!idle_cpu(ref_cpu))
>  					break;
>  			}
> +
> +			if (ref_cpu >= nr_cpu_ids) {
> +				cpufreq_cpu_put(policy);
> +				return cpufreq_quick_get(start_cpu);
> +			}
>  
>  			cpufreq_cpu_put(policy);
> -
> -			if (ref_cpu >= nr_cpu_ids)
> -				/* No alternative to pull info from */
> -				return -EAGAIN;
> -
> -			cpu = ref_cpu;
>  		} else {
>  			break;
>  		}
> -- 
> 2.33.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] arm64: topology: Remove redundant housekeeping_cpu() checks in arch_freq_get_on_cpu
  2025-11-04  7:55 ` [PATCH 3/3] arm64: topology: Remove redundant housekeeping_cpu() checks in arch_freq_get_on_cpu Bowen Yu
@ 2025-11-10 17:15   ` Beata Michalska
  0 siblings, 0 replies; 9+ messages in thread
From: Beata Michalska @ 2025-11-10 17:15 UTC (permalink / raw)
  To: Bowen Yu
  Cc: linux-arm-kernel, linux-kernel, catalin.marinas, will, ptsm,
	linuxarm, jonathan.cameron, zhanjie9, prime.zeng, wanghuiqiang,
	xuwei5, zhenglifeng1, zhangpengjie2

On Tue, Nov 04, 2025 at 03:55:44PM +0800, Bowen Yu wrote:
> This patch removes redundant !housekeeping_cpu() check since it is
> inherently done when checking jiffies.
> 
> Signed-off-by: Bowen Yu <yubowen8@huawei.com>
> ---
>  arch/arm64/kernel/topology.c | 9 +--------
>  1 file changed, 1 insertion(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index f1370a4a4df9..6981ef3019d3 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -310,20 +310,13 @@ int arch_freq_get_on_cpu(int cpu)
>  		 * (and thus freq scale), if available, for given policy: this boils
>  		 * down to identifying an active cpu within the same freq domain, if any.
>  		 */
> -		if (!housekeeping_cpu(cpu, HK_TYPE_TICK) ||
> -		    time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) {
> +		if (time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) {
>  			struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
>  			int ref_cpu;
>  
>  			if (!policy)
>  				return -EINVAL;
>  
> -			if (!cpumask_intersects(policy->related_cpus,
> -						housekeeping_cpumask(HK_TYPE_TICK))) {
> -				cpufreq_cpu_put(policy);
> -				return -EOPNOTSUPP;
> -			}
Removing this means you will iterate over potentially dynamic-tick CPUs and
running the checks instead of skipping it here (plus other implications).
Is that intentional ?
---
BR
Beata
> -
>  			for_each_cpu_wrap(ref_cpu, policy->cpus, cpu + 1) {
>  				if (ref_cpu == start_cpu) {
>  					/* Prevent verifying same CPU twice */
> -- 
> 2.33.0
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] arm64: topology: Use current freq in governor for idle cpus in cpuinfo_avg_freq
  2025-11-10 17:11   ` Beata Michalska
@ 2025-11-16  7:46     ` yubowen (H)
  0 siblings, 0 replies; 9+ messages in thread
From: yubowen (H) @ 2025-11-16  7:46 UTC (permalink / raw)
  To: Beata Michalska
  Cc: linux-arm-kernel, linux-kernel, catalin.marinas, will, ptsm,
	linuxarm, jonathan.cameron, zhanjie9, prime.zeng, wanghuiqiang,
	xuwei5, zhenglifeng1, zhangpengjie2


在 2025/11/11 1:11, Beata Michalska 写道:
> On Tue, Nov 04, 2025 at 03:55:43PM +0800, Bowen Yu wrote:
>> The current cpuinfo_avg_freq interface returns an error when all CPUs
>> under a policy are idle, which is relatively common. To address this, it
>> is better to use the current frequency stored in the governor. This
>> implementation is also used on x86 architecture.
> Well, the very reason of having this knob was not to mix hardware and software
> view of so called 'current frequency'. Note that for x86 there is a dedicated
> config option that keeps the behaviour you have mentioned for backward
> compatibility.
>
>
> cpuinfo_avg_freq -> average freq as seen by hw
> cpuinfo_cur_freq -> closes one can get to current frequency - hw feedback
> scaling_cur_freq -> requested freq, so smth the software believes should be the
> 		    current freq (often might not be)
>
> The following thread might be useful -> [1]
>
> ---
> [1] https://lore.kernel.org/all/20240603114811.oio3uemniib5uaa2@vireshk-i7/
> ---
>
> BR
> Beata
Hi Beata,

Thank you for pointing out the important distinction between hardware and
software views of current frequency. The motivation behind this patch is
not to blur these boundaries, but rather to avoid returning -EAGAIN when
all CPUs under a policy are idle — a relatively common scenario,
especially in idle or low-utilization systems.

I'm relatively new to x86 architecture, so I might be missing something,
but it appears that on x86, a fallback to software-derived frequency
(e.g., via scaling_cur_freq) is always in effect in arch_freq_get_on_cpu()
when the last hardware update is too stale. However, I couldn't locate the
config option that controls this behavior — could you please clarify which
option you were referring to?

I fully understand the concern about mixing hardware and software
frequency views. That said, when no CPU is active and hardware provides
no real-time feedback, there is no available hardware source for current
frequency. In such cases, falling back to the last known frequency
(as maintained by the cpufreq core) seems like a reasonable compromise
— it provides a stale-but-plausible value instead of an error, which
improves usability for monitoring tools and user-space interfaces.

That said, I'm open to better alternatives. If there's a more appropriate
way to handle this scenario — such as deferring to the governor,
introducing a policy-specific fallback, or adjusting the cpufreq core
logic — I’d be happy to revise the patch accordingly.

Thanks again for the reference thread [1], it's very helpful.

BR
Bowen Yu
>> Since the current frequency in the governor is the last known frequency,
>> it should be more user-friendly.
>>
>> This patch also removes redundant !housekeeping_cpu() check since it is
>> inherently done when checking jiffies.
>>
>> Original output when all cpus under a policy are idle:
>> [root@localhost home]# cat /sys/devices/system/cpu/cpufreq/policy0/
>> cpuinfo_avg_freq
>> cat: /sys/devices/system/cpu/cpufreq/policy0/cpuinfo_avg_freq: Resource
>>   temporarily unavailable
>>
>> Output after changes:
>> [root@localhost home]# cat /sys/devices/system/cpu/cpufreq/policy0
>> /cpuinfo_avg_freq
>> 1200000
>>
>> Signed-off-by: Bowen Yu <yubowen8@huawei.com>
>> ---
>>   arch/arm64/kernel/topology.c | 11 +++++------
>>   1 file changed, 5 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
>> index c0dbc27289ea..f1370a4a4df9 100644
>> --- a/arch/arm64/kernel/topology.c
>> +++ b/arch/arm64/kernel/topology.c
>> @@ -333,14 +333,13 @@ int arch_freq_get_on_cpu(int cpu)
>>   				if (!idle_cpu(ref_cpu))
>>   					break;
>>   			}
>> +
>> +			if (ref_cpu >= nr_cpu_ids) {
>> +				cpufreq_cpu_put(policy);
>> +				return cpufreq_quick_get(start_cpu);
>> +			}
>>   
>>   			cpufreq_cpu_put(policy);
>> -
>> -			if (ref_cpu >= nr_cpu_ids)
>> -				/* No alternative to pull info from */
>> -				return -EAGAIN;
>> -
>> -			cpu = ref_cpu;
>>   		} else {
>>   			break;
>>   		}
>> -- 
>> 2.33.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-11-16  7:47 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-04  7:55 [PATCH 0/3] arm64: topology: Improve cpuinfo_avg_freq for ARM64 Bowen Yu
2025-11-04  7:55 ` [PATCH 1/3] arm64: topology: Improve AMU-based frequency calculation Bowen Yu
2025-11-06  4:12   ` Jie Zhan
2025-11-10 17:04   ` Beata Michalska
2025-11-04  7:55 ` [PATCH 2/3] arm64: topology: Use current freq in governor for idle cpus in cpuinfo_avg_freq Bowen Yu
2025-11-10 17:11   ` Beata Michalska
2025-11-16  7:46     ` yubowen (H)
2025-11-04  7:55 ` [PATCH 3/3] arm64: topology: Remove redundant housekeeping_cpu() checks in arch_freq_get_on_cpu Bowen Yu
2025-11-10 17:15   ` Beata Michalska

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).