public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] clocksource/drivers/arm_arch_timer: Update sched_clock when non-boot CPUs need counter workaround
@ 2023-01-13 11:16 Marc Zyngier
  2023-01-13 12:45 ` Mark Rutland
  2023-03-28 13:59 ` Will Deacon
  0 siblings, 2 replies; 6+ messages in thread
From: Marc Zyngier @ 2023-01-13 11:16 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel
  Cc: Mark Rutland, Will Deacon, Daniel Lezcano, Thomas Gleixner,
	stable, Yogesh Lal

When booting on a CPU that has a countertum on the counter read,
we use the arch_counter_get_cnt{v,p}ct_stable() backend which
applies the workaround.

However, we don't do the same thing when an affected CPU is
a secondary CPU, and we're stuck with the standard sched_clock()
backend that knows nothing about the workaround.

Fix it by always indirecting sched_clock(), making arch_timer_read_counter
a function instead of a function pointer. In turn, we update the
pointer (now private to the driver code) when detecting a new
workaround.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Daniel Lezcano <daniel.lezcano@kernel.org>
Cc: Thomas Gleixner <tglx@linotronix.de>
Cc: stable@vger.kernel.org
Reported-by: Yogesh Lal <quic_ylal@quicinc.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Fixes: 0ea415390cd3 ("clocksource/arm_arch_timer: Use arch_timer_read_counter to access stable counters")
Link: https://lore.kernel.org/r/ca4679a0-7f29-65f4-54b9-c575248192f1@quicinc.com
---
 drivers/clocksource/arm_arch_timer.c | 56 +++++++++++++++++-----------
 include/clocksource/arm_arch_timer.h |  2 +-
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index e09d4427f604..5272db86bef5 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -217,7 +217,12 @@ static notrace u64 arch_counter_get_cntvct(void)
  * to exist on arm64. arm doesn't use this before DT is probed so even
  * if we don't have the cp15 accessors we won't have a problem.
  */
-u64 (*arch_timer_read_counter)(void) __ro_after_init = arch_counter_get_cntvct;
+static u64 (*__arch_timer_read_counter)(void) __ro_after_init = arch_counter_get_cntvct;
+
+u64 arch_timer_read_counter(void)
+{
+	return __arch_timer_read_counter();
+}
 EXPORT_SYMBOL_GPL(arch_timer_read_counter);
 
 static u64 arch_counter_read(struct clocksource *cs)
@@ -230,6 +235,28 @@ static u64 arch_counter_read_cc(const struct cyclecounter *cc)
 	return arch_timer_read_counter();
 }
 
+static bool arch_timer_counter_has_wa(void);
+
+static u64 (*arch_counter_get_read_fn(void))(void)
+{
+	u64 (*rd)(void);
+
+	if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
+	    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
+		if (arch_timer_counter_has_wa())
+			rd = arch_counter_get_cntvct_stable;
+		else
+			rd = arch_counter_get_cntvct;
+	} else {
+		if (arch_timer_counter_has_wa())
+			rd = arch_counter_get_cntpct_stable;
+		else
+			rd = arch_counter_get_cntpct;
+	}
+
+	return rd;
+}
+
 static struct clocksource clocksource_counter = {
 	.name	= "arch_sys_counter",
 	.id	= CSID_ARM_ARCH_COUNTER,
@@ -571,8 +598,10 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa
 			per_cpu(timer_unstable_counter_workaround, i) = wa;
 	}
 
-	if (wa->read_cntvct_el0 || wa->read_cntpct_el0)
-		atomic_set(&timer_unstable_counter_workaround_in_use, 1);
+	if (wa->read_cntvct_el0 || wa->read_cntpct_el0) {
+		__arch_timer_read_counter = arch_counter_get_read_fn();
+		atomic_set_release(&timer_unstable_counter_workaround_in_use, 1);
+	}
 
 	/*
 	 * Don't use the vdso fastpath if errata require using the
@@ -641,7 +670,7 @@ static bool arch_timer_counter_has_wa(void)
 #else
 #define arch_timer_check_ool_workaround(t,a)		do { } while(0)
 #define arch_timer_this_cpu_has_cntvct_wa()		({false;})
-#define arch_timer_counter_has_wa()			({false;})
+static bool arch_timer_counter_has_wa(void)		{ return false; }
 #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
 
 static __always_inline irqreturn_t timer_handler(const int access,
@@ -1079,25 +1108,10 @@ static void __init arch_counter_register(unsigned type)
 
 	/* Register the CP15 based counter if we have one */
 	if (type & ARCH_TIMER_TYPE_CP15) {
-		u64 (*rd)(void);
-
-		if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
-		    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
-			if (arch_timer_counter_has_wa())
-				rd = arch_counter_get_cntvct_stable;
-			else
-				rd = arch_counter_get_cntvct;
-		} else {
-			if (arch_timer_counter_has_wa())
-				rd = arch_counter_get_cntpct_stable;
-			else
-				rd = arch_counter_get_cntpct;
-		}
-
-		arch_timer_read_counter = rd;
+		__arch_timer_read_counter = arch_counter_get_read_fn();
 		clocksource_counter.vdso_clock_mode = vdso_default;
 	} else {
-		arch_timer_read_counter = arch_counter_get_cntvct_mem;
+		__arch_timer_read_counter = arch_counter_get_cntvct_mem;
 	}
 
 	width = arch_counter_get_width();
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 057c8964aefb..ec331b65ba23 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -85,7 +85,7 @@ struct arch_timer_mem {
 #ifdef CONFIG_ARM_ARCH_TIMER
 
 extern u32 arch_timer_get_rate(void);
-extern u64 (*arch_timer_read_counter)(void);
+extern u64 arch_timer_read_counter(void);
 extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
 extern bool arch_timer_evtstrm_available(void);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread
* Re: [PATCH] clocksource/drivers/arm_arch_timer: Update sched_clock when non-boot CPUs need counter workaround
@ 2023-01-19  7:59 刘琦
  2023-01-19  9:52 ` Marc Zyngier
  0 siblings, 1 reply; 6+ messages in thread
From: 刘琦 @ 2023-01-19  7:59 UTC (permalink / raw)
  To: maz
  Cc: daniel.lezcano, linux-arm-kernel, linux-kernel, mark.rutland,
	quic_ylal, stable, tglx, will, 王法杰,
	liurenwang, zhanghui5, liangke1

[Test Report]
Result: Test Pass

A total of two rounds of pending testing
     a. The first round of hanging test
          Number of machines: 200
          Hanging test duration: 48h
          Hanging test results: no walt crash problem
     b. The second round of hanging test
          Number of machines: 200
          Hanging test duration: 72h
          Hanging test results: no walt crash problem

Tested-by: wangfajie <wangfajie@longcheer.com>
Tested-by: liurenwang <liurenwang@longcheer.com>
Tested-by: zhanghui <zhanghui5@longcheer.com>
Tested-by: liangke <liangke1@xiaomi.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread
* Re: [PATCH] clocksource/drivers/arm_arch_timer: Update sched_clock when non-boot CPUs need counter workaround
@ 2023-01-19 13:28 王法杰
  0 siblings, 0 replies; 6+ messages in thread
From: 王法杰 @ 2023-01-19 13:28 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: daniel.lezcano@kernel.org, linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, mark.rutland@arm.com,
	quic_ylal@quicinc.com, stable@vger.kernel.org, will@kernel.org,
	刘仁旺, 张辉, liangke1@xiaomi.com,
	沈兵, 刘琦

Hi Marc Zyngier

We found the APPS Crash issue on MTBF test.
Brief crash information, APPS Crash - Kernel BUG at /sched/walt/sched_avg.c:281! in sched_update_nr_prod flow

[Test equipment]
1. Number of phone:  200 pcs
2. CPU info of phone:  CPU architecture with Quad Cortex-A73 and Quad Cortex-A53

[Preset conditions]
1. Android 13 with kernel 5.15
2. Install application of top 20
3. Connected to Wi-Fi
4. Connect the adapter to charge the phone
5. Test duration 48 hours

[Expected results]
0 crash happened.

[Test results without [PATCH] clocksource/drivers/arm_arch_timer: Update sched_clock when non-boot CPUs need counter workaround ]
1. First round of test, found 3 phones with APPS Crash issue on /sched/walt/sched_avg.c:281! in sched_update_nr_prod flow
2. Second round of test, found 7 phones with APP Crash issue on /sched/walt/sched_avg.c:281! in sched_update_nr_prod flow

[Test results with [PATCH] clocksource/drivers/arm_arch_timer: Update sched_clock when non-boot CPUs need counter workaround ] 
1. First round of test, 0 crash happened.
2. Second round of test, 0 crash happened.

Tested-by: wangfajie <wangfajie@longcheer.com>
Tested-by: liurenwang <liurenwang@longcheer.com>
Tested-by: zhanghui <zhanghui5@longcheer.com>
Tested-by: liangke <liangke1@xiaomi.com>

So we think the PATCH is working and it can fix APPS crash issue. Many thanks your time.

Best regards!
Wangfajie


-----邮件原件-----
发件人: Marc Zyngier [mailto:maz@kernel.org] 
发送时间: 2023年1月19日 17:52
收件人: 刘琦 <liuqi405@icloud.com>
抄送: daniel.lezcano@kernel.org; linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; mark.rutland@arm.com; quic_ylal@quicinc.com; stable@vger.kernel.org; will@kernel.org; 王法杰 <wangfajie@longcheer.com>; 刘仁旺 <liurenwang@longcheer.com>; 张辉 <zhanghui5@longcheer.com>; liangke1@xiaomi.com
主题: Re: [PATCH] clocksource/drivers/arm_arch_timer: Update sched_clock when non-boot CPUs need counter workaround

On 2023-01-19 07:59, 刘琦 wrote:
> [Test Report]
> Result: Test Pass
> 
> A total of two rounds of pending testing
>      a. The first round of hanging test
>           Number of machines: 200
>           Hanging test duration: 48h
>           Hanging test results: no walt crash problem
>      b. The second round of hanging test
>           Number of machines: 200
>           Hanging test duration: 72h
>           Hanging test results: no walt crash problem
> 
> Tested-by: wangfajie <wangfajie@longcheer.com>
> Tested-by: liurenwang <liurenwang@longcheer.com>
> Tested-by: zhanghui <zhanghui5@longcheer.com>
> Tested-by: liangke <liangke1@xiaomi.com>

Thanks for this.

The only issue here is that that you don't explain what you tested, nor how you tested it.

It is also a patch that has known defects (you just have to read the thread for the details)... This makes this testing, no matter how thorough it is, rather ineffective.

         M.
--
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-03-28 14:00 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-01-13 11:16 [PATCH] clocksource/drivers/arm_arch_timer: Update sched_clock when non-boot CPUs need counter workaround Marc Zyngier
2023-01-13 12:45 ` Mark Rutland
2023-03-28 13:59 ` Will Deacon
  -- strict thread matches above, loose matches on Subject: below --
2023-01-19  7:59 刘琦
2023-01-19  9:52 ` Marc Zyngier
2023-01-19 13:28 王法杰

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox