* [PATCH 1/5] clocksource/drivers/arm_arch_timer: Add a static key indicating the need for a runtime workaround
2026-03-02 10:29 [PATCH 0/5] arm64: arch_timer: Improve errata handling Marc Zyngier
@ 2026-03-02 10:29 ` Marc Zyngier
2026-03-02 10:29 ` [PATCH 2/5] clocksource/drivers/arm_arch_timer: Convert counter accessors to a static key alternative Marc Zyngier
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Marc Zyngier @ 2026-03-02 10:29 UTC (permalink / raw)
To: linux-arm-kernel, linux-kernel
Cc: Catalin Marinas, Will Deacon, Mark Rutland, Thomas Gleixner,
Ben Horgan, Daniel Lezcano
In order to decide whether we can read the architected counter without
disabling preemption to look up a workaround, introduce a static key
that denotes whether a workaround is required at all.
The behaviour of this new static key is a bit unusual:
- it starts as 'true', indicating that workarounds are required
- each time a new CPU boots, it is added to a cpumask
- when all possible CPUs have booted at least once, and that it
has been established that none of them require a workaround,
the key flips to 'false'
Of course, as long as not all the CPUs have booted once, you
may end-up with slow accessors, but that's what you get for not
sharing your toys.
Things are made a bit complicated because static keys cannot be
flipped from a CPUHP callback. Instead, schedule a deferred work
from there. Yes, this is fun.
Nothing is making use of this stuff yet, but watch this space.
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
drivers/clocksource/arm_arch_timer.c | 33 ++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 90aeff44a2764..c5b42001c9282 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -90,6 +90,8 @@ static int arch_counter_get_width(void)
/*
* Architected system timer support.
*/
+static inline bool arch_counter_broken_accessors(void);
+
static noinstr u64 raw_counter_get_cntpct_stable(void)
{
return __arch_counter_get_cntpct_stable();
@@ -555,10 +557,40 @@ static bool arch_timer_counter_has_wa(void)
{
return atomic_read(&timer_unstable_counter_workaround_in_use);
}
+
+static DEFINE_STATIC_KEY_TRUE(broken_cnt_accessors);
+
+static inline bool arch_counter_broken_accessors(void)
+{
+ return static_branch_unlikely(&broken_cnt_accessors);
+}
+
+static void enable_direct_accessors(struct work_struct *wk)
+{
+ pr_info("Enabling direct accessors\n");
+ static_branch_disable(&broken_cnt_accessors);
+}
+
+static int arch_timer_set_direct_accessors(unsigned int cpu)
+{
+ static DECLARE_WORK(enable_accessors_wk, enable_direct_accessors);
+ static cpumask_t seen_cpus;
+
+ cpumask_set_cpu(cpu, &seen_cpus);
+
+ if (arch_counter_broken_accessors() &&
+ !arch_timer_counter_has_wa() &&
+ cpumask_equal(&seen_cpus, cpu_possible_mask))
+ schedule_work(&enable_accessors_wk);
+
+ return 0;
+}
#else
#define arch_timer_check_ool_workaround(t,a) do { } while(0)
#define arch_timer_this_cpu_has_cntvct_wa() ({false;})
#define arch_timer_counter_has_wa() ({false;})
+static inline bool arch_counter_broken_accessors(void) { return false ; }
+#define arch_timer_set_direct_accessors(c) do { } while(0)
#endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
static __always_inline irqreturn_t timer_handler(const int access,
@@ -840,6 +872,7 @@ static int arch_timer_starting_cpu(unsigned int cpu)
}
arch_counter_set_user_access();
+ arch_timer_set_direct_accessors(cpu);
return 0;
}
--
2.47.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/5] clocksource/drivers/arm_arch_timer: Convert counter accessors to a static key alternative
2026-03-02 10:29 [PATCH 0/5] arm64: arch_timer: Improve errata handling Marc Zyngier
2026-03-02 10:29 ` [PATCH 1/5] clocksource/drivers/arm_arch_timer: Add a static key indicating the need for a runtime workaround Marc Zyngier
@ 2026-03-02 10:29 ` Marc Zyngier
2026-03-02 10:29 ` [PATCH 3/5] clocksource/drivers/arm_arch_timer: Drop the arch_counter_get_cnt{p,v}ct_stable() accessors Marc Zyngier
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Marc Zyngier @ 2026-03-02 10:29 UTC (permalink / raw)
To: linux-arm-kernel, linux-kernel
Cc: Catalin Marinas, Will Deacon, Mark Rutland, Thomas Gleixner,
Ben Horgan, Daniel Lezcano
Now that we have a reliable static key to control whether our
counter accessors need to be worked around, use it in these
accessors and simplify the logic that picks which accessor to use.
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
drivers/clocksource/arm_arch_timer.c | 38 +++++++++++++++-------------
1 file changed, 20 insertions(+), 18 deletions(-)
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index c5b42001c9282..723ba698b8c46 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -92,9 +92,12 @@ static int arch_counter_get_width(void)
*/
static inline bool arch_counter_broken_accessors(void);
-static noinstr u64 raw_counter_get_cntpct_stable(void)
+static noinstr u64 raw_counter_get_cntpct(void)
{
- return __arch_counter_get_cntpct_stable();
+ if (arch_counter_broken_accessors())
+ return __arch_counter_get_cntpct_stable();
+
+ return __arch_counter_get_cntpct();
}
static notrace u64 arch_counter_get_cntpct_stable(void)
@@ -108,12 +111,18 @@ static notrace u64 arch_counter_get_cntpct_stable(void)
static noinstr u64 arch_counter_get_cntpct(void)
{
+ if (arch_counter_broken_accessors())
+ return arch_counter_get_cntpct_stable();
+
return __arch_counter_get_cntpct();
}
-static noinstr u64 raw_counter_get_cntvct_stable(void)
+static noinstr u64 raw_counter_get_cntvct(void)
{
- return __arch_counter_get_cntvct_stable();
+ if (arch_counter_broken_accessors())
+ return __arch_counter_get_cntvct_stable();
+
+ return __arch_counter_get_cntvct();
}
static notrace u64 arch_counter_get_cntvct_stable(void)
@@ -127,6 +136,9 @@ static notrace u64 arch_counter_get_cntvct_stable(void)
static noinstr u64 arch_counter_get_cntvct(void)
{
+ if (arch_counter_broken_accessors())
+ return arch_counter_get_cntvct_stable();
+
return __arch_counter_get_cntvct();
}
@@ -946,21 +958,11 @@ static void __init arch_counter_register(void)
if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
- if (arch_timer_counter_has_wa()) {
- rd = arch_counter_get_cntvct_stable;
- scr = raw_counter_get_cntvct_stable;
- } else {
- rd = arch_counter_get_cntvct;
- scr = arch_counter_get_cntvct;
- }
+ rd = arch_counter_get_cntvct;
+ scr = raw_counter_get_cntvct;
} else {
- if (arch_timer_counter_has_wa()) {
- rd = arch_counter_get_cntpct_stable;
- scr = raw_counter_get_cntpct_stable;
- } else {
- rd = arch_counter_get_cntpct;
- scr = arch_counter_get_cntpct;
- }
+ rd = arch_counter_get_cntpct;
+ scr = raw_counter_get_cntpct;
}
arch_timer_read_counter = rd;
--
2.47.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 3/5] clocksource/drivers/arm_arch_timer: Drop the arch_counter_get_cnt{p,v}ct_stable() accessors
2026-03-02 10:29 [PATCH 0/5] arm64: arch_timer: Improve errata handling Marc Zyngier
2026-03-02 10:29 ` [PATCH 1/5] clocksource/drivers/arm_arch_timer: Add a static key indicating the need for a runtime workaround Marc Zyngier
2026-03-02 10:29 ` [PATCH 2/5] clocksource/drivers/arm_arch_timer: Convert counter accessors to a static key alternative Marc Zyngier
@ 2026-03-02 10:29 ` Marc Zyngier
2026-03-02 10:29 ` [PATCH 4/5] clocksource/drivers/arm_arch_timer: Expose a direct accessor for the virtual counter Marc Zyngier
2026-03-02 10:29 ` [PATCH 5/5] arm64: Convert __delay_cycles() to arch_timer_read_vcounter() Marc Zyngier
4 siblings, 0 replies; 6+ messages in thread
From: Marc Zyngier @ 2026-03-02 10:29 UTC (permalink / raw)
To: linux-arm-kernel, linux-kernel
Cc: Catalin Marinas, Will Deacon, Mark Rutland, Thomas Gleixner,
Ben Horgan, Daniel Lezcano
Further simplify the counter accessors by eliminating the *_stable()
ones, which serve little purpose at this stage.
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
drivers/clocksource/arm_arch_timer.c | 38 +++++++++-------------------
1 file changed, 12 insertions(+), 26 deletions(-)
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 723ba698b8c46..ee21804d6613c 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -100,19 +100,12 @@ static noinstr u64 raw_counter_get_cntpct(void)
return __arch_counter_get_cntpct();
}
-static notrace u64 arch_counter_get_cntpct_stable(void)
+static notrace u64 arch_counter_get_cntpct(void)
{
- u64 val;
- preempt_disable_notrace();
- val = __arch_counter_get_cntpct_stable();
- preempt_enable_notrace();
- return val;
-}
-
-static noinstr u64 arch_counter_get_cntpct(void)
-{
- if (arch_counter_broken_accessors())
- return arch_counter_get_cntpct_stable();
+ if (arch_counter_broken_accessors()) {
+ guard(preempt_notrace)();
+ return __arch_counter_get_cntpct_stable();
+ }
return __arch_counter_get_cntpct();
}
@@ -125,19 +118,12 @@ static noinstr u64 raw_counter_get_cntvct(void)
return __arch_counter_get_cntvct();
}
-static notrace u64 arch_counter_get_cntvct_stable(void)
+static notrace u64 arch_counter_get_cntvct(void)
{
- u64 val;
- preempt_disable_notrace();
- val = __arch_counter_get_cntvct_stable();
- preempt_enable_notrace();
- return val;
-}
-
-static noinstr u64 arch_counter_get_cntvct(void)
-{
- if (arch_counter_broken_accessors())
- return arch_counter_get_cntvct_stable();
+ if (arch_counter_broken_accessors()) {
+ guard(preempt_notrace)();
+ return __arch_counter_get_cntvct_stable();
+ }
return __arch_counter_get_cntvct();
}
@@ -342,10 +328,10 @@ void erratum_set_next_event_generic(const int access, unsigned long evt,
ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
if (access == ARCH_TIMER_PHYS_ACCESS) {
- cval = evt + arch_counter_get_cntpct_stable();
+ cval = evt + arch_counter_get_cntpct();
write_sysreg(cval, cntp_cval_el0);
} else {
- cval = evt + arch_counter_get_cntvct_stable();
+ cval = evt + arch_counter_get_cntvct();
write_sysreg(cval, cntv_cval_el0);
}
--
2.47.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 4/5] clocksource/drivers/arm_arch_timer: Expose a direct accessor for the virtual counter
2026-03-02 10:29 [PATCH 0/5] arm64: arch_timer: Improve errata handling Marc Zyngier
` (2 preceding siblings ...)
2026-03-02 10:29 ` [PATCH 3/5] clocksource/drivers/arm_arch_timer: Drop the arch_counter_get_cnt{p,v}ct_stable() accessors Marc Zyngier
@ 2026-03-02 10:29 ` Marc Zyngier
2026-03-02 10:29 ` [PATCH 5/5] arm64: Convert __delay_cycles() to arch_timer_read_vcounter() Marc Zyngier
4 siblings, 0 replies; 6+ messages in thread
From: Marc Zyngier @ 2026-03-02 10:29 UTC (permalink / raw)
To: linux-arm-kernel, linux-kernel
Cc: Catalin Marinas, Will Deacon, Mark Rutland, Thomas Gleixner,
Ben Horgan, Daniel Lezcano
We allow access to the architected counter via arch_timer_read_counter().
However, this accessor can either be the virtual or the physical
view of the counter, depending on how the kernel has been booted.
At the same time, we have some architectural features (such as WFIT,
WFET) that rely on the virtual counter, and nothing else.
If implementations were perfect, we'd rely on reading CNTVCT_EL0,
and be done with it. However, we have a bunch of broken implementations
in the wild, which rely on preemption being disabled and other
costly workarounds.
In order to provide decent performance on non-broken HW while still
supporting the legacy horrors, expose arch_timer_read_vcounter() as
a new helper that hides this complexity. Obviously, this is simply
a global alias of arch_counter_get_cntvct().
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
drivers/clocksource/arm_arch_timer.c | 2 ++
include/clocksource/arm_arch_timer.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index ee21804d6613c..6fcd9afad38c2 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -137,6 +137,8 @@ static notrace u64 arch_counter_get_cntvct(void)
u64 (*arch_timer_read_counter)(void) __ro_after_init = arch_counter_get_cntvct;
EXPORT_SYMBOL_GPL(arch_timer_read_counter);
+u64 arch_timer_read_vcounter(void) __attribute__((alias("arch_counter_get_cntvct")));
+
static u64 arch_counter_read(struct clocksource *cs)
{
return arch_timer_read_counter();
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 2eda895f19f54..587314e584839 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -88,6 +88,7 @@ struct arch_timer_mem {
extern u32 arch_timer_get_rate(void);
extern u64 (*arch_timer_read_counter)(void);
+extern u64 arch_timer_read_vcounter(void);
extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
extern bool arch_timer_evtstrm_available(void);
--
2.47.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 5/5] arm64: Convert __delay_cycles() to arch_timer_read_vcounter()
2026-03-02 10:29 [PATCH 0/5] arm64: arch_timer: Improve errata handling Marc Zyngier
` (3 preceding siblings ...)
2026-03-02 10:29 ` [PATCH 4/5] clocksource/drivers/arm_arch_timer: Expose a direct accessor for the virtual counter Marc Zyngier
@ 2026-03-02 10:29 ` Marc Zyngier
4 siblings, 0 replies; 6+ messages in thread
From: Marc Zyngier @ 2026-03-02 10:29 UTC (permalink / raw)
To: linux-arm-kernel, linux-kernel
Cc: Catalin Marinas, Will Deacon, Mark Rutland, Thomas Gleixner,
Ben Horgan, Daniel Lezcano
Relax the need for disabling preemption in __delay_cycles() by
using arch_timer_read_vcounter(), which will disable preemption
only when this is actually required.
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
arch/arm64/lib/delay.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
index e278e060e78a9..a667df920697d 100644
--- a/arch/arm64/lib/delay.c
+++ b/arch/arm64/lib/delay.c
@@ -32,10 +32,9 @@ static inline unsigned long xloops_to_cycles(unsigned long xloops)
* Note that userspace cannot change the offset behind our back either,
* as the vcpu mutex is held as long as KVM_RUN is in progress.
*/
-static cycles_t notrace __delay_cycles(void)
+static cycles_t __delay_cycles(void)
{
- guard(preempt_notrace)();
- return __arch_counter_get_cntvct_stable();
+ return arch_timer_read_vcounter();
}
void __delay(unsigned long cycles)
--
2.47.3
^ permalink raw reply related [flat|nested] 6+ messages in thread