Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v11 04/14] arm64: support WFET in smp_cond_load_relaxed_timeout()
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

To handle WFET use __cmpwait_timeout() similarly to __cmpwait(). These
call out to the respective __cmpwait_case_timeout_##sz(),
__cmpwait_case_##sz() functions.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/arm64/include/asm/barrier.h |  8 +++--
 arch/arm64/include/asm/cmpxchg.h | 62 +++++++++++++++++++++++++-------
 2 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 6190e178db51..fbd71cd4ef4e 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -224,8 +224,8 @@ do {									\
 extern bool arch_timer_evtstrm_available(void);
 
 /*
- * In the common case, cpu_poll_relax() sits waiting in __cmpwait_relaxed()
- * for the ptr value to change.
+ * In the common case, cpu_poll_relax() sits waiting in __cmpwait_relaxed()/
+ * __cmpwait_relaxed_timeout() for the ptr value to change.
  *
  * Since this period is reasonably long, choose SMP_TIMEOUT_POLL_COUNT
  * to be 1, so smp_cond_load_{relaxed,acquire}_timeout() does a
@@ -234,7 +234,9 @@ extern bool arch_timer_evtstrm_available(void);
 #define SMP_TIMEOUT_POLL_COUNT	1
 
 #define cpu_poll_relax(ptr, val, timeout_ns) do {			\
-	if (arch_timer_evtstrm_available())				\
+	if (alternative_has_cap_unlikely(ARM64_HAS_WFXT))		\
+		__cmpwait_relaxed_timeout(ptr, val, timeout_ns);	\
+	else if (arch_timer_evtstrm_available())			\
 		__cmpwait_relaxed(ptr, val);				\
 	else								\
 		cpu_relax();						\
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 6cf3cd6873f5..9e4cdc9e41d1 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -12,6 +12,7 @@
 
 #include <asm/barrier.h>
 #include <asm/lse.h>
+#include <asm/delay-const.h>
 
 /*
  * We need separate acquire parameters for ll/sc and lse, since the full
@@ -212,7 +213,8 @@ __CMPXCHG_GEN(_mb)
 
 #define __CMPWAIT_CASE(w, sfx, sz)					\
 static inline void __cmpwait_case_##sz(volatile void *ptr,		\
-				       unsigned long val)		\
+				       unsigned long val,		\
+				       u64 __maybe_unused timeout_ns)	\
 {									\
 	unsigned long tmp;						\
 									\
@@ -235,20 +237,52 @@ __CMPWAIT_CASE( ,  , 64);
 
 #undef __CMPWAIT_CASE
 
-#define __CMPWAIT_GEN(sfx)						\
-static __always_inline void __cmpwait##sfx(volatile void *ptr,		\
-				  unsigned long val,			\
-				  int size)				\
+#define __CMPWAIT_TIMEOUT_CASE(w, sfx, sz)				\
+static inline void __cmpwait_case_timeout_##sz(volatile void *ptr,	\
+					       unsigned long val,	\
+					       u64 timeout_ns)		\
+{									\
+	unsigned long tmp;						\
+	u64 ecycles = __delay_cycles() +				\
+			NSECS_TO_CYCLES(timeout_ns);			\
+	asm volatile(							\
+	"	sevl\n"							\
+	"	wfe\n"							\
+	"	ldxr" #sfx "\t%" #w "[tmp], %[v]\n"			\
+	"	eor	%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
+	"	cbnz	%" #w "[tmp], 2f\n"				\
+	"	msr s0_3_c1_c0_0, %[ecycles]\n"				\
+	"2:"								\
+	: [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr)			\
+	: [val] "r" (val), [ecycles] "r" (ecycles));			\
+}
+
+__CMPWAIT_TIMEOUT_CASE(w, b, 8);
+__CMPWAIT_TIMEOUT_CASE(w, h, 16);
+__CMPWAIT_TIMEOUT_CASE(w,  , 32);
+__CMPWAIT_TIMEOUT_CASE( ,  , 64);
+
+#undef __CMPWAIT_TIMEOUT_CASE
+
+#define __CMPWAIT_GEN(timeout, sfx)					\
+static __always_inline void __cmpwait##timeout##sfx(volatile void *ptr,	\
+						    unsigned long val,	\
+						    u64 timeout_ns,	\
+						    int size)		\
 {									\
 	switch (size) {							\
 	case 1:								\
-		return __cmpwait_case##sfx##_8(ptr, (u8)val);		\
+		return __cmpwait_case##timeout##sfx##_8(ptr, (u8)val,	\
+							timeout_ns);	\
 	case 2:								\
-		return __cmpwait_case##sfx##_16(ptr, (u16)val);		\
+		return __cmpwait_case##timeout##sfx##_16(ptr, (u16)val,	\
+							 timeout_ns);	\
 	case 4:								\
-		return __cmpwait_case##sfx##_32(ptr, val);		\
+		return __cmpwait_case##timeout##sfx##_32(ptr, val,	\
+							 timeout_ns);	\
 	case 8:								\
-		return __cmpwait_case##sfx##_64(ptr, val);		\
+		return __cmpwait_case##timeout##sfx##_64(ptr, val,	\
+							 timeout_ns);	\
 	default:							\
 		BUILD_BUG();						\
 	}								\
@@ -256,11 +290,15 @@ static __always_inline void __cmpwait##sfx(volatile void *ptr,		\
 	unreachable();							\
 }
 
-__CMPWAIT_GEN()
+__CMPWAIT_GEN(        , )
+__CMPWAIT_GEN(_timeout, )
 
 #undef __CMPWAIT_GEN
 
-#define __cmpwait_relaxed(ptr, val) \
-	__cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+#define __cmpwait_relaxed_timeout(ptr, val, timeout_ns)			\
+	__cmpwait_timeout((ptr), (unsigned long)(val), timeout_ns, sizeof(*(ptr)))
+
+#define __cmpwait_relaxed(ptr, val)					\
+	__cmpwait((ptr), (unsigned long)(val), 0, sizeof(*(ptr)))
 
 #endif	/* __ASM_CMPXCHG_H */
-- 
2.31.1



^ permalink raw reply related

* [PATCH v11 06/14] asm-generic: barrier: Add smp_cond_load_acquire_timeout()
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

Add the acquire variant of smp_cond_load_relaxed_timeout(). This
reuses the relaxed variant, with additional LOAD->LOAD ordering.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arch@vger.kernel.org
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Haris Okanovic <harisokn@amazon.com>
Tested-by: Haris Okanovic <harisokn@amazon.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 include/asm-generic/barrier.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index e5a6a1c04649..68d9e7108f4a 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -342,6 +342,32 @@ do {									\
 })
 #endif
 
+/**
+ * smp_cond_load_acquire_timeout() - (Spin) wait for cond with ACQUIRE ordering
+ * until a timeout expires.
+ * @ptr: pointer to the variable to wait on.
+ * @cond_expr: boolean expression to wait for.
+ * @time_expr_ns: monotonic expression that evaluates to time in ns or,
+ *  on failure, returns a negative value.
+ * @timeout_ns: timeout value in ns
+ * (Both of the above are assumed to be compatible with s64.)
+ *
+ * Equivalent to using smp_cond_load_acquire() on the condition variable with
+ * a timeout.
+ */
+#ifndef smp_cond_load_acquire_timeout
+#define smp_cond_load_acquire_timeout(ptr, cond_expr,			\
+				      time_expr_ns, timeout_ns)		\
+({									\
+	__unqual_scalar_typeof(*ptr) _val;				\
+	_val = smp_cond_load_relaxed_timeout(ptr, cond_expr,		\
+					     time_expr_ns,		\
+					     timeout_ns);		\
+	smp_acquire__after_ctrl_dep();					\
+	(typeof(*ptr))_val;						\
+})
+#endif
+
 /*
  * pmem_wmb() ensures that all stores for which the modification
  * are written to persistent storage by preceding instructions have
-- 
2.31.1



^ permalink raw reply related

* [PATCH v11 07/14] atomic: Add atomic_cond_read_*_timeout()
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora, Boqun Feng
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

Add atomic load wrappers, atomic_cond_read_*_timeout() and
atomic64_cond_read_*_timeout() for the cond-load timeout interfaces.

Also add a short description for the atomic_cond_read_{relaxed,acquire}(),
and the atomic_cond_read_{relaxed,acquire}_timeout() interfaces.

Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 Documentation/atomic_t.txt | 14 +++++++++-----
 include/linux/atomic.h     | 10 ++++++++++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index bee3b1bca9a7..0e53f6ccb558 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -16,6 +16,10 @@ Non-RMW ops:
   atomic_read(), atomic_set()
   atomic_read_acquire(), atomic_set_release()
 
+Non-RMW, non-atomic_t ops:
+
+  atomic_cond_read_{relaxed,acquire}()
+  atomic_cond_read_{relaxed,acquire}_timeout()
 
 RMW atomic operations:
 
@@ -79,11 +83,11 @@ SEMANTICS
 
 Non-RMW ops:
 
-The non-RMW ops are (typically) regular LOADs and STOREs and are canonically
-implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and
-smp_store_release() respectively. Therefore, if you find yourself only using
-the Non-RMW operations of atomic_t, you do not in fact need atomic_t at all
-and are doing it wrong.
+The non-RMW ops are (typically) regular, or conditional LOADs and STOREs and
+are canonically implemented using READ_ONCE(), WRITE_ONCE(),
+smp_load_acquire() and smp_store_release() respectively. Therefore, if you
+find yourself only using the Non-RMW operations of atomic_t, you do not in
+fact need atomic_t at all and are doing it wrong.
 
 A note for the implementation of atomic_set{}() is that it must not break the
 atomicity of the RMW ops. That is:
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 8dd57c3a99e9..5bcb86e07784 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -31,6 +31,16 @@
 #define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
 #define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
 
+#define atomic_cond_read_acquire_timeout(v, c, e, t) \
+	smp_cond_load_acquire_timeout(&(v)->counter, (c), (e), (t))
+#define atomic_cond_read_relaxed_timeout(v, c, e, t) \
+	smp_cond_load_relaxed_timeout(&(v)->counter, (c), (e), (t))
+
+#define atomic64_cond_read_acquire_timeout(v, c, e, t) \
+	smp_cond_load_acquire_timeout(&(v)->counter, (c), (e), (t))
+#define atomic64_cond_read_relaxed_timeout(v, c, e, t) \
+	smp_cond_load_relaxed_timeout(&(v)->counter, (c), (e), (t))
+
 /*
  * The idea here is to build acquire/release variants by adding explicit
  * barriers on top of the relaxed variant. In the case where the relaxed
-- 
2.31.1



^ permalink raw reply related

* [PATCH v11 05/14] arm64: rqspinlock: Remove private copy of smp_cond_load_acquire_timewait()
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

In preparation for defining smp_cond_load_acquire_timeout(), remove
the private copy. Lacking this, the rqspinlock code falls back to using
smp_cond_load_acquire().

Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: bpf@vger.kernel.org
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Haris Okanovic <harisokn@amazon.com>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/arm64/include/asm/rqspinlock.h | 85 -----------------------------
 1 file changed, 85 deletions(-)

diff --git a/arch/arm64/include/asm/rqspinlock.h b/arch/arm64/include/asm/rqspinlock.h
index 9ea0a74e5892..a385603436e9 100644
--- a/arch/arm64/include/asm/rqspinlock.h
+++ b/arch/arm64/include/asm/rqspinlock.h
@@ -3,91 +3,6 @@
 #define _ASM_RQSPINLOCK_H
 
 #include <asm/barrier.h>
-
-/*
- * Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom
- * version based on [0]. In rqspinlock code, our conditional expression involves
- * checking the value _and_ additionally a timeout. However, on arm64, the
- * WFE-based implementation may never spin again if no stores occur to the
- * locked byte in the lock word. As such, we may be stuck forever if
- * event-stream based unblocking is not available on the platform for WFE spin
- * loops (arch_timer_evtstrm_available).
- *
- * Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this
- * copy-paste.
- *
- * While we rely on the implementation to amortize the cost of sampling
- * cond_expr for us, it will not happen when event stream support is
- * unavailable, time_expr check is amortized. This is not the common case, and
- * it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns
- * comparison, hence just let it be. In case of event-stream, the loop is woken
- * up at microsecond granularity.
- *
- * [0]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com
- */
-
-#ifndef smp_cond_load_acquire_timewait
-
-#define smp_cond_time_check_count	200
-
-#define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns,	\
-					 time_limit_ns) ({		\
-	typeof(ptr) __PTR = (ptr);					\
-	__unqual_scalar_typeof(*ptr) VAL;				\
-	unsigned int __count = 0;					\
-	for (;;) {							\
-		VAL = READ_ONCE(*__PTR);				\
-		if (cond_expr)						\
-			break;						\
-		cpu_relax();						\
-		if (__count++ < smp_cond_time_check_count)		\
-			continue;					\
-		if ((time_expr_ns) >= (time_limit_ns))			\
-			break;						\
-		__count = 0;						\
-	}								\
-	(typeof(*ptr))VAL;						\
-})
-
-#define __smp_cond_load_acquire_timewait(ptr, cond_expr,		\
-					 time_expr_ns, time_limit_ns)	\
-({									\
-	typeof(ptr) __PTR = (ptr);					\
-	__unqual_scalar_typeof(*ptr) VAL;				\
-	for (;;) {							\
-		VAL = smp_load_acquire(__PTR);				\
-		if (cond_expr)						\
-			break;						\
-		__cmpwait_relaxed(__PTR, VAL);				\
-		if ((time_expr_ns) >= (time_limit_ns))			\
-			break;						\
-	}								\
-	(typeof(*ptr))VAL;						\
-})
-
-#define smp_cond_load_acquire_timewait(ptr, cond_expr,			\
-				      time_expr_ns, time_limit_ns)	\
-({									\
-	__unqual_scalar_typeof(*ptr) _val;				\
-	int __wfe = arch_timer_evtstrm_available();			\
-									\
-	if (likely(__wfe)) {						\
-		_val = __smp_cond_load_acquire_timewait(ptr, cond_expr,	\
-							time_expr_ns,	\
-							time_limit_ns);	\
-	} else {							\
-		_val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr,	\
-							time_expr_ns,	\
-							time_limit_ns);	\
-		smp_acquire__after_ctrl_dep();				\
-	}								\
-	(typeof(*ptr))_val;						\
-})
-
-#endif
-
-#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
-
 #include <asm-generic/rqspinlock.h>
 
 #endif /* _ASM_RQSPINLOCK_H */
-- 
2.31.1



^ permalink raw reply related

* [PATCH v11 08/14] locking/atomic: scripts: build atomic_long_cond_read_*_timeout()
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora, Boqun Feng
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

Add the atomic long wrappers for the cond-load timeout interfaces.

Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 include/linux/atomic/atomic-long.h | 18 +++++++++++-------
 scripts/atomic/gen-atomic-long.sh  | 16 ++++++++++------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
index 6a4e47d2db35..553b6b0e0258 100644
--- a/include/linux/atomic/atomic-long.h
+++ b/include/linux/atomic/atomic-long.h
@@ -11,14 +11,18 @@
 
 #ifdef CONFIG_64BIT
 typedef atomic64_t atomic_long_t;
-#define ATOMIC_LONG_INIT(i)		ATOMIC64_INIT(i)
-#define atomic_long_cond_read_acquire	atomic64_cond_read_acquire
-#define atomic_long_cond_read_relaxed	atomic64_cond_read_relaxed
+#define ATOMIC_LONG_INIT(i)			ATOMIC64_INIT(i)
+#define atomic_long_cond_read_acquire		atomic64_cond_read_acquire
+#define atomic_long_cond_read_relaxed		atomic64_cond_read_relaxed
+#define atomic_long_cond_read_acquire_timeout	atomic64_cond_read_acquire_timeout
+#define atomic_long_cond_read_relaxed_timeout	atomic64_cond_read_relaxed_timeout
 #else
 typedef atomic_t atomic_long_t;
-#define ATOMIC_LONG_INIT(i)		ATOMIC_INIT(i)
-#define atomic_long_cond_read_acquire	atomic_cond_read_acquire
-#define atomic_long_cond_read_relaxed	atomic_cond_read_relaxed
+#define ATOMIC_LONG_INIT(i)			ATOMIC_INIT(i)
+#define atomic_long_cond_read_acquire		atomic_cond_read_acquire
+#define atomic_long_cond_read_relaxed		atomic_cond_read_relaxed
+#define atomic_long_cond_read_acquire_timeout	atomic_cond_read_acquire_timeout
+#define atomic_long_cond_read_relaxed_timeout	atomic_cond_read_relaxed_timeout
 #endif
 
 /**
@@ -1809,4 +1813,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v)
 }
 
 #endif /* _LINUX_ATOMIC_LONG_H */
-// 4b882bf19018602c10816c52f8b4ae280adc887b
+// 79c1f4acb5774376ceed559843d5d9ed1348df99
diff --git a/scripts/atomic/gen-atomic-long.sh b/scripts/atomic/gen-atomic-long.sh
index 9826be3ba986..874643dc74bd 100755
--- a/scripts/atomic/gen-atomic-long.sh
+++ b/scripts/atomic/gen-atomic-long.sh
@@ -79,14 +79,18 @@ cat << EOF
 
 #ifdef CONFIG_64BIT
 typedef atomic64_t atomic_long_t;
-#define ATOMIC_LONG_INIT(i)		ATOMIC64_INIT(i)
-#define atomic_long_cond_read_acquire	atomic64_cond_read_acquire
-#define atomic_long_cond_read_relaxed	atomic64_cond_read_relaxed
+#define ATOMIC_LONG_INIT(i)			ATOMIC64_INIT(i)
+#define atomic_long_cond_read_acquire		atomic64_cond_read_acquire
+#define atomic_long_cond_read_relaxed		atomic64_cond_read_relaxed
+#define atomic_long_cond_read_acquire_timeout	atomic64_cond_read_acquire_timeout
+#define atomic_long_cond_read_relaxed_timeout	atomic64_cond_read_relaxed_timeout
 #else
 typedef atomic_t atomic_long_t;
-#define ATOMIC_LONG_INIT(i)		ATOMIC_INIT(i)
-#define atomic_long_cond_read_acquire	atomic_cond_read_acquire
-#define atomic_long_cond_read_relaxed	atomic_cond_read_relaxed
+#define ATOMIC_LONG_INIT(i)			ATOMIC_INIT(i)
+#define atomic_long_cond_read_acquire		atomic_cond_read_acquire
+#define atomic_long_cond_read_relaxed		atomic_cond_read_relaxed
+#define atomic_long_cond_read_acquire_timeout	atomic_cond_read_acquire_timeout
+#define atomic_long_cond_read_relaxed_timeout	atomic_cond_read_relaxed_timeout
 #endif
 
 EOF
-- 
2.31.1



^ permalink raw reply related

* [PATCH v11 10/14] bpf/rqspinlock: Use smp_cond_load_acquire_timeout()
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

Switch out the conditional load interfaces used by rqspinlock
to smp_cond_read_acquire_timeout() and its wrapper,
atomic_cond_read_acquire_timeout().

Both these handle the timeout and amortize as needed, so use the
non-amortized RES_CHECK_TIMEOUT.

RES_CHECK_TIMEOUT does double duty here -- presenting the current
clock value, the timeout/deadlock error from clock_deadlock() to
the cond-load and, returning the error value via ret.

For correctness, we need to ensure that the error case of the
cond-load interface always agrees with that in clock_deadlock().

For the most part, this is fine because there's no independent clock,
or double reads from the clock in cond-load -- either of which could
lead to its internal state going out of sync from that of
clock_deadlock().

There is, however, an edge case where clock_deadlock() checks for:

        if (time > ts->timeout_end)
                return -ETIMEDOUT;

while smp_cond_load_acquire_timeout() checks for:

        __time_now = (time_expr_ns);
        if (__time_now <= 0 || __time_now >= __time_end) {
                VAL = READ_ONCE(*__PTR);
                break;
        }

This runs into a problem when (__time_now == __time_end) since
clock_deadlock() does not treat it as a timeout condition but
the second clause in the conditional above does.
So, add an equality check in clock_deadlock().

Finally, redefine SMP_TIMEOUT_POLL_COUNT to be 16k to be similar to
the spin-count used in the amortized version. We only do this for
non-arm64 as that uses a waiting implementation.

Cc: bpf@vger.kernel.org
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 kernel/bpf/rqspinlock.c | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index 0ec17ebb67c1..e5e27266b813 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -215,7 +215,7 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask,
 	}
 
 	time = ktime_get_mono_fast_ns();
-	if (time > ts->timeout_end)
+	if (time >= ts->timeout_end)
 		return -ETIMEDOUT;
 
 	/*
@@ -235,11 +235,10 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask,
 }
 
 /*
- * Do not amortize with spins when res_smp_cond_load_acquire is defined,
- * as the macro does internal amortization for us.
+ * Spin amortized version of RES_CHECK_TIMEOUT. Used when busy-waiting in
+ * atomic_try_cmpxchg().
  */
-#ifndef res_smp_cond_load_acquire
-#define RES_CHECK_TIMEOUT(ts, ret, mask)					\
+#define RES_CHECK_TIMEOUT_AMORTIZED(ts, ret, mask)				\
 	({									\
 		s64 __timeval_err = 0;						\
 		if (!(ts).spin++)						\
@@ -247,7 +246,7 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask,
 		(ret) = __timeval_err < 0 ? __timeval_err : 0;			\
 		__timeval_err;							\
 	})
-#else
+
 #define RES_CHECK_TIMEOUT(ts, ret, mask)					\
 	({									\
 		s64 __timeval_err;						\
@@ -255,7 +254,6 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask,
 		(ret) = __timeval_err < 0 ? __timeval_err : 0;			\
 		__timeval_err;							\
 	})
-#endif
 
 /*
  * Initialize the 'spin' member.
@@ -269,6 +267,17 @@ static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask,
  */
 #define RES_RESET_TIMEOUT(ts, _duration) ({ (ts).timeout_end = 0; (ts).duration = _duration; })
 
+/*
+ * Limit how often we invoke clock_deadlock() while spin-waiting in
+ * smp_cond_load_acquire_timeout() or atomic_cond_read_acquire_timeout().
+ *
+ * We only override the default value not superceding ARM64's override.
+ */
+#ifndef CONFIG_ARM64
+#undef SMP_TIMEOUT_POLL_COUNT
+#define SMP_TIMEOUT_POLL_COUNT	(16*1024)
+#endif
+
 /*
  * Provide a test-and-set fallback for cases when queued spin lock support is
  * absent from the architecture.
@@ -296,7 +305,7 @@ int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock)
 	val = atomic_read(&lock->val);
 
 	if (val || !atomic_try_cmpxchg(&lock->val, &val, 1)) {
-		if (RES_CHECK_TIMEOUT(ts, ret, ~0u) < 0)
+		if (RES_CHECK_TIMEOUT_AMORTIZED(ts, ret, ~0u) < 0)
 			goto out;
 		cpu_relax();
 		goto retry;
@@ -319,12 +328,6 @@ EXPORT_SYMBOL_GPL(resilient_tas_spin_lock);
  */
 static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]);
 
-#ifndef res_smp_cond_load_acquire
-#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire(v, c)
-#endif
-
-#define res_atomic_cond_read_acquire(v, c) res_smp_cond_load_acquire(&(v)->counter, (c))
-
 /**
  * resilient_queued_spin_lock_slowpath - acquire the queued spinlock
  * @lock: Pointer to queued spinlock structure
@@ -421,7 +424,9 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
 	 */
 	if (val & _Q_LOCKED_MASK) {
 		RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT);
-		res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK) < 0);
+		smp_cond_load_acquire_timeout(&lock->locked, !VAL,
+					      RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK),
+					      ts.duration);
 	}
 
 	if (ret) {
@@ -582,8 +587,9 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
 	 * us.
 	 */
 	RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT * 2);
-	val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) ||
-					   RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK) < 0);
+	val = atomic_cond_read_acquire_timeout(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK),
+					       RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK),
+					       ts.duration);
 
 	/* Disable queue destruction when we detect deadlocks. */
 	if (ret == -EDEADLK) {
-- 
2.31.1



^ permalink raw reply related

* [PATCH v11 12/14] cpuidle/poll_state: Wait for need-resched via tif_need_resched_relaxed_wait()
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

The inner loop in poll_idle() polls over the thread_info flags,
waiting to see if the thread has TIF_NEED_RESCHED set. The loop
exits once the condition is met, or if the poll time limit has
been exceeded.

To minimize the number of instructions executed in each iteration,
the time check is rate-limited. In addition, each loop iteration
executes cpu_relax() which on certain platforms provides a hint to
the pipeline that the loop busy-waits, allowing the processor to
reduce power consumption.

Switch over to tif_need_resched_relaxed_wait() instead, since that
provides exactly that.

However, since we want to minimize power consumption in idle, building
of cpuidle/poll_state.c continues to depend on CONFIG_ARCH_HAS_CPU_RELAX
as that serves as an indicator that the platform supports an optimized
version of tif_need_resched_relaxed_wait() (via
smp_cond_load_acquire_timeout()).

Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: linux-pm@vger.kernel.org
Suggested-by: Rafael J. Wysocki <rafael@kernel.org>
Acked-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 drivers/cpuidle/poll_state.c | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index c7524e4c522a..7443b3e971ba 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -6,41 +6,22 @@
 #include <linux/cpuidle.h>
 #include <linux/export.h>
 #include <linux/irqflags.h>
-#include <linux/sched.h>
-#include <linux/sched/clock.h>
 #include <linux/sched/idle.h>
 #include <linux/sprintf.h>
 #include <linux/types.h>
 
-#define POLL_IDLE_RELAX_COUNT	200
-
 static int __cpuidle poll_idle(struct cpuidle_device *dev,
 			       struct cpuidle_driver *drv, int index)
 {
-	u64 time_start;
-
-	time_start = local_clock_noinstr();
-
 	dev->poll_time_limit = false;
 
 	raw_local_irq_enable();
 	if (!current_set_polling_and_test()) {
-		unsigned int loop_count = 0;
 		u64 limit;
 
 		limit = cpuidle_poll_time(drv, dev);
 
-		while (!need_resched()) {
-			cpu_relax();
-			if (loop_count++ < POLL_IDLE_RELAX_COUNT)
-				continue;
-
-			loop_count = 0;
-			if (local_clock_noinstr() - time_start > limit) {
-				dev->poll_time_limit = true;
-				break;
-			}
-		}
+		dev->poll_time_limit = !tif_need_resched_relaxed_wait(limit);
 	}
 	raw_local_irq_disable();
 
-- 
2.31.1



^ permalink raw reply related

* [PATCH v11 13/14] kunit: enable testing smp_cond_load_relaxed_timeout()
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

This enables the barrier tests to be built as a module.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/arm64/lib/delay.c               | 2 ++
 drivers/clocksource/arm_arch_timer.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
index c660a7ea26dd..dfb102ce3009 100644
--- a/arch/arm64/lib/delay.c
+++ b/arch/arm64/lib/delay.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/timex.h>
+#include <kunit/visibility.h>
 #include <asm/delay-const.h>
 
 #include <clocksource/arm_arch_timer.h>
@@ -30,6 +31,7 @@ u64 notrace __delay_cycles(void)
 	guard(preempt_notrace)();
 	return __arch_counter_get_cntvct_stable();
 }
+EXPORT_SYMBOL_IF_KUNIT(__delay_cycles);
 
 void __delay(unsigned long cycles)
 {
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 90aeff44a276..1de63e1a2cd2 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -28,6 +28,7 @@
 #include <linux/acpi.h>
 #include <linux/arm-smccc.h>
 #include <linux/ptp_kvm.h>
+#include <kunit/visibility.h>
 
 #include <asm/arch_timer.h>
 #include <asm/virt.h>
@@ -896,6 +897,7 @@ bool arch_timer_evtstrm_available(void)
 	 */
 	return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
 }
+EXPORT_SYMBOL_IF_KUNIT(arch_timer_evtstrm_available);
 
 static struct arch_timer_kvm_info arch_timer_kvm_info;
 
-- 
2.31.1



^ permalink raw reply related

* [PATCH v11 14/14] kunit: add tests for smp_cond_load_relaxed_timeout()
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

Add a success and failure case for smp_cond_load_relaxed_timeout().

Both test cases wait on some state in smp_cond_load_relaxed_timeout().
In the success case we spawn a kthread that pokes the bit.

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 lib/Kconfig.debug                |  10 +++
 lib/tests/Makefile               |   1 +
 lib/tests/barrier-timeout-test.c | 125 +++++++++++++++++++++++++++++++
 3 files changed, 136 insertions(+)
 create mode 100644 lib/tests/barrier-timeout-test.c

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 93f356d2b3d9..dcd2d60a9391 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2398,6 +2398,16 @@ config FPROBE_SANITY_TEST
 
 	  Say N if you are unsure.
 
+config BARRIER_TIMEOUT_TEST
+	tristate "KUnit tests for smp_cond_load_relaxed_timeout()"
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Builds KUnit tests that validate wake-up and timeout handling paths
+	  in smp_cond_load_relaxed_timeout().
+
+	  Say N if you are unsure.
+
 config BACKTRACE_SELF_TEST
 	tristate "Self test for the backtrace code"
 	depends on DEBUG_KERNEL
diff --git a/lib/tests/Makefile b/lib/tests/Makefile
index 05f74edbc62b..3504d677b7b8 100644
--- a/lib/tests/Makefile
+++ b/lib/tests/Makefile
@@ -20,6 +20,7 @@ CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN)
 obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o
 CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE)
 obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o
+obj-$(CONFIG_BARRIER_TIMEOUT_TEST) += barrier-timeout-test.o
 obj-$(CONFIG_GLOB_KUNIT_TEST) += glob_kunit.o
 obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o
 obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o
diff --git a/lib/tests/barrier-timeout-test.c b/lib/tests/barrier-timeout-test.c
new file mode 100644
index 000000000000..d72200daa0f2
--- /dev/null
+++ b/lib/tests/barrier-timeout-test.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit tests exercising smp_cond_load_relaxed_timeout().
+ *
+ * Copyright (c) 2026, Oracle Corp.
+ * Author: Ankur Arora <ankur.a.arora@oracle.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/sched/clock.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <asm/barrier.h>
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
+
+struct clock_state {
+	s64 start_time;
+	s64 end_time;
+};
+
+#define TIMEOUT_MSEC	2
+#define TEST_FLAG_VAL	BIT(2)
+static unsigned int flag;
+
+static s64 basic_clock(struct clock_state *clk)
+{
+	clk->end_time = local_clock();
+	return clk->end_time;
+}
+
+static void update_flags(void)
+{
+	WRITE_ONCE(flag, TEST_FLAG_VAL);
+}
+
+static s64 mock_clock(struct clock_state *clk)
+{
+	s64 clk_mid = clk->start_time + (TIMEOUT_MSEC * NSEC_PER_MSEC)/2;
+
+	clk->end_time = local_clock();
+	if (clk->end_time >= clk_mid)
+		update_flags();
+	return clk->end_time;
+}
+
+typedef s64 (*clkfn_t)(struct clock_state *);
+
+static void test_smp_cond_relaxed_timeout(struct kunit *test,
+					  clkfn_t clock, bool succeeds)
+{
+	struct clock_state clk = {
+		.start_time = local_clock(),
+		.end_time = local_clock(),
+	};
+	s64 runtime, timeout_ns = TIMEOUT_MSEC * NSEC_PER_MSEC;
+	unsigned int result;
+
+	result = smp_cond_load_relaxed_timeout(&flag,
+					       (VAL & TEST_FLAG_VAL),
+					       clock(&clk),
+					       timeout_ns);
+
+	runtime = clk.end_time - clk.start_time;
+	KUNIT_EXPECT_EQ(test, (bool)(result & TEST_FLAG_VAL), succeeds);
+	KUNIT_EXPECT_EQ(test, runtime <= timeout_ns, succeeds);
+}
+
+static int smp_cond_threadfn(void *data)
+{
+	udelay(TIMEOUT_MSEC * USEC_PER_MSEC / 4);
+
+	/*
+	 * Update flags after a delay to give smp_cond_relaxed_timeout()
+	 * time to get started.
+	 */
+	update_flags();
+	return 0;
+}
+
+static void smp_cond_relaxed_timeout_succeeds(struct kunit *test)
+{
+	struct task_struct *task;
+
+	flag = 0;
+
+	task = kthread_run(smp_cond_threadfn, &flag, "smp_cond_thread");
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, task);
+	test_smp_cond_relaxed_timeout(test, &basic_clock, true);
+
+	kthread_stop(task);
+}
+
+static void smp_cond_relaxed_timeout_mocked(struct kunit *test)
+{
+	flag = 0;
+	test_smp_cond_relaxed_timeout(test, &mock_clock, true);
+}
+
+static void smp_cond_relaxed_timeout_expires(struct kunit *test)
+{
+	flag = 0;
+	test_smp_cond_relaxed_timeout(test, &basic_clock, false);
+}
+
+static struct kunit_case barrier_timeout_test_cases[] = {
+	KUNIT_CASE(smp_cond_relaxed_timeout_mocked),
+	KUNIT_CASE(smp_cond_relaxed_timeout_succeeds),
+	KUNIT_CASE(smp_cond_relaxed_timeout_expires),
+	{}
+};
+
+static struct kunit_suite barrier_timeout_test_suite = {
+	.name = "smp-cond-load-relaxed-timeout",
+	.test_cases = barrier_timeout_test_cases,
+};
+
+kunit_test_suite(barrier_timeout_test_suite);
+
+MODULE_DESCRIPTION("KUnit tests for smp_cond_load_relaxed_timeout()");
+MODULE_LICENSE("GPL");
-- 
2.31.1



^ permalink raw reply related

* Re: [RFC V1 12/16] arm64/mm: Abstract printing of pxd_val()
From: David Hildenbrand (Arm) @ 2026-04-08 12:28 UTC (permalink / raw)
  To: Anshuman Khandual, linux-arm-kernel
  Cc: Catalin Marinas, Will Deacon, Ryan Roberts, Mark Rutland,
	Lorenzo Stoakes, Andrew Morton, Mike Rapoport, Linu Cherian,
	linux-kernel, linux-mm
In-Reply-To: <20260224051153.3150613-13-anshuman.khandual@arm.com>

On 2/24/26 06:11, Anshuman Khandual wrote:

Subject: you probably mean "pxx_val()" ?

> Ahead of adding support for D128 pgtables, refactor places that print
> PTE values to use the new __PRIpte format specifier and __PRIpte_args()
> macro to prepare the argument(s). When using D128 pgtables in future,
> we can simply redefine __PRIpte and __PTIpte_args().
> 
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
>  arch/arm64/include/asm/pgtable-types.h |  3 +++
>  arch/arm64/include/asm/pgtable.h       | 22 +++++++++++-----------
>  arch/arm64/mm/fault.c                  | 10 +++++-----
>  3 files changed, 19 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
> index 265e8301d7ba..dc3791dc9f14 100644
> --- a/arch/arm64/include/asm/pgtable-types.h
> +++ b/arch/arm64/include/asm/pgtable-types.h
> @@ -11,6 +11,9 @@
>  
>  #include <asm/types.h>
>  
> +#define __PRIpte		"016llx"
> +#define __PRIpte_args(val)	((u64)val)

Same comment regarding "pte" being misleading.


-- 
Cheers,

David


^ permalink raw reply

* Re: [RFC V1 11/16] arm64/mm: Route all pgtable atomics to central helpers
From: David Hildenbrand (Arm) @ 2026-04-08 12:28 UTC (permalink / raw)
  To: Anshuman Khandual, linux-arm-kernel
  Cc: Catalin Marinas, Will Deacon, Ryan Roberts, Mark Rutland,
	Lorenzo Stoakes, Andrew Morton, Mike Rapoport, Linu Cherian,
	linux-kernel, linux-mm
In-Reply-To: <20260224051153.3150613-12-anshuman.khandual@arm.com>

On 2/24/26 06:11, Anshuman Khandual wrote:
> Route all cmpxchg() operations performed on various page table entries to a
> new ptdesc_cmpxchg_relaxed() helper. Similarly route all xchg() operations
> performed on page table entries to a new ptdesc_xchg_relaxed() helper.
> 
> Currently these helpers just forward to the same APIs that were previously
> called direct, but in future we will change the routing for D128 which is
> too long to use the standard APIs.
> 
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
>  arch/arm64/include/asm/pgtable.h | 23 +++++++++++++++++------
>  arch/arm64/mm/fault.c            |  2 +-
>  2 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 42124d2f323d..cf69ce68f951 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -87,6 +87,17 @@ static inline void arch_leave_lazy_mmu_mode(void)
>  #define ptdesc_get(x)		READ_ONCE(x)
>  #define ptdesc_set(x, val)	WRITE_ONCE(x, val)
>  
> +static inline ptdesc_t ptdesc_cmpxchg_relaxed(ptdesc_t *ptep, ptdesc_t old,
> +					      ptdesc_t new)
> +{
> +	return cmpxchg_relaxed(ptep, old, new);
> +}
> +
> +static inline ptdesc_t ptdesc_xchg_relaxed(ptdesc_t *ptep, ptdesc_t new)
> +{
> +	return xchg_relaxed(ptep, new);
> +}
> +

We really want the rename of ptdesc_t before this change.

-- 
Cheers,

David


^ permalink raw reply

* Re: [RFC V1 14/16] arm64/mm: Enable fixmap with 5 level page table
From: David Hildenbrand (Arm) @ 2026-04-08 12:29 UTC (permalink / raw)
  To: Anshuman Khandual, linux-arm-kernel
  Cc: Catalin Marinas, Will Deacon, Ryan Roberts, Mark Rutland,
	Lorenzo Stoakes, Andrew Morton, Mike Rapoport, Linu Cherian,
	linux-kernel, linux-mm
In-Reply-To: <20260224051153.3150613-15-anshuman.khandual@arm.com>

On 2/24/26 06:11, Anshuman Khandual wrote:
> Enable fixmap with 5 level page table when required. This creates table
> entries at the PGD level. Add a fallback stub for pgd_page_paddr() when
> (PGTBALE_LEVELS <= 4) which helps in intercepting any unintended usage.

Can you add the "why" ?


-- 
Cheers,

David


^ permalink raw reply

* [PATCH v11 02/14] arm64: barrier: Support smp_cond_load_relaxed_timeout()
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

Support waiting in smp_cond_load_relaxed_timeout() via
__cmpwait_relaxed(). To ensure that we wake from waiting in
WFE periodically and don't block forever if there are no stores
to ptr, this path is only used when the event-stream is enabled.

Note that when using __cmpwait_relaxed() we ignore the timeout
value, allowing an overshoot by up to the event-stream period.
And, in the unlikely event that the event-stream is unavailable,
fallback to spin-waiting.

Also set SMP_TIMEOUT_POLL_COUNT to 1 so we do the time-check in
each iteration of smp_cond_load_relaxed_timeout().

And finally define ARCH_HAS_CPU_RELAX to indicate that we have
an optimized implementation of cpu_poll_relax().

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Suggested-by: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/arm64/Kconfig               |  3 +++
 arch/arm64/include/asm/barrier.h | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9ea19b74b6c3..e3ce08276e9b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1628,6 +1628,9 @@ config ARCH_SUPPORTS_CRASH_DUMP
 config ARCH_DEFAULT_CRASH_DUMP
 	def_bool y
 
+config ARCH_HAS_CPU_RELAX
+	def_bool y
+
 config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
 	def_bool CRASH_RESERVE
 
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 9495c4441a46..6190e178db51 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -12,6 +12,7 @@
 #include <linux/kasan-checks.h>
 
 #include <asm/alternative-macros.h>
+#include <asm/vdso/processor.h>
 
 #define __nops(n)	".rept	" #n "\nnop\n.endr\n"
 #define nops(n)		asm volatile(__nops(n))
@@ -219,6 +220,26 @@ do {									\
 	(typeof(*ptr))VAL;						\
 })
 
+/* Re-declared here to avoid include dependency. */
+extern bool arch_timer_evtstrm_available(void);
+
+/*
+ * In the common case, cpu_poll_relax() sits waiting in __cmpwait_relaxed()
+ * for the ptr value to change.
+ *
+ * Since this period is reasonably long, choose SMP_TIMEOUT_POLL_COUNT
+ * to be 1, so smp_cond_load_{relaxed,acquire}_timeout() does a
+ * time-check in each iteration.
+ */
+#define SMP_TIMEOUT_POLL_COUNT	1
+
+#define cpu_poll_relax(ptr, val, timeout_ns) do {			\
+	if (arch_timer_evtstrm_available())				\
+		__cmpwait_relaxed(ptr, val);				\
+	else								\
+		cpu_relax();						\
+} while (0)
+
 #include <asm-generic/barrier.h>
 
 #endif	/* __ASSEMBLER__ */
-- 
2.31.1



^ permalink raw reply related

* [PATCH] coresight: etm4x: Correct TRCVMIDCCTLR1 save and restore
From: Leo Yan @ 2026-04-08 12:31 UTC (permalink / raw)
  To: Suzuki K Poulose, Mike Leach, James Clark, Alexander Shishkin,
	Greg Kroah-Hartman
  Cc: coresight, linux-arm-kernel, Leo Yan

It is a typo to use trcvmidcctlr0 to save and restore TRCVMIDCCTLR1.
Use trcvmidcctlr1 instead.

Fixes: f5bd523690d2 ("coresight: etm4x: Convert all register accesses")
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index d565a73f0042e3e0b21fcf9cb94009cc25834d3d..c370b6bf45ef26aff29ee71630f86eae2487069a 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -1979,7 +1979,7 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
 
 	state->trcvmidcctlr0 = etm4x_read32(csa, TRCVMIDCCTLR0);
 	if (drvdata->numvmidc > 4)
-		state->trcvmidcctlr0 = etm4x_read32(csa, TRCVMIDCCTLR1);
+		state->trcvmidcctlr1 = etm4x_read32(csa, TRCVMIDCCTLR1);
 
 	state->trcclaimset = etm4x_read32(csa, TRCCLAIMCLR);
 
@@ -2102,7 +2102,7 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 
 	etm4x_relaxed_write32(csa, state->trcvmidcctlr0, TRCVMIDCCTLR0);
 	if (drvdata->numvmidc > 4)
-		etm4x_relaxed_write32(csa, state->trcvmidcctlr0, TRCVMIDCCTLR1);
+		etm4x_relaxed_write32(csa, state->trcvmidcctlr1, TRCVMIDCCTLR1);
 
 	etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);
 

---
base-commit: 971f3474f8898ae8bbab19a9b547819a5e6fbcf1
change-id: 20260408-arm_cs_fix_trcvmidcctlr1_typo-3382c27d1c28

Best regards,
-- 
Leo Yan <leo.yan@arm.com>



^ permalink raw reply related

* [PATCH v11 09/14] bpf/rqspinlock: switch check_timeout() to a clock interface
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

check_timeout() gets the current time value and depending on how
much time has passed, checks for deadlock or times out, returning 0
or -errno on deadlock or timeout.

Switch this out to a clock style interface, where it functions as a
clock in the "lock-domain", returning the current time until a
deadlock or timeout occurs. Once a deadlock or timeout has occurred,
it stops functioning as a clock and returns error.

Also adjust the RES_CHECK_TIMEOUT macro to discard the clock value
when updating the explicit return status.

Cc: bpf@vger.kernel.org
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 kernel/bpf/rqspinlock.c | 45 +++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index e4e338cdb437..0ec17ebb67c1 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -196,8 +196,12 @@ static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask)
 	return 0;
 }
 
-static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
-				  struct rqspinlock_timeout *ts)
+/*
+ * Returns current monotonic time in ns on success or, negative errno
+ * value on failure due to timeout expiration or detection of deadlock.
+ */
+static noinline s64 clock_deadlock(rqspinlock_t *lock, u32 mask,
+				   struct rqspinlock_timeout *ts)
 {
 	u64 prev = ts->cur;
 	u64 time;
@@ -207,7 +211,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
 			return -EDEADLK;
 		ts->cur = ktime_get_mono_fast_ns();
 		ts->timeout_end = ts->cur + ts->duration;
-		return 0;
+		return (s64)ts->cur;
 	}
 
 	time = ktime_get_mono_fast_ns();
@@ -219,11 +223,15 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
 	 * checks.
 	 */
 	if (prev + NSEC_PER_MSEC < time) {
+		int ret;
 		ts->cur = time;
-		return check_deadlock_ABBA(lock, mask);
+		ret = check_deadlock_ABBA(lock, mask);
+		if (ret)
+			return ret;
+
 	}
 
-	return 0;
+	return (s64)time;
 }
 
 /*
@@ -231,15 +239,22 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
  * as the macro does internal amortization for us.
  */
 #ifndef res_smp_cond_load_acquire
-#define RES_CHECK_TIMEOUT(ts, ret, mask)                              \
-	({                                                            \
-		if (!(ts).spin++)                                     \
-			(ret) = check_timeout((lock), (mask), &(ts)); \
-		(ret);                                                \
+#define RES_CHECK_TIMEOUT(ts, ret, mask)					\
+	({									\
+		s64 __timeval_err = 0;						\
+		if (!(ts).spin++)						\
+			__timeval_err = clock_deadlock((lock), (mask), &(ts));	\
+		(ret) = __timeval_err < 0 ? __timeval_err : 0;			\
+		__timeval_err;							\
 	})
 #else
-#define RES_CHECK_TIMEOUT(ts, ret, mask)			      \
-	({ (ret) = check_timeout((lock), (mask), &(ts)); })
+#define RES_CHECK_TIMEOUT(ts, ret, mask)					\
+	({									\
+		s64 __timeval_err;						\
+		__timeval_err = clock_deadlock((lock), (mask), &(ts));		\
+		(ret) = __timeval_err < 0 ? __timeval_err : 0;			\
+		__timeval_err;							\
+	})
 #endif
 
 /*
@@ -281,7 +296,7 @@ int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock)
 	val = atomic_read(&lock->val);
 
 	if (val || !atomic_try_cmpxchg(&lock->val, &val, 1)) {
-		if (RES_CHECK_TIMEOUT(ts, ret, ~0u))
+		if (RES_CHECK_TIMEOUT(ts, ret, ~0u) < 0)
 			goto out;
 		cpu_relax();
 		goto retry;
@@ -406,7 +421,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
 	 */
 	if (val & _Q_LOCKED_MASK) {
 		RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT);
-		res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK));
+		res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK) < 0);
 	}
 
 	if (ret) {
@@ -568,7 +583,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
 	 */
 	RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT * 2);
 	val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) ||
-					   RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK));
+					   RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK) < 0);
 
 	/* Disable queue destruction when we detect deadlocks. */
 	if (ret == -EDEADLK) {
-- 
2.31.1



^ permalink raw reply related

* [PATCH v11 11/14] sched: add need-resched timed wait interface
From: Ankur Arora @ 2026-04-08 12:25 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-arm-kernel, linux-pm, bpf
  Cc: arnd, catalin.marinas, will, peterz, akpm, mark.rutland, harisokn,
	cl, ast, rafael, daniel.lezcano, memxor, zhenglifeng1, xueshuai,
	rdunlap, david.laight.linux, joao.m.martins, boris.ostrovsky,
	konrad.wilk, ashok.bhat, Ankur Arora, Ingo Molnar
In-Reply-To: <20260408122538.3610871-1-ankur.a.arora@oracle.com>

Add tif_bitset_relaxed_wait() (and tif_need_resched_relaxed_wait()
which wraps it) which takes the thread_info bit and timeout duration
as parameters and waits until the bit is set or for the expiration
of the timeout.

The wait is implemented via smp_cond_load_relaxed_timeout().

smp_cond_load_relaxed_timeout() essentially provides the pattern used
in poll_idle() where we spin in a loop waiting for the flag to change
until a timeout occurs.

tif_need_resched_relaxed_wait() allows us to abstract out the internals
of waiting, scheduler specific details etc.

Placed in linux/sched/idle.h instead of linux/thread_info.h to work
around recursive include hell.

Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: linux-pm@vger.kernel.org
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 include/linux/sched/idle.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 8465ff1f20d1..ddee9b019895 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -3,6 +3,7 @@
 #define _LINUX_SCHED_IDLE_H
 
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 enum cpu_idle_type {
 	__CPU_NOT_IDLE = 0,
@@ -113,4 +114,32 @@ static __always_inline void current_clr_polling(void)
 }
 #endif
 
+/*
+ * Caller needs to make sure that the thread context cannot be preempted
+ * or migrated, so current_thread_info() cannot change from under us.
+ *
+ * This also allows us to safely stay in the local_clock domain.
+ */
+static __always_inline bool tif_bitset_relaxed_wait(int tif, u64 timeout_ns)
+{
+	unsigned long flags;
+
+	flags = smp_cond_load_relaxed_timeout(&current_thread_info()->flags,
+					      (VAL & BIT(tif)),
+					      local_clock_noinstr(),
+					      timeout_ns);
+	return flags & BIT(tif);
+}
+
+/**
+ * tif_need_resched_relaxed_wait() - Wait for need-resched being set
+ * with no ordering guarantees until a timeout expires.
+ *
+ * @timeout_ns: timeout value.
+ */
+static __always_inline bool tif_need_resched_relaxed_wait(u64 timeout_ns)
+{
+	return tif_bitset_relaxed_wait(TIF_NEED_RESCHED, timeout_ns);
+}
+
 #endif /* _LINUX_SCHED_IDLE_H */
-- 
2.31.1



^ permalink raw reply related

* Re: [PATCH v12 2/2] arm: dts: aspeed: ventura: add Meta Ventura BMC
From: Andrew Lunn @ 2026-04-08 12:34 UTC (permalink / raw)
  To: P.K. Lee
  Cc: robh+dt, krzysztof.kozlowski+dt, conor+dt, joel, andrew,
	devicetree, linux-arm-kernel, linux-aspeed, linux-kernel,
	Jason-Hsu, p.k.lee
In-Reply-To: <CAK8yEOAYC0iApNHBApt+xu1Fz=+N1wX0XrLGOPzmeRq=OjWnhg@mail.gmail.com>

> > > > If there are no devices on the bus, why enable it?
> > >
> > > We intentionally enable it so user-space tools can access the switch
> > > registers. I have added a comment in v13 to clarify this.
> >
> > Why would user space want to access the switch registers for an
> > unmanaged switch? It sounds like you are using Marvells SDK in
> > userspace to manage the switch, rather than using DSA.
> >
> 
> We do have a custom user-space daemon that configures the switch
> registers for our specific use case. Should I remove the &mdio0 node
> if it is only enabled and has no other configuration in the upstream
> device tree?

Please just be truthful that you have a user space driver, so need the
bus enabled.

I also guess you have some other kernel code that allows you to
actually use the bus from user space? The typical ethernet IOCTL
handler does not work for you, since you don't have an ethernet device
using this bus. Such code is unlikely to be accepted into mainline. We
don't like user space drivers when there is a perfectly good kernel
driver for this switch.

	Andrew


^ permalink raw reply

* Re: [PATCH] coresight: etm4x: Correct TRCVMIDCCTLR1 save and restore
From: James Clark @ 2026-04-08 12:39 UTC (permalink / raw)
  To: Leo Yan
  Cc: coresight, linux-arm-kernel, Suzuki K Poulose, Mike Leach,
	Alexander Shishkin, Greg Kroah-Hartman
In-Reply-To: <20260408-arm_cs_fix_trcvmidcctlr1_typo-v1-1-6a5695363b46@arm.com>



On 08/04/2026 1:31 pm, Leo Yan wrote:
> It is a typo to use trcvmidcctlr0 to save and restore TRCVMIDCCTLR1.
> Use trcvmidcctlr1 instead.
> 
> Fixes: f5bd523690d2 ("coresight: etm4x: Convert all register accesses")
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
>   drivers/hwtracing/coresight/coresight-etm4x-core.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> index d565a73f0042e3e0b21fcf9cb94009cc25834d3d..c370b6bf45ef26aff29ee71630f86eae2487069a 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> @@ -1979,7 +1979,7 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
>   
>   	state->trcvmidcctlr0 = etm4x_read32(csa, TRCVMIDCCTLR0);
>   	if (drvdata->numvmidc > 4)
> -		state->trcvmidcctlr0 = etm4x_read32(csa, TRCVMIDCCTLR1);
> +		state->trcvmidcctlr1 = etm4x_read32(csa, TRCVMIDCCTLR1);
>   
>   	state->trcclaimset = etm4x_read32(csa, TRCCLAIMCLR);
>   
> @@ -2102,7 +2102,7 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
>   
>   	etm4x_relaxed_write32(csa, state->trcvmidcctlr0, TRCVMIDCCTLR0);
>   	if (drvdata->numvmidc > 4)
> -		etm4x_relaxed_write32(csa, state->trcvmidcctlr0, TRCVMIDCCTLR1);
> +		etm4x_relaxed_write32(csa, state->trcvmidcctlr1, TRCVMIDCCTLR1);
>   
>   	etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);
>   
> 
> ---
> base-commit: 971f3474f8898ae8bbab19a9b547819a5e6fbcf1
> change-id: 20260408-arm_cs_fix_trcvmidcctlr1_typo-3382c27d1c28
> 
> Best regards,

Reviewed-by: James Clark <james.clark@linaro.org>



^ permalink raw reply

* Re: [PATCH v2 8/8] mips: dts: Add PCIe to EcoNet EN751221
From: Caleb James DeLisle @ 2026-04-08 12:58 UTC (permalink / raw)
  To: Thomas Bogendoerfer
  Cc: linux-mips, naseefkm, mturquette, sboyd, robh, krzk+dt, conor+dt,
	ryder.lee, jianjun.wang, lpieralisi, kwilczynski, mani, bhelgaas,
	vkoul, neil.armstrong, p.zabel, matthias.bgg,
	angelogioacchino.delregno, nbd, ansuelsmth, linux-clk, devicetree,
	linux-kernel, linux-pci, linux-mediatek, linux-phy,
	linux-arm-kernel
In-Reply-To: <adOo9xZxXT3FkufM@alpha.franken.de>


On 06/04/2026 14:37, Thomas Bogendoerfer wrote:
> On Mon, Mar 09, 2026 at 01:18:18PM +0000, Caleb James DeLisle wrote:
>> Add PCIe based on EN7528 PCIe driver, also add two MT76 wifi devices
>> to SmartFiber XP8421-B.
>>
>> Signed-off-by: Caleb James DeLisle <cjd@cjdns.fr>
>> ---
>>   arch/mips/boot/dts/econet/en751221.dtsi       | 114 ++++++++++++++++++
>>   .../econet/en751221_smartfiber_xp8421-b.dts   |  21 ++++
>>   arch/mips/econet/Kconfig                      |   2 +
>>   3 files changed, 137 insertions(+)
>>
> applied to mips-next

Thank you very much.

Caleb

>
> Thomas.
>


^ permalink raw reply

* BUG: net-next (7.0-rc6 based and later) fails to boot on Jetson Xavier NX
From: Russell King (Oracle) @ 2026-04-08 13:07 UTC (permalink / raw)
  To: netdev, linux-arm-kernel, linux-kernel, iommu, linux-ext4,
	Linus Torvalds
  Cc: Marek Szyprowski, Robin Murphy, Theodore Ts'o, Andreas Dilger

Hi,

Just a heads-up that current net-next (v7.0-rc6 based) fails to boot on
my nVidia Jetson Xavier platform. v7.0-rc5 and v6.14 based net-next both
boot fine. This is an arm64 platform.

The problem appears to be completely random in terms of its symptoms,
and looks like severe memory corruption - every boot seems to produce
a different problem. The common theme is, although the kernel gets to
userspace, it never gets anywhere close to a login prompt before
failing in some way.

The last net-next+ boot (which is currently v7.0-rc6 based) resulted
in:

tegra-mc 2c00000.memory-controller: xusb_hostw: secure write @0x00000003ffffff00: VPR violation ((null))
...
irq 91: nobody cared (try booting with the "irqpoll" option)
...
depmod: ERROR: could not open directory /lib/modules/7.0.0-rc6-net-next+: No such file or directory
...
Unable to handle kernel paging request at virtual address 0003201fd50320cf


A previous boot of the exact same kernel didn't oops, but was unable
to find the block device to mount for /mnt via block UUID.

A previous boot to that resulted in an oops.


The intersting thing is - the depmod error above is incorrect:

root@tegra-ubuntu:~# ls -ld /lib/modules/7.0.0-rc6-net-next+
drwxrwxr-x 3 root root 4096 Apr  8 10:23 /lib/modules/7.0.0-rc6-net-next+

The directory is definitely there, and is readable - checked after
booting back into net-next based on 7.0-rc5. In some of these boots,
stmmac hasn't probed yet, which rules out my changes.

Rootfs is ext4, and it seems there were a lot of ext4 commits merged
between rc5 and rc6, but nothing for rc7.

My current net-next head is dfecb0c5af3b. Merging rc7 on top also
fails, I suspect also randomly, with that I just got:

EXT4-fs (mmcblk0p1): VFS: Can't find ext4 filesystem
mount: /mnt: wrong fs type, bad option, bad superblock on /dev/mmcblk0p1, missing codepage or helper program, or other error.
mount: /mnt/: can't find PARTUUID=741c0777-391a-4bce-a222-455e180ece2a.
Unable to handle kernel paging request at virtual address f9bf0011ac0fb893
Mem abort info:
  ESR = 0x0000000096000004
  EC = 0x25: DABT (current EL), IL = 32 bits
  SET = 0, FnV = 0
  EA = 0, S1PTW = 0
  FSC = 0x04: level 0 translation fault
Data abort info:
  ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
  CM = 0, WnR = 0, TnD = 0, TagAccess = 0
  GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[f9bf0011ac0fb893] address between user and kernel address ranges
Internal error: Oops: 0000000096000004 [#1]  SMP
Modules linked in:
CPU: 1 UID: 0 PID: 936 Comm: mount Not tainted 7.0.0-rc7-net-next+ #649 PREEMPT
Hardware name: NVIDIA NVIDIA Jetson Xavier NX Developer Kit/Jetson, BIOS 6.0-37391689 08/28/2024
pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : refill_objects+0x298/0x5ec
lr : refill_objects+0x1f0/0x5ec

...

Call trace:
 refill_objects+0x298/0x5ec (P)
 __pcs_replace_empty_main+0x13c/0x3a8
 kmem_cache_alloc_noprof+0x324/0x3a0
 alloc_iova+0x3c/0x290
 alloc_iova_fast+0x168/0x2d4
 iommu_dma_alloc_iova+0x84/0x154
 iommu_dma_map_sg+0x2c4/0x538
 __dma_map_sg_attrs+0x124/0x2c0
 dma_map_sg_attrs+0x10/0x20
 sdhci_pre_dma_transfer+0xb8/0x164
 sdhci_pre_req+0x38/0x44
 mmc_blk_mq_issue_rq+0x3dc/0x920
 mmc_mq_queue_rq+0x104/0x2b0
 __blk_mq_issue_directly+0x38/0xb0
 blk_mq_request_issue_directly+0x54/0xb4
 blk_mq_issue_direct+0x84/0x180
 blk_mq_dispatch_queue_requests+0x1a8/0x2e0
 blk_mq_flush_plug_list+0x60/0x140
 __blk_flush_plug+0xe0/0x11c
 blk_finish_plug+0x38/0x4c
 read_pages+0x158/0x260
 page_cache_ra_unbounded+0x158/0x3e0
 force_page_cache_ra+0xb0/0xe4
 page_cache_sync_ra+0x88/0x480
 filemap_get_pages+0xd8/0x850
 filemap_read+0xdc/0x3d8
 blkdev_read_iter+0x84/0x198
 vfs_read+0x208/0x2d8
 ksys_read+0x58/0xf4
 __arm64_sys_read+0x1c/0x28
 invoke_syscall.constprop.0+0x50/0xe0
 do_el0_svc+0x40/0xc0
 el0_svc+0x48/0x2a0
 el0t_64_sync_handler+0xa0/0xe4
 el0t_64_sync+0x19c/0x1a0
Code: 54000189 f9000022 aa0203e4 b9402ae3 (f8634840)
---[ end trace 0000000000000000 ]---
Kernel panic - not syncing: Oops: Fatal exception

Looking at the changes between rc5 and rc6, there's one drivers/block
change for zram (which is used on this platform), one change in
drivers/base for regmap, nothing for drivers/mmc, but plenty for
fs/ext4. There are five DMA API changes.

Now building straight -rc7. If that also fails, my plan is to start
bisecting rc5..rc6, which will likely take most of the rest of the
day. So, in the mean time I'm sending this as a heads-up that rc6
and onwards has a problem.

I'll update when I have a potential commit located.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!


^ permalink raw reply

* Re: [PATCH v2 2/3] mailbox: exynos: Add support for Exynos850 mailbox
From: Alexey Klimov @ 2026-04-08 13:08 UTC (permalink / raw)
  To: Tudor Ambarus
  Cc: Krzysztof Kozlowski, Sylwester Nawrocki, Chanwoo Choi,
	Alim Akhtar, Sam Protsenko, Michael Turquette, Stephen Boyd,
	Rob Herring, Conor Dooley, Jassi Brar, Krzysztof Kozlowski,
	Peter Griffin, linux-samsung-soc, linux-arm-kernel, linux-clk,
	devicetree, linux-kernel
In-Reply-To: <a02a693e-b06e-43bf-ac5f-8253f298c83d@linaro.org>

Hi Tudor,

On Thu Apr 2, 2026 at 9:42 AM BST, Tudor Ambarus wrote:
> Hi, Alexey,
>
> On 4/2/26 5:20 AM, Alexey Klimov wrote:
>> Exynos850-based platforms support ACPM and has similar workflow
>> of communicating with ACPM via mailbox, however mailbox controller
>> registers are located at different offsets and writes/reads could be
>> different. To distinguish between such different behaviours,
>> the registers offsets for Exynos850 and the platform-specific data
>> structs are introduced and configuration is described in such structs
>> for gs101 and exynos850 based SoCs. Probe routine now selects the
>> corresponding platform-specific data via device_get_match_data().
>> 
>> Signed-off-by: Alexey Klimov <alexey.klimov@linaro.org>
>> ---
>>  drivers/mailbox/exynos-mailbox.c | 67 ++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 64 insertions(+), 3 deletions(-)
>> 
>> diff --git a/drivers/mailbox/exynos-mailbox.c b/drivers/mailbox/exynos-mailbox.c
>> index d2355b128ba4..f9c59c07558a 100644
>> --- a/drivers/mailbox/exynos-mailbox.c
>> +++ b/drivers/mailbox/exynos-mailbox.c
>> @@ -31,14 +31,61 @@
>>  
>>  #define EXYNOS_MBOX_CHAN_COUNT		HWEIGHT32(EXYNOS_MBOX_INTGR1_MASK)
>>  
>> +#define EXYNOS850_MBOX_MCUCTRL		0x0	/* Mailbox Control Register		*/
>> +#define EXYNOS850_MBOX_INTGR0		0x8	/* Interrupt Generation Register 0	*/
>> +#define EXYNOS850_MBOX_INTCR0		0x0C	/* Interrupt Clear Register 0		*/
>> +#define EXYNOS850_MBOX_INTMR0		0x10	/* Interrupt Mask Register 0		*/
>> +#define EXYNOS850_MBOX_INTSR0		0x14	/* Interrupt Status Register 0		*/
>> +#define EXYNOS850_MBOX_INTMSR0		0x18	/* Interrupt Mask Status Register 0	*/
>> +#define EXYNOS850_MBOX_INTGR1		0x1C	/* Interrupt Generation Register 1	*/
>> +#define EXYNOS850_MBOX_INTMR1		0x24	/* Interrupt Mask Register 1		*/
>> +#define EXYNOS850_MBOX_INTSR1		0x28	/* Interrupt Status Register 1		*/
>> +#define EXYNOS850_MBOX_INTMSR1		0x2C	/* Interrupt Mask Status Register 1	*/
>> +#define EXYNOS850_MBOX_VERSION		0x70
>
> Please consider defining just the registers that are used, to not
> pollute the driver. You may drop the unused gs101 definitions too. 

Sure. Thanks. I was surprised how many unused defines were introduced
by gs101 SoC in the first place all over.

>> +#define EXYNOS850_MBOX_INTMR1_MASK	GENMASK(15, 0)
>> +
>> +/**
>> + * struct exynos_mbox_driver_data - platform-specific mailbox configuration.
>> + * @irq_doorbell_offset:	offset to the IRQ generation register, doorbell
>> + *				to APM co-processor.
>> + * @irq_doorbell_shift:		shift to apply to the value written to IRQ
>> + *				generation register.
>> + * @irq_mask_offset:		offset to the IRQ mask register.
>> + * @irq_mask_value:		value to right to the mask register to mask out
>> + *				all interrupts.
>> + */
>> +struct exynos_mbox_driver_data {
>> +	u16 irq_doorbell_offset;
>> +	u16 irq_doorbell_shift;
>> +	u16 irq_mask_offset;
>> +	u16 irq_mask_value;
>> +};
>> +
>>  /**
>>   * struct exynos_mbox - driver's private data.
>>   * @regs:	mailbox registers base address.
>>   * @mbox:	pointer to the mailbox controller.
>> + * @data:	pointer to driver platform-specific data.
>>   */
>>  struct exynos_mbox {
>>  	void __iomem *regs;
>>  	struct mbox_controller *mbox;
>> +	const struct exynos_mbox_driver_data *data;
>> +};
>> +
>> +static const struct exynos_mbox_driver_data exynos850_mbox_data = {
>> +	.irq_doorbell_offset = EXYNOS850_MBOX_INTGR0,
>> +	.irq_doorbell_shift = 16,
>> +	.irq_mask_offset = EXYNOS850_MBOX_INTMR1,
>> +	.irq_mask_value = EXYNOS850_MBOX_INTMR1_MASK,
>> +};
>> +
>> +static const struct exynos_mbox_driver_data exynos_gs101_mbox_data = {
>> +	.irq_doorbell_offset = EXYNOS_MBOX_INTGR1,
>> +	.irq_doorbell_shift = 0,
>> +	.irq_mask_offset = EXYNOS_MBOX_INTMR0,
>> +	.irq_mask_value = EXYNOS_MBOX_INTMR0_MASK,
>>  };
>
> I find it strange that the SoCs use different registers. Are you sure you're
> using the right direction? i.e. ring the doorbell to APM and not to AP?

Well, I am not sure I correctly understood the questions and comment. So,
this all was tested with ACPM TMU code with 3 temp sensors and it seems
to work and sensors react in the right way.

Downstream clearly does the following (see also [1],[2]) when sending
ACPM msg:

static void apm_interrupt_gen(unsigned int id)
{
	/* APM NVIC INTERRUPT GENERATE */
	writel((1 << id) << 16, acpm_ipc->intr + INTGR0);
}

I am aware that gs101 downstream uses INTGR1 in apm_interrupt_gen().

When I use INTGR1 for e850 then I observe acpm timeouts. Hence, out of
curiosity, what's the expected behaviour when/if I ring the doorbell to
AP (to itself as far as I understand)? My understanding that it won't
work at all in such case unless APM firmware does some very fast
polling.


[1]: https://gitlab.com/Linaro/96boards/e850-96/kernel/-/blob/android-exynos-4.14-linaro/drivers/soc/samsung/acpm/acpm_ipc.c?ref_type=heads#L423
[2]: https://github.com/samsungexynos850/android_kernel_samsung_exynos850/blob/0af517be2336bf8e09c59d576c4c314446713101/drivers/soc/samsung/acpm/acpm_ipc.c#L426

>>  static int exynos_mbox_send_data(struct mbox_chan *chan, void *data)
>> @@ -57,7 +104,8 @@ static int exynos_mbox_send_data(struct mbox_chan *chan, void *data)
>>  		return -EINVAL;
>>  	}
>>  
>> -	writel(BIT(msg->chan_id), exynos_mbox->regs + EXYNOS_MBOX_INTGR1);
>> +	writel(BIT(msg->chan_id) << exynos_mbox->data->irq_doorbell_shift,
>> +	       exynos_mbox->regs + exynos_mbox->data->irq_doorbell_offset);
>
> Use FIELD_PREP from <linux/bitfield.h> please. You will use a mask instead of
> a shift.
>
> I would rename irq_doorbell_offset to intgr. It aligns with the register name
> from the datasheet. You won't need to prepend _offset to the name, we already
> see it's an offset when doing the writel().

Sure. Thanks. Let's use FIELD_PREP.

"doorbell" naming was chosen for readability and maintainability reasons.
It seems to be more generic enough name that better reflects the workflow
of what's going on in ACPM+mailbox machinery. We can rename it to just
"doorbell" for instance.

From platform data it will be clear to which register it is set, INTGR0
or INTGR1, to align it with datasheet (which is closed anyway).

Regarding intgr vs doorbell name, the intgr is a bit unclear for a
reader if it means interrupt generation register or something else.
But if you prefer, I can go with "intgr".

One more option is add a comment, smth like /* Ring the doorbell */
before that writel().

[..]

>> @@ -133,7 +194,7 @@ static int exynos_mbox_probe(struct platform_device *pdev)
>>  	platform_set_drvdata(pdev, exynos_mbox);
>>  
>>  	/* Mask out all interrupts. We support just polling channels for now. */
>> -	writel(EXYNOS_MBOX_INTMR0_MASK, exynos_mbox->regs + EXYNOS_MBOX_INTMR0);
>> +	writel(data->irq_mask_value, exynos_mbox->regs + data->irq_mask_offset);
>>  
>
> and here I would s/irq_mask_value/intmr_mask and irq_mask_offset/intmr.

Ack. Thanks.

Best regards,
Alexey


^ permalink raw reply

* [PATCH] iommu/arm-smmu-qcom: Fix fastrpc compatible string in ACTLR client match table
From: bibek.patro @ 2026-04-08 13:08 UTC (permalink / raw)
  To: Rob Clark, Will Deacon, Robin Murphy, Joerg Roedel
  Cc: Dmitry Baryshkov, iommu, linux-arm-msm, linux-arm-kernel,
	linux-kernel, srinivas.kandagatla, Bibek Kumar Patro

From: Bibek Kumar Patro <bibek.patro@oss.qualcomm.com>

The qcom_smmu_actlr_client_of_match table contained "qcom,fastrpc" as
the compatible string for applying ACTLR prefetch settings to FastRPC
devices. However, "qcom,fastrpc" is the compatible string for the parent
rpmsg channel node, which is not an IOMMU client — it carries no
"iommus" property in the device tree and is never attached to an SMMU
context bank.

The actual IOMMU clients are the compute context bank (CB) child nodes,
which use the compatible string "qcom,fastrpc-compute-cb". These nodes
carry the "iommus" property and are probed by fastrpc_cb_driver via
fastrpc_cb_probe(), which sets up the DMA mask and IOMMU mappings for
each FastRPC session. The device tree structure is:

  fastrpc {
      compatible = "qcom,fastrpc";        /* rpmsg channel, no iommus */
      ...
      compute-cb@3 {
          compatible = "qcom,fastrpc-compute-cb";
          iommus = <&apps_smmu 0x1823 0x0>;  /* actual IOMMU client */
      };
  };

Since qcom_smmu_set_actlr_dev() calls of_match_device() against the
device being attached to the SMMU context bank, the "qcom,fastrpc"
entry was never matching any device. As a result, the ACTLR prefetch
settings (PREFETCH_DEEP | CPRE | CMTLB) were silently never applied
for FastRPC compute context banks.

Fix this by replacing "qcom,fastrpc" with "qcom,fastrpc-compute-cb"
in the match table so that the ACTLR settings are correctly applied
to the compute CB devices that are the true IOMMU clients.

Assisted-by: Anthropic:claude-4-6-sonnet
Fixes: 3e35c3e725de ("iommu/arm-smmu: Add ACTLR data and support for qcom_smmu_500")
Signed-off-by: Bibek Kumar Patro <bibek.patro@oss.qualcomm.com>
---

While there is an ongoing discussion [1] on how to differentiate ACTLR
prefetch settings between compute DSP and audio DSP fastrpc devices, it
is necessary to first fix the compatible string to "qcom,fastrpc-compute-cb".
Both compute DSP and audio DSP fastrpc nodes use this compatible string,
so both will receive the ACTLR settings after this fix. However, for
audio DSP devices the effect remains the same as the current
state since they do not actively use prefetch — the write is effectively
a NOP for them. The fix is meaningful for compute DSP devices, which
actively use prefetch and were previously being silently skipped.

[1]: https://lore.kernel.org/all/9b4c895a-c822-40e6-bb92-8fdcd09c82d3@oss.qualcomm.com/

 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index edd41b5a3b6a..2d006049dd61 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -39,7 +39,7 @@ static const struct of_device_id qcom_smmu_actlr_client_of_match[] = {
 			.data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
 	{ .compatible = "qcom,adreno-smmu",
 			.data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
-	{ .compatible = "qcom,fastrpc",
+	{ .compatible = "qcom,fastrpc-compute-cb",
 			.data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
 	{ .compatible = "qcom,qcm2290-mdss",
 			.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
--
2.34.1



^ permalink raw reply related

* [PATCH] coresight: tpda: fix race between refcnt check and register access
From: Jie Gan @ 2026-04-08 13:23 UTC (permalink / raw)
  To: Suzuki K Poulose, Mike Leach, James Clark, Leo Yan,
	Alexander Shishkin, Tingwei Zhang, Tao Zhang
  Cc: coresight, linux-arm-kernel, linux-kernel, Jie Gan

Several sysfs show/store handlers check csdev->refcnt before acquiring
the spinlock, then access hardware registers inside the lock.  This is
a race: between the check and the lock acquisition, the following can
occur on another CPU:

  CPU 0 (sysfs reader)              CPU 1 (disable path)
  ─────────────────────────────     ──────────────────────────────────
  refcnt == 1, check passes
  [waiting for spinlock]            tpda_disable(): refcnt → 0
                                    debug clock disabled
  spinlock acquired
  readl_relaxed()  ← FAULT/hang

tpda_disable() decrements csdev->refcnt under the same spinlock, so
moving the refcnt check inside the lock closes the race.

Fix all six affected sysfs handlers: global_flush_req_show/store,
syncr_mode_show, syncr_count_show, and port_flush_req_show/store.

Fixes: 8e1c358a3b0e ("coresight: tpda: add global_flush_req sysfs node")
Fixes: 33f04ead7c49 ("coresight: tpda: add logic to configure TPDA_SYNCR register")
Fixes: a089d585a7f4 ("coresight: tpda: add sysfs node to flush specific port")
Signed-off-by: Jie Gan <jie.gan@oss.qualcomm.com>
---
 drivers/hwtracing/coresight/coresight-tpda.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-tpda.c b/drivers/hwtracing/coresight/coresight-tpda.c
index 89c8f71f0aff..63a979312463 100644
--- a/drivers/hwtracing/coresight/coresight-tpda.c
+++ b/drivers/hwtracing/coresight/coresight-tpda.c
@@ -360,10 +360,10 @@ static ssize_t global_flush_req_show(struct device *dev,
 	struct tpda_drvdata *drvdata = dev_get_drvdata(dev->parent);
 	unsigned long val;
 
+	guard(spinlock)(&drvdata->spinlock);
 	if (!drvdata->csdev->refcnt)
 		return -EINVAL;
 
-	guard(spinlock)(&drvdata->spinlock);
 	val = readl_relaxed(drvdata->base + TPDA_CR);
 	/* read global_flush_req bit */
 	val &= TPDA_CR_FLREQ;
@@ -382,10 +382,13 @@ static ssize_t global_flush_req_store(struct device *dev,
 	if (kstrtoul(buf, 0, &val))
 		return -EINVAL;
 
-	if (!drvdata->csdev->refcnt || !val)
+	if (!val)
 		return -EINVAL;
 
 	guard(spinlock)(&drvdata->spinlock);
+	if (!drvdata->csdev->refcnt)
+		return -EINVAL;
+
 	val = readl_relaxed(drvdata->base + TPDA_CR);
 	/* set global_flush_req bit */
 	val |= TPDA_CR_FLREQ;
@@ -404,10 +407,10 @@ static ssize_t syncr_mode_show(struct device *dev,
 	struct tpda_drvdata *drvdata = dev_get_drvdata(dev->parent);
 	unsigned long val, syncr_val;
 
+	guard(spinlock)(&drvdata->spinlock);
 	if (!drvdata->csdev->refcnt)
 		return -EINVAL;
 
-	guard(spinlock)(&drvdata->spinlock);
 	syncr_val = readl_relaxed(drvdata->base + TPDA_SYNCR);
 	val = FIELD_GET(TPDA_SYNCR_MODE_CTRL_MASK, syncr_val);
 
@@ -440,10 +443,10 @@ static ssize_t syncr_count_show(struct device *dev,
 	struct tpda_drvdata *drvdata = dev_get_drvdata(dev->parent);
 	unsigned long val;
 
+	guard(spinlock)(&drvdata->spinlock);
 	if (!drvdata->csdev->refcnt)
 		return -EINVAL;
 
-	guard(spinlock)(&drvdata->spinlock);
 	val = readl_relaxed(drvdata->base + TPDA_SYNCR);
 	val &= TPDA_SYNCR_COUNT_MASK;
 
@@ -478,10 +481,10 @@ static ssize_t port_flush_req_show(struct device *dev,
 	struct tpda_drvdata *drvdata = dev_get_drvdata(dev->parent);
 	unsigned long val;
 
+	guard(spinlock)(&drvdata->spinlock);
 	if (!drvdata->csdev->refcnt)
 		return -EINVAL;
 
-	guard(spinlock)(&drvdata->spinlock);
 	val = readl_relaxed(drvdata->base + TPDA_FLUSH_CR);
 
 	return sysfs_emit(buf, "0x%lx\n", val);
@@ -498,10 +501,13 @@ static ssize_t port_flush_req_store(struct device *dev,
 	if (kstrtou32(buf, 0, &val))
 		return -EINVAL;
 
-	if (!drvdata->csdev->refcnt || !val)
+	if (!val)
 		return -EINVAL;
 
 	guard(spinlock)(&drvdata->spinlock);
+	if (!drvdata->csdev->refcnt)
+		return -EINVAL;
+
 	CS_UNLOCK(drvdata->base);
 	writel_relaxed(val, drvdata->base + TPDA_FLUSH_CR);
 	CS_LOCK(drvdata->base);

---
base-commit: f3e6330d7fe42b204af05a2dbc68b379e0ad179e
change-id: 20260408-fix-race-condition-issue-d44203254b87

Best regards,
-- 
Jie Gan <jie.gan@oss.qualcomm.com>



^ permalink raw reply related

* Re: [PATCH] coresight: tpda: fix race between refcnt check and register access
From: James Clark @ 2026-04-08 13:27 UTC (permalink / raw)
  To: Jie Gan
  Cc: coresight, linux-arm-kernel, linux-kernel, Suzuki K Poulose,
	Mike Leach, Leo Yan, Alexander Shishkin, Tingwei Zhang, Tao Zhang
In-Reply-To: <20260408-fix-race-condition-issue-v1-1-9a148d07e08f@oss.qualcomm.com>



On 08/04/2026 2:23 pm, Jie Gan wrote:
> Several sysfs show/store handlers check csdev->refcnt before acquiring
> the spinlock, then access hardware registers inside the lock.  This is
> a race: between the check and the lock acquisition, the following can
> occur on another CPU:
> 
>    CPU 0 (sysfs reader)              CPU 1 (disable path)
>    ─────────────────────────────     ──────────────────────────────────
>    refcnt == 1, check passes
>    [waiting for spinlock]            tpda_disable(): refcnt → 0
>                                      debug clock disabled
>    spinlock acquired
>    readl_relaxed()  ← FAULT/hang
> 
> tpda_disable() decrements csdev->refcnt under the same spinlock, so
> moving the refcnt check inside the lock closes the race.
> 
> Fix all six affected sysfs handlers: global_flush_req_show/store,
> syncr_mode_show, syncr_count_show, and port_flush_req_show/store.
> 
> Fixes: 8e1c358a3b0e ("coresight: tpda: add global_flush_req sysfs node")
> Fixes: 33f04ead7c49 ("coresight: tpda: add logic to configure TPDA_SYNCR register")
> Fixes: a089d585a7f4 ("coresight: tpda: add sysfs node to flush specific port")
> Signed-off-by: Jie Gan <jie.gan@oss.qualcomm.com>

Reviewed-by: James Clark <james.clark@linaro.org>

> ---
>   drivers/hwtracing/coresight/coresight-tpda.c | 18 ++++++++++++------
>   1 file changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-tpda.c b/drivers/hwtracing/coresight/coresight-tpda.c
> index 89c8f71f0aff..63a979312463 100644
> --- a/drivers/hwtracing/coresight/coresight-tpda.c
> +++ b/drivers/hwtracing/coresight/coresight-tpda.c
> @@ -360,10 +360,10 @@ static ssize_t global_flush_req_show(struct device *dev,
>   	struct tpda_drvdata *drvdata = dev_get_drvdata(dev->parent);
>   	unsigned long val;
>   
> +	guard(spinlock)(&drvdata->spinlock);
>   	if (!drvdata->csdev->refcnt)
>   		return -EINVAL;
>   
> -	guard(spinlock)(&drvdata->spinlock);
>   	val = readl_relaxed(drvdata->base + TPDA_CR);
>   	/* read global_flush_req bit */
>   	val &= TPDA_CR_FLREQ;
> @@ -382,10 +382,13 @@ static ssize_t global_flush_req_store(struct device *dev,
>   	if (kstrtoul(buf, 0, &val))
>   		return -EINVAL;
>   
> -	if (!drvdata->csdev->refcnt || !val)
> +	if (!val)
>   		return -EINVAL;
>   
>   	guard(spinlock)(&drvdata->spinlock);
> +	if (!drvdata->csdev->refcnt)
> +		return -EINVAL;
> +
>   	val = readl_relaxed(drvdata->base + TPDA_CR);
>   	/* set global_flush_req bit */
>   	val |= TPDA_CR_FLREQ;
> @@ -404,10 +407,10 @@ static ssize_t syncr_mode_show(struct device *dev,
>   	struct tpda_drvdata *drvdata = dev_get_drvdata(dev->parent);
>   	unsigned long val, syncr_val;
>   
> +	guard(spinlock)(&drvdata->spinlock);
>   	if (!drvdata->csdev->refcnt)
>   		return -EINVAL;
>   
> -	guard(spinlock)(&drvdata->spinlock);
>   	syncr_val = readl_relaxed(drvdata->base + TPDA_SYNCR);
>   	val = FIELD_GET(TPDA_SYNCR_MODE_CTRL_MASK, syncr_val);
>   
> @@ -440,10 +443,10 @@ static ssize_t syncr_count_show(struct device *dev,
>   	struct tpda_drvdata *drvdata = dev_get_drvdata(dev->parent);
>   	unsigned long val;
>   
> +	guard(spinlock)(&drvdata->spinlock);
>   	if (!drvdata->csdev->refcnt)
>   		return -EINVAL;
>   
> -	guard(spinlock)(&drvdata->spinlock);
>   	val = readl_relaxed(drvdata->base + TPDA_SYNCR);
>   	val &= TPDA_SYNCR_COUNT_MASK;
>   
> @@ -478,10 +481,10 @@ static ssize_t port_flush_req_show(struct device *dev,
>   	struct tpda_drvdata *drvdata = dev_get_drvdata(dev->parent);
>   	unsigned long val;
>   
> +	guard(spinlock)(&drvdata->spinlock);
>   	if (!drvdata->csdev->refcnt)
>   		return -EINVAL;
>   
> -	guard(spinlock)(&drvdata->spinlock);
>   	val = readl_relaxed(drvdata->base + TPDA_FLUSH_CR);
>   
>   	return sysfs_emit(buf, "0x%lx\n", val);
> @@ -498,10 +501,13 @@ static ssize_t port_flush_req_store(struct device *dev,
>   	if (kstrtou32(buf, 0, &val))
>   		return -EINVAL;
>   
> -	if (!drvdata->csdev->refcnt || !val)
> +	if (!val)
>   		return -EINVAL;
>   
>   	guard(spinlock)(&drvdata->spinlock);
> +	if (!drvdata->csdev->refcnt)
> +		return -EINVAL;
> +
>   	CS_UNLOCK(drvdata->base);
>   	writel_relaxed(val, drvdata->base + TPDA_FLUSH_CR);
>   	CS_LOCK(drvdata->base);
> 
> ---
> base-commit: f3e6330d7fe42b204af05a2dbc68b379e0ad179e
> change-id: 20260408-fix-race-condition-issue-d44203254b87
> 
> Best regards,



^ permalink raw reply

* Patch "net: phy: allow MDIO bus PM ops to start/stop state machine for phylink-controlled PHY" has been added to the 6.1-stable tree
From: gregkh @ 2026-04-08 13:30 UTC (permalink / raw)
  To: 681739313, Z-1tiW9zjcoFkhwc, andrew, davem, edumazet, f.fainelli,
	gregkh, hkallweit1, kuba, linux-arm-kernel, linux-mediatek, linux,
	matthias.bgg, pabeni, patches, vladimir.oltean, wei.fang
  Cc: stable-commits
In-Reply-To: <20260327015237.1713008-1-681739313@139.com>


This is a note to let you know that I've just added the patch titled

    net: phy: allow MDIO bus PM ops to start/stop state machine for phylink-controlled PHY

to the 6.1-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     net-phy-allow-mdio-bus-pm-ops-to-start-stop-state-machine-for-phylink-controlled-phy.patch
and it can be found in the queue-6.1 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


From 681739313@139.com Fri Mar 27 02:52:59 2026
From: Rajani Kantha <681739313@139.com>
Date: Fri, 27 Mar 2026 09:52:37 +0800
Subject: net: phy: allow MDIO bus PM ops to start/stop state machine for phylink-controlled PHY
To: gregkh@linuxfoundation.org, stable@vger.kernel.org, vladimir.oltean@nxp.com
Cc: patches@lists.linux.dev, linux-kernel@vger.kernel.org, andrew@lunn.ch, hkallweit1@gmail.com, linux@armlinux.org.uk, davem@davemloft.net, edumazet@google.com, kuba@kernel.org, pabeni@redhat.com, matthias.bgg@gmail.com, f.fainelli@gmail.com, netdev@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-mediatek@lists.infradead.org, wei.fang@nxp.com
Message-ID: <20260327015237.1713008-1-681739313@139.com>

From: Vladimir Oltean <vladimir.oltean@nxp.com>

[ Upstream commit fc75ea20ffb452652f0d4033f38fe88d7cfdae35 ]

DSA has 2 kinds of drivers:

1. Those who call dsa_switch_suspend() and dsa_switch_resume() from
   their device PM ops: qca8k-8xxx, bcm_sf2, microchip ksz
2. Those who don't: all others. The above methods should be optional.

For type 1, dsa_switch_suspend() calls dsa_user_suspend() -> phylink_stop(),
and dsa_switch_resume() calls dsa_user_resume() -> phylink_start().
These seem good candidates for setting mac_managed_pm = true because
that is essentially its definition [1], but that does not seem to be the
biggest problem for now, and is not what this change focuses on.

Talking strictly about the 2nd category of DSA drivers here (which
do not have MAC managed PM, meaning that for their attached PHYs,
mdio_bus_phy_suspend() and mdio_bus_phy_resume() should run in full),
I have noticed that the following warning from mdio_bus_phy_resume() is
triggered:

	WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY &&
		phydev->state != PHY_UP);

because the PHY state machine is running.

It's running as a result of a previous dsa_user_open() -> ... ->
phylink_start() -> phy_start() having been initiated by the user.

The previous mdio_bus_phy_suspend() was supposed to have called
phy_stop_machine(), but it didn't. So this is why the PHY is in state
PHY_NOLINK by the time mdio_bus_phy_resume() runs.

mdio_bus_phy_suspend() did not call phy_stop_machine() because for
phylink, the phydev->adjust_link function pointer is NULL. This seems a
technicality introduced by commit fddd91016d16 ("phylib: fix PAL state
machine restart on resume"). That commit was written before phylink
existed, and was intended to avoid crashing with consumer drivers which
don't use the PHY state machine - phylink always does, when using a PHY.
But phylink itself has historically not been developed with
suspend/resume in mind, and apparently not tested too much in that
scenario, allowing this bug to exist unnoticed for so long. Plus, prior
to the WARN_ON(), it would have likely been invisible.

This issue is not in fact restricted to type 2 DSA drivers (according to
the above ad-hoc classification), but can be extrapolated to any MAC
driver with phylink and MDIO-bus-managed PHY PM ops. DSA is just where
the issue was reported. Assuming mac_managed_pm is set correctly, a
quick search indicates the following other drivers might be affected:

$ grep -Zlr PHYLINK_NETDEV drivers/ | xargs -0 grep -L mac_managed_pm
drivers/net/ethernet/atheros/ag71xx.c
drivers/net/ethernet/microchip/sparx5/sparx5_main.c
drivers/net/ethernet/microchip/lan966x/lan966x_main.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/ucc_geth.c
drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/prestera/prestera_main.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/altera/altera_tse_main.c
drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
drivers/net/ethernet/tehuti/tn40_phy.c
drivers/net/ethernet/mscc/ocelot_net.c

Make the existing conditions dependent on the PHY device having a
phydev->phy_link_change() implementation equal to the default
phy_link_change() provided by phylib. Otherwise, we implicitly know that
the phydev has the phylink-provided phylink_phy_change() callback, and
when phylink is used, the PHY state machine always needs to be stopped/
started on the suspend/resume path. The code is structured as such that
if phydev->phy_link_change() is absent, it is a matter of time until the
kernel will crash - no need to further complicate the test.

Thus, for the situation where the PM is not managed by the MAC, we will
make the MDIO bus PM ops treat identically the phylink-controlled PHYs
with the phylib-controlled PHYs where an adjust_link() callback is
supplied. In both cases, the MDIO bus PM ops should stop and restart the
PHY state machine.

[1] https://lore.kernel.org/netdev/Z-1tiW9zjcoFkhwc@shell.armlinux.org.uk/

Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state")
Reported-by: Wei Fang <wei.fang@nxp.com>
Tested-by: Wei Fang <wei.fang@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20250407094042.2155633-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Rajani Kantha <681739313@139.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/phy/phy_device.c |   31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -247,6 +247,33 @@ static void phy_link_change(struct phy_d
 		phydev->mii_ts->link_state(phydev->mii_ts, phydev);
 }
 
+/**
+ * phy_uses_state_machine - test whether consumer driver uses PAL state machine
+ * @phydev: the target PHY device structure
+ *
+ * Ultimately, this aims to indirectly determine whether the PHY is attached
+ * to a consumer which uses the state machine by calling phy_start() and
+ * phy_stop().
+ *
+ * When the PHY driver consumer uses phylib, it must have previously called
+ * phy_connect_direct() or one of its derivatives, so that phy_prepare_link()
+ * has set up a hook for monitoring state changes.
+ *
+ * When the PHY driver is used by the MAC driver consumer through phylink (the
+ * only other provider of a phy_link_change() method), using the PHY state
+ * machine is not optional.
+ *
+ * Return: true if consumer calls phy_start() and phy_stop(), false otherwise.
+ */
+static bool phy_uses_state_machine(struct phy_device *phydev)
+{
+	if (phydev->phy_link_change == phy_link_change)
+		return phydev->attached_dev && phydev->adjust_link;
+
+	/* phydev->phy_link_change is implicitly phylink_phy_change() */
+	return true;
+}
+
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
 	struct device_driver *drv = phydev->mdio.dev.driver;
@@ -307,7 +334,7 @@ static __maybe_unused int mdio_bus_phy_s
 	 * may call phy routines that try to grab the same lock, and that may
 	 * lead to a deadlock.
 	 */
-	if (phydev->attached_dev && phydev->adjust_link)
+	if (phy_uses_state_machine(phydev))
 		phy_stop_machine(phydev);
 
 	if (!mdio_bus_phy_may_suspend(phydev))
@@ -361,7 +388,7 @@ no_resume:
 		}
 	}
 
-	if (phydev->attached_dev && phydev->adjust_link)
+	if (phy_uses_state_machine(phydev))
 		phy_start_machine(phydev);
 
 	return 0;


Patches currently in stable-queue which might be from 681739313@139.com are

queue-6.1/net-phy-move-phy_link_change-prior-to-mdio_bus_phy_may_suspend.patch
queue-6.1/net-phy-fix-phy_uses_state_machine.patch
queue-6.1/net-phy-allow-mdio-bus-pm-ops-to-start-stop-state-machine-for-phylink-controlled-phy.patch


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox