public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
@ 2024-12-25  8:24 Aleksandar Rikalo
  2025-01-10  3:23 ` Charlie Jenkins
  0 siblings, 1 reply; 14+ messages in thread
From: Aleksandar Rikalo @ 2024-12-25  8:24 UTC (permalink / raw)
  To: linux-riscv
  Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Will Deacon,
	Peter Zijlstra, Boqun Feng, Mark Rutland, Yury Norov,
	Rasmus Villemoes, Andrea Parri, Leonardo Bras, Guo Ren,
	Samuel Holland, Eric Chan, Aleksandar Rikalo, linux-kernel,
	Djordje Todorovic

From: Chao-ying Fu <cfu@mips.com>

Use only LR/SC instructions to implement atomic functions.

Add config RISCV_AMO_USE_ZALRSC.

Signed-off-by: Chao-ying Fu <cfu@mips.com>
Signed-off-by: Aleksandar Rikalo <arikalo@gmail.com>
---
 arch/riscv/Kconfig               | 11 +++++++
 arch/riscv/include/asm/atomic.h  | 52 +++++++++++++++++++++++++++++++-
 arch/riscv/include/asm/bitops.h  | 45 +++++++++++++++++++++++++++
 arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
 arch/riscv/include/asm/futex.h   | 46 ++++++++++++++++++++++++++++
 5 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index cc63aef41e94..9fb020b49408 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -715,6 +715,17 @@ config RISCV_ISA_ZACAS
 
 	  If you don't know what to do here, say Y.
 
+config RISCV_AMO_USE_ZALRSC
+	bool "Use Zalrsc extension to implement atomic functions"
+	help
+	   Kernel uses only LR/SC instructions to implement atomic functions.
+
+	   It makes sense to enable this option if your platform only
+	   implements the Zalrsc extension (a subset of the A extension),
+	   and not the complete A extension.
+
+	   If you don't know what to do here, say N.
+
 config TOOLCHAIN_HAS_ZBB
 	bool
 	default y
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 5b96c2f61adb..88f62e33a545 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -50,6 +50,7 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
  * have the AQ or RL bits set.  These don't return anything, so there's only
  * one version to worry about.
  */
+#ifndef RISCV_AMO_USE_ZALRSC
 #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)		\
 static __always_inline							\
 void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
@@ -59,7 +60,23 @@ void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
 		: "+A" (v->counter)					\
 		: "r" (I)						\
 		: "memory");						\
-}									\
+}
+#else
+#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)		\
+static __always_inline							\
+void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
+{									\
+	register c_type ret, temp;					\
+	__asm__ __volatile__ (						\
+		"1:	lr." #asm_type " %1, %0\n"			\
+		"	" #asm_op " %2, %1, %3\n"			\
+		"	sc." #asm_type " %2, %2, %0\n"			\
+		"	bnez %2, 1b\n"					\
+		: "+A" (v->counter), "=&r" (ret), "=&r" (temp)		\
+		: "r" (I)						\
+		: "memory");						\
+}
+#endif
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC_OPS(op, asm_op, I)					\
@@ -84,6 +101,7 @@ ATOMIC_OPS(xor, xor,  i)
  * There's two flavors of these: the arithmatic ops have both fetch and return
  * versions, while the logical ops only have fetch versions.
  */
+#ifndef RISCV_AMO_USE_ZALRSC
 #define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)	\
 static __always_inline							\
 c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,		\
@@ -108,6 +126,38 @@ c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)	\
 		: "memory");						\
 	return ret;							\
 }
+#else
+#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)	\
+static __always_inline							\
+c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,		\
+					     atomic##prefix##_t *v)	\
+{									\
+	register c_type ret, temp;					\
+	__asm__ __volatile__ (						\
+		"1:	lr." #asm_type " %1, %0\n"			\
+		"	" #asm_op " %2, %1, %3\n"			\
+		"	sc." #asm_type " %2, %2, %0\n"			\
+		"	bnez %2, 1b\n"					\
+		: "+A" (v->counter), "=&r" (ret), "=&r" (temp)		\
+		: "r" (I)						\
+		: "memory");						\
+	return ret;							\
+}									\
+static __always_inline							\
+c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)	\
+{									\
+	register c_type ret, temp;					\
+	__asm__ __volatile__ (						\
+		"1:	lr." #asm_type ".aqrl %1, %0\n"			\
+		"	" #asm_op " %2, %1, %3\n"			\
+		"	sc." #asm_type ".aqrl %2, %2, %0\n"		\
+		"	bnez %2, 1b\n"					\
+		: "+A" (v->counter), "=&r" (ret), "=&r" (temp)		\
+		: "r" (I)						\
+		: "memory");						\
+	return ret;							\
+}
+#endif
 
 #define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix)	\
 static __always_inline							\
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index fae152ea0508..0051de1cf471 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -187,12 +187,17 @@ static __always_inline int variable_fls(unsigned int x)
 
 #if (BITS_PER_LONG == 64)
 #define __AMO(op)	"amo" #op ".d"
+#define __LR	"lr.d"
+#define __SC	"sc.d"
 #elif (BITS_PER_LONG == 32)
 #define __AMO(op)	"amo" #op ".w"
+#define __LR	"lr.w"
+#define __SC	"sc.w"
 #else
 #error "Unexpected BITS_PER_LONG"
 #endif
 
+#ifndef RISCV_AMO_USE_ZALRSC
 #define __test_and_op_bit_ord(op, mod, nr, addr, ord)		\
 ({								\
 	unsigned long __res, __mask;				\
@@ -211,6 +216,33 @@ static __always_inline int variable_fls(unsigned int x)
 		: "+A" (addr[BIT_WORD(nr)])			\
 		: "r" (mod(BIT_MASK(nr)))			\
 		: "memory");
+#else
+#define __test_and_op_bit_ord(op, mod, nr, addr, ord)		\
+({								\
+	unsigned long __res, __mask, __temp;			\
+	__mask = BIT_MASK(nr);					\
+	__asm__ __volatile__ (					\
+		"1: " __LR #ord " %0, %1\n"			\
+		#op " %2, %0, %3\n"				\
+		__SC #ord " %2, %2, %1\n"			\
+		"bnez %2, 1b\n"					\
+		: "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)	\
+		: "r" (mod(__mask))				\
+		: "memory");					\
+	((__res & __mask) != 0);				\
+})
+
+#define __op_bit_ord(op, mod, nr, addr, ord)			\
+	unsigned long __res, __temp;				\
+	__asm__ __volatile__ (					\
+		"1: " __LR #ord " %0, %1\n"			\
+		#op " %2, %0, %3\n"				\
+		__SC #ord " %2, %2, %1\n"			\
+		"bnez %2, 1b\n"					\
+		: "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)	\
+		: "r" (mod(BIT_MASK(nr)))			\
+		: "memory")
+#endif
 
 #define __test_and_op_bit(op, mod, nr, addr) 			\
 	__test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
@@ -354,12 +386,25 @@ static inline void arch___clear_bit_unlock(
 static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
 		volatile unsigned long *addr)
 {
+#ifndef RISCV_AMO_USE_ZALRSC
 	unsigned long res;
 	__asm__ __volatile__ (
 		__AMO(xor) ".rl %0, %2, %1"
 		: "=r" (res), "+A" (*addr)
 		: "r" (__NOP(mask))
 		: "memory");
+#else
+	unsigned long res, temp;
+
+	__asm__ __volatile__ (
+		"1: " __LR ".rl %0, %1\n"
+		"xor %2, %0, %3\n"
+		__SC ".rl %2, %2, %1\n"
+		"bnez %2, 1b\n"
+		: "=&r" (res), "+A" (*addr), "=&r" (temp)
+		: "r" (__NOP(mask))
+		: "memory");
+#endif
 	return (res & BIT(7)) != 0;
 }
 
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 4cadc56220fe..aba60f427060 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -51,6 +51,7 @@
 	}									\
 })
 
+#ifndef RISCV_AMO_USE_ZALRSC
 #define __arch_xchg(sfx, prepend, append, r, p, n)			\
 ({									\
 	__asm__ __volatile__ (						\
@@ -61,6 +62,21 @@
 		: "r" (n)						\
 		: "memory");						\
 })
+#else
+#define __arch_xchg(sfx, prepend, append, r, p, n)			\
+({									\
+	__typeof__(*(__ptr)) temp;					\
+	__asm__ __volatile__ (						\
+		prepend							\
+		"1:	lr" sfx " %0, %1\n"				\
+		"	sc" sfx " %2, %3, %1\n"				\
+		"	bnez %2, 1b\n"					\
+		append							\
+		: "=&r" (r), "+A" (*(p)), "=&r" (temp)			\
+		: "r" (n)						\
+		: "memory");						\
+})
+#endif
 
 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
 		   sc_append, swap_append)				\
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index fc8130f995c1..dc63065e707e 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -19,6 +19,7 @@
 #define __disable_user_access()		do { } while (0)
 #endif
 
+#ifndef RISCV_AMO_USE_ZALRSC
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
 {								\
 	__enable_user_access();					\
@@ -32,16 +33,39 @@
 	: "memory");						\
 	__disable_user_access();				\
 }
+#else
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
+{								\
+	__enable_user_access();					\
+	__asm__ __volatile__ (					\
+	"1:	lr.w.aqrl %[ov], %[u]\n"			\
+	"	" insn "\n"					\
+	"	sc.w.aqrl %[t], %[t], %[u]\n"			\
+	"	bnez %[t], 1b\n"				\
+	"2:\n"							\
+	_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r])			\
+	: [r] "+r" (ret), [ov] "=&r" (oldval),			\
+	  [t] "=&r" (temp), [u] "+m" (*uaddr)			\
+	: [op] "Jr" (oparg)					\
+	: "memory");						\
+	__disable_user_access();				\
+}
+#endif
 
 static inline int
 arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
+#ifndef RISCV_AMO_USE_ZALRSC
 	int oldval = 0, ret = 0;
+#else
+	int oldval = 0, ret = 0, temp = 0;
+#endif
 
 	if (!access_ok(uaddr, sizeof(u32)))
 		return -EFAULT;
 
 	switch (op) {
+#ifndef RISCV_AMO_USE_ZALRSC
 	case FUTEX_OP_SET:
 		__futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
 				  ret, oldval, uaddr, oparg);
@@ -62,6 +86,28 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 		__futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
 				  ret, oldval, uaddr, oparg);
 		break;
+#else
+	case FUTEX_OP_SET:
+		__futex_atomic_op("mv %[t], %z[op]",
+				  ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("add %[t], %[ov], %z[op]",
+				  ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("or %[t], %[ov], %z[op]",
+				  ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("and %[t], %[ov], %z[op]",
+				  ret, oldval, uaddr, ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("xor %[t], %[ov], %z[op]",
+				  ret, oldval, uaddr, oparg);
+		break;
+#endif
 	default:
 		ret = -ENOSYS;
 	}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2024-12-25  8:24 [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions Aleksandar Rikalo
@ 2025-01-10  3:23 ` Charlie Jenkins
  2025-02-01 12:04   ` Aleksandar Rikalo
  0 siblings, 1 reply; 14+ messages in thread
From: Charlie Jenkins @ 2025-01-10  3:23 UTC (permalink / raw)
  To: Aleksandar Rikalo
  Cc: linux-riscv, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Will Deacon, Peter Zijlstra, Boqun Feng, Mark Rutland, Yury Norov,
	Rasmus Villemoes, Andrea Parri, Leonardo Bras, Guo Ren,
	Samuel Holland, Eric Chan, linux-kernel, Djordje Todorovic

On Wed, Dec 25, 2024 at 09:24:12AM +0100, Aleksandar Rikalo wrote:
> From: Chao-ying Fu <cfu@mips.com>
> 
> Use only LR/SC instructions to implement atomic functions.

In the previous patch you mention that this is to support MIPS P8700. Can
you expand on why this change is required? The datasheet at [1] says:

"The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
extensions, as well as the as well as the bit-manipulation extensions
(Zba) and (Zbb)"

The "A" extension is a part of "G" which is mostly assumed to exist in
the kernel. Additionally, having this be a compilation flag will cause
traps on generic kernels. We generally try to push everything we can
into runtime feature detection since there are so many possible variants
of riscv.

Expressing not being able to perform a feature like this is normally
better expressed as an errata. Then generic kernels will be able to
include this, and anybody who doesn't want to have the extra nops
introduced can disable the errata. A similar approach to what I pointed
out last time should work here too (but with more places to replace)
[2].

[1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
[2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/

> 
> Add config RISCV_AMO_USE_ZALRSC.
> 
> Signed-off-by: Chao-ying Fu <cfu@mips.com>
> Signed-off-by: Aleksandar Rikalo <arikalo@gmail.com>
> ---
>  arch/riscv/Kconfig               | 11 +++++++
>  arch/riscv/include/asm/atomic.h  | 52 +++++++++++++++++++++++++++++++-
>  arch/riscv/include/asm/bitops.h  | 45 +++++++++++++++++++++++++++
>  arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
>  arch/riscv/include/asm/futex.h   | 46 ++++++++++++++++++++++++++++
>  5 files changed, 169 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index cc63aef41e94..9fb020b49408 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -715,6 +715,17 @@ config RISCV_ISA_ZACAS
>  
>  	  If you don't know what to do here, say Y.
>  
> +config RISCV_AMO_USE_ZALRSC
> +	bool "Use Zalrsc extension to implement atomic functions"
> +	help
> +	   Kernel uses only LR/SC instructions to implement atomic functions.
> +
> +	   It makes sense to enable this option if your platform only
> +	   implements the Zalrsc extension (a subset of the A extension),
> +	   and not the complete A extension.
> +
> +	   If you don't know what to do here, say N.
> +
>  config TOOLCHAIN_HAS_ZBB
>  	bool
>  	default y
> diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
> index 5b96c2f61adb..88f62e33a545 100644
> --- a/arch/riscv/include/asm/atomic.h
> +++ b/arch/riscv/include/asm/atomic.h
> @@ -50,6 +50,7 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
>   * have the AQ or RL bits set.  These don't return anything, so there's only
>   * one version to worry about.
>   */
> +#ifndef RISCV_AMO_USE_ZALRSC
>  #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)		\
>  static __always_inline							\
>  void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
> @@ -59,7 +60,23 @@ void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
>  		: "+A" (v->counter)					\
>  		: "r" (I)						\
>  		: "memory");						\
> -}									\
> +}
> +#else
> +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)		\
> +static __always_inline							\
> +void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
> +{									\
> +	register c_type ret, temp;					\
> +	__asm__ __volatile__ (						\
> +		"1:	lr." #asm_type " %1, %0\n"			\
> +		"	" #asm_op " %2, %1, %3\n"			\
> +		"	sc." #asm_type " %2, %2, %0\n"			\
> +		"	bnez %2, 1b\n"					\
> +		: "+A" (v->counter), "=&r" (ret), "=&r" (temp)		\
> +		: "r" (I)						\
> +		: "memory");						\
> +}
> +#endif
>  
>  #ifdef CONFIG_GENERIC_ATOMIC64
>  #define ATOMIC_OPS(op, asm_op, I)					\
> @@ -84,6 +101,7 @@ ATOMIC_OPS(xor, xor,  i)
>   * There's two flavors of these: the arithmatic ops have both fetch and return
>   * versions, while the logical ops only have fetch versions.
>   */
> +#ifndef RISCV_AMO_USE_ZALRSC
>  #define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)	\
>  static __always_inline							\
>  c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,		\
> @@ -108,6 +126,38 @@ c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)	\
>  		: "memory");						\
>  	return ret;							\
>  }
> +#else
> +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)	\
> +static __always_inline							\
> +c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,		\
> +					     atomic##prefix##_t *v)	\
> +{									\
> +	register c_type ret, temp;					\
> +	__asm__ __volatile__ (						\
> +		"1:	lr." #asm_type " %1, %0\n"			\
> +		"	" #asm_op " %2, %1, %3\n"			\
> +		"	sc." #asm_type " %2, %2, %0\n"			\
> +		"	bnez %2, 1b\n"					\
> +		: "+A" (v->counter), "=&r" (ret), "=&r" (temp)		\
> +		: "r" (I)						\
> +		: "memory");						\
> +	return ret;							\
> +}									\
> +static __always_inline							\
> +c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)	\
> +{									\
> +	register c_type ret, temp;					\
> +	__asm__ __volatile__ (						\
> +		"1:	lr." #asm_type ".aqrl %1, %0\n"			\
> +		"	" #asm_op " %2, %1, %3\n"			\
> +		"	sc." #asm_type ".aqrl %2, %2, %0\n"		\
> +		"	bnez %2, 1b\n"					\
> +		: "+A" (v->counter), "=&r" (ret), "=&r" (temp)		\
> +		: "r" (I)						\
> +		: "memory");						\
> +	return ret;							\
> +}
> +#endif
>  
>  #define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix)	\
>  static __always_inline							\
> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> index fae152ea0508..0051de1cf471 100644
> --- a/arch/riscv/include/asm/bitops.h
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -187,12 +187,17 @@ static __always_inline int variable_fls(unsigned int x)
>  
>  #if (BITS_PER_LONG == 64)
>  #define __AMO(op)	"amo" #op ".d"
> +#define __LR	"lr.d"
> +#define __SC	"sc.d"
>  #elif (BITS_PER_LONG == 32)
>  #define __AMO(op)	"amo" #op ".w"
> +#define __LR	"lr.w"
> +#define __SC	"sc.w"
>  #else
>  #error "Unexpected BITS_PER_LONG"
>  #endif
>  
> +#ifndef RISCV_AMO_USE_ZALRSC
>  #define __test_and_op_bit_ord(op, mod, nr, addr, ord)		\
>  ({								\
>  	unsigned long __res, __mask;				\
> @@ -211,6 +216,33 @@ static __always_inline int variable_fls(unsigned int x)
>  		: "+A" (addr[BIT_WORD(nr)])			\
>  		: "r" (mod(BIT_MASK(nr)))			\
>  		: "memory");
> +#else
> +#define __test_and_op_bit_ord(op, mod, nr, addr, ord)		\
> +({								\
> +	unsigned long __res, __mask, __temp;			\
> +	__mask = BIT_MASK(nr);					\
> +	__asm__ __volatile__ (					\
> +		"1: " __LR #ord " %0, %1\n"			\
> +		#op " %2, %0, %3\n"				\
> +		__SC #ord " %2, %2, %1\n"			\
> +		"bnez %2, 1b\n"					\
> +		: "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)	\
> +		: "r" (mod(__mask))				\
> +		: "memory");					\
> +	((__res & __mask) != 0);				\
> +})
> +
> +#define __op_bit_ord(op, mod, nr, addr, ord)			\
> +	unsigned long __res, __temp;				\
> +	__asm__ __volatile__ (					\
> +		"1: " __LR #ord " %0, %1\n"			\
> +		#op " %2, %0, %3\n"				\
> +		__SC #ord " %2, %2, %1\n"			\
> +		"bnez %2, 1b\n"					\
> +		: "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)	\
> +		: "r" (mod(BIT_MASK(nr)))			\
> +		: "memory")
> +#endif
>  
>  #define __test_and_op_bit(op, mod, nr, addr) 			\
>  	__test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
> @@ -354,12 +386,25 @@ static inline void arch___clear_bit_unlock(
>  static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
>  		volatile unsigned long *addr)
>  {
> +#ifndef RISCV_AMO_USE_ZALRSC
>  	unsigned long res;
>  	__asm__ __volatile__ (
>  		__AMO(xor) ".rl %0, %2, %1"
>  		: "=r" (res), "+A" (*addr)
>  		: "r" (__NOP(mask))
>  		: "memory");
> +#else
> +	unsigned long res, temp;
> +
> +	__asm__ __volatile__ (
> +		"1: " __LR ".rl %0, %1\n"
> +		"xor %2, %0, %3\n"
> +		__SC ".rl %2, %2, %1\n"
> +		"bnez %2, 1b\n"
> +		: "=&r" (res), "+A" (*addr), "=&r" (temp)
> +		: "r" (__NOP(mask))
> +		: "memory");
> +#endif
>  	return (res & BIT(7)) != 0;
>  }
>  
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index 4cadc56220fe..aba60f427060 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -51,6 +51,7 @@
>  	}									\
>  })
>  
> +#ifndef RISCV_AMO_USE_ZALRSC
>  #define __arch_xchg(sfx, prepend, append, r, p, n)			\
>  ({									\
>  	__asm__ __volatile__ (						\
> @@ -61,6 +62,21 @@
>  		: "r" (n)						\
>  		: "memory");						\
>  })
> +#else
> +#define __arch_xchg(sfx, prepend, append, r, p, n)			\
> +({									\
> +	__typeof__(*(__ptr)) temp;					\
> +	__asm__ __volatile__ (						\
> +		prepend							\
> +		"1:	lr" sfx " %0, %1\n"				\
> +		"	sc" sfx " %2, %3, %1\n"				\
> +		"	bnez %2, 1b\n"					\
> +		append							\
> +		: "=&r" (r), "+A" (*(p)), "=&r" (temp)			\
> +		: "r" (n)						\
> +		: "memory");						\
> +})
> +#endif
>  
>  #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
>  		   sc_append, swap_append)				\
> diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
> index fc8130f995c1..dc63065e707e 100644
> --- a/arch/riscv/include/asm/futex.h
> +++ b/arch/riscv/include/asm/futex.h
> @@ -19,6 +19,7 @@
>  #define __disable_user_access()		do { } while (0)
>  #endif
>  
> +#ifndef RISCV_AMO_USE_ZALRSC
>  #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
>  {								\
>  	__enable_user_access();					\
> @@ -32,16 +33,39 @@
>  	: "memory");						\
>  	__disable_user_access();				\
>  }
> +#else
> +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
> +{								\
> +	__enable_user_access();					\
> +	__asm__ __volatile__ (					\
> +	"1:	lr.w.aqrl %[ov], %[u]\n"			\
> +	"	" insn "\n"					\
> +	"	sc.w.aqrl %[t], %[t], %[u]\n"			\
> +	"	bnez %[t], 1b\n"				\
> +	"2:\n"							\
> +	_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r])			\
> +	: [r] "+r" (ret), [ov] "=&r" (oldval),			\
> +	  [t] "=&r" (temp), [u] "+m" (*uaddr)			\
> +	: [op] "Jr" (oparg)					\
> +	: "memory");						\
> +	__disable_user_access();				\
> +}
> +#endif
>  
>  static inline int
>  arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
>  {
> +#ifndef RISCV_AMO_USE_ZALRSC
>  	int oldval = 0, ret = 0;
> +#else
> +	int oldval = 0, ret = 0, temp = 0;

I think it's better to define this temp variable inside of
__futex_atomic_op() instead of requiring it to be defined in the scope
where the macro is called.

- Charlie

> +#endif
>  
>  	if (!access_ok(uaddr, sizeof(u32)))
>  		return -EFAULT;
>  
>  	switch (op) {
> +#ifndef RISCV_AMO_USE_ZALRSC
>  	case FUTEX_OP_SET:
>  		__futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
>  				  ret, oldval, uaddr, oparg);
> @@ -62,6 +86,28 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
>  		__futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
>  				  ret, oldval, uaddr, oparg);
>  		break;
> +#else
> +	case FUTEX_OP_SET:
> +		__futex_atomic_op("mv %[t], %z[op]",
> +				  ret, oldval, uaddr, oparg);
> +		break;
> +	case FUTEX_OP_ADD:
> +		__futex_atomic_op("add %[t], %[ov], %z[op]",
> +				  ret, oldval, uaddr, oparg);
> +		break;
> +	case FUTEX_OP_OR:
> +		__futex_atomic_op("or %[t], %[ov], %z[op]",
> +				  ret, oldval, uaddr, oparg);
> +		break;
> +	case FUTEX_OP_ANDN:
> +		__futex_atomic_op("and %[t], %[ov], %z[op]",
> +				  ret, oldval, uaddr, ~oparg);
> +		break;
> +	case FUTEX_OP_XOR:
> +		__futex_atomic_op("xor %[t], %[ov], %z[op]",
> +				  ret, oldval, uaddr, oparg);
> +		break;
> +#endif
>  	default:
>  		ret = -ENOSYS;
>  	}
> -- 
> 2.25.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-01-10  3:23 ` Charlie Jenkins
@ 2025-02-01 12:04   ` Aleksandar Rikalo
  2025-02-02 20:08     ` Conor Dooley
  0 siblings, 1 reply; 14+ messages in thread
From: Aleksandar Rikalo @ 2025-02-01 12:04 UTC (permalink / raw)
  To: Charlie Jenkins
  Cc: linux-riscv, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Will Deacon, Peter Zijlstra, Boqun Feng, Mark Rutland, Yury Norov,
	Rasmus Villemoes, Andrea Parri, Leonardo Bras, Guo Ren,
	Samuel Holland, Eric Chan, linux-kernel, Djordje Todorovic

On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:

> > From: Chao-ying Fu <cfu@mips.com>
> >
> > Use only LR/SC instructions to implement atomic functions.
>
> In the previous patch you mention that this is to support MIPS P8700. Can
> you expand on why this change is required? The datasheet at [1] says:
>
> "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
> Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
> Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
> extensions, as well as the as well as the bit-manipulation extensions
> (Zba) and (Zbb)"
>
> The "A" extension is a part of "G" which is mostly assumed to exist in
> the kernel. Additionally, having this be a compilation flag will cause
> traps on generic kernels. We generally try to push everything we can
> into runtime feature detection since there are so many possible variants
> of riscv.
>
> Expressing not being able to perform a feature like this is normally
> better expressed as an errata. Then generic kernels will be able to
> include this, and anybody who doesn't want to have the extra nops
> introduced can disable the errata. A similar approach to what I pointed
> out last time should work here too (but with more places to replace)
> [2].
>
> [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
> [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/

So far we haven't found a way to do this using errata. There's no way
to patch one instruction with multiple ones, and we also need an extra
(temporary) register.

A CPU can implement the Zalrsc extension [1] instead of "complete A"
(which P8700 does).
From "Zaamo and Zalrsc Extensions" spec:

"The fetch-and-op style atomic primitives provided by the A extension
support better scaling for highly parallel systems. Simpler
implementations that do not have such scaling requirements may
implement the Zalrsc subset of the A extension to support atomic
primitives."

Therefore, we believe this would be a useful option for wider use.

[1] https://github.com/riscv/riscv-zaamo-zalrsc/releases/download/v1.0-rc2/riscv-zaamo-zalrsc.pdf

> >
> > Add config RISCV_AMO_USE_ZALRSC.
> >
> > Signed-off-by: Chao-ying Fu <cfu@mips.com>
> > Signed-off-by: Aleksandar Rikalo <arikalo@gmail.com>
> > ---
> >  arch/riscv/Kconfig               | 11 +++++++
> >  arch/riscv/include/asm/atomic.h  | 52 +++++++++++++++++++++++++++++++-
> >  arch/riscv/include/asm/bitops.h  | 45 +++++++++++++++++++++++++++
> >  arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
> >  arch/riscv/include/asm/futex.h   | 46 ++++++++++++++++++++++++++++
> >  5 files changed, 169 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index cc63aef41e94..9fb020b49408 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -715,6 +715,17 @@ config RISCV_ISA_ZACAS
> >
> >         If you don't know what to do here, say Y.
> >
> > +config RISCV_AMO_USE_ZALRSC
> > +     bool "Use Zalrsc extension to implement atomic functions"
> > +     help
> > +        Kernel uses only LR/SC instructions to implement atomic functions.
> > +
> > +        It makes sense to enable this option if your platform only
> > +        implements the Zalrsc extension (a subset of the A extension),
> > +        and not the complete A extension.
> > +
> > +        If you don't know what to do here, say N.
> > +
> >  config TOOLCHAIN_HAS_ZBB
> >       bool
> >       default y
> > diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
> > index 5b96c2f61adb..88f62e33a545 100644
> > --- a/arch/riscv/include/asm/atomic.h
> > +++ b/arch/riscv/include/asm/atomic.h
> > @@ -50,6 +50,7 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
> >   * have the AQ or RL bits set.  These don't return anything, so there's only
> >   * one version to worry about.
> >   */
> > +#ifndef RISCV_AMO_USE_ZALRSC
> >  #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)           \
> >  static __always_inline                                                       \
> >  void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)     \
> > @@ -59,7 +60,23 @@ void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)   \
> >               : "+A" (v->counter)                                     \
> >               : "r" (I)                                               \
> >               : "memory");                                            \
> > -}                                                                    \
> > +}
> > +#else
> > +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)           \
> > +static __always_inline                                                       \
> > +void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)     \
> > +{                                                                    \
> > +     register c_type ret, temp;                                      \
> > +     __asm__ __volatile__ (                                          \
> > +             "1:     lr." #asm_type " %1, %0\n"                      \
> > +             "       " #asm_op " %2, %1, %3\n"                       \
> > +             "       sc." #asm_type " %2, %2, %0\n"                  \
> > +             "       bnez %2, 1b\n"                                  \
> > +             : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> > +             : "r" (I)                                               \
> > +             : "memory");                                            \
> > +}
> > +#endif
> >
> >  #ifdef CONFIG_GENERIC_ATOMIC64
> >  #define ATOMIC_OPS(op, asm_op, I)                                    \
> > @@ -84,6 +101,7 @@ ATOMIC_OPS(xor, xor,  i)
> >   * There's two flavors of these: the arithmatic ops have both fetch and return
> >   * versions, while the logical ops only have fetch versions.
> >   */
> > +#ifndef RISCV_AMO_USE_ZALRSC
> >  #define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)     \
> >  static __always_inline                                                       \
> >  c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,          \
> > @@ -108,6 +126,38 @@ c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
> >               : "memory");                                            \
> >       return ret;                                                     \
> >  }
> > +#else
> > +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)     \
> > +static __always_inline                                                       \
> > +c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,          \
> > +                                          atomic##prefix##_t *v)     \
> > +{                                                                    \
> > +     register c_type ret, temp;                                      \
> > +     __asm__ __volatile__ (                                          \
> > +             "1:     lr." #asm_type " %1, %0\n"                      \
> > +             "       " #asm_op " %2, %1, %3\n"                       \
> > +             "       sc." #asm_type " %2, %2, %0\n"                  \
> > +             "       bnez %2, 1b\n"                                  \
> > +             : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> > +             : "r" (I)                                               \
> > +             : "memory");                                            \
> > +     return ret;                                                     \
> > +}                                                                    \
> > +static __always_inline                                                       \
> > +c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)     \
> > +{                                                                    \
> > +     register c_type ret, temp;                                      \
> > +     __asm__ __volatile__ (                                          \
> > +             "1:     lr." #asm_type ".aqrl %1, %0\n"                 \
> > +             "       " #asm_op " %2, %1, %3\n"                       \
> > +             "       sc." #asm_type ".aqrl %2, %2, %0\n"             \
> > +             "       bnez %2, 1b\n"                                  \
> > +             : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> > +             : "r" (I)                                               \
> > +             : "memory");                                            \
> > +     return ret;                                                     \
> > +}
> > +#endif
> >
> >  #define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix)      \
> >  static __always_inline                                                       \
> > diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> > index fae152ea0508..0051de1cf471 100644
> > --- a/arch/riscv/include/asm/bitops.h
> > +++ b/arch/riscv/include/asm/bitops.h
> > @@ -187,12 +187,17 @@ static __always_inline int variable_fls(unsigned int x)
> >
> >  #if (BITS_PER_LONG == 64)
> >  #define __AMO(op)    "amo" #op ".d"
> > +#define __LR "lr.d"
> > +#define __SC "sc.d"
> >  #elif (BITS_PER_LONG == 32)
> >  #define __AMO(op)    "amo" #op ".w"
> > +#define __LR "lr.w"
> > +#define __SC "sc.w"
> >  #else
> >  #error "Unexpected BITS_PER_LONG"
> >  #endif
> >
> > +#ifndef RISCV_AMO_USE_ZALRSC
> >  #define __test_and_op_bit_ord(op, mod, nr, addr, ord)                \
> >  ({                                                           \
> >       unsigned long __res, __mask;                            \
> > @@ -211,6 +216,33 @@ static __always_inline int variable_fls(unsigned int x)
> >               : "+A" (addr[BIT_WORD(nr)])                     \
> >               : "r" (mod(BIT_MASK(nr)))                       \
> >               : "memory");
> > +#else
> > +#define __test_and_op_bit_ord(op, mod, nr, addr, ord)                \
> > +({                                                           \
> > +     unsigned long __res, __mask, __temp;                    \
> > +     __mask = BIT_MASK(nr);                                  \
> > +     __asm__ __volatile__ (                                  \
> > +             "1: " __LR #ord " %0, %1\n"                     \
> > +             #op " %2, %0, %3\n"                             \
> > +             __SC #ord " %2, %2, %1\n"                       \
> > +             "bnez %2, 1b\n"                                 \
> > +             : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)      \
> > +             : "r" (mod(__mask))                             \
> > +             : "memory");                                    \
> > +     ((__res & __mask) != 0);                                \
> > +})
> > +
> > +#define __op_bit_ord(op, mod, nr, addr, ord)                 \
> > +     unsigned long __res, __temp;                            \
> > +     __asm__ __volatile__ (                                  \
> > +             "1: " __LR #ord " %0, %1\n"                     \
> > +             #op " %2, %0, %3\n"                             \
> > +             __SC #ord " %2, %2, %1\n"                       \
> > +             "bnez %2, 1b\n"                                 \
> > +             : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)      \
> > +             : "r" (mod(BIT_MASK(nr)))                       \
> > +             : "memory")
> > +#endif
> >
> >  #define __test_and_op_bit(op, mod, nr, addr)                         \
> >       __test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
> > @@ -354,12 +386,25 @@ static inline void arch___clear_bit_unlock(
> >  static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
> >               volatile unsigned long *addr)
> >  {
> > +#ifndef RISCV_AMO_USE_ZALRSC
> >       unsigned long res;
> >       __asm__ __volatile__ (
> >               __AMO(xor) ".rl %0, %2, %1"
> >               : "=r" (res), "+A" (*addr)
> >               : "r" (__NOP(mask))
> >               : "memory");
> > +#else
> > +     unsigned long res, temp;
> > +
> > +     __asm__ __volatile__ (
> > +             "1: " __LR ".rl %0, %1\n"
> > +             "xor %2, %0, %3\n"
> > +             __SC ".rl %2, %2, %1\n"
> > +             "bnez %2, 1b\n"
> > +             : "=&r" (res), "+A" (*addr), "=&r" (temp)
> > +             : "r" (__NOP(mask))
> > +             : "memory");
> > +#endif
> >       return (res & BIT(7)) != 0;
> >  }
> >
> > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > index 4cadc56220fe..aba60f427060 100644
> > --- a/arch/riscv/include/asm/cmpxchg.h
> > +++ b/arch/riscv/include/asm/cmpxchg.h
> > @@ -51,6 +51,7 @@
> >       }                                                                       \
> >  })
> >
> > +#ifndef RISCV_AMO_USE_ZALRSC
> >  #define __arch_xchg(sfx, prepend, append, r, p, n)                   \
> >  ({                                                                   \
> >       __asm__ __volatile__ (                                          \
> > @@ -61,6 +62,21 @@
> >               : "r" (n)                                               \
> >               : "memory");                                            \
> >  })
> > +#else
> > +#define __arch_xchg(sfx, prepend, append, r, p, n)                   \
> > +({                                                                   \
> > +     __typeof__(*(__ptr)) temp;                                      \
> > +     __asm__ __volatile__ (                                          \
> > +             prepend                                                 \
> > +             "1:     lr" sfx " %0, %1\n"                             \
> > +             "       sc" sfx " %2, %3, %1\n"                         \
> > +             "       bnez %2, 1b\n"                                  \
> > +             append                                                  \
> > +             : "=&r" (r), "+A" (*(p)), "=&r" (temp)                  \
> > +             : "r" (n)                                               \
> > +             : "memory");                                            \
> > +})
> > +#endif
> >
> >  #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,                      \
> >                  sc_append, swap_append)                              \
> > diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
> > index fc8130f995c1..dc63065e707e 100644
> > --- a/arch/riscv/include/asm/futex.h
> > +++ b/arch/riscv/include/asm/futex.h
> > @@ -19,6 +19,7 @@
> >  #define __disable_user_access()              do { } while (0)
> >  #endif
> >
> > +#ifndef RISCV_AMO_USE_ZALRSC
> >  #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)   \
> >  {                                                            \
> >       __enable_user_access();                                 \
> > @@ -32,16 +33,39 @@
> >       : "memory");                                            \
> >       __disable_user_access();                                \
> >  }
> > +#else
> > +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)   \
> > +{                                                            \
> > +     __enable_user_access();                                 \
> > +     __asm__ __volatile__ (                                  \
> > +     "1:     lr.w.aqrl %[ov], %[u]\n"                        \
> > +     "       " insn "\n"                                     \
> > +     "       sc.w.aqrl %[t], %[t], %[u]\n"                   \
> > +     "       bnez %[t], 1b\n"                                \
> > +     "2:\n"                                                  \
> > +     _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r])                  \
> > +     : [r] "+r" (ret), [ov] "=&r" (oldval),                  \
> > +       [t] "=&r" (temp), [u] "+m" (*uaddr)                   \
> > +     : [op] "Jr" (oparg)                                     \
> > +     : "memory");                                            \
> > +     __disable_user_access();                                \
> > +}
> > +#endif
> >
> >  static inline int
> >  arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
> >  {
> > +#ifndef RISCV_AMO_USE_ZALRSC
> >       int oldval = 0, ret = 0;
> > +#else
> > +     int oldval = 0, ret = 0, temp = 0;
>
> I think it's better to define this temp variable inside of
> __futex_atomic_op() instead of requiring it to be defined in the scope
> where the macro is called.
>
> - Charlie
>
> > +#endif
> >
> >       if (!access_ok(uaddr, sizeof(u32)))
> >               return -EFAULT;
> >
> >       switch (op) {
> > +#ifndef RISCV_AMO_USE_ZALRSC
> >       case FUTEX_OP_SET:
> >               __futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
> >                                 ret, oldval, uaddr, oparg);
> > @@ -62,6 +86,28 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
> >               __futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
> >                                 ret, oldval, uaddr, oparg);
> >               break;
> > +#else
> > +     case FUTEX_OP_SET:
> > +             __futex_atomic_op("mv %[t], %z[op]",
> > +                               ret, oldval, uaddr, oparg);
> > +             break;
> > +     case FUTEX_OP_ADD:
> > +             __futex_atomic_op("add %[t], %[ov], %z[op]",
> > +                               ret, oldval, uaddr, oparg);
> > +             break;
> > +     case FUTEX_OP_OR:
> > +             __futex_atomic_op("or %[t], %[ov], %z[op]",
> > +                               ret, oldval, uaddr, oparg);
> > +             break;
> > +     case FUTEX_OP_ANDN:
> > +             __futex_atomic_op("and %[t], %[ov], %z[op]",
> > +                               ret, oldval, uaddr, ~oparg);
> > +             break;
> > +     case FUTEX_OP_XOR:
> > +             __futex_atomic_op("xor %[t], %[ov], %z[op]",
> > +                               ret, oldval, uaddr, oparg);
> > +             break;
> > +#endif
> >       default:
> >               ret = -ENOSYS;
> >       }
> > --

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-02-01 12:04   ` Aleksandar Rikalo
@ 2025-02-02 20:08     ` Conor Dooley
  2025-02-03  6:59       ` Jessica Clarke
  2025-02-03 19:12       ` Charlie Jenkins
  0 siblings, 2 replies; 14+ messages in thread
From: Conor Dooley @ 2025-02-02 20:08 UTC (permalink / raw)
  To: Aleksandar Rikalo
  Cc: Charlie Jenkins, linux-riscv, Paul Walmsley, Palmer Dabbelt,
	Albert Ou, Will Deacon, Peter Zijlstra, Boqun Feng, Mark Rutland,
	Yury Norov, Rasmus Villemoes, Andrea Parri, Leonardo Bras,
	Guo Ren, Samuel Holland, Eric Chan, linux-kernel,
	Djordje Todorovic

[-- Attachment #1: Type: text/plain, Size: 19888 bytes --]

On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
> On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> 
> > > From: Chao-ying Fu <cfu@mips.com>
> > >
> > > Use only LR/SC instructions to implement atomic functions.
> >
> > In the previous patch you mention that this is to support MIPS P8700. Can
> > you expand on why this change is required? The datasheet at [1] says:
> >
> > "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
> > Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
> > Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
> > extensions, as well as the as well as the bit-manipulation extensions
> > (Zba) and (Zbb)"
> >
> > The "A" extension is a part of "G" which is mostly assumed to exist in
> > the kernel. Additionally, having this be a compilation flag will cause
> > traps on generic kernels. We generally try to push everything we can
> > into runtime feature detection since there are so many possible variants
> > of riscv.
> >
> > Expressing not being able to perform a feature like this is normally
> > better expressed as an errata. Then generic kernels will be able to
> > include this, and anybody who doesn't want to have the extra nops
> > introduced can disable the errata. A similar approach to what I pointed
> > out last time should work here too (but with more places to replace)
> > [2].
> >
> > [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
> > [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
> 
> So far we haven't found a way to do this using errata.

You mean using alternatives? Not implementing A, but instead
implementing Zalrsc, is not an erratum. It's a design decision.

> There's no way
> to patch one instruction with multiple ones,

Have you looked at how the alternatives work? There's no requirement
that the sequences have the same number of instructions, padding is
allowed.

> and we also need an extra
> (temporary) register.
> 
> A CPU can implement the Zalrsc extension [1] instead of "complete A"
> (which P8700 does).
> >From "Zaamo and Zalrsc Extensions" spec:

Why does this statement differ from the P8700 datasheet linked above?

Cheers,
Conor.

> 
> "The fetch-and-op style atomic primitives provided by the A extension
> support better scaling for highly parallel systems. Simpler
> implementations that do not have such scaling requirements may
> implement the Zalrsc subset of the A extension to support atomic
> primitives."
> 
> Therefore, we believe this would be a useful option for wider use.
> 
> [1] https://github.com/riscv/riscv-zaamo-zalrsc/releases/download/v1.0-rc2/riscv-zaamo-zalrsc.pdf
> 
> > >
> > > Add config RISCV_AMO_USE_ZALRSC.
> > >
> > > Signed-off-by: Chao-ying Fu <cfu@mips.com>
> > > Signed-off-by: Aleksandar Rikalo <arikalo@gmail.com>
> > > ---
> > >  arch/riscv/Kconfig               | 11 +++++++
> > >  arch/riscv/include/asm/atomic.h  | 52 +++++++++++++++++++++++++++++++-
> > >  arch/riscv/include/asm/bitops.h  | 45 +++++++++++++++++++++++++++
> > >  arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
> > >  arch/riscv/include/asm/futex.h   | 46 ++++++++++++++++++++++++++++
> > >  5 files changed, 169 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index cc63aef41e94..9fb020b49408 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -715,6 +715,17 @@ config RISCV_ISA_ZACAS
> > >
> > >         If you don't know what to do here, say Y.
> > >
> > > +config RISCV_AMO_USE_ZALRSC
> > > +     bool "Use Zalrsc extension to implement atomic functions"
> > > +     help
> > > +        Kernel uses only LR/SC instructions to implement atomic functions.
> > > +
> > > +        It makes sense to enable this option if your platform only
> > > +        implements the Zalrsc extension (a subset of the A extension),
> > > +        and not the complete A extension.
> > > +
> > > +        If you don't know what to do here, say N.
> > > +
> > >  config TOOLCHAIN_HAS_ZBB
> > >       bool
> > >       default y
> > > diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
> > > index 5b96c2f61adb..88f62e33a545 100644
> > > --- a/arch/riscv/include/asm/atomic.h
> > > +++ b/arch/riscv/include/asm/atomic.h
> > > @@ -50,6 +50,7 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
> > >   * have the AQ or RL bits set.  These don't return anything, so there's only
> > >   * one version to worry about.
> > >   */
> > > +#ifndef RISCV_AMO_USE_ZALRSC
> > >  #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)           \
> > >  static __always_inline                                                       \
> > >  void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)     \
> > > @@ -59,7 +60,23 @@ void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)   \
> > >               : "+A" (v->counter)                                     \
> > >               : "r" (I)                                               \
> > >               : "memory");                                            \
> > > -}                                                                    \
> > > +}
> > > +#else
> > > +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)           \
> > > +static __always_inline                                                       \
> > > +void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)     \
> > > +{                                                                    \
> > > +     register c_type ret, temp;                                      \
> > > +     __asm__ __volatile__ (                                          \
> > > +             "1:     lr." #asm_type " %1, %0\n"                      \
> > > +             "       " #asm_op " %2, %1, %3\n"                       \
> > > +             "       sc." #asm_type " %2, %2, %0\n"                  \
> > > +             "       bnez %2, 1b\n"                                  \
> > > +             : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> > > +             : "r" (I)                                               \
> > > +             : "memory");                                            \
> > > +}
> > > +#endif
> > >
> > >  #ifdef CONFIG_GENERIC_ATOMIC64
> > >  #define ATOMIC_OPS(op, asm_op, I)                                    \
> > > @@ -84,6 +101,7 @@ ATOMIC_OPS(xor, xor,  i)
> > >   * There's two flavors of these: the arithmatic ops have both fetch and return
> > >   * versions, while the logical ops only have fetch versions.
> > >   */
> > > +#ifndef RISCV_AMO_USE_ZALRSC
> > >  #define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)     \
> > >  static __always_inline                                                       \
> > >  c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,          \
> > > @@ -108,6 +126,38 @@ c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
> > >               : "memory");                                            \
> > >       return ret;                                                     \
> > >  }
> > > +#else
> > > +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)     \
> > > +static __always_inline                                                       \
> > > +c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,          \
> > > +                                          atomic##prefix##_t *v)     \
> > > +{                                                                    \
> > > +     register c_type ret, temp;                                      \
> > > +     __asm__ __volatile__ (                                          \
> > > +             "1:     lr." #asm_type " %1, %0\n"                      \
> > > +             "       " #asm_op " %2, %1, %3\n"                       \
> > > +             "       sc." #asm_type " %2, %2, %0\n"                  \
> > > +             "       bnez %2, 1b\n"                                  \
> > > +             : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> > > +             : "r" (I)                                               \
> > > +             : "memory");                                            \
> > > +     return ret;                                                     \
> > > +}                                                                    \
> > > +static __always_inline                                                       \
> > > +c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)     \
> > > +{                                                                    \
> > > +     register c_type ret, temp;                                      \
> > > +     __asm__ __volatile__ (                                          \
> > > +             "1:     lr." #asm_type ".aqrl %1, %0\n"                 \
> > > +             "       " #asm_op " %2, %1, %3\n"                       \
> > > +             "       sc." #asm_type ".aqrl %2, %2, %0\n"             \
> > > +             "       bnez %2, 1b\n"                                  \
> > > +             : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> > > +             : "r" (I)                                               \
> > > +             : "memory");                                            \
> > > +     return ret;                                                     \
> > > +}
> > > +#endif
> > >
> > >  #define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix)      \
> > >  static __always_inline                                                       \
> > > diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> > > index fae152ea0508..0051de1cf471 100644
> > > --- a/arch/riscv/include/asm/bitops.h
> > > +++ b/arch/riscv/include/asm/bitops.h
> > > @@ -187,12 +187,17 @@ static __always_inline int variable_fls(unsigned int x)
> > >
> > >  #if (BITS_PER_LONG == 64)
> > >  #define __AMO(op)    "amo" #op ".d"
> > > +#define __LR "lr.d"
> > > +#define __SC "sc.d"
> > >  #elif (BITS_PER_LONG == 32)
> > >  #define __AMO(op)    "amo" #op ".w"
> > > +#define __LR "lr.w"
> > > +#define __SC "sc.w"
> > >  #else
> > >  #error "Unexpected BITS_PER_LONG"
> > >  #endif
> > >
> > > +#ifndef RISCV_AMO_USE_ZALRSC
> > >  #define __test_and_op_bit_ord(op, mod, nr, addr, ord)                \
> > >  ({                                                           \
> > >       unsigned long __res, __mask;                            \
> > > @@ -211,6 +216,33 @@ static __always_inline int variable_fls(unsigned int x)
> > >               : "+A" (addr[BIT_WORD(nr)])                     \
> > >               : "r" (mod(BIT_MASK(nr)))                       \
> > >               : "memory");
> > > +#else
> > > +#define __test_and_op_bit_ord(op, mod, nr, addr, ord)                \
> > > +({                                                           \
> > > +     unsigned long __res, __mask, __temp;                    \
> > > +     __mask = BIT_MASK(nr);                                  \
> > > +     __asm__ __volatile__ (                                  \
> > > +             "1: " __LR #ord " %0, %1\n"                     \
> > > +             #op " %2, %0, %3\n"                             \
> > > +             __SC #ord " %2, %2, %1\n"                       \
> > > +             "bnez %2, 1b\n"                                 \
> > > +             : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)      \
> > > +             : "r" (mod(__mask))                             \
> > > +             : "memory");                                    \
> > > +     ((__res & __mask) != 0);                                \
> > > +})
> > > +
> > > +#define __op_bit_ord(op, mod, nr, addr, ord)                 \
> > > +     unsigned long __res, __temp;                            \
> > > +     __asm__ __volatile__ (                                  \
> > > +             "1: " __LR #ord " %0, %1\n"                     \
> > > +             #op " %2, %0, %3\n"                             \
> > > +             __SC #ord " %2, %2, %1\n"                       \
> > > +             "bnez %2, 1b\n"                                 \
> > > +             : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)      \
> > > +             : "r" (mod(BIT_MASK(nr)))                       \
> > > +             : "memory")
> > > +#endif
> > >
> > >  #define __test_and_op_bit(op, mod, nr, addr)                         \
> > >       __test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
> > > @@ -354,12 +386,25 @@ static inline void arch___clear_bit_unlock(
> > >  static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
> > >               volatile unsigned long *addr)
> > >  {
> > > +#ifndef RISCV_AMO_USE_ZALRSC
> > >       unsigned long res;
> > >       __asm__ __volatile__ (
> > >               __AMO(xor) ".rl %0, %2, %1"
> > >               : "=r" (res), "+A" (*addr)
> > >               : "r" (__NOP(mask))
> > >               : "memory");
> > > +#else
> > > +     unsigned long res, temp;
> > > +
> > > +     __asm__ __volatile__ (
> > > +             "1: " __LR ".rl %0, %1\n"
> > > +             "xor %2, %0, %3\n"
> > > +             __SC ".rl %2, %2, %1\n"
> > > +             "bnez %2, 1b\n"
> > > +             : "=&r" (res), "+A" (*addr), "=&r" (temp)
> > > +             : "r" (__NOP(mask))
> > > +             : "memory");
> > > +#endif
> > >       return (res & BIT(7)) != 0;
> > >  }
> > >
> > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > > index 4cadc56220fe..aba60f427060 100644
> > > --- a/arch/riscv/include/asm/cmpxchg.h
> > > +++ b/arch/riscv/include/asm/cmpxchg.h
> > > @@ -51,6 +51,7 @@
> > >       }                                                                       \
> > >  })
> > >
> > > +#ifndef RISCV_AMO_USE_ZALRSC
> > >  #define __arch_xchg(sfx, prepend, append, r, p, n)                   \
> > >  ({                                                                   \
> > >       __asm__ __volatile__ (                                          \
> > > @@ -61,6 +62,21 @@
> > >               : "r" (n)                                               \
> > >               : "memory");                                            \
> > >  })
> > > +#else
> > > +#define __arch_xchg(sfx, prepend, append, r, p, n)                   \
> > > +({                                                                   \
> > > +     __typeof__(*(__ptr)) temp;                                      \
> > > +     __asm__ __volatile__ (                                          \
> > > +             prepend                                                 \
> > > +             "1:     lr" sfx " %0, %1\n"                             \
> > > +             "       sc" sfx " %2, %3, %1\n"                         \
> > > +             "       bnez %2, 1b\n"                                  \
> > > +             append                                                  \
> > > +             : "=&r" (r), "+A" (*(p)), "=&r" (temp)                  \
> > > +             : "r" (n)                                               \
> > > +             : "memory");                                            \
> > > +})
> > > +#endif
> > >
> > >  #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,                      \
> > >                  sc_append, swap_append)                              \
> > > diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
> > > index fc8130f995c1..dc63065e707e 100644
> > > --- a/arch/riscv/include/asm/futex.h
> > > +++ b/arch/riscv/include/asm/futex.h
> > > @@ -19,6 +19,7 @@
> > >  #define __disable_user_access()              do { } while (0)
> > >  #endif
> > >
> > > +#ifndef RISCV_AMO_USE_ZALRSC
> > >  #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)   \
> > >  {                                                            \
> > >       __enable_user_access();                                 \
> > > @@ -32,16 +33,39 @@
> > >       : "memory");                                            \
> > >       __disable_user_access();                                \
> > >  }
> > > +#else
> > > +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)   \
> > > +{                                                            \
> > > +     __enable_user_access();                                 \
> > > +     __asm__ __volatile__ (                                  \
> > > +     "1:     lr.w.aqrl %[ov], %[u]\n"                        \
> > > +     "       " insn "\n"                                     \
> > > +     "       sc.w.aqrl %[t], %[t], %[u]\n"                   \
> > > +     "       bnez %[t], 1b\n"                                \
> > > +     "2:\n"                                                  \
> > > +     _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r])                  \
> > > +     : [r] "+r" (ret), [ov] "=&r" (oldval),                  \
> > > +       [t] "=&r" (temp), [u] "+m" (*uaddr)                   \
> > > +     : [op] "Jr" (oparg)                                     \
> > > +     : "memory");                                            \
> > > +     __disable_user_access();                                \
> > > +}
> > > +#endif
> > >
> > >  static inline int
> > >  arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
> > >  {
> > > +#ifndef RISCV_AMO_USE_ZALRSC
> > >       int oldval = 0, ret = 0;
> > > +#else
> > > +     int oldval = 0, ret = 0, temp = 0;
> >
> > I think it's better to define this temp variable inside of
> > __futex_atomic_op() instead of requiring it to be defined in the scope
> > where the macro is called.
> >
> > - Charlie
> >
> > > +#endif
> > >
> > >       if (!access_ok(uaddr, sizeof(u32)))
> > >               return -EFAULT;
> > >
> > >       switch (op) {
> > > +#ifndef RISCV_AMO_USE_ZALRSC
> > >       case FUTEX_OP_SET:
> > >               __futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
> > >                                 ret, oldval, uaddr, oparg);
> > > @@ -62,6 +86,28 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
> > >               __futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
> > >                                 ret, oldval, uaddr, oparg);
> > >               break;
> > > +#else
> > > +     case FUTEX_OP_SET:
> > > +             __futex_atomic_op("mv %[t], %z[op]",
> > > +                               ret, oldval, uaddr, oparg);
> > > +             break;
> > > +     case FUTEX_OP_ADD:
> > > +             __futex_atomic_op("add %[t], %[ov], %z[op]",
> > > +                               ret, oldval, uaddr, oparg);
> > > +             break;
> > > +     case FUTEX_OP_OR:
> > > +             __futex_atomic_op("or %[t], %[ov], %z[op]",
> > > +                               ret, oldval, uaddr, oparg);
> > > +             break;
> > > +     case FUTEX_OP_ANDN:
> > > +             __futex_atomic_op("and %[t], %[ov], %z[op]",
> > > +                               ret, oldval, uaddr, ~oparg);
> > > +             break;
> > > +     case FUTEX_OP_XOR:
> > > +             __futex_atomic_op("xor %[t], %[ov], %z[op]",
> > > +                               ret, oldval, uaddr, oparg);
> > > +             break;
> > > +#endif
> > >       default:
> > >               ret = -ENOSYS;
> > >       }
> > > --

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-02-02 20:08     ` Conor Dooley
@ 2025-02-03  6:59       ` Jessica Clarke
  2025-02-03 19:12       ` Charlie Jenkins
  1 sibling, 0 replies; 14+ messages in thread
From: Jessica Clarke @ 2025-02-03  6:59 UTC (permalink / raw)
  To: Conor Dooley
  Cc: Aleksandar Rikalo, Charlie Jenkins, linux-riscv, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Will Deacon, Peter Zijlstra,
	Boqun Feng, Mark Rutland, Yury Norov, Rasmus Villemoes,
	Andrea Parri, Leonardo Bras, Guo Ren, Samuel Holland, Eric Chan,
	linux-kernel, Djordje Todorovic

On 2 Feb 2025, at 20:08, Conor Dooley <conor@kernel.org> wrote:
> 
> On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
>> On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>> 
>>>> From: Chao-ying Fu <cfu@mips.com>
>>>> 
>>>> Use only LR/SC instructions to implement atomic functions.
>>> 
>>> In the previous patch you mention that this is to support MIPS P8700. Can
>>> you expand on why this change is required? The datasheet at [1] says:
>>> 
>>> "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
>>> Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
>>> Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
>>> extensions, as well as the as well as the bit-manipulation extensions
>>> (Zba) and (Zbb)"
>>> 
>>> The "A" extension is a part of "G" which is mostly assumed to exist in
>>> the kernel. Additionally, having this be a compilation flag will cause
>>> traps on generic kernels. We generally try to push everything we can
>>> into runtime feature detection since there are so many possible variants
>>> of riscv.
>>> 
>>> Expressing not being able to perform a feature like this is normally
>>> better expressed as an errata. Then generic kernels will be able to
>>> include this, and anybody who doesn't want to have the extra nops
>>> introduced can disable the errata. A similar approach to what I pointed
>>> out last time should work here too (but with more places to replace)
>>> [2].
>>> 
>>> [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
>>> [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
>> 
>> So far we haven't found a way to do this using errata.
> 
> You mean using alternatives? Not implementing A, but instead
> implementing Zalrsc, is not an erratum. It's a design decision.
> 
>> There's no way
>> to patch one instruction with multiple ones,
> 
> Have you looked at how the alternatives work? There's no requirement
> that the sequences have the same number of instructions, padding is
> allowed.
> 
>> and we also need an extra
>> (temporary) register.
>> 
>> A CPU can implement the Zalrsc extension [1] instead of "complete A"
>> (which P8700 does).
>>> From "Zaamo and Zalrsc Extensions" spec:
> 
> Why does this statement differ from the P8700 datasheet linked above?

In an LLVM patch several weeks ago it was said that Zaamo is
implemented in firmware by trapping and emulating with LR/SC.

Jess


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-02-02 20:08     ` Conor Dooley
  2025-02-03  6:59       ` Jessica Clarke
@ 2025-02-03 19:12       ` Charlie Jenkins
  2025-02-03 19:30         ` Samuel Holland
  1 sibling, 1 reply; 14+ messages in thread
From: Charlie Jenkins @ 2025-02-03 19:12 UTC (permalink / raw)
  To: Conor Dooley
  Cc: Aleksandar Rikalo, linux-riscv, Paul Walmsley, Palmer Dabbelt,
	Albert Ou, Will Deacon, Peter Zijlstra, Boqun Feng, Mark Rutland,
	Yury Norov, Rasmus Villemoes, Andrea Parri, Leonardo Bras,
	Guo Ren, Samuel Holland, Eric Chan, linux-kernel,
	Djordje Todorovic

On Sun, Feb 02, 2025 at 08:08:50PM +0000, Conor Dooley wrote:
> On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
> > On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> > 
> > > > From: Chao-ying Fu <cfu@mips.com>
> > > >
> > > > Use only LR/SC instructions to implement atomic functions.
> > >
> > > In the previous patch you mention that this is to support MIPS P8700. Can
> > > you expand on why this change is required? The datasheet at [1] says:
> > >
> > > "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
> > > Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
> > > Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
> > > extensions, as well as the as well as the bit-manipulation extensions
> > > (Zba) and (Zbb)"
> > >
> > > The "A" extension is a part of "G" which is mostly assumed to exist in
> > > the kernel. Additionally, having this be a compilation flag will cause
> > > traps on generic kernels. We generally try to push everything we can
> > > into runtime feature detection since there are so many possible variants
> > > of riscv.
> > >
> > > Expressing not being able to perform a feature like this is normally
> > > better expressed as an errata. Then generic kernels will be able to
> > > include this, and anybody who doesn't want to have the extra nops
> > > introduced can disable the errata. A similar approach to what I pointed
> > > out last time should work here too (but with more places to replace)
> > > [2].
> > >
> > > [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
> > > [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
> > 
> > So far we haven't found a way to do this using errata.
> 
> You mean using alternatives? Not implementing A, but instead
> implementing Zalrsc, is not an erratum. It's a design decision.

We could do the same thing we do with misaligned access detection and
run some instructions to determine if these instructions are being
emulated.  If they are being emulated, patch all of the places to use
zalrsc.

- Charlie

> 
> > There's no way
> > to patch one instruction with multiple ones,
> 
> Have you looked at how the alternatives work? There's no requirement
> that the sequences have the same number of instructions, padding is
> allowed.
> 
> > and we also need an extra
> > (temporary) register.
> > 
> > A CPU can implement the Zalrsc extension [1] instead of "complete A"
> > (which P8700 does).
> > >From "Zaamo and Zalrsc Extensions" spec:
> 
> Why does this statement differ from the P8700 datasheet linked above?
> 
> Cheers,
> Conor.
> 
> > 
> > "The fetch-and-op style atomic primitives provided by the A extension
> > support better scaling for highly parallel systems. Simpler
> > implementations that do not have such scaling requirements may
> > implement the Zalrsc subset of the A extension to support atomic
> > primitives."
> > 
> > Therefore, we believe this would be a useful option for wider use.
> > 
> > [1] https://github.com/riscv/riscv-zaamo-zalrsc/releases/download/v1.0-rc2/riscv-zaamo-zalrsc.pdf
> > 
> > > >
> > > > Add config RISCV_AMO_USE_ZALRSC.
> > > >
> > > > Signed-off-by: Chao-ying Fu <cfu@mips.com>
> > > > Signed-off-by: Aleksandar Rikalo <arikalo@gmail.com>
> > > > ---
> > > >  arch/riscv/Kconfig               | 11 +++++++
> > > >  arch/riscv/include/asm/atomic.h  | 52 +++++++++++++++++++++++++++++++-
> > > >  arch/riscv/include/asm/bitops.h  | 45 +++++++++++++++++++++++++++
> > > >  arch/riscv/include/asm/cmpxchg.h | 16 ++++++++++
> > > >  arch/riscv/include/asm/futex.h   | 46 ++++++++++++++++++++++++++++
> > > >  5 files changed, 169 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > index cc63aef41e94..9fb020b49408 100644
> > > > --- a/arch/riscv/Kconfig
> > > > +++ b/arch/riscv/Kconfig
> > > > @@ -715,6 +715,17 @@ config RISCV_ISA_ZACAS
> > > >
> > > >         If you don't know what to do here, say Y.
> > > >
> > > > +config RISCV_AMO_USE_ZALRSC
> > > > +     bool "Use Zalrsc extension to implement atomic functions"
> > > > +     help
> > > > +        Kernel uses only LR/SC instructions to implement atomic functions.
> > > > +
> > > > +        It makes sense to enable this option if your platform only
> > > > +        implements the Zalrsc extension (a subset of the A extension),
> > > > +        and not the complete A extension.
> > > > +
> > > > +        If you don't know what to do here, say N.
> > > > +
> > > >  config TOOLCHAIN_HAS_ZBB
> > > >       bool
> > > >       default y
> > > > diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
> > > > index 5b96c2f61adb..88f62e33a545 100644
> > > > --- a/arch/riscv/include/asm/atomic.h
> > > > +++ b/arch/riscv/include/asm/atomic.h
> > > > @@ -50,6 +50,7 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
> > > >   * have the AQ or RL bits set.  These don't return anything, so there's only
> > > >   * one version to worry about.
> > > >   */
> > > > +#ifndef RISCV_AMO_USE_ZALRSC
> > > >  #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)           \
> > > >  static __always_inline                                                       \
> > > >  void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)     \
> > > > @@ -59,7 +60,23 @@ void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)   \
> > > >               : "+A" (v->counter)                                     \
> > > >               : "r" (I)                                               \
> > > >               : "memory");                                            \
> > > > -}                                                                    \
> > > > +}
> > > > +#else
> > > > +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)           \
> > > > +static __always_inline                                                       \
> > > > +void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)     \
> > > > +{                                                                    \
> > > > +     register c_type ret, temp;                                      \
> > > > +     __asm__ __volatile__ (                                          \
> > > > +             "1:     lr." #asm_type " %1, %0\n"                      \
> > > > +             "       " #asm_op " %2, %1, %3\n"                       \
> > > > +             "       sc." #asm_type " %2, %2, %0\n"                  \
> > > > +             "       bnez %2, 1b\n"                                  \
> > > > +             : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> > > > +             : "r" (I)                                               \
> > > > +             : "memory");                                            \
> > > > +}
> > > > +#endif
> > > >
> > > >  #ifdef CONFIG_GENERIC_ATOMIC64
> > > >  #define ATOMIC_OPS(op, asm_op, I)                                    \
> > > > @@ -84,6 +101,7 @@ ATOMIC_OPS(xor, xor,  i)
> > > >   * There's two flavors of these: the arithmatic ops have both fetch and return
> > > >   * versions, while the logical ops only have fetch versions.
> > > >   */
> > > > +#ifndef RISCV_AMO_USE_ZALRSC
> > > >  #define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)     \
> > > >  static __always_inline                                                       \
> > > >  c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,          \
> > > > @@ -108,6 +126,38 @@ c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
> > > >               : "memory");                                            \
> > > >       return ret;                                                     \
> > > >  }
> > > > +#else
> > > > +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)     \
> > > > +static __always_inline                                                       \
> > > > +c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,          \
> > > > +                                          atomic##prefix##_t *v)     \
> > > > +{                                                                    \
> > > > +     register c_type ret, temp;                                      \
> > > > +     __asm__ __volatile__ (                                          \
> > > > +             "1:     lr." #asm_type " %1, %0\n"                      \
> > > > +             "       " #asm_op " %2, %1, %3\n"                       \
> > > > +             "       sc." #asm_type " %2, %2, %0\n"                  \
> > > > +             "       bnez %2, 1b\n"                                  \
> > > > +             : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> > > > +             : "r" (I)                                               \
> > > > +             : "memory");                                            \
> > > > +     return ret;                                                     \
> > > > +}                                                                    \
> > > > +static __always_inline                                                       \
> > > > +c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)     \
> > > > +{                                                                    \
> > > > +     register c_type ret, temp;                                      \
> > > > +     __asm__ __volatile__ (                                          \
> > > > +             "1:     lr." #asm_type ".aqrl %1, %0\n"                 \
> > > > +             "       " #asm_op " %2, %1, %3\n"                       \
> > > > +             "       sc." #asm_type ".aqrl %2, %2, %0\n"             \
> > > > +             "       bnez %2, 1b\n"                                  \
> > > > +             : "+A" (v->counter), "=&r" (ret), "=&r" (temp)          \
> > > > +             : "r" (I)                                               \
> > > > +             : "memory");                                            \
> > > > +     return ret;                                                     \
> > > > +}
> > > > +#endif
> > > >
> > > >  #define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix)      \
> > > >  static __always_inline                                                       \
> > > > diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> > > > index fae152ea0508..0051de1cf471 100644
> > > > --- a/arch/riscv/include/asm/bitops.h
> > > > +++ b/arch/riscv/include/asm/bitops.h
> > > > @@ -187,12 +187,17 @@ static __always_inline int variable_fls(unsigned int x)
> > > >
> > > >  #if (BITS_PER_LONG == 64)
> > > >  #define __AMO(op)    "amo" #op ".d"
> > > > +#define __LR "lr.d"
> > > > +#define __SC "sc.d"
> > > >  #elif (BITS_PER_LONG == 32)
> > > >  #define __AMO(op)    "amo" #op ".w"
> > > > +#define __LR "lr.w"
> > > > +#define __SC "sc.w"
> > > >  #else
> > > >  #error "Unexpected BITS_PER_LONG"
> > > >  #endif
> > > >
> > > > +#ifndef RISCV_AMO_USE_ZALRSC
> > > >  #define __test_and_op_bit_ord(op, mod, nr, addr, ord)                \
> > > >  ({                                                           \
> > > >       unsigned long __res, __mask;                            \
> > > > @@ -211,6 +216,33 @@ static __always_inline int variable_fls(unsigned int x)
> > > >               : "+A" (addr[BIT_WORD(nr)])                     \
> > > >               : "r" (mod(BIT_MASK(nr)))                       \
> > > >               : "memory");
> > > > +#else
> > > > +#define __test_and_op_bit_ord(op, mod, nr, addr, ord)                \
> > > > +({                                                           \
> > > > +     unsigned long __res, __mask, __temp;                    \
> > > > +     __mask = BIT_MASK(nr);                                  \
> > > > +     __asm__ __volatile__ (                                  \
> > > > +             "1: " __LR #ord " %0, %1\n"                     \
> > > > +             #op " %2, %0, %3\n"                             \
> > > > +             __SC #ord " %2, %2, %1\n"                       \
> > > > +             "bnez %2, 1b\n"                                 \
> > > > +             : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)      \
> > > > +             : "r" (mod(__mask))                             \
> > > > +             : "memory");                                    \
> > > > +     ((__res & __mask) != 0);                                \
> > > > +})
> > > > +
> > > > +#define __op_bit_ord(op, mod, nr, addr, ord)                 \
> > > > +     unsigned long __res, __temp;                            \
> > > > +     __asm__ __volatile__ (                                  \
> > > > +             "1: " __LR #ord " %0, %1\n"                     \
> > > > +             #op " %2, %0, %3\n"                             \
> > > > +             __SC #ord " %2, %2, %1\n"                       \
> > > > +             "bnez %2, 1b\n"                                 \
> > > > +             : "=&r" (__res), "+A" (addr[BIT_WORD(nr)]), "=&r" (__temp)      \
> > > > +             : "r" (mod(BIT_MASK(nr)))                       \
> > > > +             : "memory")
> > > > +#endif
> > > >
> > > >  #define __test_and_op_bit(op, mod, nr, addr)                         \
> > > >       __test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
> > > > @@ -354,12 +386,25 @@ static inline void arch___clear_bit_unlock(
> > > >  static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
> > > >               volatile unsigned long *addr)
> > > >  {
> > > > +#ifndef RISCV_AMO_USE_ZALRSC
> > > >       unsigned long res;
> > > >       __asm__ __volatile__ (
> > > >               __AMO(xor) ".rl %0, %2, %1"
> > > >               : "=r" (res), "+A" (*addr)
> > > >               : "r" (__NOP(mask))
> > > >               : "memory");
> > > > +#else
> > > > +     unsigned long res, temp;
> > > > +
> > > > +     __asm__ __volatile__ (
> > > > +             "1: " __LR ".rl %0, %1\n"
> > > > +             "xor %2, %0, %3\n"
> > > > +             __SC ".rl %2, %2, %1\n"
> > > > +             "bnez %2, 1b\n"
> > > > +             : "=&r" (res), "+A" (*addr), "=&r" (temp)
> > > > +             : "r" (__NOP(mask))
> > > > +             : "memory");
> > > > +#endif
> > > >       return (res & BIT(7)) != 0;
> > > >  }
> > > >
> > > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > > > index 4cadc56220fe..aba60f427060 100644
> > > > --- a/arch/riscv/include/asm/cmpxchg.h
> > > > +++ b/arch/riscv/include/asm/cmpxchg.h
> > > > @@ -51,6 +51,7 @@
> > > >       }                                                                       \
> > > >  })
> > > >
> > > > +#ifndef RISCV_AMO_USE_ZALRSC
> > > >  #define __arch_xchg(sfx, prepend, append, r, p, n)                   \
> > > >  ({                                                                   \
> > > >       __asm__ __volatile__ (                                          \
> > > > @@ -61,6 +62,21 @@
> > > >               : "r" (n)                                               \
> > > >               : "memory");                                            \
> > > >  })
> > > > +#else
> > > > +#define __arch_xchg(sfx, prepend, append, r, p, n)                   \
> > > > +({                                                                   \
> > > > +     __typeof__(*(__ptr)) temp;                                      \
> > > > +     __asm__ __volatile__ (                                          \
> > > > +             prepend                                                 \
> > > > +             "1:     lr" sfx " %0, %1\n"                             \
> > > > +             "       sc" sfx " %2, %3, %1\n"                         \
> > > > +             "       bnez %2, 1b\n"                                  \
> > > > +             append                                                  \
> > > > +             : "=&r" (r), "+A" (*(p)), "=&r" (temp)                  \
> > > > +             : "r" (n)                                               \
> > > > +             : "memory");                                            \
> > > > +})
> > > > +#endif
> > > >
> > > >  #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,                      \
> > > >                  sc_append, swap_append)                              \
> > > > diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
> > > > index fc8130f995c1..dc63065e707e 100644
> > > > --- a/arch/riscv/include/asm/futex.h
> > > > +++ b/arch/riscv/include/asm/futex.h
> > > > @@ -19,6 +19,7 @@
> > > >  #define __disable_user_access()              do { } while (0)
> > > >  #endif
> > > >
> > > > +#ifndef RISCV_AMO_USE_ZALRSC
> > > >  #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)   \
> > > >  {                                                            \
> > > >       __enable_user_access();                                 \
> > > > @@ -32,16 +33,39 @@
> > > >       : "memory");                                            \
> > > >       __disable_user_access();                                \
> > > >  }
> > > > +#else
> > > > +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)   \
> > > > +{                                                            \
> > > > +     __enable_user_access();                                 \
> > > > +     __asm__ __volatile__ (                                  \
> > > > +     "1:     lr.w.aqrl %[ov], %[u]\n"                        \
> > > > +     "       " insn "\n"                                     \
> > > > +     "       sc.w.aqrl %[t], %[t], %[u]\n"                   \
> > > > +     "       bnez %[t], 1b\n"                                \
> > > > +     "2:\n"                                                  \
> > > > +     _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r])                  \
> > > > +     : [r] "+r" (ret), [ov] "=&r" (oldval),                  \
> > > > +       [t] "=&r" (temp), [u] "+m" (*uaddr)                   \
> > > > +     : [op] "Jr" (oparg)                                     \
> > > > +     : "memory");                                            \
> > > > +     __disable_user_access();                                \
> > > > +}
> > > > +#endif
> > > >
> > > >  static inline int
> > > >  arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
> > > >  {
> > > > +#ifndef RISCV_AMO_USE_ZALRSC
> > > >       int oldval = 0, ret = 0;
> > > > +#else
> > > > +     int oldval = 0, ret = 0, temp = 0;
> > >
> > > I think it's better to define this temp variable inside of
> > > __futex_atomic_op() instead of requiring it to be defined in the scope
> > > where the macro is called.
> > >
> > > - Charlie
> > >
> > > > +#endif
> > > >
> > > >       if (!access_ok(uaddr, sizeof(u32)))
> > > >               return -EFAULT;
> > > >
> > > >       switch (op) {
> > > > +#ifndef RISCV_AMO_USE_ZALRSC
> > > >       case FUTEX_OP_SET:
> > > >               __futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
> > > >                                 ret, oldval, uaddr, oparg);
> > > > @@ -62,6 +86,28 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
> > > >               __futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
> > > >                                 ret, oldval, uaddr, oparg);
> > > >               break;
> > > > +#else
> > > > +     case FUTEX_OP_SET:
> > > > +             __futex_atomic_op("mv %[t], %z[op]",
> > > > +                               ret, oldval, uaddr, oparg);
> > > > +             break;
> > > > +     case FUTEX_OP_ADD:
> > > > +             __futex_atomic_op("add %[t], %[ov], %z[op]",
> > > > +                               ret, oldval, uaddr, oparg);
> > > > +             break;
> > > > +     case FUTEX_OP_OR:
> > > > +             __futex_atomic_op("or %[t], %[ov], %z[op]",
> > > > +                               ret, oldval, uaddr, oparg);
> > > > +             break;
> > > > +     case FUTEX_OP_ANDN:
> > > > +             __futex_atomic_op("and %[t], %[ov], %z[op]",
> > > > +                               ret, oldval, uaddr, ~oparg);
> > > > +             break;
> > > > +     case FUTEX_OP_XOR:
> > > > +             __futex_atomic_op("xor %[t], %[ov], %z[op]",
> > > > +                               ret, oldval, uaddr, oparg);
> > > > +             break;
> > > > +#endif
> > > >       default:
> > > >               ret = -ENOSYS;
> > > >       }
> > > > --



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-02-03 19:12       ` Charlie Jenkins
@ 2025-02-03 19:30         ` Samuel Holland
  2025-02-03 19:50           ` Charlie Jenkins
  0 siblings, 1 reply; 14+ messages in thread
From: Samuel Holland @ 2025-02-03 19:30 UTC (permalink / raw)
  To: Charlie Jenkins, Conor Dooley
  Cc: Aleksandar Rikalo, linux-riscv, Paul Walmsley, Palmer Dabbelt,
	Albert Ou, Will Deacon, Peter Zijlstra, Boqun Feng, Mark Rutland,
	Yury Norov, Rasmus Villemoes, Andrea Parri, Leonardo Bras,
	Guo Ren, Eric Chan, linux-kernel, Djordje Todorovic

Hi Charlie,

On 2025-02-03 1:12 PM, Charlie Jenkins wrote:
> On Sun, Feb 02, 2025 at 08:08:50PM +0000, Conor Dooley wrote:
>> On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
>>> On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>
>>>>> From: Chao-ying Fu <cfu@mips.com>
>>>>>
>>>>> Use only LR/SC instructions to implement atomic functions.
>>>>
>>>> In the previous patch you mention that this is to support MIPS P8700. Can
>>>> you expand on why this change is required? The datasheet at [1] says:
>>>>
>>>> "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
>>>> Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
>>>> Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
>>>> extensions, as well as the as well as the bit-manipulation extensions
>>>> (Zba) and (Zbb)"
>>>>
>>>> The "A" extension is a part of "G" which is mostly assumed to exist in
>>>> the kernel. Additionally, having this be a compilation flag will cause
>>>> traps on generic kernels. We generally try to push everything we can
>>>> into runtime feature detection since there are so many possible variants
>>>> of riscv.
>>>>
>>>> Expressing not being able to perform a feature like this is normally
>>>> better expressed as an errata. Then generic kernels will be able to
>>>> include this, and anybody who doesn't want to have the extra nops
>>>> introduced can disable the errata. A similar approach to what I pointed
>>>> out last time should work here too (but with more places to replace)
>>>> [2].
>>>>
>>>> [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
>>>> [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
>>>
>>> So far we haven't found a way to do this using errata.
>>
>> You mean using alternatives? Not implementing A, but instead
>> implementing Zalrsc, is not an erratum. It's a design decision.
> 
> We could do the same thing we do with misaligned access detection and
> run some instructions to determine if these instructions are being
> emulated.  If they are being emulated, patch all of the places to use
> zalrsc.

Is the implication here that the riscv,isa-extensions list passed to the kernel
will contain "a", even though the hardware does not support it, because AMOs are
emulated in M-mode?

If that is not the case, there is no need for runtime detection. The alternative
entry can check RISCV_ISA_EXT_ZAAMO (which would be implied by RISCV_ISA_EXT_a)
in the ISA bitmap like normal.

Regards,
Samuel


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-02-03 19:30         ` Samuel Holland
@ 2025-02-03 19:50           ` Charlie Jenkins
  2025-02-03 21:34             ` Jessica Clarke
  0 siblings, 1 reply; 14+ messages in thread
From: Charlie Jenkins @ 2025-02-03 19:50 UTC (permalink / raw)
  To: Samuel Holland
  Cc: Conor Dooley, Aleksandar Rikalo, linux-riscv, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Will Deacon, Peter Zijlstra,
	Boqun Feng, Mark Rutland, Yury Norov, Rasmus Villemoes,
	Andrea Parri, Leonardo Bras, Guo Ren, Eric Chan, linux-kernel,
	Djordje Todorovic

On Mon, Feb 03, 2025 at 01:30:48PM -0600, Samuel Holland wrote:
> Hi Charlie,
> 
> On 2025-02-03 1:12 PM, Charlie Jenkins wrote:
> > On Sun, Feb 02, 2025 at 08:08:50PM +0000, Conor Dooley wrote:
> >> On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
> >>> On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >>>
> >>>>> From: Chao-ying Fu <cfu@mips.com>
> >>>>>
> >>>>> Use only LR/SC instructions to implement atomic functions.
> >>>>
> >>>> In the previous patch you mention that this is to support MIPS P8700. Can
> >>>> you expand on why this change is required? The datasheet at [1] says:
> >>>>
> >>>> "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
> >>>> Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
> >>>> Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
> >>>> extensions, as well as the as well as the bit-manipulation extensions
> >>>> (Zba) and (Zbb)"
> >>>>
> >>>> The "A" extension is a part of "G" which is mostly assumed to exist in
> >>>> the kernel. Additionally, having this be a compilation flag will cause
> >>>> traps on generic kernels. We generally try to push everything we can
> >>>> into runtime feature detection since there are so many possible variants
> >>>> of riscv.
> >>>>
> >>>> Expressing not being able to perform a feature like this is normally
> >>>> better expressed as an errata. Then generic kernels will be able to
> >>>> include this, and anybody who doesn't want to have the extra nops
> >>>> introduced can disable the errata. A similar approach to what I pointed
> >>>> out last time should work here too (but with more places to replace)
> >>>> [2].
> >>>>
> >>>> [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
> >>>> [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
> >>>
> >>> So far we haven't found a way to do this using errata.
> >>
> >> You mean using alternatives? Not implementing A, but instead
> >> implementing Zalrsc, is not an erratum. It's a design decision.
> > 
> > We could do the same thing we do with misaligned access detection and
> > run some instructions to determine if these instructions are being
> > emulated.  If they are being emulated, patch all of the places to use
> > zalrsc.
> 
> Is the implication here that the riscv,isa-extensions list passed to the kernel
> will contain "a", even though the hardware does not support it, because AMOs are
> emulated in M-mode?
> 
> If that is not the case, there is no need for runtime detection. The alternative
> entry can check RISCV_ISA_EXT_ZAAMO (which would be implied by RISCV_ISA_EXT_a)
> in the ISA bitmap like normal.

That would be much better! I was under the assumption that the usecase
for this patch was that they were passing in "a" and wanting to only get
zalrsc. We should be able to check
RISCV_ISA_EXT_ZAAMO/RISCV_ISA_EXT_ZALRSC to get the information without
runtime detection.

- Charlie

> 
> Regards,
> Samuel
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-02-03 19:50           ` Charlie Jenkins
@ 2025-02-03 21:34             ` Jessica Clarke
  2025-02-05 15:49               ` Palmer Dabbelt
  0 siblings, 1 reply; 14+ messages in thread
From: Jessica Clarke @ 2025-02-03 21:34 UTC (permalink / raw)
  To: Charlie Jenkins
  Cc: Samuel Holland, Conor Dooley, Aleksandar Rikalo, linux-riscv,
	Paul Walmsley, Palmer Dabbelt, Albert Ou, Will Deacon,
	Peter Zijlstra, Boqun Feng, Mark Rutland, Yury Norov,
	Rasmus Villemoes, Andrea Parri, Leonardo Bras, Guo Ren, Eric Chan,
	linux-kernel, Djordje Todorovic

On 3 Feb 2025, at 19:50, Charlie Jenkins <charlie@rivosinc.com> wrote:
> 
> On Mon, Feb 03, 2025 at 01:30:48PM -0600, Samuel Holland wrote:
>> Hi Charlie,
>> 
>> On 2025-02-03 1:12 PM, Charlie Jenkins wrote:
>>> On Sun, Feb 02, 2025 at 08:08:50PM +0000, Conor Dooley wrote:
>>>> On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
>>>>> On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>>> 
>>>>>>> From: Chao-ying Fu <cfu@mips.com>
>>>>>>> 
>>>>>>> Use only LR/SC instructions to implement atomic functions.
>>>>>> 
>>>>>> In the previous patch you mention that this is to support MIPS P8700. Can
>>>>>> you expand on why this change is required? The datasheet at [1] says:
>>>>>> 
>>>>>> "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
>>>>>> Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
>>>>>> Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
>>>>>> extensions, as well as the as well as the bit-manipulation extensions
>>>>>> (Zba) and (Zbb)"
>>>>>> 
>>>>>> The "A" extension is a part of "G" which is mostly assumed to exist in
>>>>>> the kernel. Additionally, having this be a compilation flag will cause
>>>>>> traps on generic kernels. We generally try to push everything we can
>>>>>> into runtime feature detection since there are so many possible variants
>>>>>> of riscv.
>>>>>> 
>>>>>> Expressing not being able to perform a feature like this is normally
>>>>>> better expressed as an errata. Then generic kernels will be able to
>>>>>> include this, and anybody who doesn't want to have the extra nops
>>>>>> introduced can disable the errata. A similar approach to what I pointed
>>>>>> out last time should work here too (but with more places to replace)
>>>>>> [2].
>>>>>> 
>>>>>> [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
>>>>>> [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
>>>>> 
>>>>> So far we haven't found a way to do this using errata.
>>>> 
>>>> You mean using alternatives? Not implementing A, but instead
>>>> implementing Zalrsc, is not an erratum. It's a design decision.
>>> 
>>> We could do the same thing we do with misaligned access detection and
>>> run some instructions to determine if these instructions are being
>>> emulated.  If they are being emulated, patch all of the places to use
>>> zalrsc.
>> 
>> Is the implication here that the riscv,isa-extensions list passed to the kernel
>> will contain "a", even though the hardware does not support it, because AMOs are
>> emulated in M-mode?
>> 
>> If that is not the case, there is no need for runtime detection. The alternative
>> entry can check RISCV_ISA_EXT_ZAAMO (which would be implied by RISCV_ISA_EXT_a)
>> in the ISA bitmap like normal.
> 
> That would be much better! I was under the assumption that the usecase
> for this patch was that they were passing in "a" and wanting to only get
> zalrsc. We should be able to check
> RISCV_ISA_EXT_ZAAMO/RISCV_ISA_EXT_ZALRSC to get the information without
> runtime detection.

In LLVM at least -mcpu=mips-p8700 enables the full A extension...

Jess


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-02-03 21:34             ` Jessica Clarke
@ 2025-02-05 15:49               ` Palmer Dabbelt
  2025-02-12 10:22                 ` Djordje Todorovic
  0 siblings, 1 reply; 14+ messages in thread
From: Palmer Dabbelt @ 2025-02-05 15:49 UTC (permalink / raw)
  To: jrtc27
  Cc: Charlie Jenkins, samuel.holland, Conor Dooley, arikalo,
	linux-riscv, Paul Walmsley, aou, Will Deacon, peterz, boqun.feng,
	Mark Rutland, yury.norov, linux, parri.andrea, leobras, guoren,
	ericchancf, linux-kernel, djordje.todorovic

On Mon, 03 Feb 2025 13:34:48 PST (-0800), jrtc27@jrtc27.com wrote:
> On 3 Feb 2025, at 19:50, Charlie Jenkins <charlie@rivosinc.com> wrote:
>> 
>> On Mon, Feb 03, 2025 at 01:30:48PM -0600, Samuel Holland wrote:
>>> Hi Charlie,
>>> 
>>> On 2025-02-03 1:12 PM, Charlie Jenkins wrote:
>>>> On Sun, Feb 02, 2025 at 08:08:50PM +0000, Conor Dooley wrote:
>>>>> On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
>>>>>> On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>>>> 
>>>>>>>> From: Chao-ying Fu <cfu@mips.com>
>>>>>>>> 
>>>>>>>> Use only LR/SC instructions to implement atomic functions.
>>>>>>> 
>>>>>>> In the previous patch you mention that this is to support MIPS P8700. Can
>>>>>>> you expand on why this change is required? The datasheet at [1] says:
>>>>>>> 
>>>>>>> "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
>>>>>>> Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
>>>>>>> Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
>>>>>>> extensions, as well as the as well as the bit-manipulation extensions
>>>>>>> (Zba) and (Zbb)"
>>>>>>> 
>>>>>>> The "A" extension is a part of "G" which is mostly assumed to exist in
>>>>>>> the kernel. Additionally, having this be a compilation flag will cause
>>>>>>> traps on generic kernels. We generally try to push everything we can
>>>>>>> into runtime feature detection since there are so many possible variants
>>>>>>> of riscv.
>>>>>>> 
>>>>>>> Expressing not being able to perform a feature like this is normally
>>>>>>> better expressed as an errata. Then generic kernels will be able to
>>>>>>> include this, and anybody who doesn't want to have the extra nops
>>>>>>> introduced can disable the errata. A similar approach to what I pointed
>>>>>>> out last time should work here too (but with more places to replace)
>>>>>>> [2].
>>>>>>> 
>>>>>>> [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
>>>>>>> [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
>>>>>> 
>>>>>> So far we haven't found a way to do this using errata.
>>>>> 
>>>>> You mean using alternatives? Not implementing A, but instead
>>>>> implementing Zalrsc, is not an erratum. It's a design decision.
>>>> 
>>>> We could do the same thing we do with misaligned access detection and
>>>> run some instructions to determine if these instructions are being
>>>> emulated.  If they are being emulated, patch all of the places to use
>>>> zalrsc.
>>> 
>>> Is the implication here that the riscv,isa-extensions list passed to the kernel
>>> will contain "a", even though the hardware does not support it, because AMOs are
>>> emulated in M-mode?
>>> 
>>> If that is not the case, there is no need for runtime detection. The alternative
>>> entry can check RISCV_ISA_EXT_ZAAMO (which would be implied by RISCV_ISA_EXT_a)
>>> in the ISA bitmap like normal.
>> 
>> That would be much better! I was under the assumption that the usecase
>> for this patch was that they were passing in "a" and wanting to only get
>> zalrsc. We should be able to check
>> RISCV_ISA_EXT_ZAAMO/RISCV_ISA_EXT_ZALRSC to get the information without
>> runtime detection.
>
> In LLVM at least -mcpu=mips-p8700 enables the full A extension...

So then I think we need some help from the HW vendor here.  
Specifically: do these systems implement A via M-mode traps (ie, with a 
performance penalty) or is there something functional erratum in the A 
implementation.

If it's just a performance thing then we need some benchmark justifying 
the extra work, which means some hardware we can point at to run the 
benchmark.  Probably also best to shim this in via alternatives, so we 
can keep multi-vendor kernels working.

If it's a funtional issue then we need to know what's actually broken.

Either way this should be exposed to userspace.

>
> Jess

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-02-05 15:49               ` Palmer Dabbelt
@ 2025-02-12 10:22                 ` Djordje Todorovic
  2025-03-19 12:28                   ` Djordje Todorovic
  0 siblings, 1 reply; 14+ messages in thread
From: Djordje Todorovic @ 2025-02-12 10:22 UTC (permalink / raw)
  To: Palmer Dabbelt, jrtc27@jrtc27.com
  Cc: Charlie Jenkins, samuel.holland@sifive.com, Conor Dooley,
	arikalo@gmail.com, linux-riscv@lists.infradead.org, Paul Walmsley,
	aou@eecs.berkeley.edu, Will Deacon, peterz@infradead.org,
	boqun.feng@gmail.com, Mark Rutland, yury.norov@gmail.com,
	linux@rasmusvillemoes.dk, parri.andrea@gmail.com,
	leobras@redhat.com, guoren@kernel.org, ericchancf@google.com,
	linux-kernel@vger.kernel.org

HTEC Public

Hi,

The official product page is located here:
https://mips.com/products/hardware/p8700

The mechanism we use to handle AMO is described in:
"7.3.10 MIPS Configuration 6 Register (mipsconfig6)"

Best,
Djordje

________________________________________
From: Palmer Dabbelt <palmer@dabbelt.com>
Sent: Wednesday, February 5, 2025 4:49 PM
To: jrtc27@jrtc27.com <jrtc27@jrtc27.com>
Cc: Charlie Jenkins <charlie@rivosinc.com>; samuel.holland@sifive.com <samuel.holland@sifive.com>; Conor Dooley <conor@kernel.org>; arikalo@gmail.com <arikalo@gmail.com>; linux-riscv@lists.infradead.org <linux-riscv@lists.infradead.org>; Paul Walmsley <paul.walmsley@sifive.com>; aou@eecs.berkeley.edu <aou@eecs.berkeley.edu>; Will Deacon <will@kernel.org>; peterz@infradead.org <peterz@infradead.org>; boqun.feng@gmail.com <boqun.feng@gmail.com>; Mark Rutland <mark.rutland@arm.com>; yury.norov@gmail.com <yury.norov@gmail.com>; linux@rasmusvillemoes.dk <linux@rasmusvillemoes.dk>; parri.andrea@gmail.com <parri.andrea@gmail.com>; leobras@redhat.com <leobras@redhat.com>; guoren@kernel.org <guoren@kernel.org>; ericchancf@google.com <ericchancf@google.com>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>; Djordje Todorovic <djordje.todorovic@htecgroup.com>
Subject: Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions

[You don't often get email from palmer@dabbelt.com. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ]

CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.


On Mon, 03 Feb 2025 13:34:48 PST (-0800), jrtc27@jrtc27.com wrote:
> On 3 Feb 2025, at 19:50, Charlie Jenkins <charlie@rivosinc.com> wrote:
>>
>> On Mon, Feb 03, 2025 at 01:30:48PM -0600, Samuel Holland wrote:
>>> Hi Charlie,
>>>
>>> On 2025-02-03 1:12 PM, Charlie Jenkins wrote:
>>>> On Sun, Feb 02, 2025 at 08:08:50PM +0000, Conor Dooley wrote:
>>>>> On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
>>>>>> On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>>>>
>>>>>>>> From: Chao-ying Fu <cfu@mips.com>
>>>>>>>>
>>>>>>>> Use only LR/SC instructions to implement atomic functions.
>>>>>>>
>>>>>>> In the previous patch you mention that this is to support MIPS P8700. Can
>>>>>>> you expand on why this change is required? The datasheet at [1] says:
>>>>>>>
>>>>>>> "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
>>>>>>> Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
>>>>>>> Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
>>>>>>> extensions, as well as the as well as the bit-manipulation extensions
>>>>>>> (Zba) and (Zbb)"
>>>>>>>
>>>>>>> The "A" extension is a part of "G" which is mostly assumed to exist in
>>>>>>> the kernel. Additionally, having this be a compilation flag will cause
>>>>>>> traps on generic kernels. We generally try to push everything we can
>>>>>>> into runtime feature detection since there are so many possible variants
>>>>>>> of riscv.
>>>>>>>
>>>>>>> Expressing not being able to perform a feature like this is normally
>>>>>>> better expressed as an errata. Then generic kernels will be able to
>>>>>>> include this, and anybody who doesn't want to have the extra nops
>>>>>>> introduced can disable the errata. A similar approach to what I pointed
>>>>>>> out last time should work here too (but with more places to replace)
>>>>>>> [2].
>>>>>>>
>>>>>>> [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
>>>>>>> [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
>>>>>>
>>>>>> So far we haven't found a way to do this using errata.
>>>>>
>>>>> You mean using alternatives? Not implementing A, but instead
>>>>> implementing Zalrsc, is not an erratum. It's a design decision.
>>>>
>>>> We could do the same thing we do with misaligned access detection and
>>>> run some instructions to determine if these instructions are being
>>>> emulated.  If they are being emulated, patch all of the places to use
>>>> zalrsc.
>>>
>>> Is the implication here that the riscv,isa-extensions list passed to the kernel
>>> will contain "a", even though the hardware does not support it, because AMOs are
>>> emulated in M-mode?
>>>
>>> If that is not the case, there is no need for runtime detection. The alternative
>>> entry can check RISCV_ISA_EXT_ZAAMO (which would be implied by RISCV_ISA_EXT_a)
>>> in the ISA bitmap like normal.
>>
>> That would be much better! I was under the assumption that the usecase
>> for this patch was that they were passing in "a" and wanting to only get
>> zalrsc. We should be able to check
>> RISCV_ISA_EXT_ZAAMO/RISCV_ISA_EXT_ZALRSC to get the information without
>> runtime detection.
>
> In LLVM at least -mcpu=mips-p8700 enables the full A extension...

So then I think we need some help from the HW vendor here.
Specifically: do these systems implement A via M-mode traps (ie, with a
performance penalty) or is there something functional erratum in the A
implementation.

If it's just a performance thing then we need some benchmark justifying
the extra work, which means some hardware we can point at to run the
benchmark.  Probably also best to shim this in via alternatives, so we
can keep multi-vendor kernels working.

If it's a funtional issue then we need to know what's actually broken.

Either way this should be exposed to userspace.

>
> Jess

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-02-12 10:22                 ` Djordje Todorovic
@ 2025-03-19 12:28                   ` Djordje Todorovic
  2025-03-25  8:58                     ` Alexandre Ghiti
  0 siblings, 1 reply; 14+ messages in thread
From: Djordje Todorovic @ 2025-03-19 12:28 UTC (permalink / raw)
  To: Palmer Dabbelt, jrtc27@jrtc27.com
  Cc: Charlie Jenkins, samuel.holland@sifive.com, Conor Dooley,
	arikalo@gmail.com, linux-riscv@lists.infradead.org, Paul Walmsley,
	aou@eecs.berkeley.edu, Will Deacon, peterz@infradead.org,
	boqun.feng@gmail.com, Mark Rutland, yury.norov@gmail.com,
	linux@rasmusvillemoes.dk, parri.andrea@gmail.com,
	leobras@redhat.com, guoren@kernel.org, ericchancf@google.com,
	linux-kernel@vger.kernel.org

HTEC Public

Hi everyone,

Thank you a lot for your comments.
I am wondering how we should proceed from here.

Best,
Djordje

________________________________________
From: Djordje Todorovic <Djordje.Todorovic@htecgroup.com>
Sent: Wednesday, February 12, 2025 11:22 AM
To: Palmer Dabbelt <palmer@dabbelt.com>; jrtc27@jrtc27.com <jrtc27@jrtc27.com>
Cc: Charlie Jenkins <charlie@rivosinc.com>; samuel.holland@sifive.com <samuel.holland@sifive.com>; Conor Dooley <conor@kernel.org>; arikalo@gmail.com <arikalo@gmail.com>; linux-riscv@lists.infradead.org <linux-riscv@lists.infradead.org>; Paul Walmsley <paul.walmsley@sifive.com>; aou@eecs.berkeley.edu <aou@eecs.berkeley.edu>; Will Deacon <will@kernel.org>; peterz@infradead.org <peterz@infradead.org>; boqun.feng@gmail.com <boqun.feng@gmail.com>; Mark Rutland <mark.rutland@arm.com>; yury.norov@gmail.com <yury.norov@gmail.com>; linux@rasmusvillemoes.dk <linux@rasmusvillemoes.dk>; parri.andrea@gmail.com <parri.andrea@gmail.com>; leobras@redhat.com <leobras@redhat.com>; guoren@kernel.org <guoren@kernel.org>; ericchancf@google.com <ericchancf@google.com>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions

Hi,

The official product page is located here:
https://mips.com/products/hardware/p8700

The mechanism we use to handle AMO is described in:
"7.3.10 MIPS Configuration 6 Register (mipsconfig6)"

Best,
Djordje

________________________________________
From: Palmer Dabbelt <palmer@dabbelt.com>
Sent: Wednesday, February 5, 2025 4:49 PM
To: jrtc27@jrtc27.com <jrtc27@jrtc27.com>
Cc: Charlie Jenkins <charlie@rivosinc.com>; samuel.holland@sifive.com <samuel.holland@sifive.com>; Conor Dooley <conor@kernel.org>; arikalo@gmail.com <arikalo@gmail.com>; linux-riscv@lists.infradead.org <linux-riscv@lists.infradead.org>; Paul Walmsley <paul.walmsley@sifive.com>; aou@eecs.berkeley.edu <aou@eecs.berkeley.edu>; Will Deacon <will@kernel.org>; peterz@infradead.org <peterz@infradead.org>; boqun.feng@gmail.com <boqun.feng@gmail.com>; Mark Rutland <mark.rutland@arm.com>; yury.norov@gmail.com <yury.norov@gmail.com>; linux@rasmusvillemoes.dk <linux@rasmusvillemoes.dk>; parri.andrea@gmail.com <parri.andrea@gmail.com>; leobras@redhat.com <leobras@redhat.com>; guoren@kernel.org <guoren@kernel.org>; ericchancf@google.com <ericchancf@google.com>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>; Djordje Todorovic <djordje.todorovic@htecgroup.com>
Subject: Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions

[You don't often get email from palmer@dabbelt.com. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ]

CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.


On Mon, 03 Feb 2025 13:34:48 PST (-0800), jrtc27@jrtc27.com wrote:
> On 3 Feb 2025, at 19:50, Charlie Jenkins <charlie@rivosinc.com> wrote:
>>
>> On Mon, Feb 03, 2025 at 01:30:48PM -0600, Samuel Holland wrote:
>>> Hi Charlie,
>>>
>>> On 2025-02-03 1:12 PM, Charlie Jenkins wrote:
>>>> On Sun, Feb 02, 2025 at 08:08:50PM +0000, Conor Dooley wrote:
>>>>> On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
>>>>>> On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>>>>
>>>>>>>> From: Chao-ying Fu <cfu@mips.com>
>>>>>>>>
>>>>>>>> Use only LR/SC instructions to implement atomic functions.
>>>>>>>
>>>>>>> In the previous patch you mention that this is to support MIPS P8700. Can
>>>>>>> you expand on why this change is required? The datasheet at [1] says:
>>>>>>>
>>>>>>> "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
>>>>>>> Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
>>>>>>> Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
>>>>>>> extensions, as well as the as well as the bit-manipulation extensions
>>>>>>> (Zba) and (Zbb)"
>>>>>>>
>>>>>>> The "A" extension is a part of "G" which is mostly assumed to exist in
>>>>>>> the kernel. Additionally, having this be a compilation flag will cause
>>>>>>> traps on generic kernels. We generally try to push everything we can
>>>>>>> into runtime feature detection since there are so many possible variants
>>>>>>> of riscv.
>>>>>>>
>>>>>>> Expressing not being able to perform a feature like this is normally
>>>>>>> better expressed as an errata. Then generic kernels will be able to
>>>>>>> include this, and anybody who doesn't want to have the extra nops
>>>>>>> introduced can disable the errata. A similar approach to what I pointed
>>>>>>> out last time should work here too (but with more places to replace)
>>>>>>> [2].
>>>>>>>
>>>>>>> [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
>>>>>>> [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
>>>>>>
>>>>>> So far we haven't found a way to do this using errata.
>>>>>
>>>>> You mean using alternatives? Not implementing A, but instead
>>>>> implementing Zalrsc, is not an erratum. It's a design decision.
>>>>
>>>> We could do the same thing we do with misaligned access detection and
>>>> run some instructions to determine if these instructions are being
>>>> emulated.  If they are being emulated, patch all of the places to use
>>>> zalrsc.
>>>
>>> Is the implication here that the riscv,isa-extensions list passed to the kernel
>>> will contain "a", even though the hardware does not support it, because AMOs are
>>> emulated in M-mode?
>>>
>>> If that is not the case, there is no need for runtime detection. The alternative
>>> entry can check RISCV_ISA_EXT_ZAAMO (which would be implied by RISCV_ISA_EXT_a)
>>> in the ISA bitmap like normal.
>>
>> That would be much better! I was under the assumption that the usecase
>> for this patch was that they were passing in "a" and wanting to only get
>> zalrsc. We should be able to check
>> RISCV_ISA_EXT_ZAAMO/RISCV_ISA_EXT_ZALRSC to get the information without
>> runtime detection.
>
> In LLVM at least -mcpu=mips-p8700 enables the full A extension...

So then I think we need some help from the HW vendor here.
Specifically: do these systems implement A via M-mode traps (ie, with a
performance penalty) or is there something functional erratum in the A
implementation.

If it's just a performance thing then we need some benchmark justifying
the extra work, which means some hardware we can point at to run the
benchmark.  Probably also best to shim this in via alternatives, so we
can keep multi-vendor kernels working.

If it's a funtional issue then we need to know what's actually broken.

Either way this should be exposed to userspace.

>
> Jess

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-03-19 12:28                   ` Djordje Todorovic
@ 2025-03-25  8:58                     ` Alexandre Ghiti
  2025-03-26 12:33                       ` Djordje Todorovic
  0 siblings, 1 reply; 14+ messages in thread
From: Alexandre Ghiti @ 2025-03-25  8:58 UTC (permalink / raw)
  To: Djordje Todorovic, Palmer Dabbelt, jrtc27@jrtc27.com
  Cc: Charlie Jenkins, samuel.holland@sifive.com, Conor Dooley,
	arikalo@gmail.com, linux-riscv@lists.infradead.org, Paul Walmsley,
	aou@eecs.berkeley.edu, Will Deacon, peterz@infradead.org,
	boqun.feng@gmail.com, Mark Rutland, yury.norov@gmail.com,
	linux@rasmusvillemoes.dk, parri.andrea@gmail.com,
	leobras@redhat.com, guoren@kernel.org, ericchancf@google.com,
	linux-kernel@vger.kernel.org

Hi Djordje,

On 19/03/2025 13:28, Djordje Todorovic wrote:
> HTEC Public
>
> Hi everyone,
>
> Thank you a lot for your comments.
> I am wondering how we should proceed from here.


I read the previous comments and everyone seems to agree that an 
implementation using an alternative is the way to go, so can you come 
with a new version doing that?

Thanks,

Alex


>
> Best,
> Djordje
>
> ________________________________________
> From: Djordje Todorovic <Djordje.Todorovic@htecgroup.com>
> Sent: Wednesday, February 12, 2025 11:22 AM
> To: Palmer Dabbelt <palmer@dabbelt.com>; jrtc27@jrtc27.com <jrtc27@jrtc27.com>
> Cc: Charlie Jenkins <charlie@rivosinc.com>; samuel.holland@sifive.com <samuel.holland@sifive.com>; Conor Dooley <conor@kernel.org>; arikalo@gmail.com <arikalo@gmail.com>; linux-riscv@lists.infradead.org <linux-riscv@lists.infradead.org>; Paul Walmsley <paul.walmsley@sifive.com>; aou@eecs.berkeley.edu <aou@eecs.berkeley.edu>; Will Deacon <will@kernel.org>; peterz@infradead.org <peterz@infradead.org>; boqun.feng@gmail.com <boqun.feng@gmail.com>; Mark Rutland <mark.rutland@arm.com>; yury.norov@gmail.com <yury.norov@gmail.com>; linux@rasmusvillemoes.dk <linux@rasmusvillemoes.dk>; parri.andrea@gmail.com <parri.andrea@gmail.com>; leobras@redhat.com <leobras@redhat.com>; guoren@kernel.org <guoren@kernel.org>; ericchancf@google.com <ericchancf@google.com>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>
> Subject: Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
>
> Hi,
>
> The official product page is located here:
> https://mips.com/products/hardware/p8700
>
> The mechanism we use to handle AMO is described in:
> "7.3.10 MIPS Configuration 6 Register (mipsconfig6)"
>
> Best,
> Djordje
>
> ________________________________________
> From: Palmer Dabbelt <palmer@dabbelt.com>
> Sent: Wednesday, February 5, 2025 4:49 PM
> To: jrtc27@jrtc27.com <jrtc27@jrtc27.com>
> Cc: Charlie Jenkins <charlie@rivosinc.com>; samuel.holland@sifive.com <samuel.holland@sifive.com>; Conor Dooley <conor@kernel.org>; arikalo@gmail.com <arikalo@gmail.com>; linux-riscv@lists.infradead.org <linux-riscv@lists.infradead.org>; Paul Walmsley <paul.walmsley@sifive.com>; aou@eecs.berkeley.edu <aou@eecs.berkeley.edu>; Will Deacon <will@kernel.org>; peterz@infradead.org <peterz@infradead.org>; boqun.feng@gmail.com <boqun.feng@gmail.com>; Mark Rutland <mark.rutland@arm.com>; yury.norov@gmail.com <yury.norov@gmail.com>; linux@rasmusvillemoes.dk <linux@rasmusvillemoes.dk>; parri.andrea@gmail.com <parri.andrea@gmail.com>; leobras@redhat.com <leobras@redhat.com>; guoren@kernel.org <guoren@kernel.org>; ericchancf@google.com <ericchancf@google.com>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>; Djordje Todorovic <djordje.todorovic@htecgroup.com>
> Subject: Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
>
> [You don't often get email from palmer@dabbelt.com. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ]
>
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.
>
>
> On Mon, 03 Feb 2025 13:34:48 PST (-0800), jrtc27@jrtc27.com wrote:
>> On 3 Feb 2025, at 19:50, Charlie Jenkins <charlie@rivosinc.com> wrote:
>>> On Mon, Feb 03, 2025 at 01:30:48PM -0600, Samuel Holland wrote:
>>>> Hi Charlie,
>>>>
>>>> On 2025-02-03 1:12 PM, Charlie Jenkins wrote:
>>>>> On Sun, Feb 02, 2025 at 08:08:50PM +0000, Conor Dooley wrote:
>>>>>> On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
>>>>>>> On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>>>>>
>>>>>>>>> From: Chao-ying Fu <cfu@mips.com>
>>>>>>>>>
>>>>>>>>> Use only LR/SC instructions to implement atomic functions.
>>>>>>>> In the previous patch you mention that this is to support MIPS P8700. Can
>>>>>>>> you expand on why this change is required? The datasheet at [1] says:
>>>>>>>>
>>>>>>>> "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
>>>>>>>> Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
>>>>>>>> Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
>>>>>>>> extensions, as well as the as well as the bit-manipulation extensions
>>>>>>>> (Zba) and (Zbb)"
>>>>>>>>
>>>>>>>> The "A" extension is a part of "G" which is mostly assumed to exist in
>>>>>>>> the kernel. Additionally, having this be a compilation flag will cause
>>>>>>>> traps on generic kernels. We generally try to push everything we can
>>>>>>>> into runtime feature detection since there are so many possible variants
>>>>>>>> of riscv.
>>>>>>>>
>>>>>>>> Expressing not being able to perform a feature like this is normally
>>>>>>>> better expressed as an errata. Then generic kernels will be able to
>>>>>>>> include this, and anybody who doesn't want to have the extra nops
>>>>>>>> introduced can disable the errata. A similar approach to what I pointed
>>>>>>>> out last time should work here too (but with more places to replace)
>>>>>>>> [2].
>>>>>>>>
>>>>>>>> [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
>>>>>>>> [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
>>>>>>> So far we haven't found a way to do this using errata.
>>>>>> You mean using alternatives? Not implementing A, but instead
>>>>>> implementing Zalrsc, is not an erratum. It's a design decision.
>>>>> We could do the same thing we do with misaligned access detection and
>>>>> run some instructions to determine if these instructions are being
>>>>> emulated.  If they are being emulated, patch all of the places to use
>>>>> zalrsc.
>>>> Is the implication here that the riscv,isa-extensions list passed to the kernel
>>>> will contain "a", even though the hardware does not support it, because AMOs are
>>>> emulated in M-mode?
>>>>
>>>> If that is not the case, there is no need for runtime detection. The alternative
>>>> entry can check RISCV_ISA_EXT_ZAAMO (which would be implied by RISCV_ISA_EXT_a)
>>>> in the ISA bitmap like normal.
>>> That would be much better! I was under the assumption that the usecase
>>> for this patch was that they were passing in "a" and wanting to only get
>>> zalrsc. We should be able to check
>>> RISCV_ISA_EXT_ZAAMO/RISCV_ISA_EXT_ZALRSC to get the information without
>>> runtime detection.
>> In LLVM at least -mcpu=mips-p8700 enables the full A extension...
> So then I think we need some help from the HW vendor here.
> Specifically: do these systems implement A via M-mode traps (ie, with a
> performance penalty) or is there something functional erratum in the A
> implementation.
>
> If it's just a performance thing then we need some benchmark justifying
> the extra work, which means some hardware we can point at to run the
> benchmark.  Probably also best to shim this in via alternatives, so we
> can keep multi-vendor kernels working.
>
> If it's a funtional issue then we need to know what's actually broken.
>
> Either way this should be exposed to userspace.
>
>> Jess
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
  2025-03-25  8:58                     ` Alexandre Ghiti
@ 2025-03-26 12:33                       ` Djordje Todorovic
  0 siblings, 0 replies; 14+ messages in thread
From: Djordje Todorovic @ 2025-03-26 12:33 UTC (permalink / raw)
  To: Alexandre Ghiti, Palmer Dabbelt, jrtc27@jrtc27.com
  Cc: Charlie Jenkins, samuel.holland@sifive.com, Conor Dooley,
	arikalo@gmail.com, linux-riscv@lists.infradead.org, Paul Walmsley,
	aou@eecs.berkeley.edu, Will Deacon, peterz@infradead.org,
	boqun.feng@gmail.com, Mark Rutland, yury.norov@gmail.com,
	linux@rasmusvillemoes.dk, parri.andrea@gmail.com,
	leobras@redhat.com, guoren@kernel.org, ericchancf@google.com,
	linux-kernel@vger.kernel.org

HTEC Proprietary

Hi Alex,

> I read the previous comments and everyone seems to agree that an
> implementation using an alternative is the way to go, so can you come
> with a new version doing that?

Yes, sure, we are working on that. I wanted to confirm that it is the way
to go.

Thanks,
Djordje

________________________________________
From: Alexandre Ghiti <alex@ghiti.fr>
Sent: Tuesday, March 25, 2025 9:58 AM
To: Djordje Todorovic <Djordje.Todorovic@htecgroup.com>; Palmer Dabbelt <palmer@dabbelt.com>; jrtc27@jrtc27.com <jrtc27@jrtc27.com>
Cc: Charlie Jenkins <charlie@rivosinc.com>; samuel.holland@sifive.com <samuel.holland@sifive.com>; Conor Dooley <conor@kernel.org>; arikalo@gmail.com <arikalo@gmail.com>; linux-riscv@lists.infradead.org <linux-riscv@lists.infradead.org>; Paul Walmsley <paul.walmsley@sifive.com>; aou@eecs.berkeley.edu <aou@eecs.berkeley.edu>; Will Deacon <will@kernel.org>; peterz@infradead.org <peterz@infradead.org>; boqun.feng@gmail.com <boqun.feng@gmail.com>; Mark Rutland <mark.rutland@arm.com>; yury.norov@gmail.com <yury.norov@gmail.com>; linux@rasmusvillemoes.dk <linux@rasmusvillemoes.dk>; parri.andrea@gmail.com <parri.andrea@gmail.com>; leobras@redhat.com <leobras@redhat.com>; guoren@kernel.org <guoren@kernel.org>; ericchancf@google.com <ericchancf@google.com>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions

[You don't often get email from alex@ghiti.fr. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ]

CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.


Hi Djordje,

On 19/03/2025 13:28, Djordje Todorovic wrote:
> HTEC Public
>
> Hi everyone,
>
> Thank you a lot for your comments.
> I am wondering how we should proceed from here.


I read the previous comments and everyone seems to agree that an
implementation using an alternative is the way to go, so can you come
with a new version doing that?

Thanks,

Alex


>
> Best,
> Djordje
>
> ________________________________________
> From: Djordje Todorovic <Djordje.Todorovic@htecgroup.com>
> Sent: Wednesday, February 12, 2025 11:22 AM
> To: Palmer Dabbelt <palmer@dabbelt.com>; jrtc27@jrtc27.com <jrtc27@jrtc27.com>
> Cc: Charlie Jenkins <charlie@rivosinc.com>; samuel.holland@sifive.com <samuel.holland@sifive.com>; Conor Dooley <conor@kernel.org>; arikalo@gmail.com <arikalo@gmail.com>; linux-riscv@lists.infradead.org <linux-riscv@lists.infradead.org>; Paul Walmsley <paul.walmsley@sifive.com>; aou@eecs.berkeley.edu <aou@eecs.berkeley.edu>; Will Deacon <will@kernel.org>; peterz@infradead.org <peterz@infradead.org>; boqun.feng@gmail.com <boqun.feng@gmail.com>; Mark Rutland <mark.rutland@arm.com>; yury.norov@gmail.com <yury.norov@gmail.com>; linux@rasmusvillemoes.dk <linux@rasmusvillemoes.dk>; parri.andrea@gmail.com <parri.andrea@gmail.com>; leobras@redhat.com <leobras@redhat.com>; guoren@kernel.org <guoren@kernel.org>; ericchancf@google.com <ericchancf@google.com>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>
> Subject: Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
>
> Hi,
>
> The official product page is located here:
> https://mips.com/products/hardware/p8700
>
> The mechanism we use to handle AMO is described in:
> "7.3.10 MIPS Configuration 6 Register (mipsconfig6)"
>
> Best,
> Djordje
>
> ________________________________________
> From: Palmer Dabbelt <palmer@dabbelt.com>
> Sent: Wednesday, February 5, 2025 4:49 PM
> To: jrtc27@jrtc27.com <jrtc27@jrtc27.com>
> Cc: Charlie Jenkins <charlie@rivosinc.com>; samuel.holland@sifive.com <samuel.holland@sifive.com>; Conor Dooley <conor@kernel.org>; arikalo@gmail.com <arikalo@gmail.com>; linux-riscv@lists.infradead.org <linux-riscv@lists.infradead.org>; Paul Walmsley <paul.walmsley@sifive.com>; aou@eecs.berkeley.edu <aou@eecs.berkeley.edu>; Will Deacon <will@kernel.org>; peterz@infradead.org <peterz@infradead.org>; boqun.feng@gmail.com <boqun.feng@gmail.com>; Mark Rutland <mark.rutland@arm.com>; yury.norov@gmail.com <yury.norov@gmail.com>; linux@rasmusvillemoes.dk <linux@rasmusvillemoes.dk>; parri.andrea@gmail.com <parri.andrea@gmail.com>; leobras@redhat.com <leobras@redhat.com>; guoren@kernel.org <guoren@kernel.org>; ericchancf@google.com <ericchancf@google.com>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>; Djordje Todorovic <djordje.todorovic@htecgroup.com>
> Subject: Re: [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions
>
> [You don't often get email from palmer@dabbelt.com. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ]
>
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.
>
>
> On Mon, 03 Feb 2025 13:34:48 PST (-0800), jrtc27@jrtc27.com wrote:
>> On 3 Feb 2025, at 19:50, Charlie Jenkins <charlie@rivosinc.com> wrote:
>>> On Mon, Feb 03, 2025 at 01:30:48PM -0600, Samuel Holland wrote:
>>>> Hi Charlie,
>>>>
>>>> On 2025-02-03 1:12 PM, Charlie Jenkins wrote:
>>>>> On Sun, Feb 02, 2025 at 08:08:50PM +0000, Conor Dooley wrote:
>>>>>> On Sat, Feb 01, 2025 at 01:04:25PM +0100, Aleksandar Rikalo wrote:
>>>>>>> On Fri, Jan 10, 2025 at 4:23 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>>>>>
>>>>>>>>> From: Chao-ying Fu <cfu@mips.com>
>>>>>>>>>
>>>>>>>>> Use only LR/SC instructions to implement atomic functions.
>>>>>>>> In the previous patch you mention that this is to support MIPS P8700. Can
>>>>>>>> you expand on why this change is required? The datasheet at [1] says:
>>>>>>>>
>>>>>>>> "The P8700 core is configured to support the RV64GCZba_Zbb (G = IMAFD)
>>>>>>>> Standard ISA. It includes the RV64I base ISA, Multiply (M), Atomic (A),
>>>>>>>> Single-Precision Floating Point (F), Double (D), Compressed (C) RISC-V
>>>>>>>> extensions, as well as the as well as the bit-manipulation extensions
>>>>>>>> (Zba) and (Zbb)"
>>>>>>>>
>>>>>>>> The "A" extension is a part of "G" which is mostly assumed to exist in
>>>>>>>> the kernel. Additionally, having this be a compilation flag will cause
>>>>>>>> traps on generic kernels. We generally try to push everything we can
>>>>>>>> into runtime feature detection since there are so many possible variants
>>>>>>>> of riscv.
>>>>>>>>
>>>>>>>> Expressing not being able to perform a feature like this is normally
>>>>>>>> better expressed as an errata. Then generic kernels will be able to
>>>>>>>> include this, and anybody who doesn't want to have the extra nops
>>>>>>>> introduced can disable the errata. A similar approach to what I pointed
>>>>>>>> out last time should work here too (but with more places to replace)
>>>>>>>> [2].
>>>>>>>>
>>>>>>>> [1] https://mips.com/wp-content/uploads/2024/11/P8700_Data_Sheet.pdf
>>>>>>>> [2] https://lore.kernel.org/lkml/Z2-UNfwcAQYZqVBU@ghost/T/
>>>>>>> So far we haven't found a way to do this using errata.
>>>>>> You mean using alternatives? Not implementing A, but instead
>>>>>> implementing Zalrsc, is not an erratum. It's a design decision.
>>>>> We could do the same thing we do with misaligned access detection and
>>>>> run some instructions to determine if these instructions are being
>>>>> emulated.  If they are being emulated, patch all of the places to use
>>>>> zalrsc.
>>>> Is the implication here that the riscv,isa-extensions list passed to the kernel
>>>> will contain "a", even though the hardware does not support it, because AMOs are
>>>> emulated in M-mode?
>>>>
>>>> If that is not the case, there is no need for runtime detection. The alternative
>>>> entry can check RISCV_ISA_EXT_ZAAMO (which would be implied by RISCV_ISA_EXT_a)
>>>> in the ISA bitmap like normal.
>>> That would be much better! I was under the assumption that the usecase
>>> for this patch was that they were passing in "a" and wanting to only get
>>> zalrsc. We should be able to check
>>> RISCV_ISA_EXT_ZAAMO/RISCV_ISA_EXT_ZALRSC to get the information without
>>> runtime detection.
>> In LLVM at least -mcpu=mips-p8700 enables the full A extension...
> So then I think we need some help from the HW vendor here.
> Specifically: do these systems implement A via M-mode traps (ie, with a
> performance penalty) or is there something functional erratum in the A
> implementation.
>
> If it's just a performance thing then we need some benchmark justifying
> the extra work, which means some hardware we can point at to run the
> benchmark.  Probably also best to shim this in via alternatives, so we
> can keep multi-vendor kernels working.
>
> If it's a funtional issue then we need to know what's actually broken.
>
> Either way this should be exposed to userspace.
>
>> Jess
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2025-03-26 12:34 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-25  8:24 [PATCH v2] riscv: Use Zalrsc extension to implement atomic functions Aleksandar Rikalo
2025-01-10  3:23 ` Charlie Jenkins
2025-02-01 12:04   ` Aleksandar Rikalo
2025-02-02 20:08     ` Conor Dooley
2025-02-03  6:59       ` Jessica Clarke
2025-02-03 19:12       ` Charlie Jenkins
2025-02-03 19:30         ` Samuel Holland
2025-02-03 19:50           ` Charlie Jenkins
2025-02-03 21:34             ` Jessica Clarke
2025-02-05 15:49               ` Palmer Dabbelt
2025-02-12 10:22                 ` Djordje Todorovic
2025-03-19 12:28                   ` Djordje Todorovic
2025-03-25  8:58                     ` Alexandre Ghiti
2025-03-26 12:33                       ` Djordje Todorovic

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox