All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Add RISCV_ISA_ZALRSC_ONLY to rewrite amo instructions via lr and sc
@ 2025-01-10  0:24 Chao-ying Fu
  2025-01-10  1:14 ` Bo Gan
  2025-02-12 12:26 ` Anup Patel
  0 siblings, 2 replies; 8+ messages in thread
From: Chao-ying Fu @ 2025-01-10  0:24 UTC (permalink / raw)
  To: opensbi

Some platforms implement lr and sc in hardware, and emulate amo
instructions via the exception handler. To get better performance,
we use lr and sc only, when the config is defined.
---
 firmware/fw_base.S       |  8 ++++++
 lib/sbi/riscv_atomic.c   | 54 ++++++++++++++++++++++++++++++++++++++++
 lib/sbi/riscv_locks.c    |  7 ++++++
 platform/generic/Kconfig |  4 +++
 4 files changed, 73 insertions(+)

diff --git a/firmware/fw_base.S b/firmware/fw_base.S
index d027e5e..835d64a 100644
--- a/firmware/fw_base.S
+++ b/firmware/fw_base.S
@@ -59,8 +59,16 @@ _try_lottery:
 	/* Jump to relocation wait loop if we don't get relocation lottery */
 	lla	a6, _boot_lottery
 	li	a7, BOOT_LOTTERY_ACQUIRED
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 	amoswap.w a6, a7, (a6)
 	bnez	a6, _wait_for_boot_hart
+#else
+_sc_fail:
+	lr.w	t0, (a6)
+	sc.w	t1, a7, (a6)
+	bnez	t1, _sc_fail
+	bnez	t0, _wait_for_boot_hart
+#endif
 
 	/* relocate the global table content */
 	li	t0, FW_TEXT_START	/* link start */
diff --git a/lib/sbi/riscv_atomic.c b/lib/sbi/riscv_atomic.c
index 32cf3f0..c5f47eb 100644
--- a/lib/sbi/riscv_atomic.c
+++ b/lib/sbi/riscv_atomic.c
@@ -31,6 +31,7 @@ void atomic_write(atomic_t *atom, long value)
 
 long atomic_add_return(atomic_t *atom, long value)
 {
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 	long ret;
 #if __SIZEOF_LONG__ == 4
 	__asm__ __volatile__("	amoadd.w.aqrl  %1, %2, %0"
@@ -43,6 +44,27 @@ long atomic_add_return(atomic_t *atom, long value)
 			     : "r"(value)
 			     : "memory");
 #endif
+#else
+	long ret, temp;
+#if __SIZEOF_LONG__ == 4
+	__asm__ __volatile__("1:lr.w.aqrl	%1,%0\n"
+			     "  add	%2,%1,%3\n"
+			     "  sc.w.aqrl	%2,%2,%0\n"
+			     "  bnez	%2,1b"
+			     : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
+			     : "r"(value)
+			     : "memory");
+#elif __SIZEOF_LONG__ == 8
+	__asm__ __volatile__("1:lr.d.aqrl	%1,%0\n"
+			     "  add	%2,%1,%3\n"
+			     "  sc.d.aqrl	%2,%2,%0\n"
+			     "  bnez	%2,1b"
+			     : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
+			     : "r"(value)
+			     : "memory");
+#endif
+#endif
+
 	return ret + value;
 }
 
@@ -51,6 +73,7 @@ long atomic_sub_return(atomic_t *atom, long value)
 	return atomic_add_return(atom, -value);
 }
 
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 #define __axchg(ptr, new, size)							\
 	({									\
 		__typeof__(ptr) __ptr = (ptr);					\
@@ -76,6 +99,37 @@ long atomic_sub_return(atomic_t *atom, long value)
 		}								\
 		__ret;								\
 	})
+#else
+#define __axchg(ptr, new, size)							\
+	({									\
+		__typeof__(ptr) __ptr = (ptr);					\
+		__typeof__(new) __new = (new);					\
+		__typeof__(*(ptr)) __ret, __temp;					\
+		switch (size) {							\
+		case 4:								\
+			__asm__ __volatile__ (					\
+				"1:	lr.w.aqrl %0, %1\n"			\
+				"	sc.w.aqrl %2, %3, %1\n"			\
+				"	bnez	  %2, 1b\n"			\
+				: "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)	\
+				: "r" (__new)					\
+				: "memory");					\
+			break;							\
+		case 8:								\
+			__asm__ __volatile__ (					\
+				"1:	lr.d.aqrl %0, %1\n"			\
+				"	sc.d.aqrl %2, %3, %1\n"			\
+				"	bnez	  %2, 1b\n"			\
+				: "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)	\
+				: "r" (__new)					\
+				: "memory");					\
+			break;							\
+		default:							\
+			break;							\
+		}								\
+		__ret;								\
+	})
+#endif
 
 #define axchg(ptr, x)								\
 	({									\
diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
index acab776..11f87bf 100644
--- a/lib/sbi/riscv_locks.c
+++ b/lib/sbi/riscv_locks.c
@@ -53,7 +53,14 @@ void spin_lock(spinlock_t *lock)
 
 	__asm__ __volatile__(
 		/* Atomically increment the next ticket. */
+#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
 		"	amoadd.w.aqrl	%0, %4, %3\n"
+#else
+		"3:	lr.w.aqrl	%0, %3\n"
+		"	add	%1, %0, %4\n"
+		"	sc.w.aqrl	%1, %1, %3\n"
+		"	bnez	%1, 3b\n"
+#endif
 
 		/* Did we get the lock? */
 		"	srli	%1, %0, %6\n"
diff --git a/platform/generic/Kconfig b/platform/generic/Kconfig
index 688da3f..a5ecbd5 100644
--- a/platform/generic/Kconfig
+++ b/platform/generic/Kconfig
@@ -7,6 +7,10 @@ config PLATFORM_GENERIC
 	select FDT_PMU
 	default y
 
+config RISCV_ISA_ZALRSC_ONLY
+	bool "Use lr/sc only"
+	default n
+
 if PLATFORM_GENERIC
 
 config PLATFORM_GENERIC_NAME
-- 
2.47.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH] Add RISCV_ISA_ZALRSC_ONLY to rewrite amo instructions via lr and sc
  2025-01-10  0:24 [PATCH] Add RISCV_ISA_ZALRSC_ONLY to rewrite amo instructions via lr and sc Chao-ying Fu
@ 2025-01-10  1:14 ` Bo Gan
  2025-01-10  2:33   ` Chao-ying Fu
  2025-02-12 12:26 ` Anup Patel
  1 sibling, 1 reply; 8+ messages in thread
From: Bo Gan @ 2025-01-10  1:14 UTC (permalink / raw)
  To: opensbi

Hi Chaoying,

On 1/9/25 16:24, Chao-ying Fu wrote:
> Some platforms implement lr and sc in hardware, and emulate amo
> instructions via the exception handler. To get better performance,
> we use lr and sc only, when the config is defined.

This comment seems to be self-contradictory. If the core does not support
amo, then there's no way currently in opensbi to trap/emulate it through
exception handler. Thus, from your description, it's not only performance,
but correctness instead. Also do you have a concrete example of such HW?
I wonder if it's worthwhile to maintain for such special HW. I know the
S7 hart in hifive/unmatched and starfive/jh7110 works the other way around:
it only supports AMO, but not lr/sc.

> ---
>   firmware/fw_base.S       |  8 ++++++
>   lib/sbi/riscv_atomic.c   | 54 ++++++++++++++++++++++++++++++++++++++++
>   lib/sbi/riscv_locks.c    |  7 ++++++
>   platform/generic/Kconfig |  4 +++
>   4 files changed, 73 insertions(+)
> 
> diff --git a/firmware/fw_base.S b/firmware/fw_base.S
> index d027e5e..835d64a 100644
> --- a/firmware/fw_base.S
> +++ b/firmware/fw_base.S
> @@ -59,8 +59,16 @@ _try_lottery:
>   	/* Jump to relocation wait loop if we don't get relocation lottery */
>   	lla	a6, _boot_lottery
>   	li	a7, BOOT_LOTTERY_ACQUIRED
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>   	amoswap.w a6, a7, (a6)
>   	bnez	a6, _wait_for_boot_hart
> +#else
> +_sc_fail:
> +	lr.w	t0, (a6)
> +	sc.w	t1, a7, (a6)
> +	bnez	t1, _sc_fail
> +	bnez	t0, _wait_for_boot_hart
> +#endif
>   
>   	/* relocate the global table content */
>   	li	t0, FW_TEXT_START	/* link start */
> diff --git a/lib/sbi/riscv_atomic.c b/lib/sbi/riscv_atomic.c
> index 32cf3f0..c5f47eb 100644
> --- a/lib/sbi/riscv_atomic.c
> +++ b/lib/sbi/riscv_atomic.c
> @@ -31,6 +31,7 @@ void atomic_write(atomic_t *atom, long value)
>   
>   long atomic_add_return(atomic_t *atom, long value)
>   {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>   	long ret;
>   #if __SIZEOF_LONG__ == 4
>   	__asm__ __volatile__("	amoadd.w.aqrl  %1, %2, %0"
> @@ -43,6 +44,27 @@ long atomic_add_return(atomic_t *atom, long value)
>   			     : "r"(value)
>   			     : "memory");
>   #endif
> +#else
> +	long ret, temp;
> +#if __SIZEOF_LONG__ == 4
> +	__asm__ __volatile__("1:lr.w.aqrl	%1,%0\n"
> +			     "  add	%2,%1,%3\n"
> +			     "  sc.w.aqrl	%2,%2,%0\n"
> +			     "  bnez	%2,1b"
> +			     : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
> +			     : "r"(value)
> +			     : "memory");
> +#elif __SIZEOF_LONG__ == 8
> +	__asm__ __volatile__("1:lr.d.aqrl	%1,%0\n"
> +			     "  add	%2,%1,%3\n"
> +			     "  sc.d.aqrl	%2,%2,%0\n"
> +			     "  bnez	%2,1b"
> +			     : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
> +			     : "r"(value)
> +			     : "memory");
> +#endif
> +#endif
> +
>   	return ret + value;
>   }
>   
> @@ -51,6 +73,7 @@ long atomic_sub_return(atomic_t *atom, long value)
>   	return atomic_add_return(atom, -value);
>   }
>   
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>   #define __axchg(ptr, new, size)							\
>   	({									\
>   		__typeof__(ptr) __ptr = (ptr);					\
> @@ -76,6 +99,37 @@ long atomic_sub_return(atomic_t *atom, long value)
>   		}								\
>   		__ret;								\
>   	})
> +#else
> +#define __axchg(ptr, new, size)							\
> +	({									\
> +		__typeof__(ptr) __ptr = (ptr);					\
> +		__typeof__(new) __new = (new);					\
> +		__typeof__(*(ptr)) __ret, __temp;					\
> +		switch (size) {							\
> +		case 4:								\
> +			__asm__ __volatile__ (					\
> +				"1:	lr.w.aqrl %0, %1\n"			\
> +				"	sc.w.aqrl %2, %3, %1\n"			\
> +				"	bnez	  %2, 1b\n"			\
> +				: "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)	\
> +				: "r" (__new)					\
> +				: "memory");					\
> +			break;							\
> +		case 8:								\
> +			__asm__ __volatile__ (					\
> +				"1:	lr.d.aqrl %0, %1\n"			\
> +				"	sc.d.aqrl %2, %3, %1\n"			\
> +				"	bnez	  %2, 1b\n"			\
> +				: "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)	\
> +				: "r" (__new)					\
> +				: "memory");					\
> +			break;							\
> +		default:							\
> +			break;							\
> +		}								\
> +		__ret;								\
> +	})
> +#endif
>   
>   #define axchg(ptr, x)								\
>   	({									\
> diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
> index acab776..11f87bf 100644
> --- a/lib/sbi/riscv_locks.c
> +++ b/lib/sbi/riscv_locks.c
> @@ -53,7 +53,14 @@ void spin_lock(spinlock_t *lock)
>   
>   	__asm__ __volatile__(
>   		/* Atomically increment the next ticket. */
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>   		"	amoadd.w.aqrl	%0, %4, %3\n"
> +#else
> +		"3:	lr.w.aqrl	%0, %3\n"
> +		"	add	%1, %0, %4\n"
> +		"	sc.w.aqrl	%1, %1, %3\n"
> +		"	bnez	%1, 3b\n"
> +#endif
>   
>   		/* Did we get the lock? */
>   		"	srli	%1, %0, %6\n"
> diff --git a/platform/generic/Kconfig b/platform/generic/Kconfig
> index 688da3f..a5ecbd5 100644
> --- a/platform/generic/Kconfig
> +++ b/platform/generic/Kconfig
> @@ -7,6 +7,10 @@ config PLATFORM_GENERIC
>   	select FDT_PMU
>   	default y
>   
> +config RISCV_ISA_ZALRSC_ONLY
> +	bool "Use lr/sc only"
> +	default n
> +
>   if PLATFORM_GENERIC
>   
>   config PLATFORM_GENERIC_NAME

Bo


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] Add RISCV_ISA_ZALRSC_ONLY to rewrite amo instructions via lr and sc
  2025-01-10  1:14 ` Bo Gan
@ 2025-01-10  2:33   ` Chao-ying Fu
  0 siblings, 0 replies; 8+ messages in thread
From: Chao-ying Fu @ 2025-01-10  2:33 UTC (permalink / raw)
  To: opensbi

On Thu, Jan 9, 2025 at 5:12?PM Bo Gan <ganboing@gmail.com> wrote:
>
> Hi Chaoying,
>
> On 1/9/25 16:24, Chao-ying Fu wrote:
> > Some platforms implement lr and sc in hardware, and emulate amo
> > instructions via the exception handler. To get better performance,
> > we use lr and sc only, when the config is defined.
>
> This comment seems to be self-contradictory. If the core does not support
> amo, then there's no way currently in opensbi to trap/emulate it through
> exception handler. Thus, from your description, it's not only performance,
> but correctness instead. Also do you have a concrete example of such HW?
> I wonder if it's worthwhile to maintain for such special HW. I know the
> S7 hart in hifive/unmatched and starfive/jh7110 works the other way around:
> it only supports AMO, but not lr/sc.

Our core implements extra custom illegal instruction exceptions for
amo instructions and has custom exception vector address. We have
assembly source code to emulate amo via lr and sc in our platform
code. We will post our platform patch to OpenSBI later.
Note that without this patch, the original code still can run.
However, if we just replace amo with lr and sc directly in OpenSBI, we
don't need to run through our custom exception handler.
Thanks a lot!

Regards,
Chao-ying


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] Add RISCV_ISA_ZALRSC_ONLY to rewrite amo instructions via lr and sc
  2025-01-10  0:24 [PATCH] Add RISCV_ISA_ZALRSC_ONLY to rewrite amo instructions via lr and sc Chao-ying Fu
  2025-01-10  1:14 ` Bo Gan
@ 2025-02-12 12:26 ` Anup Patel
  2025-02-25 23:00   ` [PATCH v2] Emit lr and sc instructions based on -march flags Chao-ying Fu
  2025-02-26  1:47   ` [PATCH v3] " Chao-ying Fu
  1 sibling, 2 replies; 8+ messages in thread
From: Anup Patel @ 2025-02-12 12:26 UTC (permalink / raw)
  To: opensbi

On Fri, Jan 10, 2025 at 6:28?AM Chao-ying Fu <icebergfu@gmail.com> wrote:
>
> Some platforms implement lr and sc in hardware, and emulate amo
> instructions via the exception handler. To get better performance,
> we use lr and sc only, when the config is defined.

Currently, A-extension (Zaamo + Zalrsc) is mandatory for OpenSBI.
(Refer, docs/platform_requirements.md) so supporting



> ---
>  firmware/fw_base.S       |  8 ++++++
>  lib/sbi/riscv_atomic.c   | 54 ++++++++++++++++++++++++++++++++++++++++
>  lib/sbi/riscv_locks.c    |  7 ++++++
>  platform/generic/Kconfig |  4 +++
>  4 files changed, 73 insertions(+)
>
> diff --git a/firmware/fw_base.S b/firmware/fw_base.S
> index d027e5e..835d64a 100644
> --- a/firmware/fw_base.S
> +++ b/firmware/fw_base.S
> @@ -59,8 +59,16 @@ _try_lottery:
>         /* Jump to relocation wait loop if we don't get relocation lottery */
>         lla     a6, _boot_lottery
>         li      a7, BOOT_LOTTERY_ACQUIRED
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY
>         amoswap.w a6, a7, (a6)
>         bnez    a6, _wait_for_boot_hart
> +#else
> +_sc_fail:
> +       lr.w    t0, (a6)
> +       sc.w    t1, a7, (a6)
> +       bnez    t1, _sc_fail
> +       bnez    t0, _wait_for_boot_hart
> +#endif
>
>         /* relocate the global table content */
>         li      t0, FW_TEXT_START       /* link start */
> diff --git a/lib/sbi/riscv_atomic.c b/lib/sbi/riscv_atomic.c
> index 32cf3f0..c5f47eb 100644
> --- a/lib/sbi/riscv_atomic.c
> +++ b/lib/sbi/riscv_atomic.c
> @@ -31,6 +31,7 @@ void atomic_write(atomic_t *atom, long value)
>
>  long atomic_add_return(atomic_t *atom, long value)
>  {
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY

This should be "#ifdef __riscv_atomic"

>         long ret;
>  #if __SIZEOF_LONG__ == 4
>         __asm__ __volatile__("  amoadd.w.aqrl  %1, %2, %0"
> @@ -43,6 +44,27 @@ long atomic_add_return(atomic_t *atom, long value)
>                              : "r"(value)
>                              : "memory");
>  #endif
> +#else

This should be "#elif __riscv_zalrsc"

> +       long ret, temp;
> +#if __SIZEOF_LONG__ == 4
> +       __asm__ __volatile__("1:lr.w.aqrl       %1,%0\n"
> +                            "  add     %2,%1,%3\n"
> +                            "  sc.w.aqrl       %2,%2,%0\n"
> +                            "  bnez    %2,1b"
> +                            : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
> +                            : "r"(value)
> +                            : "memory");
> +#elif __SIZEOF_LONG__ == 8
> +       __asm__ __volatile__("1:lr.d.aqrl       %1,%0\n"
> +                            "  add     %2,%1,%3\n"
> +                            "  sc.d.aqrl       %2,%2,%0\n"
> +                            "  bnez    %2,1b"
> +                            : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
> +                            : "r"(value)
> +                            : "memory");
> +#endif

There should be "#else error" here.

> +#endif
> +
>         return ret + value;
>  }
>
> @@ -51,6 +73,7 @@ long atomic_sub_return(atomic_t *atom, long value)
>         return atomic_add_return(atom, -value);
>  }
>
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY

Same as above.

>  #define __axchg(ptr, new, size)                                                        \
>         ({                                                                      \
>                 __typeof__(ptr) __ptr = (ptr);                                  \
> @@ -76,6 +99,37 @@ long atomic_sub_return(atomic_t *atom, long value)
>                 }                                                               \
>                 __ret;                                                          \
>         })
> +#else

Same as above.

> +#define __axchg(ptr, new, size)                                                        \
> +       ({                                                                      \
> +               __typeof__(ptr) __ptr = (ptr);                                  \
> +               __typeof__(new) __new = (new);                                  \
> +               __typeof__(*(ptr)) __ret, __temp;                                       \
> +               switch (size) {                                                 \
> +               case 4:                                                         \
> +                       __asm__ __volatile__ (                                  \
> +                               "1:     lr.w.aqrl %0, %1\n"                     \
> +                               "       sc.w.aqrl %2, %3, %1\n"                 \
> +                               "       bnez      %2, 1b\n"                     \
> +                               : "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)  \
> +                               : "r" (__new)                                   \
> +                               : "memory");                                    \
> +                       break;                                                  \
> +               case 8:                                                         \
> +                       __asm__ __volatile__ (                                  \
> +                               "1:     lr.d.aqrl %0, %1\n"                     \
> +                               "       sc.d.aqrl %2, %3, %1\n"                 \
> +                               "       bnez      %2, 1b\n"                     \
> +                               : "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)  \
> +                               : "r" (__new)                                   \
> +                               : "memory");                                    \
> +                       break;                                                  \
> +               default:                                                        \
> +                       break;                                                  \
> +               }                                                               \
> +               __ret;                                                          \
> +       })
> +#endif
>
>  #define axchg(ptr, x)                                                          \
>         ({                                                                      \
> diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
> index acab776..11f87bf 100644
> --- a/lib/sbi/riscv_locks.c
> +++ b/lib/sbi/riscv_locks.c
> @@ -53,7 +53,14 @@ void spin_lock(spinlock_t *lock)
>
>         __asm__ __volatile__(
>                 /* Atomically increment the next ticket. */
> +#ifndef CONFIG_RISCV_ISA_ZALRSC_ONLY

Same as above.

>                 "       amoadd.w.aqrl   %0, %4, %3\n"
> +#else

Same as above.

> +               "3:     lr.w.aqrl       %0, %3\n"
> +               "       add     %1, %0, %4\n"
> +               "       sc.w.aqrl       %1, %1, %3\n"
> +               "       bnez    %1, 3b\n"
> +#endif
>
>                 /* Did we get the lock? */
>                 "       srli    %1, %0, %6\n"
> diff --git a/platform/generic/Kconfig b/platform/generic/Kconfig
> index 688da3f..a5ecbd5 100644
> --- a/platform/generic/Kconfig
> +++ b/platform/generic/Kconfig
> @@ -7,6 +7,10 @@ config PLATFORM_GENERIC
>         select FDT_PMU
>         default y
>
> +config RISCV_ISA_ZALRSC_ONLY
> +       bool "Use lr/sc only"
> +       default n
> +

Drop this config option.

Platform without proper A-extension should compile
OpenSBI with PLATFORM_RISCV_ISA=rv64im_zalrsc_zicsr.

>  if PLATFORM_GENERIC
>
>  config PLATFORM_GENERIC_NAME
> --
> 2.47.1
>
>
> --
> opensbi mailing list
> opensbi at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi

Regards,
Anup


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] Emit lr and sc instructions based on -march flags
  2025-02-12 12:26 ` Anup Patel
@ 2025-02-25 23:00   ` Chao-ying Fu
  2025-02-26 13:22     ` Xiang W
  2025-02-26  1:47   ` [PATCH v3] " Chao-ying Fu
  1 sibling, 1 reply; 8+ messages in thread
From: Chao-ying Fu @ 2025-02-25 23:00 UTC (permalink / raw)
  To: opensbi

When -march=rv64im_zalrsc_zicsr is used, we provide implementation
that uses lr and sc instructions only.
When -march=rv64ima_zicsr is used, we have the original implementation.

Signed-off-by: Chao-ying Fu <cfu@mips.com>
---
Changes in v2:
  Remove the config flag that controls lr/sc.
  Use addw to implement amoadd.w.
---
 firmware/fw_base.S            | 10 ++++++
 firmware/payloads/test_head.S | 11 +++++++
 lib/sbi/riscv_atomic.c        | 60 ++++++++++++++++++++++++++++++++++-
 lib/sbi/riscv_locks.c         |  9 ++++++
 4 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/firmware/fw_base.S b/firmware/fw_base.S
index 536bcd2..2498797 100644
--- a/firmware/fw_base.S
+++ b/firmware/fw_base.S
@@ -59,8 +59,18 @@ _try_lottery:
 	/* Jump to relocation wait loop if we don't get relocation lottery */
 	lla	a6, _boot_lottery
 	li	a7, BOOT_LOTTERY_ACQUIRED
+#ifdef __riscv_atomic
 	amoswap.w a6, a7, (a6)
 	bnez	a6, _wait_for_boot_hart
+#elif __riscv_zalrsc
+_sc_fail:
+	lr.w	t0, (a6)
+	sc.w	t1, a7, (a6)
+	bnez	t1, _sc_fail
+	bnez	t0, _wait_for_boot_hart
+#else
+#error "need a or zalrsc"
+#endif
 
 	/* relocate the global table content */
 	li	t0, FW_TEXT_START	/* link start */
diff --git a/firmware/payloads/test_head.S b/firmware/payloads/test_head.S
index 7a2ac12..7d25e07 100644
--- a/firmware/payloads/test_head.S
+++ b/firmware/payloads/test_head.S
@@ -30,7 +30,18 @@ _start:
 	/* Pick one hart to run the main boot sequence */
 	lla	a3, _hart_lottery
 	li	a2, 1
+#ifdef __riscv_atomic
 	amoadd.w a3, a2, (a3)
+#elif __riscv_zalrsc
+_sc_fail:
+	lr.w	t0, (a3)
+	addw	t1, t0, a2
+	sc.w	t1, t1, (a3)
+	bnez	t1, _sc_fail
+	move	a3, t0
+#else
+#error "need a or zalrsc"
+#endif
 	bnez	a3, _start_hang
 
 	/* Save a0 and a1 */
diff --git a/lib/sbi/riscv_atomic.c b/lib/sbi/riscv_atomic.c
index 32cf3f0..df16a2e 100644
--- a/lib/sbi/riscv_atomic.c
+++ b/lib/sbi/riscv_atomic.c
@@ -12,7 +12,7 @@
 #include <sbi/riscv_atomic.h>
 #include <sbi/riscv_barrier.h>
 
-#ifndef __riscv_atomic
+#if !defined(__riscv_atomic) && !defined(__riscv_zalrsc)
 #error "opensbi strongly relies on the A extension of RISC-V"
 #endif
 
@@ -31,6 +31,7 @@ void atomic_write(atomic_t *atom, long value)
 
 long atomic_add_return(atomic_t *atom, long value)
 {
+#ifdef __riscv_atomic
 	long ret;
 #if __SIZEOF_LONG__ == 4
 	__asm__ __volatile__("	amoadd.w.aqrl  %1, %2, %0"
@@ -43,6 +44,29 @@ long atomic_add_return(atomic_t *atom, long value)
 			     : "r"(value)
 			     : "memory");
 #endif
+#elif __riscv_zalrsc
+	long ret, temp;
+#if __SIZEOF_LONG__ == 4
+	__asm__ __volatile__("1:lr.w.aqrl	%1,%0\n"
+			     "  addw	%2,%1,%3\n"
+			     "  sc.w.aqrl	%2,%2,%0\n"
+			     "  bnez	%2,1b"
+			     : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
+			     : "r"(value)
+			     : "memory");
+#elif __SIZEOF_LONG__ == 8
+	__asm__ __volatile__("1:lr.d.aqrl	%1,%0\n"
+			     "  add	%2,%1,%3\n"
+			     "  sc.d.aqrl	%2,%2,%0\n"
+			     "  bnez	%2,1b"
+			     : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
+			     : "r"(value)
+			     : "memory");
+#endif
+#else
+#error "need a or zalrsc"
+#endif
+
 	return ret + value;
 }
 
@@ -51,6 +75,7 @@ long atomic_sub_return(atomic_t *atom, long value)
 	return atomic_add_return(atom, -value);
 }
 
+#ifdef __riscv_atomic
 #define __axchg(ptr, new, size)							\
 	({									\
 		__typeof__(ptr) __ptr = (ptr);					\
@@ -76,6 +101,39 @@ long atomic_sub_return(atomic_t *atom, long value)
 		}								\
 		__ret;								\
 	})
+#elif __riscv_zalrsc
+#define __axchg(ptr, new, size)							\
+	({									\
+		__typeof__(ptr) __ptr = (ptr);					\
+		__typeof__(new) __new = (new);					\
+		__typeof__(*(ptr)) __ret, __temp;					\
+		switch (size) {							\
+		case 4:								\
+			__asm__ __volatile__ (					\
+				"1:	lr.w.aqrl %0, %1\n"			\
+				"	sc.w.aqrl %2, %3, %1\n"			\
+				"	bnez	  %2, 1b\n"			\
+				: "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)	\
+				: "r" (__new)					\
+				: "memory");					\
+			break;							\
+		case 8:								\
+			__asm__ __volatile__ (					\
+				"1:	lr.d.aqrl %0, %1\n"			\
+				"	sc.d.aqrl %2, %3, %1\n"			\
+				"	bnez	  %2, 1b\n"			\
+				: "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)	\
+				: "r" (__new)					\
+				: "memory");					\
+			break;							\
+		default:							\
+			break;							\
+		}								\
+		__ret;								\
+	})
+#else
+#error "need a or zalrsc"
+#endif
 
 #define axchg(ptr, x)								\
 	({									\
diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
index acab776..41e8fab 100644
--- a/lib/sbi/riscv_locks.c
+++ b/lib/sbi/riscv_locks.c
@@ -53,7 +53,16 @@ void spin_lock(spinlock_t *lock)
 
 	__asm__ __volatile__(
 		/* Atomically increment the next ticket. */
+#ifdef __riscv_atomic
 		"	amoadd.w.aqrl	%0, %4, %3\n"
+#elif __riscv_zalrsc
+		"3:	lr.w.aqrl	%0, %3\n"
+		"	addw	%1, %0, %4\n"
+		"	sc.w.aqrl	%1, %1, %3\n"
+		"	bnez	%1, 3b\n"
+#else
+#error "need a or zalrsc"
+#endif
 
 		/* Did we get the lock? */
 		"	srli	%1, %0, %6\n"
-- 
2.47.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v3] Emit lr and sc instructions based on -march flags
  2025-02-12 12:26 ` Anup Patel
  2025-02-25 23:00   ` [PATCH v2] Emit lr and sc instructions based on -march flags Chao-ying Fu
@ 2025-02-26  1:47   ` Chao-ying Fu
  2025-03-28 12:42     ` Anup Patel
  1 sibling, 1 reply; 8+ messages in thread
From: Chao-ying Fu @ 2025-02-26  1:47 UTC (permalink / raw)
  To: opensbi

When -march=rv64im_zalrsc_zicsr is used, we provide implementation
that uses lr and sc instructions only.
When -march=rv64ima_zicsr is used, we have the original implementation.

Signed-off-by: Chao-ying Fu <cfu@mips.com>
---
Changes in v3:
  Update the new requirement.
Changes in v2:
  Remove the config flag that controls lr/sc.
  Use addw to implement amoadd.w.
---
---
 docs/platform_requirements.md |  4 +++
 firmware/fw_base.S            | 10 ++++++
 firmware/payloads/test_head.S | 11 +++++++
 lib/sbi/riscv_atomic.c        | 60 ++++++++++++++++++++++++++++++++++-
 lib/sbi/riscv_locks.c         |  9 ++++++
 5 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/docs/platform_requirements.md b/docs/platform_requirements.md
index a843feb..19b38fd 100644
--- a/docs/platform_requirements.md
+++ b/docs/platform_requirements.md
@@ -19,6 +19,10 @@ Base Platform Requirements
 The base RISC-V platform requirements for OpenSBI are as follows:
 
 1. At least rv32ima_zicsr or rv64ima_zicsr required on all HARTs
+
+     * We may restrict the usage of atomic instructions to lr/sc via
+       rv32im_zalrsc_zicsr or rv64im_zalrsc_zicsr if preferred
+
 2. At least one HART should have S-mode support because:
 
      * SBI calls are meant for RISC-V S-mode (Supervisor mode)
diff --git a/firmware/fw_base.S b/firmware/fw_base.S
index d027e5e..2fa98d4 100644
--- a/firmware/fw_base.S
+++ b/firmware/fw_base.S
@@ -59,8 +59,18 @@ _try_lottery:
 	/* Jump to relocation wait loop if we don't get relocation lottery */
 	lla	a6, _boot_lottery
 	li	a7, BOOT_LOTTERY_ACQUIRED
+#ifdef __riscv_atomic
 	amoswap.w a6, a7, (a6)
 	bnez	a6, _wait_for_boot_hart
+#elif __riscv_zalrsc
+_sc_fail:
+	lr.w	t0, (a6)
+	sc.w	t1, a7, (a6)
+	bnez	t1, _sc_fail
+	bnez	t0, _wait_for_boot_hart
+#else
+#error "need a or zalrsc"
+#endif
 
 	/* relocate the global table content */
 	li	t0, FW_TEXT_START	/* link start */
diff --git a/firmware/payloads/test_head.S b/firmware/payloads/test_head.S
index 7a2ac12..7d25e07 100644
--- a/firmware/payloads/test_head.S
+++ b/firmware/payloads/test_head.S
@@ -30,7 +30,18 @@ _start:
 	/* Pick one hart to run the main boot sequence */
 	lla	a3, _hart_lottery
 	li	a2, 1
+#ifdef __riscv_atomic
 	amoadd.w a3, a2, (a3)
+#elif __riscv_zalrsc
+_sc_fail:
+	lr.w	t0, (a3)
+	addw	t1, t0, a2
+	sc.w	t1, t1, (a3)
+	bnez	t1, _sc_fail
+	move	a3, t0
+#else
+#error "need a or zalrsc"
+#endif
 	bnez	a3, _start_hang
 
 	/* Save a0 and a1 */
diff --git a/lib/sbi/riscv_atomic.c b/lib/sbi/riscv_atomic.c
index 32cf3f0..df16a2e 100644
--- a/lib/sbi/riscv_atomic.c
+++ b/lib/sbi/riscv_atomic.c
@@ -12,7 +12,7 @@
 #include <sbi/riscv_atomic.h>
 #include <sbi/riscv_barrier.h>
 
-#ifndef __riscv_atomic
+#if !defined(__riscv_atomic) && !defined(__riscv_zalrsc)
 #error "opensbi strongly relies on the A extension of RISC-V"
 #endif
 
@@ -31,6 +31,7 @@ void atomic_write(atomic_t *atom, long value)
 
 long atomic_add_return(atomic_t *atom, long value)
 {
+#ifdef __riscv_atomic
 	long ret;
 #if __SIZEOF_LONG__ == 4
 	__asm__ __volatile__("	amoadd.w.aqrl  %1, %2, %0"
@@ -43,6 +44,29 @@ long atomic_add_return(atomic_t *atom, long value)
 			     : "r"(value)
 			     : "memory");
 #endif
+#elif __riscv_zalrsc
+	long ret, temp;
+#if __SIZEOF_LONG__ == 4
+	__asm__ __volatile__("1:lr.w.aqrl	%1,%0\n"
+			     "  addw	%2,%1,%3\n"
+			     "  sc.w.aqrl	%2,%2,%0\n"
+			     "  bnez	%2,1b"
+			     : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
+			     : "r"(value)
+			     : "memory");
+#elif __SIZEOF_LONG__ == 8
+	__asm__ __volatile__("1:lr.d.aqrl	%1,%0\n"
+			     "  add	%2,%1,%3\n"
+			     "  sc.d.aqrl	%2,%2,%0\n"
+			     "  bnez	%2,1b"
+			     : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
+			     : "r"(value)
+			     : "memory");
+#endif
+#else
+#error "need a or zalrsc"
+#endif
+
 	return ret + value;
 }
 
@@ -51,6 +75,7 @@ long atomic_sub_return(atomic_t *atom, long value)
 	return atomic_add_return(atom, -value);
 }
 
+#ifdef __riscv_atomic
 #define __axchg(ptr, new, size)							\
 	({									\
 		__typeof__(ptr) __ptr = (ptr);					\
@@ -76,6 +101,39 @@ long atomic_sub_return(atomic_t *atom, long value)
 		}								\
 		__ret;								\
 	})
+#elif __riscv_zalrsc
+#define __axchg(ptr, new, size)							\
+	({									\
+		__typeof__(ptr) __ptr = (ptr);					\
+		__typeof__(new) __new = (new);					\
+		__typeof__(*(ptr)) __ret, __temp;					\
+		switch (size) {							\
+		case 4:								\
+			__asm__ __volatile__ (					\
+				"1:	lr.w.aqrl %0, %1\n"			\
+				"	sc.w.aqrl %2, %3, %1\n"			\
+				"	bnez	  %2, 1b\n"			\
+				: "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)	\
+				: "r" (__new)					\
+				: "memory");					\
+			break;							\
+		case 8:								\
+			__asm__ __volatile__ (					\
+				"1:	lr.d.aqrl %0, %1\n"			\
+				"	sc.d.aqrl %2, %3, %1\n"			\
+				"	bnez	  %2, 1b\n"			\
+				: "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)	\
+				: "r" (__new)					\
+				: "memory");					\
+			break;							\
+		default:							\
+			break;							\
+		}								\
+		__ret;								\
+	})
+#else
+#error "need a or zalrsc"
+#endif
 
 #define axchg(ptr, x)								\
 	({									\
diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
index acab776..41e8fab 100644
--- a/lib/sbi/riscv_locks.c
+++ b/lib/sbi/riscv_locks.c
@@ -53,7 +53,16 @@ void spin_lock(spinlock_t *lock)
 
 	__asm__ __volatile__(
 		/* Atomically increment the next ticket. */
+#ifdef __riscv_atomic
 		"	amoadd.w.aqrl	%0, %4, %3\n"
+#elif __riscv_zalrsc
+		"3:	lr.w.aqrl	%0, %3\n"
+		"	addw	%1, %0, %4\n"
+		"	sc.w.aqrl	%1, %1, %3\n"
+		"	bnez	%1, 3b\n"
+#else
+#error "need a or zalrsc"
+#endif
 
 		/* Did we get the lock? */
 		"	srli	%1, %0, %6\n"
-- 
2.47.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2] Emit lr and sc instructions based on -march flags
  2025-02-25 23:00   ` [PATCH v2] Emit lr and sc instructions based on -march flags Chao-ying Fu
@ 2025-02-26 13:22     ` Xiang W
  0 siblings, 0 replies; 8+ messages in thread
From: Xiang W @ 2025-02-26 13:22 UTC (permalink / raw)
  To: opensbi

? 2025-02-25?? 15:00 -0800?Chao-ying Fu???
> When -march=rv64im_zalrsc_zicsr is used, we provide implementation
> that uses lr and sc instructions only.
> When -march=rv64ima_zicsr is used, we have the original implementation.
> 
> Signed-off-by: Chao-ying Fu <cfu@mips.com>

docs/platform_requirements.md needs to be updated

Regards,
Xiang W
> ---
> Changes in v2:
> ? Remove the config flag that controls lr/sc.
> ? Use addw to implement amoadd.w.
> ---
> ?firmware/fw_base.S??????????? | 10 ++++++
> ?firmware/payloads/test_head.S | 11 +++++++
> ?lib/sbi/riscv_atomic.c??????? | 60 ++++++++++++++++++++++++++++++++++-
> ?lib/sbi/riscv_locks.c???????? |? 9 ++++++
> ?4 files changed, 89 insertions(+), 1 deletion(-)
> 
> diff --git a/firmware/fw_base.S b/firmware/fw_base.S
> index 536bcd2..2498797 100644
> --- a/firmware/fw_base.S
> +++ b/firmware/fw_base.S
> @@ -59,8 +59,18 @@ _try_lottery:
> ?	/* Jump to relocation wait loop if we don't get relocation lottery */
> ?	lla	a6, _boot_lottery
> ?	li	a7, BOOT_LOTTERY_ACQUIRED
> +#ifdef __riscv_atomic
> ?	amoswap.w a6, a7, (a6)
> ?	bnez	a6, _wait_for_boot_hart
> +#elif __riscv_zalrsc
> +_sc_fail:
> +	lr.w	t0, (a6)
> +	sc.w	t1, a7, (a6)
> +	bnez	t1, _sc_fail
> +	bnez	t0, _wait_for_boot_hart
> +#else
> +#error "need a or zalrsc"
> +#endif
> ?
> ?	/* relocate the global table content */
> ?	li	t0, FW_TEXT_START	/* link start */
> diff --git a/firmware/payloads/test_head.S b/firmware/payloads/test_head.S
> index 7a2ac12..7d25e07 100644
> --- a/firmware/payloads/test_head.S
> +++ b/firmware/payloads/test_head.S
> @@ -30,7 +30,18 @@ _start:
> ?	/* Pick one hart to run the main boot sequence */
> ?	lla	a3, _hart_lottery
> ?	li	a2, 1
> +#ifdef __riscv_atomic
> ?	amoadd.w a3, a2, (a3)
> +#elif __riscv_zalrsc
> +_sc_fail:
> +	lr.w	t0, (a3)
> +	addw	t1, t0, a2
> +	sc.w	t1, t1, (a3)
> +	bnez	t1, _sc_fail
> +	move	a3, t0
> +#else
> +#error "need a or zalrsc"
> +#endif
> ?	bnez	a3, _start_hang
> ?
> ?	/* Save a0 and a1 */
> diff --git a/lib/sbi/riscv_atomic.c b/lib/sbi/riscv_atomic.c
> index 32cf3f0..df16a2e 100644
> --- a/lib/sbi/riscv_atomic.c
> +++ b/lib/sbi/riscv_atomic.c
> @@ -12,7 +12,7 @@
> ?#include <sbi/riscv_atomic.h>
> ?#include <sbi/riscv_barrier.h>
> ?
> -#ifndef __riscv_atomic
> +#if !defined(__riscv_atomic) && !defined(__riscv_zalrsc)
> ?#error "opensbi strongly relies on the A extension of RISC-V"
> ?#endif
> ?
> @@ -31,6 +31,7 @@ void atomic_write(atomic_t *atom, long value)
> ?
> ?long atomic_add_return(atomic_t *atom, long value)
> ?{
> +#ifdef __riscv_atomic
> ?	long ret;
> ?#if __SIZEOF_LONG__ == 4
> ?	__asm__ __volatile__("	amoadd.w.aqrl? %1, %2, %0"
> @@ -43,6 +44,29 @@ long atomic_add_return(atomic_t *atom, long value)
> ?			???? : "r"(value)
> ?			???? : "memory");
> ?#endif
> +#elif __riscv_zalrsc
> +	long ret, temp;
> +#if __SIZEOF_LONG__ == 4
> +	__asm__ __volatile__("1:lr.w.aqrl	%1,%0\n"
> +			???? "? addw	%2,%1,%3\n"
> +			???? "? sc.w.aqrl	%2,%2,%0\n"
> +			???? "? bnez	%2,1b"
> +			???? : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
> +			???? : "r"(value)
> +			???? : "memory");
> +#elif __SIZEOF_LONG__ == 8
> +	__asm__ __volatile__("1:lr.d.aqrl	%1,%0\n"
> +			???? "? add	%2,%1,%3\n"
> +			???? "? sc.d.aqrl	%2,%2,%0\n"
> +			???? "? bnez	%2,1b"
> +			???? : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
> +			???? : "r"(value)
> +			???? : "memory");
> +#endif
> +#else
> +#error "need a or zalrsc"
> +#endif
> +
> ?	return ret + value;
> ?}
> ?
> @@ -51,6 +75,7 @@ long atomic_sub_return(atomic_t *atom, long value)
> ?	return atomic_add_return(atom, -value);
> ?}
> ?
> +#ifdef __riscv_atomic
> ?#define __axchg(ptr, new, size)							\
> ?	({									\
> ?		__typeof__(ptr) __ptr = (ptr);					\
> @@ -76,6 +101,39 @@ long atomic_sub_return(atomic_t *atom, long value)
> ?		}								\
> ?		__ret;								\
> ?	})
> +#elif __riscv_zalrsc
> +#define __axchg(ptr, new, size)							\
> +	({									\
> +		__typeof__(ptr) __ptr = (ptr);					\
> +		__typeof__(new) __new = (new);					\
> +		__typeof__(*(ptr)) __ret, __temp;					\
> +		switch (size) {							\
> +		case 4:								\
> +			__asm__ __volatile__ (					\
> +				"1:	lr.w.aqrl %0, %1\n"			\
> +				"	sc.w.aqrl %2, %3, %1\n"			\
> +				"	bnez	? %2, 1b\n"			\
> +				: "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)	\
> +				: "r" (__new)					\
> +				: "memory");					\
> +			break;							\
> +		case 8:								\
> +			__asm__ __volatile__ (					\
> +				"1:	lr.d.aqrl %0, %1\n"			\
> +				"	sc.d.aqrl %2, %3, %1\n"			\
> +				"	bnez	? %2, 1b\n"			\
> +				: "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)	\
> +				: "r" (__new)					\
> +				: "memory");					\
> +			break;							\
> +		default:							\
> +			break;							\
> +		}								\
> +		__ret;								\
> +	})
> +#else
> +#error "need a or zalrsc"
> +#endif
> ?
> ?#define axchg(ptr, x)								\
> ?	({									\
> diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
> index acab776..41e8fab 100644
> --- a/lib/sbi/riscv_locks.c
> +++ b/lib/sbi/riscv_locks.c
> @@ -53,7 +53,16 @@ void spin_lock(spinlock_t *lock)
> ?
> ?	__asm__ __volatile__(
> ?		/* Atomically increment the next ticket. */
> +#ifdef __riscv_atomic
> ?		"	amoadd.w.aqrl	%0, %4, %3\n"
> +#elif __riscv_zalrsc
> +		"3:	lr.w.aqrl	%0, %3\n"
> +		"	addw	%1, %0, %4\n"
> +		"	sc.w.aqrl	%1, %1, %3\n"
> +		"	bnez	%1, 3b\n"
> +#else
> +#error "need a or zalrsc"
> +#endif
> ?
> ?		/* Did we get the lock? */
> ?		"	srli	%1, %0, %6\n"
> -- 
> 2.47.1
> 
> 



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3] Emit lr and sc instructions based on -march flags
  2025-02-26  1:47   ` [PATCH v3] " Chao-ying Fu
@ 2025-03-28 12:42     ` Anup Patel
  0 siblings, 0 replies; 8+ messages in thread
From: Anup Patel @ 2025-03-28 12:42 UTC (permalink / raw)
  To: Chao-ying Fu; +Cc: opensbi, Chao-ying Fu

On Thu, Feb 27, 2025 at 6:18 AM Chao-ying Fu <icebergfu@gmail.com> wrote:
>
> When -march=rv64im_zalrsc_zicsr is used, we provide implementation
> that uses lr and sc instructions only.
> When -march=rv64ima_zicsr is used, we have the original implementation.
>
> Signed-off-by: Chao-ying Fu <cfu@mips.com>

LGTM. I have simplified the commit description at the time
of merging this patch.

Reviewed-by: Anup Patel <anup@brainfault.org>

Applied this patch to the riscv/opensbi repo.

Thanks,
Anup

> ---
> Changes in v3:
>   Update the new requirement.
> Changes in v2:
>   Remove the config flag that controls lr/sc.
>   Use addw to implement amoadd.w.
> ---
> ---
>  docs/platform_requirements.md |  4 +++
>  firmware/fw_base.S            | 10 ++++++
>  firmware/payloads/test_head.S | 11 +++++++
>  lib/sbi/riscv_atomic.c        | 60 ++++++++++++++++++++++++++++++++++-
>  lib/sbi/riscv_locks.c         |  9 ++++++
>  5 files changed, 93 insertions(+), 1 deletion(-)
>
> diff --git a/docs/platform_requirements.md b/docs/platform_requirements.md
> index a843feb..19b38fd 100644
> --- a/docs/platform_requirements.md
> +++ b/docs/platform_requirements.md
> @@ -19,6 +19,10 @@ Base Platform Requirements
>  The base RISC-V platform requirements for OpenSBI are as follows:
>
>  1. At least rv32ima_zicsr or rv64ima_zicsr required on all HARTs
> +
> +     * We may restrict the usage of atomic instructions to lr/sc via
> +       rv32im_zalrsc_zicsr or rv64im_zalrsc_zicsr if preferred
> +
>  2. At least one HART should have S-mode support because:
>
>       * SBI calls are meant for RISC-V S-mode (Supervisor mode)
> diff --git a/firmware/fw_base.S b/firmware/fw_base.S
> index d027e5e..2fa98d4 100644
> --- a/firmware/fw_base.S
> +++ b/firmware/fw_base.S
> @@ -59,8 +59,18 @@ _try_lottery:
>         /* Jump to relocation wait loop if we don't get relocation lottery */
>         lla     a6, _boot_lottery
>         li      a7, BOOT_LOTTERY_ACQUIRED
> +#ifdef __riscv_atomic
>         amoswap.w a6, a7, (a6)
>         bnez    a6, _wait_for_boot_hart
> +#elif __riscv_zalrsc
> +_sc_fail:
> +       lr.w    t0, (a6)
> +       sc.w    t1, a7, (a6)
> +       bnez    t1, _sc_fail
> +       bnez    t0, _wait_for_boot_hart
> +#else
> +#error "need a or zalrsc"
> +#endif
>
>         /* relocate the global table content */
>         li      t0, FW_TEXT_START       /* link start */
> diff --git a/firmware/payloads/test_head.S b/firmware/payloads/test_head.S
> index 7a2ac12..7d25e07 100644
> --- a/firmware/payloads/test_head.S
> +++ b/firmware/payloads/test_head.S
> @@ -30,7 +30,18 @@ _start:
>         /* Pick one hart to run the main boot sequence */
>         lla     a3, _hart_lottery
>         li      a2, 1
> +#ifdef __riscv_atomic
>         amoadd.w a3, a2, (a3)
> +#elif __riscv_zalrsc
> +_sc_fail:
> +       lr.w    t0, (a3)
> +       addw    t1, t0, a2
> +       sc.w    t1, t1, (a3)
> +       bnez    t1, _sc_fail
> +       move    a3, t0
> +#else
> +#error "need a or zalrsc"
> +#endif
>         bnez    a3, _start_hang
>
>         /* Save a0 and a1 */
> diff --git a/lib/sbi/riscv_atomic.c b/lib/sbi/riscv_atomic.c
> index 32cf3f0..df16a2e 100644
> --- a/lib/sbi/riscv_atomic.c
> +++ b/lib/sbi/riscv_atomic.c
> @@ -12,7 +12,7 @@
>  #include <sbi/riscv_atomic.h>
>  #include <sbi/riscv_barrier.h>
>
> -#ifndef __riscv_atomic
> +#if !defined(__riscv_atomic) && !defined(__riscv_zalrsc)
>  #error "opensbi strongly relies on the A extension of RISC-V"
>  #endif
>
> @@ -31,6 +31,7 @@ void atomic_write(atomic_t *atom, long value)
>
>  long atomic_add_return(atomic_t *atom, long value)
>  {
> +#ifdef __riscv_atomic
>         long ret;
>  #if __SIZEOF_LONG__ == 4
>         __asm__ __volatile__("  amoadd.w.aqrl  %1, %2, %0"
> @@ -43,6 +44,29 @@ long atomic_add_return(atomic_t *atom, long value)
>                              : "r"(value)
>                              : "memory");
>  #endif
> +#elif __riscv_zalrsc
> +       long ret, temp;
> +#if __SIZEOF_LONG__ == 4
> +       __asm__ __volatile__("1:lr.w.aqrl       %1,%0\n"
> +                            "  addw    %2,%1,%3\n"
> +                            "  sc.w.aqrl       %2,%2,%0\n"
> +                            "  bnez    %2,1b"
> +                            : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
> +                            : "r"(value)
> +                            : "memory");
> +#elif __SIZEOF_LONG__ == 8
> +       __asm__ __volatile__("1:lr.d.aqrl       %1,%0\n"
> +                            "  add     %2,%1,%3\n"
> +                            "  sc.d.aqrl       %2,%2,%0\n"
> +                            "  bnez    %2,1b"
> +                            : "+A"(atom->counter), "=&r"(ret), "=&r"(temp)
> +                            : "r"(value)
> +                            : "memory");
> +#endif
> +#else
> +#error "need a or zalrsc"
> +#endif
> +
>         return ret + value;
>  }
>
> @@ -51,6 +75,7 @@ long atomic_sub_return(atomic_t *atom, long value)
>         return atomic_add_return(atom, -value);
>  }
>
> +#ifdef __riscv_atomic
>  #define __axchg(ptr, new, size)                                                        \
>         ({                                                                      \
>                 __typeof__(ptr) __ptr = (ptr);                                  \
> @@ -76,6 +101,39 @@ long atomic_sub_return(atomic_t *atom, long value)
>                 }                                                               \
>                 __ret;                                                          \
>         })
> +#elif __riscv_zalrsc
> +#define __axchg(ptr, new, size)                                                        \
> +       ({                                                                      \
> +               __typeof__(ptr) __ptr = (ptr);                                  \
> +               __typeof__(new) __new = (new);                                  \
> +               __typeof__(*(ptr)) __ret, __temp;                                       \
> +               switch (size) {                                                 \
> +               case 4:                                                         \
> +                       __asm__ __volatile__ (                                  \
> +                               "1:     lr.w.aqrl %0, %1\n"                     \
> +                               "       sc.w.aqrl %2, %3, %1\n"                 \
> +                               "       bnez      %2, 1b\n"                     \
> +                               : "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)  \
> +                               : "r" (__new)                                   \
> +                               : "memory");                                    \
> +                       break;                                                  \
> +               case 8:                                                         \
> +                       __asm__ __volatile__ (                                  \
> +                               "1:     lr.d.aqrl %0, %1\n"                     \
> +                               "       sc.d.aqrl %2, %3, %1\n"                 \
> +                               "       bnez      %2, 1b\n"                     \
> +                               : "=&r" (__ret), "+A" (*__ptr), "=&r" (__temp)  \
> +                               : "r" (__new)                                   \
> +                               : "memory");                                    \
> +                       break;                                                  \
> +               default:                                                        \
> +                       break;                                                  \
> +               }                                                               \
> +               __ret;                                                          \
> +       })
> +#else
> +#error "need a or zalrsc"
> +#endif
>
>  #define axchg(ptr, x)                                                          \
>         ({                                                                      \
> diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
> index acab776..41e8fab 100644
> --- a/lib/sbi/riscv_locks.c
> +++ b/lib/sbi/riscv_locks.c
> @@ -53,7 +53,16 @@ void spin_lock(spinlock_t *lock)
>
>         __asm__ __volatile__(
>                 /* Atomically increment the next ticket. */
> +#ifdef __riscv_atomic
>                 "       amoadd.w.aqrl   %0, %4, %3\n"
> +#elif __riscv_zalrsc
> +               "3:     lr.w.aqrl       %0, %3\n"
> +               "       addw    %1, %0, %4\n"
> +               "       sc.w.aqrl       %1, %1, %3\n"
> +               "       bnez    %1, 3b\n"
> +#else
> +#error "need a or zalrsc"
> +#endif
>
>                 /* Did we get the lock? */
>                 "       srli    %1, %0, %6\n"
> --
> 2.47.1
>
>
> --
> opensbi mailing list
> opensbi@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi

-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-03-28 12:42 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-10  0:24 [PATCH] Add RISCV_ISA_ZALRSC_ONLY to rewrite amo instructions via lr and sc Chao-ying Fu
2025-01-10  1:14 ` Bo Gan
2025-01-10  2:33   ` Chao-ying Fu
2025-02-12 12:26 ` Anup Patel
2025-02-25 23:00   ` [PATCH v2] Emit lr and sc instructions based on -march flags Chao-ying Fu
2025-02-26 13:22     ` Xiang W
2025-02-26  1:47   ` [PATCH v3] " Chao-ying Fu
2025-03-28 12:42     ` Anup Patel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.