* [PATCH v4 01/13] riscv: Move cpufeature.h macros into their own header
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
@ 2024-07-31 7:23 ` Alexandre Ghiti
2024-07-31 9:10 ` Andrew Jones
2024-07-31 7:23 ` [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs Alexandre Ghiti
` (12 subsequent siblings)
13 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:23 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti
asm/cmpxchg.h will soon need riscv_has_extension_unlikely() macros and
then needs to include asm/cpufeature.h which introduces a lot of header
circular dependencies.
So move the riscv_has_extension_XXX() macros into their own header which
prevents such circular dependencies by including a restricted number of
headers.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/include/asm/cpufeature-macros.h | 66 ++++++++++++++++++++++
arch/riscv/include/asm/cpufeature.h | 56 +-----------------
2 files changed, 67 insertions(+), 55 deletions(-)
create mode 100644 arch/riscv/include/asm/cpufeature-macros.h
diff --git a/arch/riscv/include/asm/cpufeature-macros.h b/arch/riscv/include/asm/cpufeature-macros.h
new file mode 100644
index 000000000000..c5f0bf75e026
--- /dev/null
+++ b/arch/riscv/include/asm/cpufeature-macros.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2022-2024 Rivos, Inc
+ */
+
+#ifndef _ASM_CPUFEATURE_MACROS_H
+#define _ASM_CPUFEATURE_MACROS_H
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+
+#define STANDARD_EXT 0
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
+static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor,
+ const unsigned long ext)
+{
+ asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1)
+ :
+ : [vendor] "i" (vendor), [ext] "i" (ext)
+ :
+ : l_no);
+
+ return true;
+l_no:
+ return false;
+}
+
+static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor,
+ const unsigned long ext)
+{
+ asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1)
+ :
+ : [vendor] "i" (vendor), [ext] "i" (ext)
+ :
+ : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext)
+{
+ compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
+ return __riscv_has_extension_unlikely(STANDARD_EXT, ext);
+
+ return __riscv_isa_extension_available(NULL, ext);
+}
+
+static __always_inline bool riscv_has_extension_likely(const unsigned long ext)
+{
+ compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
+
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
+ return __riscv_has_extension_likely(STANDARD_EXT, ext);
+
+ return __riscv_isa_extension_available(NULL, ext);
+}
+
+#endif
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 45f9c1171a48..c991672bb401 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -11,6 +11,7 @@
#include <asm/hwcap.h>
#include <asm/alternative-macros.h>
#include <asm/errno.h>
+#include <asm/cpufeature-macros.h>
/*
* These are probed via a device_initcall(), via either the SBI or directly
@@ -103,61 +104,6 @@ extern const size_t riscv_isa_ext_count;
extern bool riscv_isa_fallback;
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
-
-#define STANDARD_EXT 0
-
-bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit);
-#define riscv_isa_extension_available(isa_bitmap, ext) \
- __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
-
-static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor,
- const unsigned long ext)
-{
- asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1)
- :
- : [vendor] "i" (vendor), [ext] "i" (ext)
- :
- : l_no);
-
- return true;
-l_no:
- return false;
-}
-
-static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor,
- const unsigned long ext)
-{
- asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1)
- :
- : [vendor] "i" (vendor), [ext] "i" (ext)
- :
- : l_yes);
-
- return false;
-l_yes:
- return true;
-}
-
-static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext)
-{
- compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
-
- if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
- return __riscv_has_extension_unlikely(STANDARD_EXT, ext);
-
- return __riscv_isa_extension_available(NULL, ext);
-}
-
-static __always_inline bool riscv_has_extension_likely(const unsigned long ext)
-{
- compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
-
- if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
- return __riscv_has_extension_likely(STANDARD_EXT, ext);
-
- return __riscv_isa_extension_available(NULL, ext);
-}
-
static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
{
compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH v4 01/13] riscv: Move cpufeature.h macros into their own header
2024-07-31 7:23 ` [PATCH v4 01/13] riscv: Move cpufeature.h macros into their own header Alexandre Ghiti
@ 2024-07-31 9:10 ` Andrew Jones
0 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2024-07-31 9:10 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 09:23:53AM GMT, Alexandre Ghiti wrote:
> asm/cmpxchg.h will soon need riscv_has_extension_unlikely() macros and
> then needs to include asm/cpufeature.h which introduces a lot of header
> circular dependencies.
The includes of asm/cpufeature.h don't look well maintained. I don't think
it needs asm/errno.h and it should have linux/threads.h,
linux/percpu-defs.h, and linux/kconfig.h.
>
> So move the riscv_has_extension_XXX() macros into their own header which
> prevents such circular dependencies by including a restricted number of
> headers.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/include/asm/cpufeature-macros.h | 66 ++++++++++++++++++++++
> arch/riscv/include/asm/cpufeature.h | 56 +-----------------
> 2 files changed, 67 insertions(+), 55 deletions(-)
> create mode 100644 arch/riscv/include/asm/cpufeature-macros.h
>
> diff --git a/arch/riscv/include/asm/cpufeature-macros.h b/arch/riscv/include/asm/cpufeature-macros.h
> new file mode 100644
> index 000000000000..c5f0bf75e026
> --- /dev/null
> +++ b/arch/riscv/include/asm/cpufeature-macros.h
> @@ -0,0 +1,66 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright 2022-2024 Rivos, Inc
> + */
> +
> +#ifndef _ASM_CPUFEATURE_MACROS_H
> +#define _ASM_CPUFEATURE_MACROS_H
> +
> +#include <asm/hwcap.h>
> +#include <asm/alternative-macros.h>
> +
> +#define STANDARD_EXT 0
> +
> +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit);
> +#define riscv_isa_extension_available(isa_bitmap, ext) \
> + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
> +
> +static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor,
> + const unsigned long ext)
> +{
> + asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1)
> + :
> + : [vendor] "i" (vendor), [ext] "i" (ext)
> + :
> + : l_no);
> +
> + return true;
> +l_no:
> + return false;
> +}
> +
> +static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor,
> + const unsigned long ext)
> +{
> + asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1)
> + :
> + : [vendor] "i" (vendor), [ext] "i" (ext)
> + :
> + : l_yes);
> +
> + return false;
> +l_yes:
> + return true;
> +}
> +
> +static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext)
> +{
> + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
> +
> + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
> + return __riscv_has_extension_unlikely(STANDARD_EXT, ext);
> +
> + return __riscv_isa_extension_available(NULL, ext);
> +}
> +
> +static __always_inline bool riscv_has_extension_likely(const unsigned long ext)
> +{
> + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
> +
> + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
> + return __riscv_has_extension_likely(STANDARD_EXT, ext);
> +
> + return __riscv_isa_extension_available(NULL, ext);
> +}
> +
> +#endif
nit: Add the /* _ASM_CPUFEATURE_MACROS_H */ here.
> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index 45f9c1171a48..c991672bb401 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -11,6 +11,7 @@
> #include <asm/hwcap.h>
> #include <asm/alternative-macros.h>
I think asm/alternative-macros.h can be dropped now.
> #include <asm/errno.h>
> +#include <asm/cpufeature-macros.h>
>
> /*
> * These are probed via a device_initcall(), via either the SBI or directly
> @@ -103,61 +104,6 @@ extern const size_t riscv_isa_ext_count;
> extern bool riscv_isa_fallback;
>
> unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
> -
> -#define STANDARD_EXT 0
> -
> -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit);
> -#define riscv_isa_extension_available(isa_bitmap, ext) \
> - __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
> -
> -static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor,
> - const unsigned long ext)
> -{
> - asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1)
> - :
> - : [vendor] "i" (vendor), [ext] "i" (ext)
> - :
> - : l_no);
> -
> - return true;
> -l_no:
> - return false;
> -}
> -
> -static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor,
> - const unsigned long ext)
> -{
> - asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1)
> - :
> - : [vendor] "i" (vendor), [ext] "i" (ext)
> - :
> - : l_yes);
> -
> - return false;
> -l_yes:
> - return true;
> -}
> -
> -static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext)
> -{
> - compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
> -
> - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
> - return __riscv_has_extension_unlikely(STANDARD_EXT, ext);
> -
> - return __riscv_isa_extension_available(NULL, ext);
> -}
> -
> -static __always_inline bool riscv_has_extension_likely(const unsigned long ext)
> -{
> - compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
> -
> - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE))
> - return __riscv_has_extension_likely(STANDARD_EXT, ext);
> -
> - return __riscv_isa_extension_available(NULL, ext);
> -}
> -
> static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
> {
> compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX");
> --
> 2.39.2
>
>
Otherwise,
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
2024-07-31 7:23 ` [PATCH v4 01/13] riscv: Move cpufeature.h macros into their own header Alexandre Ghiti
@ 2024-07-31 7:23 ` Alexandre Ghiti
2024-07-31 14:10 ` Andrew Jones
2024-07-31 16:27 ` Waiman Long
2024-07-31 7:23 ` [PATCH v4 03/13] riscv: Implement cmpxchg32/64() using Zacas Alexandre Ghiti
` (11 subsequent siblings)
13 siblings, 2 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:23 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti
riscv does not have lr instructions on byte and halfword but the
qspinlock implementation actually uses such atomics provided by the
Zabha extension, so those sizes are legitimate.
Then instead of failing to build, just fallback to the !Zawrs path.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/include/asm/cmpxchg.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ebbce134917c..9ba497ea18a5 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -268,7 +268,8 @@ static __always_inline void __cmpwait(volatile void *ptr,
break;
#endif
default:
- BUILD_BUG();
+ /* RISC-V doesn't have lr instructions on byte and half-word. */
+ goto no_zawrs;
}
return;
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs
2024-07-31 7:23 ` [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs Alexandre Ghiti
@ 2024-07-31 14:10 ` Andrew Jones
2024-07-31 15:52 ` Alexandre Ghiti
2024-07-31 16:27 ` Waiman Long
1 sibling, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2024-07-31 14:10 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 09:23:54AM GMT, Alexandre Ghiti wrote:
> riscv does not have lr instructions on byte and halfword but the
> qspinlock implementation actually uses such atomics provided by the
> Zabha extension, so those sizes are legitimate.
We currently always come to __cmpwait() through smp_cond_load_relaxed()
and queued_spin_lock_slowpath() adds another invocation. However, isn't
the reason we're hitting the BUILD_BUG() because the switch fails to find
a case for 16, not because it fails to find cases for 1 or 2? The new
invocation passes a pointer to a struct mcs_spinlock, which looks like
it has size 16. We need to ensure that when ptr points to a pointer that
we pass the size of uintptr_t.
>
> Then instead of failing to build, just fallback to the !Zawrs path.
No matter what sizes we're failing on, if we do this then
queued_spin_lock_slowpath() won't be able to take advantage of Zawrs.
Thanks,
drew
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/include/asm/cmpxchg.h | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index ebbce134917c..9ba497ea18a5 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -268,7 +268,8 @@ static __always_inline void __cmpwait(volatile void *ptr,
> break;
> #endif
> default:
> - BUILD_BUG();
> + /* RISC-V doesn't have lr instructions on byte and half-word. */
> + goto no_zawrs;
> }
>
> return;
> --
> 2.39.2
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs
2024-07-31 14:10 ` Andrew Jones
@ 2024-07-31 15:52 ` Alexandre Ghiti
2024-07-31 16:14 ` Andrew Jones
0 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 15:52 UTC (permalink / raw)
To: Andrew Jones
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Hi Drew,
On Wed, Jul 31, 2024 at 4:10 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Wed, Jul 31, 2024 at 09:23:54AM GMT, Alexandre Ghiti wrote:
> > riscv does not have lr instructions on byte and halfword but the
> > qspinlock implementation actually uses such atomics provided by the
> > Zabha extension, so those sizes are legitimate.
>
> We currently always come to __cmpwait() through smp_cond_load_relaxed()
> and queued_spin_lock_slowpath() adds another invocation.
atomic_cond_read_relaxed() and smp_cond_load_acquire() also call
smp_cond_load_relaxed()
And here https://elixir.bootlin.com/linux/v6.11-rc1/source/kernel/locking/qspinlock.c#L380,
the size passed is 1.
> However, isn't
> the reason we're hitting the BUILD_BUG() because the switch fails to find
> a case for 16, not because it fails to find cases for 1 or 2? The new
> invocation passes a pointer to a struct mcs_spinlock, which looks like
> it has size 16. We need to ensure that when ptr points to a pointer that
> we pass the size of uintptr_t.
I guess you're refering to this call here
https://elixir.bootlin.com/linux/v6.11-rc1/source/kernel/locking/qspinlock.c#L551,
but it's a pointer to a pointer, which will then pass a size 8.
And the build error that I get is the following:
In function '__cmpwait',
inlined from 'queued_spin_lock_slowpath' at
../kernel/locking/qspinlock.c:380:3:
./../include/linux/compiler_types.h:510:45: error: call to
'__compiletime_assert_2' declared with attribute error: BUILD_BUG
failed
510 | _compiletime_assert(condition, msg,
__compiletime_assert_, __COUNTER__)
| ^
./../include/linux/compiler_types.h:491:25: note: in definition of
macro '__compiletime_assert'
491 | prefix ## suffix();
\
| ^~~~~~
./../include/linux/compiler_types.h:510:9: note: in expansion of macro
'_compiletime_assert'
510 | _compiletime_assert(condition, msg,
__compiletime_assert_, __COUNTER__)
| ^~~~~~~~~~~~~~~~~~~
../include/linux/build_bug.h:39:37: note: in expansion of macro
'compiletime_assert'
39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
| ^~~~~~~~~~~~~~~~~~
../include/linux/build_bug.h:59:21: note: in expansion of macro
'BUILD_BUG_ON_MSG'
59 | #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
| ^~~~~~~~~~~~~~~~
../arch/riscv/include/asm/cmpxchg.h:376:17: note: in expansion of
macro 'BUILD_BUG'
376 | BUILD_BUG();
which points to the first smp_cond_load_relaxed() I mentioned above.
Thanks,
Alex
>
> >
> > Then instead of failing to build, just fallback to the !Zawrs path.
>
> No matter what sizes we're failing on, if we do this then
> queued_spin_lock_slowpath() won't be able to take advantage of Zawrs.
>
> Thanks,
> drew
>
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > arch/riscv/include/asm/cmpxchg.h | 3 ++-
> > 1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > index ebbce134917c..9ba497ea18a5 100644
> > --- a/arch/riscv/include/asm/cmpxchg.h
> > +++ b/arch/riscv/include/asm/cmpxchg.h
> > @@ -268,7 +268,8 @@ static __always_inline void __cmpwait(volatile void *ptr,
> > break;
> > #endif
> > default:
> > - BUILD_BUG();
> > + /* RISC-V doesn't have lr instructions on byte and half-word. */
> > + goto no_zawrs;
> > }
> >
> > return;
> > --
> > 2.39.2
> >
> >
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs
2024-07-31 15:52 ` Alexandre Ghiti
@ 2024-07-31 16:14 ` Andrew Jones
2024-08-01 6:30 ` Alexandre Ghiti
0 siblings, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2024-07-31 16:14 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 05:52:46PM GMT, Alexandre Ghiti wrote:
> Hi Drew,
>
> On Wed, Jul 31, 2024 at 4:10 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > On Wed, Jul 31, 2024 at 09:23:54AM GMT, Alexandre Ghiti wrote:
> > > riscv does not have lr instructions on byte and halfword but the
> > > qspinlock implementation actually uses such atomics provided by the
> > > Zabha extension, so those sizes are legitimate.
> >
> > We currently always come to __cmpwait() through smp_cond_load_relaxed()
> > and queued_spin_lock_slowpath() adds another invocation.
>
> atomic_cond_read_relaxed() and smp_cond_load_acquire() also call
> smp_cond_load_relaxed()
>
> And here https://elixir.bootlin.com/linux/v6.11-rc1/source/kernel/locking/qspinlock.c#L380,
> the size passed is 1.
Oh, I see.
>
> > However, isn't
> > the reason we're hitting the BUILD_BUG() because the switch fails to find
> > a case for 16, not because it fails to find cases for 1 or 2? The new
> > invocation passes a pointer to a struct mcs_spinlock, which looks like
> > it has size 16. We need to ensure that when ptr points to a pointer that
> > we pass the size of uintptr_t.
>
> I guess you're refering to this call here
> https://elixir.bootlin.com/linux/v6.11-rc1/source/kernel/locking/qspinlock.c#L551,
> but it's a pointer to a pointer, which will then pass a size 8.
Ah, missed that '&'...
>
> And the build error that I get is the following:
>
> In function '__cmpwait',
> inlined from 'queued_spin_lock_slowpath' at
> ../kernel/locking/qspinlock.c:380:3:
> ./../include/linux/compiler_types.h:510:45: error: call to
> '__compiletime_assert_2' declared with attribute error: BUILD_BUG
> failed
> 510 | _compiletime_assert(condition, msg,
> __compiletime_assert_, __COUNTER__)
> | ^
> ./../include/linux/compiler_types.h:491:25: note: in definition of
> macro '__compiletime_assert'
> 491 | prefix ## suffix();
> \
> | ^~~~~~
> ./../include/linux/compiler_types.h:510:9: note: in expansion of macro
> '_compiletime_assert'
> 510 | _compiletime_assert(condition, msg,
> __compiletime_assert_, __COUNTER__)
> | ^~~~~~~~~~~~~~~~~~~
> ../include/linux/build_bug.h:39:37: note: in expansion of macro
> 'compiletime_assert'
> 39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
> | ^~~~~~~~~~~~~~~~~~
> ../include/linux/build_bug.h:59:21: note: in expansion of macro
> 'BUILD_BUG_ON_MSG'
> 59 | #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
> | ^~~~~~~~~~~~~~~~
> ../arch/riscv/include/asm/cmpxchg.h:376:17: note: in expansion of
> macro 'BUILD_BUG'
> 376 | BUILD_BUG();
>
> which points to the first smp_cond_load_relaxed() I mentioned above.
>
OK, you've got me straightened out now, but can we only add the fallback
for sizes 1 and 2 and leave the default as a BUILD_BUG()?
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs
2024-07-31 16:14 ` Andrew Jones
@ 2024-08-01 6:30 ` Alexandre Ghiti
0 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-08-01 6:30 UTC (permalink / raw)
To: Andrew Jones
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 6:14 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Wed, Jul 31, 2024 at 05:52:46PM GMT, Alexandre Ghiti wrote:
> > Hi Drew,
> >
> > On Wed, Jul 31, 2024 at 4:10 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> > >
> > > On Wed, Jul 31, 2024 at 09:23:54AM GMT, Alexandre Ghiti wrote:
> > > > riscv does not have lr instructions on byte and halfword but the
> > > > qspinlock implementation actually uses such atomics provided by the
> > > > Zabha extension, so those sizes are legitimate.
> > >
> > > We currently always come to __cmpwait() through smp_cond_load_relaxed()
> > > and queued_spin_lock_slowpath() adds another invocation.
> >
> > atomic_cond_read_relaxed() and smp_cond_load_acquire() also call
> > smp_cond_load_relaxed()
> >
> > And here https://elixir.bootlin.com/linux/v6.11-rc1/source/kernel/locking/qspinlock.c#L380,
> > the size passed is 1.
>
> Oh, I see.
>
> >
> > > However, isn't
> > > the reason we're hitting the BUILD_BUG() because the switch fails to find
> > > a case for 16, not because it fails to find cases for 1 or 2? The new
> > > invocation passes a pointer to a struct mcs_spinlock, which looks like
> > > it has size 16. We need to ensure that when ptr points to a pointer that
> > > we pass the size of uintptr_t.
> >
> > I guess you're refering to this call here
> > https://elixir.bootlin.com/linux/v6.11-rc1/source/kernel/locking/qspinlock.c#L551,
> > but it's a pointer to a pointer, which will then pass a size 8.
>
> Ah, missed that '&'...
>
> >
> > And the build error that I get is the following:
> >
> > In function '__cmpwait',
> > inlined from 'queued_spin_lock_slowpath' at
> > ../kernel/locking/qspinlock.c:380:3:
> > ./../include/linux/compiler_types.h:510:45: error: call to
> > '__compiletime_assert_2' declared with attribute error: BUILD_BUG
> > failed
> > 510 | _compiletime_assert(condition, msg,
> > __compiletime_assert_, __COUNTER__)
> > | ^
> > ./../include/linux/compiler_types.h:491:25: note: in definition of
> > macro '__compiletime_assert'
> > 491 | prefix ## suffix();
> > \
> > | ^~~~~~
> > ./../include/linux/compiler_types.h:510:9: note: in expansion of macro
> > '_compiletime_assert'
> > 510 | _compiletime_assert(condition, msg,
> > __compiletime_assert_, __COUNTER__)
> > | ^~~~~~~~~~~~~~~~~~~
> > ../include/linux/build_bug.h:39:37: note: in expansion of macro
> > 'compiletime_assert'
> > 39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
> > | ^~~~~~~~~~~~~~~~~~
> > ../include/linux/build_bug.h:59:21: note: in expansion of macro
> > 'BUILD_BUG_ON_MSG'
> > 59 | #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
> > | ^~~~~~~~~~~~~~~~
> > ../arch/riscv/include/asm/cmpxchg.h:376:17: note: in expansion of
> > macro 'BUILD_BUG'
> > 376 | BUILD_BUG();
> >
> > which points to the first smp_cond_load_relaxed() I mentioned above.
> >
>
> OK, you've got me straightened out now, but can we only add the fallback
> for sizes 1 and 2 and leave the default as a BUILD_BUG()?
Yes, sure, I'll do that.
Thanks,
Alex
>
> Thanks,
> drew
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs
2024-07-31 7:23 ` [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs Alexandre Ghiti
2024-07-31 14:10 ` Andrew Jones
@ 2024-07-31 16:27 ` Waiman Long
2024-07-31 21:51 ` Guo Ren
1 sibling, 1 reply; 47+ messages in thread
From: Waiman Long @ 2024-07-31 16:27 UTC (permalink / raw)
To: Alexandre Ghiti, Jonathan Corbet, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Conor Dooley, Rob Herring, Krzysztof Kozlowski,
Andrea Parri, Nathan Chancellor, Peter Zijlstra, Ingo Molnar,
Will Deacon, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On 7/31/24 03:23, Alexandre Ghiti wrote:
> riscv does not have lr instructions on byte and halfword but the
> qspinlock implementation actually uses such atomics provided by the
> Zabha extension, so those sizes are legitimate.
Note that the native qspinlock code only need halfword atomic cmpxchg
operation. However, if you also plan to use paravirtual qspinlock, you
need to have byte-level atomic cmpxchg().
Cheers,
Longman
>
> Then instead of failing to build, just fallback to the !Zawrs path.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/include/asm/cmpxchg.h | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index ebbce134917c..9ba497ea18a5 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -268,7 +268,8 @@ static __always_inline void __cmpwait(volatile void *ptr,
> break;
> #endif
> default:
> - BUILD_BUG();
> + /* RISC-V doesn't have lr instructions on byte and half-word. */
> + goto no_zawrs;
> }
>
> return;
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs
2024-07-31 16:27 ` Waiman Long
@ 2024-07-31 21:51 ` Guo Ren
0 siblings, 0 replies; 47+ messages in thread
From: Guo Ren @ 2024-07-31 21:51 UTC (permalink / raw)
To: Waiman Long
Cc: Alexandre Ghiti, Jonathan Corbet, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Conor Dooley, Rob Herring, Krzysztof Kozlowski,
Andrea Parri, Nathan Chancellor, Peter Zijlstra, Ingo Molnar,
Will Deacon, Boqun Feng, Arnd Bergmann, Leonardo Bras, linux-doc,
devicetree, linux-kernel, linux-riscv, linux-arch
On Thu, Aug 1, 2024 at 1:27 AM Waiman Long <longman@redhat.com> wrote:
>
> On 7/31/24 03:23, Alexandre Ghiti wrote:
> > riscv does not have lr instructions on byte and halfword but the
> > qspinlock implementation actually uses such atomics provided by the
> > Zabha extension, so those sizes are legitimate.
>
> Note that the native qspinlock code only need halfword atomic cmpxchg
> operation. However, if you also plan to use paravirtual qspinlock, you
> need to have byte-level atomic cmpxchg().
Thx for reminding me; I will update paravirt qspinlock after these
patches merge.
Zabha & Zcas extension provides byte and half-word atomic cmpxchg.
>
> Cheers,
> Longman
>
> >
> > Then instead of failing to build, just fallback to the !Zawrs path.
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > arch/riscv/include/asm/cmpxchg.h | 3 ++-
> > 1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > index ebbce134917c..9ba497ea18a5 100644
> > --- a/arch/riscv/include/asm/cmpxchg.h
> > +++ b/arch/riscv/include/asm/cmpxchg.h
> > @@ -268,7 +268,8 @@ static __always_inline void __cmpwait(volatile void *ptr,
> > break;
> > #endif
> > default:
> > - BUILD_BUG();
> > + /* RISC-V doesn't have lr instructions on byte and half-word. */
> > + goto no_zawrs;
> > }
> >
> > return;
>
--
Best Regards
Guo Ren
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH v4 03/13] riscv: Implement cmpxchg32/64() using Zacas
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
2024-07-31 7:23 ` [PATCH v4 01/13] riscv: Move cpufeature.h macros into their own header Alexandre Ghiti
2024-07-31 7:23 ` [PATCH v4 02/13] riscv: Do not fail to build on byte/halfword operations with Zawrs Alexandre Ghiti
@ 2024-07-31 7:23 ` Alexandre Ghiti
2024-07-31 9:21 ` Andrew Jones
2024-07-31 7:23 ` [PATCH v4 04/13] dt-bindings: riscv: Add Zabha ISA extension description Alexandre Ghiti
` (10 subsequent siblings)
13 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:23 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti
This adds runtime support for Zacas in cmpxchg operations.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/Kconfig | 16 +++++++++++
arch/riscv/Makefile | 3 ++
arch/riscv/include/asm/cmpxchg.h | 48 +++++++++++++++++++++-----------
3 files changed, 50 insertions(+), 17 deletions(-)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 0f3cd7c3a436..d955c64d50c2 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -613,6 +613,22 @@ config RISCV_ISA_ZAWRS
use of these instructions in the kernel when the Zawrs extension is
detected at boot.
+config TOOLCHAIN_HAS_ZACAS
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
+ depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZACAS
+ bool "Zacas extension support for atomic CAS"
+ depends on TOOLCHAIN_HAS_ZACAS
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Enable the use of the Zacas ISA-extension to implement kernel atomic
+ cmpxchg operations when it is detected at boot.
+
If you don't know what to do here, say Y.
config TOOLCHAIN_HAS_ZBB
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 6fe682139d2e..f1788131d5fe 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -82,6 +82,9 @@ else
riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei
endif
+# Check if the toolchain supports Zacas
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
+
# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 9ba497ea18a5..c626fe0d08ae 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -12,6 +12,7 @@
#include <asm/fence.h>
#include <asm/hwcap.h>
#include <asm/insn-def.h>
+#include <asm/cpufeature-macros.h>
#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
({ \
@@ -137,24 +138,37 @@
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
})
-#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
+#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
({ \
- register unsigned int __rc; \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
+ r = o; \
\
- __asm__ __volatile__ ( \
- prepend \
- "0: lr" lr_sfx " %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc" sc_sfx " %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- append \
- "1:\n" \
- : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
- : "rJ" (co o), "rJ" (n) \
- : "memory"); \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amocas" sc_cas_sfx " %0, %z2, %1\n" \
+ append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ } else { \
+ register unsigned int __rc; \
+ \
+ __asm__ __volatile__ ( \
+ prepend \
+ "0: lr" lr_sfx " %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc" sc_cas_sfx " %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ append \
+ "1:\n" \
+ : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
+ : "rJ" (co o), "rJ" (n) \
+ : "memory"); \
+ } \
})
-#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
+#define _arch_cmpxchg(ptr, old, new, sc_cas_sfx, prepend, append) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __old = (old); \
@@ -164,15 +178,15 @@
switch (sizeof(*__ptr)) { \
case 1: \
case 2: \
- __arch_cmpxchg_masked(sc_sfx, prepend, append, \
+ __arch_cmpxchg_masked(sc_cas_sfx, prepend, append, \
__ret, __ptr, __old, __new); \
break; \
case 4: \
- __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \
+ __arch_cmpxchg(".w", ".w" sc_cas_sfx, prepend, append, \
__ret, __ptr, (long), __old, __new); \
break; \
case 8: \
- __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \
+ __arch_cmpxchg(".d", ".d" sc_cas_sfx, prepend, append, \
__ret, __ptr, /**/, __old, __new); \
break; \
default: \
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH v4 03/13] riscv: Implement cmpxchg32/64() using Zacas
2024-07-31 7:23 ` [PATCH v4 03/13] riscv: Implement cmpxchg32/64() using Zacas Alexandre Ghiti
@ 2024-07-31 9:21 ` Andrew Jones
0 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2024-07-31 9:21 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 09:23:55AM GMT, Alexandre Ghiti wrote:
> This adds runtime support for Zacas in cmpxchg operations.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/Kconfig | 16 +++++++++++
> arch/riscv/Makefile | 3 ++
> arch/riscv/include/asm/cmpxchg.h | 48 +++++++++++++++++++++-----------
> 3 files changed, 50 insertions(+), 17 deletions(-)
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH v4 04/13] dt-bindings: riscv: Add Zabha ISA extension description
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (2 preceding siblings ...)
2024-07-31 7:23 ` [PATCH v4 03/13] riscv: Implement cmpxchg32/64() using Zacas Alexandre Ghiti
@ 2024-07-31 7:23 ` Alexandre Ghiti
2024-08-01 14:53 ` Conor Dooley
2024-07-31 7:23 ` [PATCH v4 05/13] riscv: Implement cmpxchg8/16() using Zabha Alexandre Ghiti
` (9 subsequent siblings)
13 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:23 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti
Add description for the Zabha ISA extension which was ratified in April
2024.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Guo Ren <guoren@kernel.org>
---
Documentation/devicetree/bindings/riscv/extensions.yaml | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml
index a06dbc6b4928..a63578b95c4a 100644
--- a/Documentation/devicetree/bindings/riscv/extensions.yaml
+++ b/Documentation/devicetree/bindings/riscv/extensions.yaml
@@ -171,6 +171,12 @@ properties:
memory types as ratified in the 20191213 version of the privileged
ISA specification.
+ - const: zabha
+ description: |
+ The Zabha extension for Byte and Halfword Atomic Memory Operations
+ as ratified at commit 49f49c842ff9 ("Update to Rafified state") of
+ riscv-zabha.
+
- const: zacas
description: |
The Zacas extension for Atomic Compare-and-Swap (CAS) instructions
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH v4 04/13] dt-bindings: riscv: Add Zabha ISA extension description
2024-07-31 7:23 ` [PATCH v4 04/13] dt-bindings: riscv: Add Zabha ISA extension description Alexandre Ghiti
@ 2024-08-01 14:53 ` Conor Dooley
0 siblings, 0 replies; 47+ messages in thread
From: Conor Dooley @ 2024-08-01 14:53 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Rob Herring, Krzysztof Kozlowski, Andrea Parri, Nathan Chancellor,
Peter Zijlstra, Ingo Molnar, Will Deacon, Waiman Long, Boqun Feng,
Arnd Bergmann, Leonardo Bras, Guo Ren, linux-doc, devicetree,
linux-kernel, linux-riscv, linux-arch
[-- Attachment #1: Type: text/plain, Size: 1145 bytes --]
On Wed, Jul 31, 2024 at 09:23:56AM +0200, Alexandre Ghiti wrote:
> Add description for the Zabha ISA extension which was ratified in April
> 2024.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> Reviewed-by: Guo Ren <guoren@kernel.org>
> ---
> Documentation/devicetree/bindings/riscv/extensions.yaml | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml
> index a06dbc6b4928..a63578b95c4a 100644
> --- a/Documentation/devicetree/bindings/riscv/extensions.yaml
> +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml
> @@ -171,6 +171,12 @@ properties:
> memory types as ratified in the 20191213 version of the privileged
> ISA specification.
>
> + - const: zabha
> + description: |
> + The Zabha extension for Byte and Halfword Atomic Memory Operations
> + as ratified at commit 49f49c842ff9 ("Update to Rafified state") of
> + riscv-zabha.
Acked-by: Conor Dooley <conor.dooley@microchip.com>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH v4 05/13] riscv: Implement cmpxchg8/16() using Zabha
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (3 preceding siblings ...)
2024-07-31 7:23 ` [PATCH v4 04/13] dt-bindings: riscv: Add Zabha ISA extension description Alexandre Ghiti
@ 2024-07-31 7:23 ` Alexandre Ghiti
2024-07-31 9:27 ` Andrew Jones
2024-07-31 7:23 ` [PATCH v4 06/13] riscv: Improve zacas fully-ordered cmpxchg() Alexandre Ghiti
` (8 subsequent siblings)
13 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:23 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti
This adds runtime support for Zabha in cmpxchg8/16() operations.
Note that in the absence of Zacas support in the toolchain, CAS
instructions from Zabha won't be used.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/Kconfig | 18 ++++++++
arch/riscv/Makefile | 3 ++
arch/riscv/include/asm/cmpxchg.h | 78 ++++++++++++++++++++------------
arch/riscv/include/asm/hwcap.h | 1 +
arch/riscv/kernel/cpufeature.c | 1 +
5 files changed, 72 insertions(+), 29 deletions(-)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d955c64d50c2..212ec2aab389 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -613,6 +613,24 @@ config RISCV_ISA_ZAWRS
use of these instructions in the kernel when the Zawrs extension is
detected at boot.
+config TOOLCHAIN_HAS_ZABHA
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha)
+ depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZABHA
+ bool "Zabha extension support for atomic byte/halfword operations"
+ depends on TOOLCHAIN_HAS_ZABHA
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Enable the use of the Zabha ISA-extension to implement kernel
+ byte/halfword atomic memory operations when it is detected at boot.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_HAS_ZACAS
bool
default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index f1788131d5fe..f6dc5ba7c526 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -85,6 +85,9 @@ endif
# Check if the toolchain supports Zacas
riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
+# Check if the toolchain supports Zabha
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha
+
# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index c626fe0d08ae..ebcd4a30ae60 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -108,34 +108,49 @@
* indicated by comparing RETURN with OLD.
*/
-#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \
-({ \
- u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
- ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
- ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
- << __s; \
- ulong __newx = (ulong)(n) << __s; \
- ulong __oldx = (ulong)(o) << __s; \
- ulong __retx; \
- ulong __rc; \
- \
- __asm__ __volatile__ ( \
- prepend \
- "0: lr.w %0, %2\n" \
- " and %1, %0, %z5\n" \
- " bne %1, %z3, 1f\n" \
- " and %1, %0, %z6\n" \
- " or %1, %1, %z4\n" \
- " sc.w" sc_sfx " %1, %1, %2\n" \
- " bnez %1, 0b\n" \
- append \
- "1:\n" \
- : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
- : "rJ" ((long)__oldx), "rJ" (__newx), \
- "rJ" (__mask), "rJ" (~__mask) \
- : "memory"); \
- \
- r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
+({ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
+ r = o; \
+ \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amocas" cas_sfx " %0, %z2, %1\n" \
+ append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ } else { \
+ u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
+ ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
+ ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
+ << __s; \
+ ulong __newx = (ulong)(n) << __s; \
+ ulong __oldx = (ulong)(o) << __s; \
+ ulong __retx; \
+ ulong __rc; \
+ \
+ __asm__ __volatile__ ( \
+ prepend \
+ "0: lr.w %0, %2\n" \
+ " and %1, %0, %z5\n" \
+ " bne %1, %z3, 1f\n" \
+ " and %1, %0, %z6\n" \
+ " or %1, %1, %z4\n" \
+ " sc.w" sc_sfx " %1, %1, %2\n" \
+ " bnez %1, 0b\n" \
+ append \
+ "1:\n" \
+ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
+ : "rJ" ((long)__oldx), "rJ" (__newx), \
+ "rJ" (__mask), "rJ" (~__mask) \
+ : "memory"); \
+ \
+ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+ } \
})
#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
@@ -177,8 +192,13 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
+ __arch_cmpxchg_masked(sc_cas_sfx, ".b" sc_cas_sfx, \
+ prepend, append, \
+ __ret, __ptr, __old, __new); \
+ break; \
case 2: \
- __arch_cmpxchg_masked(sc_cas_sfx, prepend, append, \
+ __arch_cmpxchg_masked(sc_cas_sfx, ".h" sc_cas_sfx, \
+ prepend, append, \
__ret, __ptr, __old, __new); \
break; \
case 4: \
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5a0bd27fd11a..f5d53251c947 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -92,6 +92,7 @@
#define RISCV_ISA_EXT_ZCF 83
#define RISCV_ISA_EXT_ZCMOP 84
#define RISCV_ISA_EXT_ZAWRS 85
+#define RISCV_ISA_EXT_ZABHA 86
#define RISCV_ISA_EXT_XLINUXENVCFG 127
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 8f20607adb40..2a608056c89c 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -322,6 +322,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
__RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
__RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP),
+ __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA),
__RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
__RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS),
__RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH v4 05/13] riscv: Implement cmpxchg8/16() using Zabha
2024-07-31 7:23 ` [PATCH v4 05/13] riscv: Implement cmpxchg8/16() using Zabha Alexandre Ghiti
@ 2024-07-31 9:27 ` Andrew Jones
0 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2024-07-31 9:27 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 09:23:57AM GMT, Alexandre Ghiti wrote:
> This adds runtime support for Zabha in cmpxchg8/16() operations.
>
> Note that in the absence of Zacas support in the toolchain, CAS
> instructions from Zabha won't be used.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/Kconfig | 18 ++++++++
> arch/riscv/Makefile | 3 ++
> arch/riscv/include/asm/cmpxchg.h | 78 ++++++++++++++++++++------------
> arch/riscv/include/asm/hwcap.h | 1 +
> arch/riscv/kernel/cpufeature.c | 1 +
> 5 files changed, 72 insertions(+), 29 deletions(-)
>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH v4 06/13] riscv: Improve zacas fully-ordered cmpxchg()
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (4 preceding siblings ...)
2024-07-31 7:23 ` [PATCH v4 05/13] riscv: Implement cmpxchg8/16() using Zabha Alexandre Ghiti
@ 2024-07-31 7:23 ` Alexandre Ghiti
2024-07-31 9:59 ` Andrew Jones
2024-07-31 7:23 ` [PATCH v4 07/13] riscv: Implement arch_cmpxchg128() using Zacas Alexandre Ghiti
` (7 subsequent siblings)
13 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:23 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti, Andrea Parri
The current fully-ordered cmpxchgXX() implementation results in:
amocas.X.rl a5,a4,(s1)
fence rw,rw
This provides enough sync but we can actually use the following better
mapping instead:
amocas.X.aqrl a5,a4,(s1)
Suggested-by: Andrea Parri <andrea@rivosinc.com>
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/include/asm/cmpxchg.h | 72 +++++++++++++++++++-------------
1 file changed, 44 insertions(+), 28 deletions(-)
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ebcd4a30ae60..391730367213 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -107,8 +107,10 @@
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
-
-#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
+#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ r, p, o, n) \
({ \
if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
@@ -117,9 +119,9 @@
r = o; \
\
__asm__ __volatile__ ( \
- prepend \
+ cas_prepend \
" amocas" cas_sfx " %0, %z2, %1\n" \
- append \
+ cas_append \
: "+&r" (r), "+A" (*(p)) \
: "rJ" (n) \
: "memory"); \
@@ -134,7 +136,7 @@
ulong __rc; \
\
__asm__ __volatile__ ( \
- prepend \
+ sc_prepend \
"0: lr.w %0, %2\n" \
" and %1, %0, %z5\n" \
" bne %1, %z3, 1f\n" \
@@ -142,7 +144,7 @@
" or %1, %1, %z4\n" \
" sc.w" sc_sfx " %1, %1, %2\n" \
" bnez %1, 0b\n" \
- append \
+ sc_append \
"1:\n" \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
: "rJ" ((long)__oldx), "rJ" (__newx), \
@@ -153,16 +155,19 @@
} \
})
-#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
+#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ r, p, co, o, n) \
({ \
if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
r = o; \
\
__asm__ __volatile__ ( \
- prepend \
- " amocas" sc_cas_sfx " %0, %z2, %1\n" \
- append \
+ cas_prepend \
+ " amocas" cas_sfx " %0, %z2, %1\n" \
+ cas_append \
: "+&r" (r), "+A" (*(p)) \
: "rJ" (n) \
: "memory"); \
@@ -170,12 +175,12 @@
register unsigned int __rc; \
\
__asm__ __volatile__ ( \
- prepend \
+ sc_prepend \
"0: lr" lr_sfx " %0, %2\n" \
" bne %0, %z3, 1f\n" \
- " sc" sc_cas_sfx " %1, %z4, %2\n" \
+ " sc" sc_sfx " %1, %z4, %2\n" \
" bnez %1, 0b\n" \
- append \
+ sc_append \
"1:\n" \
: "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
: "rJ" (co o), "rJ" (n) \
@@ -183,7 +188,9 @@
} \
})
-#define _arch_cmpxchg(ptr, old, new, sc_cas_sfx, prepend, append) \
+#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __old = (old); \
@@ -192,22 +199,28 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
- __arch_cmpxchg_masked(sc_cas_sfx, ".b" sc_cas_sfx, \
- prepend, append, \
- __ret, __ptr, __old, __new); \
+ __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, __old, __new); \
break; \
case 2: \
- __arch_cmpxchg_masked(sc_cas_sfx, ".h" sc_cas_sfx, \
- prepend, append, \
- __ret, __ptr, __old, __new); \
+ __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, __old, __new); \
break; \
case 4: \
- __arch_cmpxchg(".w", ".w" sc_cas_sfx, prepend, append, \
- __ret, __ptr, (long), __old, __new); \
+ __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, (long), __old, __new); \
break; \
case 8: \
- __arch_cmpxchg(".d", ".d" sc_cas_sfx, prepend, append, \
- __ret, __ptr, /**/, __old, __new); \
+ __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
+ sc_prepend, sc_append, \
+ cas_prepend, cas_append, \
+ __ret, __ptr, /**/, __old, __new); \
break; \
default: \
BUILD_BUG(); \
@@ -216,16 +229,19 @@
})
#define arch_cmpxchg_relaxed(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), "", "", "")
+ _arch_cmpxchg((ptr), (o), (n), "", "", "", "", "", "")
#define arch_cmpxchg_acquire(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER)
+ _arch_cmpxchg((ptr), (o), (n), "", "", \
+ "", RISCV_ACQUIRE_BARRIER, "", RISCV_ACQUIRE_BARRIER)
#define arch_cmpxchg_release(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "")
+ _arch_cmpxchg((ptr), (o), (n), "", "", \
+ RISCV_RELEASE_BARRIER, "", RISCV_RELEASE_BARRIER, "")
#define arch_cmpxchg(ptr, o, n) \
- _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
+ _arch_cmpxchg((ptr), (o), (n), ".rl", ".aqrl", \
+ "", RISCV_FULL_BARRIER, "", "")
#define arch_cmpxchg_local(ptr, o, n) \
arch_cmpxchg_relaxed((ptr), (o), (n))
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH v4 06/13] riscv: Improve zacas fully-ordered cmpxchg()
2024-07-31 7:23 ` [PATCH v4 06/13] riscv: Improve zacas fully-ordered cmpxchg() Alexandre Ghiti
@ 2024-07-31 9:59 ` Andrew Jones
2024-07-31 10:33 ` Andrea Parri
2024-08-01 6:15 ` Alexandre Ghiti
0 siblings, 2 replies; 47+ messages in thread
From: Andrew Jones @ 2024-07-31 9:59 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch,
Andrea Parri
On Wed, Jul 31, 2024 at 09:23:58AM GMT, Alexandre Ghiti wrote:
> The current fully-ordered cmpxchgXX() implementation results in:
>
> amocas.X.rl a5,a4,(s1)
> fence rw,rw
>
> This provides enough sync but we can actually use the following better
> mapping instead:
>
> amocas.X.aqrl a5,a4,(s1)
We won't get release semantics if the exchange fails. Does that matter?
>
> Suggested-by: Andrea Parri <andrea@rivosinc.com>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/include/asm/cmpxchg.h | 72 +++++++++++++++++++-------------
> 1 file changed, 44 insertions(+), 28 deletions(-)
>
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index ebcd4a30ae60..391730367213 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -107,8 +107,10 @@
> * store NEW in MEM. Return the initial value in MEM. Success is
> * indicated by comparing RETURN with OLD.
> */
> -
> -#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
> +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
> + sc_prepend, sc_append, \
> + cas_prepend, cas_append, \
> + r, p, o, n) \
> ({ \
> if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
> IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
> @@ -117,9 +119,9 @@
> r = o; \
> \
> __asm__ __volatile__ ( \
> - prepend \
> + cas_prepend \
> " amocas" cas_sfx " %0, %z2, %1\n" \
> - append \
> + cas_append \
> : "+&r" (r), "+A" (*(p)) \
> : "rJ" (n) \
> : "memory"); \
> @@ -134,7 +136,7 @@
> ulong __rc; \
> \
> __asm__ __volatile__ ( \
> - prepend \
> + sc_prepend \
> "0: lr.w %0, %2\n" \
> " and %1, %0, %z5\n" \
> " bne %1, %z3, 1f\n" \
> @@ -142,7 +144,7 @@
> " or %1, %1, %z4\n" \
> " sc.w" sc_sfx " %1, %1, %2\n" \
> " bnez %1, 0b\n" \
> - append \
> + sc_append \
> "1:\n" \
> : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
> : "rJ" ((long)__oldx), "rJ" (__newx), \
> @@ -153,16 +155,19 @@
> } \
> })
>
> -#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
> +#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
> + sc_prepend, sc_append, \
> + cas_prepend, cas_append, \
> + r, p, co, o, n) \
> ({ \
> if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
> riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
> r = o; \
> \
> __asm__ __volatile__ ( \
> - prepend \
> - " amocas" sc_cas_sfx " %0, %z2, %1\n" \
> - append \
> + cas_prepend \
> + " amocas" cas_sfx " %0, %z2, %1\n" \
> + cas_append \
> : "+&r" (r), "+A" (*(p)) \
> : "rJ" (n) \
> : "memory"); \
> @@ -170,12 +175,12 @@
> register unsigned int __rc; \
> \
> __asm__ __volatile__ ( \
> - prepend \
> + sc_prepend \
> "0: lr" lr_sfx " %0, %2\n" \
> " bne %0, %z3, 1f\n" \
> - " sc" sc_cas_sfx " %1, %z4, %2\n" \
> + " sc" sc_sfx " %1, %z4, %2\n" \
nit: If patch3 hadn't renamed sc_sfx to sc_cas_sfx then we wouldn't
need to rename it again now.
> " bnez %1, 0b\n" \
> - append \
> + sc_append \
> "1:\n" \
> : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
> : "rJ" (co o), "rJ" (n) \
> @@ -183,7 +188,9 @@
> } \
> })
>
> -#define _arch_cmpxchg(ptr, old, new, sc_cas_sfx, prepend, append) \
> +#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
> + sc_prepend, sc_append, \
> + cas_prepend, cas_append) \
> ({ \
> __typeof__(ptr) __ptr = (ptr); \
> __typeof__(*(__ptr)) __old = (old); \
> @@ -192,22 +199,28 @@
> \
> switch (sizeof(*__ptr)) { \
> case 1: \
> - __arch_cmpxchg_masked(sc_cas_sfx, ".b" sc_cas_sfx, \
> - prepend, append, \
> - __ret, __ptr, __old, __new); \
> + __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
> + sc_prepend, sc_append, \
> + cas_prepend, cas_append, \
> + __ret, __ptr, __old, __new); \
> break; \
> case 2: \
> - __arch_cmpxchg_masked(sc_cas_sfx, ".h" sc_cas_sfx, \
> - prepend, append, \
> - __ret, __ptr, __old, __new); \
> + __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
> + sc_prepend, sc_append, \
> + cas_prepend, cas_append, \
> + __ret, __ptr, __old, __new); \
> break; \
> case 4: \
> - __arch_cmpxchg(".w", ".w" sc_cas_sfx, prepend, append, \
> - __ret, __ptr, (long), __old, __new); \
> + __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
> + sc_prepend, sc_append, \
> + cas_prepend, cas_append, \
> + __ret, __ptr, (long), __old, __new); \
> break; \
> case 8: \
> - __arch_cmpxchg(".d", ".d" sc_cas_sfx, prepend, append, \
> - __ret, __ptr, /**/, __old, __new); \
> + __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
> + sc_prepend, sc_append, \
> + cas_prepend, cas_append, \
> + __ret, __ptr, /**/, __old, __new); \
> break; \
> default: \
> BUILD_BUG(); \
> @@ -216,16 +229,19 @@
> })
>
> #define arch_cmpxchg_relaxed(ptr, o, n) \
> - _arch_cmpxchg((ptr), (o), (n), "", "", "")
> + _arch_cmpxchg((ptr), (o), (n), "", "", "", "", "", "")
>
> #define arch_cmpxchg_acquire(ptr, o, n) \
> - _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER)
> + _arch_cmpxchg((ptr), (o), (n), "", "", \
> + "", RISCV_ACQUIRE_BARRIER, "", RISCV_ACQUIRE_BARRIER)
>
> #define arch_cmpxchg_release(ptr, o, n) \
> - _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "")
> + _arch_cmpxchg((ptr), (o), (n), "", "", \
> + RISCV_RELEASE_BARRIER, "", RISCV_RELEASE_BARRIER, "")
>
> #define arch_cmpxchg(ptr, o, n) \
> - _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
> + _arch_cmpxchg((ptr), (o), (n), ".rl", ".aqrl", \
> + "", RISCV_FULL_BARRIER, "", "")
These aren't the easiest things to read, but I can't think of a way to
improve it other than maybe some macro annotations. E.g.
#define SC_SFX(x) x
#define CAS_SFX(x) x
#define SC_PREPEND(x) x
#define SC_APPEND(x) x
#define CAS_PREPEND(x) x
#define CAS_APPEND(x) x
#define arch_cmpxchg(ptr, o, n) \
_arch_cmpxchg(ptr, o, n, \
SC_SFX(".rl"), CAS_SFX(".aqrl"), \
SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \
CAS_PREPEND(""), CAS_APPEND(""))
>
> #define arch_cmpxchg_local(ptr, o, n) \
> arch_cmpxchg_relaxed((ptr), (o), (n))
> --
> 2.39.2
>
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 06/13] riscv: Improve zacas fully-ordered cmpxchg()
2024-07-31 9:59 ` Andrew Jones
@ 2024-07-31 10:33 ` Andrea Parri
2024-08-01 6:15 ` Alexandre Ghiti
1 sibling, 0 replies; 47+ messages in thread
From: Andrea Parri @ 2024-07-31 10:33 UTC (permalink / raw)
To: Andrew Jones
Cc: Alexandre Ghiti, Jonathan Corbet, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Conor Dooley, Rob Herring, Krzysztof Kozlowski,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch,
Andrea Parri
> > amocas.X.aqrl a5,a4,(s1)
>
> We won't get release semantics if the exchange fails. Does that matter?
Conditional (atomic) RMW operations are unordered/relaxed on failure, so
we should be okay here; cf. e.g. Documentation/atomic_t.txt.
Andrea
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 06/13] riscv: Improve zacas fully-ordered cmpxchg()
2024-07-31 9:59 ` Andrew Jones
2024-07-31 10:33 ` Andrea Parri
@ 2024-08-01 6:15 ` Alexandre Ghiti
1 sibling, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-08-01 6:15 UTC (permalink / raw)
To: Andrew Jones
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch,
Andrea Parri
Hi Drew,
On Wed, Jul 31, 2024 at 11:59 AM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Wed, Jul 31, 2024 at 09:23:58AM GMT, Alexandre Ghiti wrote:
> > The current fully-ordered cmpxchgXX() implementation results in:
> >
> > amocas.X.rl a5,a4,(s1)
> > fence rw,rw
> >
> > This provides enough sync but we can actually use the following better
> > mapping instead:
> >
> > amocas.X.aqrl a5,a4,(s1)
>
> We won't get release semantics if the exchange fails. Does that matter?
>
> >
> > Suggested-by: Andrea Parri <andrea@rivosinc.com>
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > arch/riscv/include/asm/cmpxchg.h | 72 +++++++++++++++++++-------------
> > 1 file changed, 44 insertions(+), 28 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > index ebcd4a30ae60..391730367213 100644
> > --- a/arch/riscv/include/asm/cmpxchg.h
> > +++ b/arch/riscv/include/asm/cmpxchg.h
> > @@ -107,8 +107,10 @@
> > * store NEW in MEM. Return the initial value in MEM. Success is
> > * indicated by comparing RETURN with OLD.
> > */
> > -
> > -#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
> > +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + r, p, o, n) \
> > ({ \
> > if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
> > IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
> > @@ -117,9 +119,9 @@
> > r = o; \
> > \
> > __asm__ __volatile__ ( \
> > - prepend \
> > + cas_prepend \
> > " amocas" cas_sfx " %0, %z2, %1\n" \
> > - append \
> > + cas_append \
> > : "+&r" (r), "+A" (*(p)) \
> > : "rJ" (n) \
> > : "memory"); \
> > @@ -134,7 +136,7 @@
> > ulong __rc; \
> > \
> > __asm__ __volatile__ ( \
> > - prepend \
> > + sc_prepend \
> > "0: lr.w %0, %2\n" \
> > " and %1, %0, %z5\n" \
> > " bne %1, %z3, 1f\n" \
> > @@ -142,7 +144,7 @@
> > " or %1, %1, %z4\n" \
> > " sc.w" sc_sfx " %1, %1, %2\n" \
> > " bnez %1, 0b\n" \
> > - append \
> > + sc_append \
> > "1:\n" \
> > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
> > : "rJ" ((long)__oldx), "rJ" (__newx), \
> > @@ -153,16 +155,19 @@
> > } \
> > })
> >
> > -#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
> > +#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + r, p, co, o, n) \
> > ({ \
> > if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
> > riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
> > r = o; \
> > \
> > __asm__ __volatile__ ( \
> > - prepend \
> > - " amocas" sc_cas_sfx " %0, %z2, %1\n" \
> > - append \
> > + cas_prepend \
> > + " amocas" cas_sfx " %0, %z2, %1\n" \
> > + cas_append \
> > : "+&r" (r), "+A" (*(p)) \
> > : "rJ" (n) \
> > : "memory"); \
> > @@ -170,12 +175,12 @@
> > register unsigned int __rc; \
> > \
> > __asm__ __volatile__ ( \
> > - prepend \
> > + sc_prepend \
> > "0: lr" lr_sfx " %0, %2\n" \
> > " bne %0, %z3, 1f\n" \
> > - " sc" sc_cas_sfx " %1, %z4, %2\n" \
> > + " sc" sc_sfx " %1, %z4, %2\n" \
>
> nit: If patch3 hadn't renamed sc_sfx to sc_cas_sfx then we wouldn't
> need to rename it again now.
You're right, if you don't mind I'll leave it as is though as it makes
the previous patch more consistent.
>
> > " bnez %1, 0b\n" \
> > - append \
> > + sc_append \
> > "1:\n" \
> > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
> > : "rJ" (co o), "rJ" (n) \
> > @@ -183,7 +188,9 @@
> > } \
> > })
> >
> > -#define _arch_cmpxchg(ptr, old, new, sc_cas_sfx, prepend, append) \
> > +#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append) \
> > ({ \
> > __typeof__(ptr) __ptr = (ptr); \
> > __typeof__(*(__ptr)) __old = (old); \
> > @@ -192,22 +199,28 @@
> > \
> > switch (sizeof(*__ptr)) { \
> > case 1: \
> > - __arch_cmpxchg_masked(sc_cas_sfx, ".b" sc_cas_sfx, \
> > - prepend, append, \
> > - __ret, __ptr, __old, __new); \
> > + __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + __ret, __ptr, __old, __new); \
> > break; \
> > case 2: \
> > - __arch_cmpxchg_masked(sc_cas_sfx, ".h" sc_cas_sfx, \
> > - prepend, append, \
> > - __ret, __ptr, __old, __new); \
> > + __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + __ret, __ptr, __old, __new); \
> > break; \
> > case 4: \
> > - __arch_cmpxchg(".w", ".w" sc_cas_sfx, prepend, append, \
> > - __ret, __ptr, (long), __old, __new); \
> > + __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + __ret, __ptr, (long), __old, __new); \
> > break; \
> > case 8: \
> > - __arch_cmpxchg(".d", ".d" sc_cas_sfx, prepend, append, \
> > - __ret, __ptr, /**/, __old, __new); \
> > + __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
> > + sc_prepend, sc_append, \
> > + cas_prepend, cas_append, \
> > + __ret, __ptr, /**/, __old, __new); \
> > break; \
> > default: \
> > BUILD_BUG(); \
> > @@ -216,16 +229,19 @@
> > })
> >
> > #define arch_cmpxchg_relaxed(ptr, o, n) \
> > - _arch_cmpxchg((ptr), (o), (n), "", "", "")
> > + _arch_cmpxchg((ptr), (o), (n), "", "", "", "", "", "")
> >
> > #define arch_cmpxchg_acquire(ptr, o, n) \
> > - _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER)
> > + _arch_cmpxchg((ptr), (o), (n), "", "", \
> > + "", RISCV_ACQUIRE_BARRIER, "", RISCV_ACQUIRE_BARRIER)
> >
> > #define arch_cmpxchg_release(ptr, o, n) \
> > - _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "")
> > + _arch_cmpxchg((ptr), (o), (n), "", "", \
> > + RISCV_RELEASE_BARRIER, "", RISCV_RELEASE_BARRIER, "")
> >
> > #define arch_cmpxchg(ptr, o, n) \
> > - _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
> > + _arch_cmpxchg((ptr), (o), (n), ".rl", ".aqrl", \
> > + "", RISCV_FULL_BARRIER, "", "")
>
> These aren't the easiest things to read, but I can't think of a way to
> improve it other than maybe some macro annotations. E.g.
>
> #define SC_SFX(x) x
> #define CAS_SFX(x) x
> #define SC_PREPEND(x) x
> #define SC_APPEND(x) x
> #define CAS_PREPEND(x) x
> #define CAS_APPEND(x) x
>
> #define arch_cmpxchg(ptr, o, n) \
> _arch_cmpxchg(ptr, o, n, \
> SC_SFX(".rl"), CAS_SFX(".aqrl"), \
> SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \
> CAS_PREPEND(""), CAS_APPEND(""))
That's a very good idea, it's been hard to review even for me :)
I could add comments too, but I like your solution, so unless I find
something better in the next 30min, I'll implement that.
Thanks,
Alex
>
> >
> > #define arch_cmpxchg_local(ptr, o, n) \
> > arch_cmpxchg_relaxed((ptr), (o), (n))
> > --
> > 2.39.2
> >
>
> Thanks,
> drew
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH v4 07/13] riscv: Implement arch_cmpxchg128() using Zacas
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (5 preceding siblings ...)
2024-07-31 7:23 ` [PATCH v4 06/13] riscv: Improve zacas fully-ordered cmpxchg() Alexandre Ghiti
@ 2024-07-31 7:23 ` Alexandre Ghiti
2024-07-31 15:41 ` Andrew Jones
2024-07-31 7:24 ` [PATCH v4 08/13] riscv: Implement xchg8/16() using Zabha Alexandre Ghiti
` (6 subsequent siblings)
13 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:23 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti
Now that Zacas is supported in the kernel, let's use the double word
atomic version of amocas to improve the SLUB allocator.
Note that we have to select fixed registers, otherwise gcc fails to pick
even registers and then produces a reserved encoding which fails to
assemble.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/cmpxchg.h | 38 ++++++++++++++++++++++++++++++++
2 files changed, 39 insertions(+)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 212ec2aab389..ef55ab94027e 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -111,6 +111,7 @@ config RISCV
select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
select HARDIRQS_SW_RESEND
select HAS_IOPORT if MMU
+ select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 391730367213..ce9613516bbb 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -276,6 +276,44 @@
arch_cmpxchg_release((ptr), (o), (n)); \
})
+#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
+
+#define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
+
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __arch_cmpxchg128(p, o, n, cas_sfx) \
+({ \
+ __typeof__(*(p)) __o = (o); \
+ union __u128_halves __hn = { .full = (n) }; \
+ union __u128_halves __ho = { .full = (__o) }; \
+ register unsigned long t1 asm ("t1") = __hn.low; \
+ register unsigned long t2 asm ("t2") = __hn.high; \
+ register unsigned long t3 asm ("t3") = __ho.low; \
+ register unsigned long t4 asm ("t4") = __ho.high; \
+ \
+ __asm__ __volatile__ ( \
+ " amocas.q" cas_sfx " %0, %z3, %2" \
+ : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \
+ : "rJ" (t1), "rJ" (t2) \
+ : "memory"); \
+ \
+ ((u128)t4 << 64) | t3; \
+})
+
+#define arch_cmpxchg128(ptr, o, n) \
+ __arch_cmpxchg128((ptr), (o), (n), ".aqrl")
+
+#define arch_cmpxchg128_local(ptr, o, n) \
+ __arch_cmpxchg128((ptr), (o), (n), "")
+
+#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
+
#ifdef CONFIG_RISCV_ISA_ZAWRS
/*
* Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH v4 07/13] riscv: Implement arch_cmpxchg128() using Zacas
2024-07-31 7:23 ` [PATCH v4 07/13] riscv: Implement arch_cmpxchg128() using Zacas Alexandre Ghiti
@ 2024-07-31 15:41 ` Andrew Jones
0 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2024-07-31 15:41 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 09:23:59AM GMT, Alexandre Ghiti wrote:
> Now that Zacas is supported in the kernel, let's use the double word
> atomic version of amocas to improve the SLUB allocator.
>
> Note that we have to select fixed registers, otherwise gcc fails to pick
> even registers and then produces a reserved encoding which fails to
> assemble.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/Kconfig | 1 +
> arch/riscv/include/asm/cmpxchg.h | 38 ++++++++++++++++++++++++++++++++
> 2 files changed, 39 insertions(+)
>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH v4 08/13] riscv: Implement xchg8/16() using Zabha
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (6 preceding siblings ...)
2024-07-31 7:23 ` [PATCH v4 07/13] riscv: Implement arch_cmpxchg128() using Zacas Alexandre Ghiti
@ 2024-07-31 7:24 ` Alexandre Ghiti
2024-07-31 12:20 ` Andrew Jones
2024-07-31 7:24 ` [PATCH v4 09/13] asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock Alexandre Ghiti
` (5 subsequent siblings)
13 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:24 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti
This adds runtime support for Zabha in xchg8/16() operations.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/include/asm/cmpxchg.h | 65 ++++++++++++++++++++------------
1 file changed, 41 insertions(+), 24 deletions(-)
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ce9613516bbb..7484f063809e 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -14,29 +14,41 @@
#include <asm/insn-def.h>
#include <asm/cpufeature-macros.h>
-#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
-({ \
- u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
- ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
- ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
- << __s; \
- ulong __newx = (ulong)(n) << __s; \
- ulong __retx; \
- ulong __rc; \
- \
- __asm__ __volatile__ ( \
- prepend \
- "0: lr.w %0, %2\n" \
- " and %1, %0, %z4\n" \
- " or %1, %1, %z3\n" \
- " sc.w" sc_sfx " %1, %1, %2\n" \
- " bnez %1, 0b\n" \
- append \
- : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
- : "rJ" (__newx), "rJ" (~__mask) \
- : "memory"); \
- \
- r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
+ swap_append, r, p, n) \
+({ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amoswap" swap_sfx " %0, %z2, %1\n" \
+ swap_append \
+ : "=&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ } else { \
+ u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
+ ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
+ ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
+ << __s; \
+ ulong __newx = (ulong)(n) << __s; \
+ ulong __retx; \
+ ulong __rc; \
+ \
+ __asm__ __volatile__ ( \
+ prepend \
+ "0: lr.w %0, %2\n" \
+ " and %1, %0, %z4\n" \
+ " or %1, %1, %z3\n" \
+ " sc.w" sc_sfx " %1, %1, %2\n" \
+ " bnez %1, 0b\n" \
+ sc_append \
+ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
+ : "rJ" (__newx), "rJ" (~__mask) \
+ : "memory"); \
+ \
+ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+ } \
})
#define __arch_xchg(sfx, prepend, append, r, p, n) \
@@ -59,8 +71,13 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
+ __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \
+ prepend, sc_append, swap_append, \
+ __ret, __ptr, __new); \
+ break; \
case 2: \
- __arch_xchg_masked(sc_sfx, prepend, sc_append, \
+ __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \
+ prepend, sc_append, swap_append, \
__ret, __ptr, __new); \
break; \
case 4: \
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH v4 08/13] riscv: Implement xchg8/16() using Zabha
2024-07-31 7:24 ` [PATCH v4 08/13] riscv: Implement xchg8/16() using Zabha Alexandre Ghiti
@ 2024-07-31 12:20 ` Andrew Jones
0 siblings, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2024-07-31 12:20 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 09:24:00AM GMT, Alexandre Ghiti wrote:
> This adds runtime support for Zabha in xchg8/16() operations.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/include/asm/cmpxchg.h | 65 ++++++++++++++++++++------------
> 1 file changed, 41 insertions(+), 24 deletions(-)
>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH v4 09/13] asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (7 preceding siblings ...)
2024-07-31 7:24 ` [PATCH v4 08/13] riscv: Implement xchg8/16() using Zabha Alexandre Ghiti
@ 2024-07-31 7:24 ` Alexandre Ghiti
2024-07-31 7:24 ` [PATCH v4 10/13] asm-generic: ticket-lock: Add separate ticket-lock.h Alexandre Ghiti
` (4 subsequent siblings)
13 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:24 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Guo Ren
From: Guo Ren <guoren@linux.alibaba.com>
The arch_spinlock_t of qspinlock has contained the atomic_t val, which
satisfies the ticket-lock requirement. Thus, unify the arch_spinlock_t
into qspinlock_types.h. This is the preparation for the next combo
spinlock.
Reviewed-by: Leonardo Bras <leobras@redhat.com>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/linux-riscv/CAK8P3a2rnz9mQqhN6-e0CGUUv9rntRELFdxt_weiD7FxH7fkfQ@mail.gmail.com/
Signed-off-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
---
include/asm-generic/spinlock.h | 14 +++++++-------
include/asm-generic/spinlock_types.h | 12 ++----------
2 files changed, 9 insertions(+), 17 deletions(-)
diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
index 90803a826ba0..4773334ee638 100644
--- a/include/asm-generic/spinlock.h
+++ b/include/asm-generic/spinlock.h
@@ -32,7 +32,7 @@
static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
{
- u32 val = atomic_fetch_add(1<<16, lock);
+ u32 val = atomic_fetch_add(1<<16, &lock->val);
u16 ticket = val >> 16;
if (ticket == (u16)val)
@@ -46,31 +46,31 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
* have no outstanding writes due to the atomic_fetch_add() the extra
* orderings are free.
*/
- atomic_cond_read_acquire(lock, ticket == (u16)VAL);
+ atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
smp_mb();
}
static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
{
- u32 old = atomic_read(lock);
+ u32 old = atomic_read(&lock->val);
if ((old >> 16) != (old & 0xffff))
return false;
- return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */
+ return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
}
static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
{
u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
- u32 val = atomic_read(lock);
+ u32 val = atomic_read(&lock->val);
smp_store_release(ptr, (u16)val + 1);
}
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
- u32 val = lock.counter;
+ u32 val = lock.val.counter;
return ((val >> 16) == (val & 0xffff));
}
@@ -84,7 +84,7 @@ static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
- u32 val = atomic_read(lock);
+ u32 val = atomic_read(&lock->val);
return (s16)((val >> 16) - (val & 0xffff)) > 1;
}
diff --git a/include/asm-generic/spinlock_types.h b/include/asm-generic/spinlock_types.h
index 8962bb730945..f534aa5de394 100644
--- a/include/asm-generic/spinlock_types.h
+++ b/include/asm-generic/spinlock_types.h
@@ -3,15 +3,7 @@
#ifndef __ASM_GENERIC_SPINLOCK_TYPES_H
#define __ASM_GENERIC_SPINLOCK_TYPES_H
-#include <linux/types.h>
-typedef atomic_t arch_spinlock_t;
-
-/*
- * qrwlock_types depends on arch_spinlock_t, so we must typedef that before the
- * include.
- */
-#include <asm/qrwlock_types.h>
-
-#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0)
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
#endif /* __ASM_GENERIC_SPINLOCK_TYPES_H */
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* [PATCH v4 10/13] asm-generic: ticket-lock: Add separate ticket-lock.h
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (8 preceding siblings ...)
2024-07-31 7:24 ` [PATCH v4 09/13] asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock Alexandre Ghiti
@ 2024-07-31 7:24 ` Alexandre Ghiti
2024-07-31 7:24 ` [PATCH v4 11/13] riscv: Add ISA extension parsing for Ziccrse Alexandre Ghiti
` (3 subsequent siblings)
13 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:24 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Guo Ren
From: Guo Ren <guoren@linux.alibaba.com>
Add a separate ticket-lock.h to include multiple spinlock versions and
select one at compile time or runtime.
Reviewed-by: Leonardo Bras <leobras@redhat.com>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/linux-riscv/CAK8P3a2rnz9mQqhN6-e0CGUUv9rntRELFdxt_weiD7FxH7fkfQ@mail.gmail.com/
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
include/asm-generic/spinlock.h | 87 +---------------------
include/asm-generic/ticket_spinlock.h | 103 ++++++++++++++++++++++++++
2 files changed, 104 insertions(+), 86 deletions(-)
create mode 100644 include/asm-generic/ticket_spinlock.h
diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
index 4773334ee638..970590baf61b 100644
--- a/include/asm-generic/spinlock.h
+++ b/include/asm-generic/spinlock.h
@@ -1,94 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * 'Generic' ticket-lock implementation.
- *
- * It relies on atomic_fetch_add() having well defined forward progress
- * guarantees under contention. If your architecture cannot provide this, stick
- * to a test-and-set lock.
- *
- * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
- * sub-word of the value. This is generally true for anything LL/SC although
- * you'd be hard pressed to find anything useful in architecture specifications
- * about this. If your architecture cannot do this you might be better off with
- * a test-and-set.
- *
- * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
- * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
- * a full fence after the spin to upgrade the otherwise-RCpc
- * atomic_cond_read_acquire().
- *
- * The implementation uses smp_cond_load_acquire() to spin, so if the
- * architecture has WFE like instructions to sleep instead of poll for word
- * modifications be sure to implement that (see ARM64 for example).
- *
- */
-
#ifndef __ASM_GENERIC_SPINLOCK_H
#define __ASM_GENERIC_SPINLOCK_H
-#include <linux/atomic.h>
-#include <asm-generic/spinlock_types.h>
-
-static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
-{
- u32 val = atomic_fetch_add(1<<16, &lock->val);
- u16 ticket = val >> 16;
-
- if (ticket == (u16)val)
- return;
-
- /*
- * atomic_cond_read_acquire() is RCpc, but rather than defining a
- * custom cond_read_rcsc() here we just emit a full fence. We only
- * need the prior reads before subsequent writes ordering from
- * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
- * have no outstanding writes due to the atomic_fetch_add() the extra
- * orderings are free.
- */
- atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
- smp_mb();
-}
-
-static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
-{
- u32 old = atomic_read(&lock->val);
-
- if ((old >> 16) != (old & 0xffff))
- return false;
-
- return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
-}
-
-static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
- u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
- u32 val = atomic_read(&lock->val);
-
- smp_store_release(ptr, (u16)val + 1);
-}
-
-static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
- u32 val = lock.val.counter;
-
- return ((val >> 16) == (val & 0xffff));
-}
-
-static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
- arch_spinlock_t val = READ_ONCE(*lock);
-
- return !arch_spin_value_unlocked(val);
-}
-
-static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
- u32 val = atomic_read(&lock->val);
-
- return (s16)((val >> 16) - (val & 0xffff)) > 1;
-}
-
+#include <asm-generic/ticket_spinlock.h>
#include <asm/qrwlock.h>
#endif /* __ASM_GENERIC_SPINLOCK_H */
diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
new file mode 100644
index 000000000000..cfcff22b37b3
--- /dev/null
+++ b/include/asm-generic/ticket_spinlock.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * 'Generic' ticket-lock implementation.
+ *
+ * It relies on atomic_fetch_add() having well defined forward progress
+ * guarantees under contention. If your architecture cannot provide this, stick
+ * to a test-and-set lock.
+ *
+ * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
+ * sub-word of the value. This is generally true for anything LL/SC although
+ * you'd be hard pressed to find anything useful in architecture specifications
+ * about this. If your architecture cannot do this you might be better off with
+ * a test-and-set.
+ *
+ * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
+ * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
+ * a full fence after the spin to upgrade the otherwise-RCpc
+ * atomic_cond_read_acquire().
+ *
+ * The implementation uses smp_cond_load_acquire() to spin, so if the
+ * architecture has WFE like instructions to sleep instead of poll for word
+ * modifications be sure to implement that (see ARM64 for example).
+ *
+ */
+
+#ifndef __ASM_GENERIC_TICKET_SPINLOCK_H
+#define __ASM_GENERIC_TICKET_SPINLOCK_H
+
+#include <linux/atomic.h>
+#include <asm-generic/spinlock_types.h>
+
+static __always_inline void ticket_spin_lock(arch_spinlock_t *lock)
+{
+ u32 val = atomic_fetch_add(1<<16, &lock->val);
+ u16 ticket = val >> 16;
+
+ if (ticket == (u16)val)
+ return;
+
+ /*
+ * atomic_cond_read_acquire() is RCpc, but rather than defining a
+ * custom cond_read_rcsc() here we just emit a full fence. We only
+ * need the prior reads before subsequent writes ordering from
+ * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
+ * have no outstanding writes due to the atomic_fetch_add() the extra
+ * orderings are free.
+ */
+ atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
+ smp_mb();
+}
+
+static __always_inline bool ticket_spin_trylock(arch_spinlock_t *lock)
+{
+ u32 old = atomic_read(&lock->val);
+
+ if ((old >> 16) != (old & 0xffff))
+ return false;
+
+ return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
+}
+
+static __always_inline void ticket_spin_unlock(arch_spinlock_t *lock)
+{
+ u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+ u32 val = atomic_read(&lock->val);
+
+ smp_store_release(ptr, (u16)val + 1);
+}
+
+static __always_inline int ticket_spin_value_unlocked(arch_spinlock_t lock)
+{
+ u32 val = lock.val.counter;
+
+ return ((val >> 16) == (val & 0xffff));
+}
+
+static __always_inline int ticket_spin_is_locked(arch_spinlock_t *lock)
+{
+ arch_spinlock_t val = READ_ONCE(*lock);
+
+ return !ticket_spin_value_unlocked(val);
+}
+
+static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
+{
+ u32 val = atomic_read(&lock->val);
+
+ return (s16)((val >> 16) - (val & 0xffff)) > 1;
+}
+
+/*
+ * Remapping spinlock architecture specific functions to the corresponding
+ * ticket spinlock functions.
+ */
+#define arch_spin_is_locked(l) ticket_spin_is_locked(l)
+#define arch_spin_is_contended(l) ticket_spin_is_contended(l)
+#define arch_spin_value_unlocked(l) ticket_spin_value_unlocked(l)
+#define arch_spin_lock(l) ticket_spin_lock(l)
+#define arch_spin_trylock(l) ticket_spin_trylock(l)
+#define arch_spin_unlock(l) ticket_spin_unlock(l)
+
+#endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* [PATCH v4 11/13] riscv: Add ISA extension parsing for Ziccrse
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (9 preceding siblings ...)
2024-07-31 7:24 ` [PATCH v4 10/13] asm-generic: ticket-lock: Add separate ticket-lock.h Alexandre Ghiti
@ 2024-07-31 7:24 ` Alexandre Ghiti
2024-07-31 7:24 ` [PATCH v4 12/13] dt-bindings: riscv: Add Ziccrse ISA extension description Alexandre Ghiti
` (2 subsequent siblings)
13 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:24 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti
Add support to parse the Ziccrse string in the riscv,isa string.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/include/asm/hwcap.h | 1 +
arch/riscv/kernel/cpufeature.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index f5d53251c947..9e228b079a6d 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -93,6 +93,7 @@
#define RISCV_ISA_EXT_ZCMOP 84
#define RISCV_ISA_EXT_ZAWRS 85
#define RISCV_ISA_EXT_ZABHA 86
+#define RISCV_ISA_EXT_ZICCRSE 87
#define RISCV_ISA_EXT_XLINUXENVCFG 127
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 2a608056c89c..097821df8642 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -314,6 +314,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
riscv_ext_zicbom_validate),
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts,
riscv_ext_zicboz_validate),
+ __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
__RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
__RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* [PATCH v4 12/13] dt-bindings: riscv: Add Ziccrse ISA extension description
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (10 preceding siblings ...)
2024-07-31 7:24 ` [PATCH v4 11/13] riscv: Add ISA extension parsing for Ziccrse Alexandre Ghiti
@ 2024-07-31 7:24 ` Alexandre Ghiti
2024-08-01 14:44 ` Conor Dooley
2024-07-31 7:24 ` [PATCH v4 13/13] riscv: Add qspinlock support Alexandre Ghiti
2024-08-01 14:15 ` [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Peter Zijlstra
13 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:24 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti
Add description for the Ziccrse ISA extension which was introduced in
the riscv profiles specification v0.9.2.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Guo Ren <guoren@kernel.org>
---
Documentation/devicetree/bindings/riscv/extensions.yaml | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml
index a63578b95c4a..22824dd30175 100644
--- a/Documentation/devicetree/bindings/riscv/extensions.yaml
+++ b/Documentation/devicetree/bindings/riscv/extensions.yaml
@@ -289,6 +289,12 @@ properties:
in commit 64074bc ("Update version numbers for Zfh/Zfinx") of
riscv-isa-manual.
+ - const: ziccrse
+ description:
+ The standard Ziccrse extension which provides forward progress
+ guarantee on LR/SC sequences, as introduced in the riscv profiles
+ specification v0.9.2.
+
- const: zk
description:
The standard Zk Standard Scalar cryptography extension as ratified
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH v4 12/13] dt-bindings: riscv: Add Ziccrse ISA extension description
2024-07-31 7:24 ` [PATCH v4 12/13] dt-bindings: riscv: Add Ziccrse ISA extension description Alexandre Ghiti
@ 2024-08-01 14:44 ` Conor Dooley
2024-08-02 8:14 ` Alexandre Ghiti
0 siblings, 1 reply; 47+ messages in thread
From: Conor Dooley @ 2024-08-01 14:44 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Rob Herring, Krzysztof Kozlowski, Andrea Parri, Nathan Chancellor,
Peter Zijlstra, Ingo Molnar, Will Deacon, Waiman Long, Boqun Feng,
Arnd Bergmann, Leonardo Bras, Guo Ren, linux-doc, devicetree,
linux-kernel, linux-riscv, linux-arch
[-- Attachment #1: Type: text/plain, Size: 1618 bytes --]
On Wed, Jul 31, 2024 at 09:24:04AM +0200, Alexandre Ghiti wrote:
> Add description for the Ziccrse ISA extension which was introduced in
> the riscv profiles specification v0.9.2.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> Reviewed-by: Guo Ren <guoren@kernel.org>
> ---
> Documentation/devicetree/bindings/riscv/extensions.yaml | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml
> index a63578b95c4a..22824dd30175 100644
> --- a/Documentation/devicetree/bindings/riscv/extensions.yaml
> +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml
> @@ -289,6 +289,12 @@ properties:
> in commit 64074bc ("Update version numbers for Zfh/Zfinx") of
> riscv-isa-manual.
>
> + - const: ziccrse
> + description:
> + The standard Ziccrse extension which provides forward progress
> + guarantee on LR/SC sequences, as introduced in the riscv profiles
> + specification v0.9.2.
Do we have a commit hash for this? Also v0.9.2? The profiles spec is a
crock and the version depends on the specific profile - for example
there's new tags as of last week with 0.5 in them... The original profiles
are ratified, so if this definition is in there, please cite that
instead of a "random" version.
Cheers,
Conor.
> +
> - const: zk
> description:
> The standard Zk Standard Scalar cryptography extension as ratified
> --
> 2.39.2
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 12/13] dt-bindings: riscv: Add Ziccrse ISA extension description
2024-08-01 14:44 ` Conor Dooley
@ 2024-08-02 8:14 ` Alexandre Ghiti
2024-08-02 14:46 ` Conor Dooley
0 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-08-02 8:14 UTC (permalink / raw)
To: Conor Dooley, Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Rob Herring, Krzysztof Kozlowski, Andrea Parri, Nathan Chancellor,
Peter Zijlstra, Ingo Molnar, Will Deacon, Waiman Long, Boqun Feng,
Arnd Bergmann, Leonardo Bras, Guo Ren, linux-doc, devicetree,
linux-kernel, linux-riscv, linux-arch
Hi Cono
On 01/08/2024 16:44, Conor Dooley wrote:
> On Wed, Jul 31, 2024 at 09:24:04AM +0200, Alexandre Ghiti wrote:
>> Add description for the Ziccrse ISA extension which was introduced in
>> the riscv profiles specification v0.9.2.
>>
>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>> Reviewed-by: Guo Ren <guoren@kernel.org>
>> ---
>> Documentation/devicetree/bindings/riscv/extensions.yaml | 6 ++++++
>> 1 file changed, 6 insertions(+)
>>
>> diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml
>> index a63578b95c4a..22824dd30175 100644
>> --- a/Documentation/devicetree/bindings/riscv/extensions.yaml
>> +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml
>> @@ -289,6 +289,12 @@ properties:
>> in commit 64074bc ("Update version numbers for Zfh/Zfinx") of
>> riscv-isa-manual.
>>
>> + - const: ziccrse
>> + description:
>> + The standard Ziccrse extension which provides forward progress
>> + guarantee on LR/SC sequences, as introduced in the riscv profiles
>> + specification v0.9.2.
> Do we have a commit hash for this? Also v0.9.2? The profiles spec is a
> crock and the version depends on the specific profile - for example
> there's new tags as of last week with 0.5 in them... The original profiles
> are ratified, so if this definition is in there, please cite that
> instead of a "random" version.
That's not a "random" version, please refer to the existing tag v0.9.2
where this was first introduced
(https://github.com/riscv/riscv-profiles/releases/tag/v0.9.2).
But I'll change that to the profiles specification v1.0.
Thanks,
Alex
>
> Cheers,
> Conor.
>
>> +
>> - const: zk
>> description:
>> The standard Zk Standard Scalar cryptography extension as ratified
>> --
>> 2.39.2
>>
>>
>> _______________________________________________
>> linux-riscv mailing list
>> linux-riscv@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 12/13] dt-bindings: riscv: Add Ziccrse ISA extension description
2024-08-02 8:14 ` Alexandre Ghiti
@ 2024-08-02 14:46 ` Conor Dooley
0 siblings, 0 replies; 47+ messages in thread
From: Conor Dooley @ 2024-08-02 14:46 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Alexandre Ghiti, Jonathan Corbet, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
[-- Attachment #1: Type: text/plain, Size: 2189 bytes --]
On Fri, Aug 02, 2024 at 10:14:21AM +0200, Alexandre Ghiti wrote:
> Hi Cono
>
> On 01/08/2024 16:44, Conor Dooley wrote:
> > On Wed, Jul 31, 2024 at 09:24:04AM +0200, Alexandre Ghiti wrote:
> > > Add description for the Ziccrse ISA extension which was introduced in
> > > the riscv profiles specification v0.9.2.
> > >
> > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > Reviewed-by: Guo Ren <guoren@kernel.org>
> > > ---
> > > Documentation/devicetree/bindings/riscv/extensions.yaml | 6 ++++++
> > > 1 file changed, 6 insertions(+)
> > >
> > > diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml
> > > index a63578b95c4a..22824dd30175 100644
> > > --- a/Documentation/devicetree/bindings/riscv/extensions.yaml
> > > +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml
> > > @@ -289,6 +289,12 @@ properties:
> > > in commit 64074bc ("Update version numbers for Zfh/Zfinx") of
> > > riscv-isa-manual.
> > > + - const: ziccrse
> > > + description:
> > > + The standard Ziccrse extension which provides forward progress
> > > + guarantee on LR/SC sequences, as introduced in the riscv profiles
> > > + specification v0.9.2.
> > Do we have a commit hash for this? Also v0.9.2? The profiles spec is a
> > crock and the version depends on the specific profile - for example
> > there's new tags as of last week with 0.5 in them... The original profiles
> > are ratified, so if this definition is in there, please cite that
> > instead of a "random" version.
>
>
> That's not a "random" version, please refer to the existing tag v0.9.2 where
> this was first introduced
> (https://github.com/riscv/riscv-profiles/releases/tag/v0.9.2).
That might be your intent, but the versioning in that repo sucks. If
you'd said the v0.9.2 *tag* that would be clearer than what you wrote,
as there could easily be a v0.9.2 of a new profile - there's already a
v0.4 etc.
> But I'll change that to the profiles specification v1.0.
Still vague IMO, please provide a commit hash.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 47+ messages in thread
* [PATCH v4 13/13] riscv: Add qspinlock support
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (11 preceding siblings ...)
2024-07-31 7:24 ` [PATCH v4 12/13] dt-bindings: riscv: Add Ziccrse ISA extension description Alexandre Ghiti
@ 2024-07-31 7:24 ` Alexandre Ghiti
2024-07-31 15:29 ` Andrew Jones
2024-08-01 14:15 ` [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Peter Zijlstra
13 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-07-31 7:24 UTC (permalink / raw)
To: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Cc: Alexandre Ghiti
In order to produce a generic kernel, a user can select
CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
spinlock implementation if Zabha or Ziccrse are not present.
Note that we can't use alternatives here because the discovery of
extensions is done too late and we need to start with the qspinlock
implementation because the ticket spinlock implementation would pollute
the spinlock value, so let's use static keys.
This is largely based on Guo's work and Leonardo reviews at [1].
Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
Signed-off-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
.../locking/queued-spinlocks/arch-support.txt | 2 +-
arch/riscv/Kconfig | 29 +++++++++++++
arch/riscv/include/asm/Kbuild | 4 +-
arch/riscv/include/asm/spinlock.h | 43 +++++++++++++++++++
arch/riscv/kernel/setup.c | 38 ++++++++++++++++
include/asm-generic/qspinlock.h | 2 +
include/asm-generic/ticket_spinlock.h | 2 +
7 files changed, 118 insertions(+), 2 deletions(-)
create mode 100644 arch/riscv/include/asm/spinlock.h
diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
index 22f2990392ff..cf26042480e2 100644
--- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
@@ -20,7 +20,7 @@
| openrisc: | ok |
| parisc: | TODO |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | TODO |
| sh: | TODO |
| sparc: | ok |
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index ef55ab94027e..c9ff8081efc1 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -79,6 +79,7 @@ config RISCV
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
select BUILDTIME_TABLE_SORT if MMU
select CLINT_TIMER if RISCV_M_MODE
@@ -488,6 +489,34 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
+choice
+ prompt "RISC-V spinlock type"
+ default RISCV_COMBO_SPINLOCKS
+
+config RISCV_TICKET_SPINLOCKS
+ bool "Using ticket spinlock"
+
+config RISCV_QUEUED_SPINLOCKS
+ bool "Using queued spinlock"
+ depends on SMP && MMU && NONPORTABLE
+ select ARCH_USE_QUEUED_SPINLOCKS
+ help
+ The queued spinlock implementation requires the forward progress
+ guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
+ LR/SC with Ziccrse provide such guarantee.
+
+ Select this if and only if Zabha or Ziccrse is available on your
+ platform.
+
+config RISCV_COMBO_SPINLOCKS
+ bool "Using combo spinlock"
+ depends on SMP && MMU
+ select ARCH_USE_QUEUED_SPINLOCKS
+ help
+ Embed both queued spinlock and ticket lock so that the spinlock
+ implementation can be chosen at runtime.
+endchoice
+
config RISCV_ALTERNATIVE
bool
depends on !XIP_KERNEL
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 5c589770f2a8..1c2618c964f0 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -5,10 +5,12 @@ syscall-y += syscall_table_64.h
generic-y += early_ioremap.h
generic-y += flat.h
generic-y += kvm_para.h
+generic-y += mcs_spinlock.h
generic-y += parport.h
-generic-y += spinlock.h
generic-y += spinlock_types.h
+generic-y += ticket_spinlock.h
generic-y += qrwlock.h
generic-y += qrwlock_types.h
+generic-y += qspinlock.h
generic-y += user.h
generic-y += vmlinux.lds.h
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
new file mode 100644
index 000000000000..503aef31db83
--- /dev/null
+++ b/arch/riscv/include/asm/spinlock.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_RISCV_SPINLOCK_H
+#define __ASM_RISCV_SPINLOCK_H
+
+#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
+#define _Q_PENDING_LOOPS (1 << 9)
+
+#define __no_arch_spinlock_redefine
+#include <asm/ticket_spinlock.h>
+#include <asm/qspinlock.h>
+#include <asm/alternative.h>
+
+DECLARE_STATIC_KEY_TRUE(qspinlock_key);
+
+#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
+static __always_inline type arch_spin_##op(type_lock lock) \
+{ \
+ if (static_branch_unlikely(&qspinlock_key)) \
+ return queued_spin_##op(lock); \
+ return ticket_spin_##op(lock); \
+}
+
+SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
+
+#elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS)
+
+#include <asm/qspinlock.h>
+
+#else
+
+#include <asm/ticket_spinlock.h>
+
+#endif
+
+#include <asm/qrwlock.h>
+
+#endif /* __ASM_RISCV_SPINLOCK_H */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index a2cde65b69e9..b811fa331982 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -244,6 +244,43 @@ static void __init parse_dtb(void)
#endif
}
+#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
+DEFINE_STATIC_KEY_TRUE(qspinlock_key);
+EXPORT_SYMBOL(qspinlock_key);
+#endif
+
+static void __init riscv_spinlock_init(void)
+{
+ char *using_ext = NULL;
+
+ if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) {
+ pr_info("Ticket spinlock: enabled\n");
+ return;
+ }
+
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
+ IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
+ riscv_isa_extension_available(NULL, ZABHA) &&
+ riscv_isa_extension_available(NULL, ZACAS)) {
+ using_ext = "using Zabha";
+ } else if (riscv_isa_extension_available(NULL, ZICCRSE)) {
+ using_ext = "using Ziccrse";
+ }
+#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
+ else {
+ static_branch_disable(&qspinlock_key);
+ pr_info("Ticket spinlock: enabled\n");
+
+ return;
+ }
+#endif
+
+ if (!using_ext)
+ pr_err("Queued spinlock without Zabha or Ziccrse");
+ else
+ pr_info("Queued spinlock %s: enabled\n", using_ext);
+}
+
extern void __init init_rt_signal_env(void);
void __init setup_arch(char **cmdline_p)
@@ -297,6 +334,7 @@ void __init setup_arch(char **cmdline_p)
riscv_set_dma_cache_alignment();
riscv_user_isa_enable();
+ riscv_spinlock_init();
}
bool arch_cpu_is_hotpluggable(int cpu)
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 0655aa5b57b2..bf47cca2c375 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
}
#endif
+#ifndef __no_arch_spinlock_redefine
/*
* Remapping spinlock architecture specific functions to the corresponding
* queued spinlock functions.
@@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
#define arch_spin_lock(l) queued_spin_lock(l)
#define arch_spin_trylock(l) queued_spin_trylock(l)
#define arch_spin_unlock(l) queued_spin_unlock(l)
+#endif
#endif /* __ASM_GENERIC_QSPINLOCK_H */
diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
index cfcff22b37b3..325779970d8a 100644
--- a/include/asm-generic/ticket_spinlock.h
+++ b/include/asm-generic/ticket_spinlock.h
@@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
return (s16)((val >> 16) - (val & 0xffff)) > 1;
}
+#ifndef __no_arch_spinlock_redefine
/*
* Remapping spinlock architecture specific functions to the corresponding
* ticket spinlock functions.
@@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
#define arch_spin_lock(l) ticket_spin_lock(l)
#define arch_spin_trylock(l) ticket_spin_trylock(l)
#define arch_spin_unlock(l) ticket_spin_unlock(l)
+#endif
#endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
--
2.39.2
^ permalink raw reply related [flat|nested] 47+ messages in thread* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-07-31 7:24 ` [PATCH v4 13/13] riscv: Add qspinlock support Alexandre Ghiti
@ 2024-07-31 15:29 ` Andrew Jones
2024-08-01 6:53 ` Alexandre Ghiti
` (2 more replies)
0 siblings, 3 replies; 47+ messages in thread
From: Andrew Jones @ 2024-07-31 15:29 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 09:24:05AM GMT, Alexandre Ghiti wrote:
> In order to produce a generic kernel, a user can select
> CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
> spinlock implementation if Zabha or Ziccrse are not present.
>
> Note that we can't use alternatives here because the discovery of
> extensions is done too late and we need to start with the qspinlock
> implementation because the ticket spinlock implementation would pollute
> the spinlock value, so let's use static keys.
>
> This is largely based on Guo's work and Leonardo reviews at [1].
>
> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> Signed-off-by: Guo Ren <guoren@kernel.org>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> .../locking/queued-spinlocks/arch-support.txt | 2 +-
> arch/riscv/Kconfig | 29 +++++++++++++
> arch/riscv/include/asm/Kbuild | 4 +-
> arch/riscv/include/asm/spinlock.h | 43 +++++++++++++++++++
> arch/riscv/kernel/setup.c | 38 ++++++++++++++++
> include/asm-generic/qspinlock.h | 2 +
> include/asm-generic/ticket_spinlock.h | 2 +
> 7 files changed, 118 insertions(+), 2 deletions(-)
> create mode 100644 arch/riscv/include/asm/spinlock.h
>
> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> index 22f2990392ff..cf26042480e2 100644
> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> @@ -20,7 +20,7 @@
> | openrisc: | ok |
> | parisc: | TODO |
> | powerpc: | ok |
> - | riscv: | TODO |
> + | riscv: | ok |
> | s390: | TODO |
> | sh: | TODO |
> | sparc: | ok |
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index ef55ab94027e..c9ff8081efc1 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -79,6 +79,7 @@ config RISCV
> select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
> select ARCH_WANTS_NO_INSTR
> select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
> + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
Why do we need this? Also, we presumably would prefer not to have it
when we end up using ticket spinlocks when combo spinlocks is selected.
Is there no way to avoid it?
> select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
> select BUILDTIME_TABLE_SORT if MMU
> select CLINT_TIMER if RISCV_M_MODE
> @@ -488,6 +489,34 @@ config NODES_SHIFT
> Specify the maximum number of NUMA Nodes available on the target
> system. Increases memory reserved to accommodate various tables.
>
> +choice
> + prompt "RISC-V spinlock type"
> + default RISCV_COMBO_SPINLOCKS
> +
> +config RISCV_TICKET_SPINLOCKS
> + bool "Using ticket spinlock"
> +
> +config RISCV_QUEUED_SPINLOCKS
> + bool "Using queued spinlock"
> + depends on SMP && MMU && NONPORTABLE
> + select ARCH_USE_QUEUED_SPINLOCKS
> + help
> + The queued spinlock implementation requires the forward progress
> + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
> + LR/SC with Ziccrse provide such guarantee.
> +
> + Select this if and only if Zabha or Ziccrse is available on your
> + platform.
Maybe some text recommending combo spinlocks here? As it stands it sounds
like enabling queued spinlocks is a bad idea for anybody that doesn't know
what platforms will run the kernel they're building, which is all distros.
> +
> +config RISCV_COMBO_SPINLOCKS
> + bool "Using combo spinlock"
> + depends on SMP && MMU
> + select ARCH_USE_QUEUED_SPINLOCKS
> + help
> + Embed both queued spinlock and ticket lock so that the spinlock
> + implementation can be chosen at runtime.
nit: Add a blank line here
> +endchoice
> +
> config RISCV_ALTERNATIVE
> bool
> depends on !XIP_KERNEL
> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> index 5c589770f2a8..1c2618c964f0 100644
> --- a/arch/riscv/include/asm/Kbuild
> +++ b/arch/riscv/include/asm/Kbuild
> @@ -5,10 +5,12 @@ syscall-y += syscall_table_64.h
> generic-y += early_ioremap.h
> generic-y += flat.h
> generic-y += kvm_para.h
> +generic-y += mcs_spinlock.h
> generic-y += parport.h
> -generic-y += spinlock.h
> generic-y += spinlock_types.h
> +generic-y += ticket_spinlock.h
> generic-y += qrwlock.h
> generic-y += qrwlock_types.h
> +generic-y += qspinlock.h
> generic-y += user.h
> generic-y += vmlinux.lds.h
> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> new file mode 100644
> index 000000000000..503aef31db83
> --- /dev/null
> +++ b/arch/riscv/include/asm/spinlock.h
> @@ -0,0 +1,43 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_RISCV_SPINLOCK_H
> +#define __ASM_RISCV_SPINLOCK_H
> +
> +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
> +#define _Q_PENDING_LOOPS (1 << 9)
> +
> +#define __no_arch_spinlock_redefine
> +#include <asm/ticket_spinlock.h>
> +#include <asm/qspinlock.h>
> +#include <asm/alternative.h>
We need asm/jump_label.h instead of asm/alternative.h, but...
> +
> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> +
> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> +static __always_inline type arch_spin_##op(type_lock lock) \
> +{ \
> + if (static_branch_unlikely(&qspinlock_key)) \
> + return queued_spin_##op(lock); \
> + return ticket_spin_##op(lock); \
> +}
...do you know what impact this inlined static key check has on the
kernel size?
Actually, why not use ALTERNATIVE with any nonzero cpufeature value.
Then add code to riscv_cpufeature_patch_check() to return true when
qspinlocks should be enabled (based on the value of some global set
during riscv_spinlock_init)?
> +
> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> +
> +#elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS)
> +
> +#include <asm/qspinlock.h>
> +
> +#else
> +
> +#include <asm/ticket_spinlock.h>
> +
> +#endif
> +
> +#include <asm/qrwlock.h>
> +
> +#endif /* __ASM_RISCV_SPINLOCK_H */
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index a2cde65b69e9..b811fa331982 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -244,6 +244,43 @@ static void __init parse_dtb(void)
> #endif
> }
>
> +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> +EXPORT_SYMBOL(qspinlock_key);
> +#endif
> +
> +static void __init riscv_spinlock_init(void)
> +{
> + char *using_ext = NULL;
> +
> + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) {
> + pr_info("Ticket spinlock: enabled\n");
> + return;
> + }
> +
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
> + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
> + riscv_isa_extension_available(NULL, ZABHA) &&
> + riscv_isa_extension_available(NULL, ZACAS)) {
> + using_ext = "using Zabha";
> + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) {
> + using_ext = "using Ziccrse";
> + }
> +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
> + else {
else if (IS_ENABLED(CONFIG_RISCV_COMBO_SPINLOCKS))
> + static_branch_disable(&qspinlock_key);
> + pr_info("Ticket spinlock: enabled\n");
> +
nit: remove this blank line
> + return;
> + }
> +#endif
> +
> + if (!using_ext)
> + pr_err("Queued spinlock without Zabha or Ziccrse");
> + else
> + pr_info("Queued spinlock %s: enabled\n", using_ext);
> +}
> +
> extern void __init init_rt_signal_env(void);
>
> void __init setup_arch(char **cmdline_p)
> @@ -297,6 +334,7 @@ void __init setup_arch(char **cmdline_p)
> riscv_set_dma_cache_alignment();
>
> riscv_user_isa_enable();
> + riscv_spinlock_init();
> }
>
> bool arch_cpu_is_hotpluggable(int cpu)
> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> index 0655aa5b57b2..bf47cca2c375 100644
> --- a/include/asm-generic/qspinlock.h
> +++ b/include/asm-generic/qspinlock.h
> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> }
> #endif
>
> +#ifndef __no_arch_spinlock_redefine
I'm not sure what's better/worse, but instead of inventing this
__no_arch_spinlock_redefine thing we could just name all the functions
something like __arch_spin* and then add defines for both to asm/spinlock.h,
i.e.
#define queued_spin_lock(l) __arch_spin_lock(l)
...
#define ticket_spin_lock(l) __arch_spin_lock(l)
...
Besides not having to touch asm-generic/qspinlock.h and
asm-generic/ticket_spinlock.h it allows one to find the implementations
a bit easier as following a tag to arch_spin_lock() will take them to
queued_spin_lock() which will then take them to
arch/riscv/include/asm/spinlock.h and there they'll figure out how
__arch_spin_lock() was defined.
> /*
> * Remapping spinlock architecture specific functions to the corresponding
> * queued spinlock functions.
> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> #define arch_spin_lock(l) queued_spin_lock(l)
> #define arch_spin_trylock(l) queued_spin_trylock(l)
> #define arch_spin_unlock(l) queued_spin_unlock(l)
> +#endif
>
> #endif /* __ASM_GENERIC_QSPINLOCK_H */
> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> index cfcff22b37b3..325779970d8a 100644
> --- a/include/asm-generic/ticket_spinlock.h
> +++ b/include/asm-generic/ticket_spinlock.h
> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> return (s16)((val >> 16) - (val & 0xffff)) > 1;
> }
>
> +#ifndef __no_arch_spinlock_redefine
> /*
> * Remapping spinlock architecture specific functions to the corresponding
> * ticket spinlock functions.
> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> #define arch_spin_lock(l) ticket_spin_lock(l)
> #define arch_spin_trylock(l) ticket_spin_trylock(l)
> #define arch_spin_unlock(l) ticket_spin_unlock(l)
> +#endif
>
> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> --
> 2.39.2
>
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-07-31 15:29 ` Andrew Jones
@ 2024-08-01 6:53 ` Alexandre Ghiti
2024-08-01 7:48 ` Andrew Jones
2024-08-15 13:27 ` Alexandre Ghiti
2024-08-01 8:43 ` Alexandre Ghiti
2024-08-01 9:48 ` Andrea Parri
2 siblings, 2 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-08-01 6:53 UTC (permalink / raw)
To: Andrew Jones
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 5:29 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Wed, Jul 31, 2024 at 09:24:05AM GMT, Alexandre Ghiti wrote:
> > In order to produce a generic kernel, a user can select
> > CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
> > spinlock implementation if Zabha or Ziccrse are not present.
> >
> > Note that we can't use alternatives here because the discovery of
> > extensions is done too late and we need to start with the qspinlock
> > implementation because the ticket spinlock implementation would pollute
> > the spinlock value, so let's use static keys.
> >
> > This is largely based on Guo's work and Leonardo reviews at [1].
> >
> > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > arch/riscv/Kconfig | 29 +++++++++++++
> > arch/riscv/include/asm/Kbuild | 4 +-
> > arch/riscv/include/asm/spinlock.h | 43 +++++++++++++++++++
> > arch/riscv/kernel/setup.c | 38 ++++++++++++++++
> > include/asm-generic/qspinlock.h | 2 +
> > include/asm-generic/ticket_spinlock.h | 2 +
> > 7 files changed, 118 insertions(+), 2 deletions(-)
> > create mode 100644 arch/riscv/include/asm/spinlock.h
> >
> > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > index 22f2990392ff..cf26042480e2 100644
> > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > @@ -20,7 +20,7 @@
> > | openrisc: | ok |
> > | parisc: | TODO |
> > | powerpc: | ok |
> > - | riscv: | TODO |
> > + | riscv: | ok |
> > | s390: | TODO |
> > | sh: | TODO |
> > | sparc: | ok |
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index ef55ab94027e..c9ff8081efc1 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -79,6 +79,7 @@ config RISCV
> > select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
> > select ARCH_WANTS_NO_INSTR
> > select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
> > + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
>
> Why do we need this? Also, we presumably would prefer not to have it
> when we end up using ticket spinlocks when combo spinlocks is selected.
> Is there no way to avoid it?
I'll let Andrea answer this as he asked for it.
>
> > select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
> > select BUILDTIME_TABLE_SORT if MMU
> > select CLINT_TIMER if RISCV_M_MODE
> > @@ -488,6 +489,34 @@ config NODES_SHIFT
> > Specify the maximum number of NUMA Nodes available on the target
> > system. Increases memory reserved to accommodate various tables.
> >
> > +choice
> > + prompt "RISC-V spinlock type"
> > + default RISCV_COMBO_SPINLOCKS
> > +
> > +config RISCV_TICKET_SPINLOCKS
> > + bool "Using ticket spinlock"
> > +
> > +config RISCV_QUEUED_SPINLOCKS
> > + bool "Using queued spinlock"
> > + depends on SMP && MMU && NONPORTABLE
> > + select ARCH_USE_QUEUED_SPINLOCKS
> > + help
> > + The queued spinlock implementation requires the forward progress
> > + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
> > + LR/SC with Ziccrse provide such guarantee.
> > +
> > + Select this if and only if Zabha or Ziccrse is available on your
> > + platform.
>
> Maybe some text recommending combo spinlocks here? As it stands it sounds
> like enabling queued spinlocks is a bad idea for anybody that doesn't know
> what platforms will run the kernel they're building, which is all distros.
That's NONPORTABLE, so people enabling this config are supposed to
know that right?
>
> > +
> > +config RISCV_COMBO_SPINLOCKS
> > + bool "Using combo spinlock"
> > + depends on SMP && MMU
> > + select ARCH_USE_QUEUED_SPINLOCKS
> > + help
> > + Embed both queued spinlock and ticket lock so that the spinlock
> > + implementation can be chosen at runtime.
>
> nit: Add a blank line here
Done
>
> > +endchoice
> > +
> > config RISCV_ALTERNATIVE
> > bool
> > depends on !XIP_KERNEL
> > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > index 5c589770f2a8..1c2618c964f0 100644
> > --- a/arch/riscv/include/asm/Kbuild
> > +++ b/arch/riscv/include/asm/Kbuild
> > @@ -5,10 +5,12 @@ syscall-y += syscall_table_64.h
> > generic-y += early_ioremap.h
> > generic-y += flat.h
> > generic-y += kvm_para.h
> > +generic-y += mcs_spinlock.h
> > generic-y += parport.h
> > -generic-y += spinlock.h
> > generic-y += spinlock_types.h
> > +generic-y += ticket_spinlock.h
> > generic-y += qrwlock.h
> > generic-y += qrwlock_types.h
> > +generic-y += qspinlock.h
> > generic-y += user.h
> > generic-y += vmlinux.lds.h
> > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > new file mode 100644
> > index 000000000000..503aef31db83
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/spinlock.h
> > @@ -0,0 +1,43 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#ifndef __ASM_RISCV_SPINLOCK_H
> > +#define __ASM_RISCV_SPINLOCK_H
> > +
> > +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
> > +#define _Q_PENDING_LOOPS (1 << 9)
> > +
> > +#define __no_arch_spinlock_redefine
> > +#include <asm/ticket_spinlock.h>
> > +#include <asm/qspinlock.h>
> > +#include <asm/alternative.h>
>
> We need asm/jump_label.h instead of asm/alternative.h, but...
>
> > +
> > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > +
> > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > +static __always_inline type arch_spin_##op(type_lock lock) \
> > +{ \
> > + if (static_branch_unlikely(&qspinlock_key)) \
> > + return queued_spin_##op(lock); \
> > + return ticket_spin_##op(lock); \
> > +}
>
> ...do you know what impact this inlined static key check has on the
> kernel size?
No, I'll check, thanks.
>
> Actually, why not use ALTERNATIVE with any nonzero cpufeature value.
> Then add code to riscv_cpufeature_patch_check() to return true when
> qspinlocks should be enabled (based on the value of some global set
> during riscv_spinlock_init)?
As discussed with Guo in the previous iteration, the patching of the
alternatives intervenes far after the first use of the spinlocks and
the ticket spinlock implementation pollutes the spinlock value, so
we'd have to unconditionally start with the qspinlock implementation
and after switch to the ticket implementation if not supported by the
platform. It works but it's dirty, I really don't like this hack.
We could though:
- add an initial value to the alternatives (not sure it's feasible though)
- make the patching of alternatives happen sooner by parsing the isa
string sooner, either in DT or ACPI (I have a working PoC for very
early parsing of ACPI).
I intend to do the latter as I think we should be aware of the
extensions sooner in the boot process, so I'll change that to the
alternatives when it's done. WDYT, any other idea?
>
> > +
> > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > +
> > +#elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS)
> > +
> > +#include <asm/qspinlock.h>
> > +
> > +#else
> > +
> > +#include <asm/ticket_spinlock.h>
> > +
> > +#endif
> > +
> > +#include <asm/qrwlock.h>
> > +
> > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > index a2cde65b69e9..b811fa331982 100644
> > --- a/arch/riscv/kernel/setup.c
> > +++ b/arch/riscv/kernel/setup.c
> > @@ -244,6 +244,43 @@ static void __init parse_dtb(void)
> > #endif
> > }
> >
> > +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
> > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > +EXPORT_SYMBOL(qspinlock_key);
> > +#endif
> > +
> > +static void __init riscv_spinlock_init(void)
> > +{
> > + char *using_ext = NULL;
> > +
> > + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) {
> > + pr_info("Ticket spinlock: enabled\n");
> > + return;
> > + }
> > +
> > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
> > + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
> > + riscv_isa_extension_available(NULL, ZABHA) &&
> > + riscv_isa_extension_available(NULL, ZACAS)) {
> > + using_ext = "using Zabha";
> > + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) {
> > + using_ext = "using Ziccrse";
> > + }
> > +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
> > + else {
>
> else if (IS_ENABLED(CONFIG_RISCV_COMBO_SPINLOCKS))
>
> > + static_branch_disable(&qspinlock_key);
> > + pr_info("Ticket spinlock: enabled\n");
> > +
>
> nit: remove this blank line
>
> > + return;
> > + }
> > +#endif
> > +
> > + if (!using_ext)
> > + pr_err("Queued spinlock without Zabha or Ziccrse");
> > + else
> > + pr_info("Queued spinlock %s: enabled\n", using_ext);
> > +}
> > +
> > extern void __init init_rt_signal_env(void);
> >
> > void __init setup_arch(char **cmdline_p)
> > @@ -297,6 +334,7 @@ void __init setup_arch(char **cmdline_p)
> > riscv_set_dma_cache_alignment();
> >
> > riscv_user_isa_enable();
> > + riscv_spinlock_init();
> > }
> >
> > bool arch_cpu_is_hotpluggable(int cpu)
> > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > index 0655aa5b57b2..bf47cca2c375 100644
> > --- a/include/asm-generic/qspinlock.h
> > +++ b/include/asm-generic/qspinlock.h
> > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > }
> > #endif
> >
> > +#ifndef __no_arch_spinlock_redefine
>
> I'm not sure what's better/worse, but instead of inventing this
> __no_arch_spinlock_redefine thing we could just name all the functions
> something like __arch_spin* and then add defines for both to asm/spinlock.h,
> i.e.
>
> #define queued_spin_lock(l) __arch_spin_lock(l)
> ...
>
> #define ticket_spin_lock(l) __arch_spin_lock(l)
> ...
>
> Besides not having to touch asm-generic/qspinlock.h and
> asm-generic/ticket_spinlock.h it allows one to find the implementations
> a bit easier as following a tag to arch_spin_lock() will take them to
> queued_spin_lock() which will then take them to
> arch/riscv/include/asm/spinlock.h and there they'll figure out how
> __arch_spin_lock() was defined.
>
> > /*
> > * Remapping spinlock architecture specific functions to the corresponding
> > * queued spinlock functions.
> > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > #define arch_spin_lock(l) queued_spin_lock(l)
> > #define arch_spin_trylock(l) queued_spin_trylock(l)
> > #define arch_spin_unlock(l) queued_spin_unlock(l)
> > +#endif
> >
> > #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > index cfcff22b37b3..325779970d8a 100644
> > --- a/include/asm-generic/ticket_spinlock.h
> > +++ b/include/asm-generic/ticket_spinlock.h
> > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > }
> >
> > +#ifndef __no_arch_spinlock_redefine
> > /*
> > * Remapping spinlock architecture specific functions to the corresponding
> > * ticket spinlock functions.
> > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > #define arch_spin_lock(l) ticket_spin_lock(l)
> > #define arch_spin_trylock(l) ticket_spin_trylock(l)
> > #define arch_spin_unlock(l) ticket_spin_unlock(l)
> > +#endif
> >
> > #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > --
> > 2.39.2
> >
>
> Thanks,
> drew
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-08-01 6:53 ` Alexandre Ghiti
@ 2024-08-01 7:48 ` Andrew Jones
2024-08-02 8:31 ` Alexandre Ghiti
2024-08-15 13:27 ` Alexandre Ghiti
1 sibling, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2024-08-01 7:48 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Thu, Aug 01, 2024 at 08:53:06AM GMT, Alexandre Ghiti wrote:
> On Wed, Jul 31, 2024 at 5:29 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > On Wed, Jul 31, 2024 at 09:24:05AM GMT, Alexandre Ghiti wrote:
> > > In order to produce a generic kernel, a user can select
> > > CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
> > > spinlock implementation if Zabha or Ziccrse are not present.
> > >
> > > Note that we can't use alternatives here because the discovery of
> > > extensions is done too late and we need to start with the qspinlock
> > > implementation because the ticket spinlock implementation would pollute
> > > the spinlock value, so let's use static keys.
> > >
> > > This is largely based on Guo's work and Leonardo reviews at [1].
> > >
> > > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > ---
> > > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > > arch/riscv/Kconfig | 29 +++++++++++++
> > > arch/riscv/include/asm/Kbuild | 4 +-
> > > arch/riscv/include/asm/spinlock.h | 43 +++++++++++++++++++
> > > arch/riscv/kernel/setup.c | 38 ++++++++++++++++
> > > include/asm-generic/qspinlock.h | 2 +
> > > include/asm-generic/ticket_spinlock.h | 2 +
> > > 7 files changed, 118 insertions(+), 2 deletions(-)
> > > create mode 100644 arch/riscv/include/asm/spinlock.h
> > >
> > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > index 22f2990392ff..cf26042480e2 100644
> > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > @@ -20,7 +20,7 @@
> > > | openrisc: | ok |
> > > | parisc: | TODO |
> > > | powerpc: | ok |
> > > - | riscv: | TODO |
> > > + | riscv: | ok |
> > > | s390: | TODO |
> > > | sh: | TODO |
> > > | sparc: | ok |
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index ef55ab94027e..c9ff8081efc1 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -79,6 +79,7 @@ config RISCV
> > > select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
> > > select ARCH_WANTS_NO_INSTR
> > > select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
> > > + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
> >
> > Why do we need this? Also, we presumably would prefer not to have it
> > when we end up using ticket spinlocks when combo spinlocks is selected.
> > Is there no way to avoid it?
>
> I'll let Andrea answer this as he asked for it.
>
> >
> > > select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
> > > select BUILDTIME_TABLE_SORT if MMU
> > > select CLINT_TIMER if RISCV_M_MODE
> > > @@ -488,6 +489,34 @@ config NODES_SHIFT
> > > Specify the maximum number of NUMA Nodes available on the target
> > > system. Increases memory reserved to accommodate various tables.
> > >
> > > +choice
> > > + prompt "RISC-V spinlock type"
> > > + default RISCV_COMBO_SPINLOCKS
> > > +
> > > +config RISCV_TICKET_SPINLOCKS
> > > + bool "Using ticket spinlock"
> > > +
> > > +config RISCV_QUEUED_SPINLOCKS
> > > + bool "Using queued spinlock"
> > > + depends on SMP && MMU && NONPORTABLE
> > > + select ARCH_USE_QUEUED_SPINLOCKS
> > > + help
> > > + The queued spinlock implementation requires the forward progress
> > > + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
> > > + LR/SC with Ziccrse provide such guarantee.
> > > +
> > > + Select this if and only if Zabha or Ziccrse is available on your
> > > + platform.
> >
> > Maybe some text recommending combo spinlocks here? As it stands it sounds
> > like enabling queued spinlocks is a bad idea for anybody that doesn't know
> > what platforms will run the kernel they're building, which is all distros.
>
> That's NONPORTABLE, so people enabling this config are supposed to
> know that right?
Yes, both the NONPORTABLE and the scary text will imply that qspinlocks
shouldn't be selected. I'm asking for text which points people configuring
kernels to COMBO. Something like
qspinlocks provides performance enhancements on platforms which support
Zabha or Ziccrse. RISCV_QUEUED_SPINLOCKS should not be selected for
platforms without one of those extensions. If unsure, select
RISCV_COMBO_SPINLOCKS, which will use qspinlocks when supported and
otherwise ticket spinlocks.
>
> >
> > > +
> > > +config RISCV_COMBO_SPINLOCKS
> > > + bool "Using combo spinlock"
> > > + depends on SMP && MMU
> > > + select ARCH_USE_QUEUED_SPINLOCKS
> > > + help
> > > + Embed both queued spinlock and ticket lock so that the spinlock
> > > + implementation can be chosen at runtime.
> >
> > nit: Add a blank line here
>
> Done
>
> >
> > > +endchoice
> > > +
> > > config RISCV_ALTERNATIVE
> > > bool
> > > depends on !XIP_KERNEL
> > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > index 5c589770f2a8..1c2618c964f0 100644
> > > --- a/arch/riscv/include/asm/Kbuild
> > > +++ b/arch/riscv/include/asm/Kbuild
> > > @@ -5,10 +5,12 @@ syscall-y += syscall_table_64.h
> > > generic-y += early_ioremap.h
> > > generic-y += flat.h
> > > generic-y += kvm_para.h
> > > +generic-y += mcs_spinlock.h
> > > generic-y += parport.h
> > > -generic-y += spinlock.h
> > > generic-y += spinlock_types.h
> > > +generic-y += ticket_spinlock.h
> > > generic-y += qrwlock.h
> > > generic-y += qrwlock_types.h
> > > +generic-y += qspinlock.h
> > > generic-y += user.h
> > > generic-y += vmlinux.lds.h
> > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > new file mode 100644
> > > index 000000000000..503aef31db83
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/spinlock.h
> > > @@ -0,0 +1,43 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +
> > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > +#define __ASM_RISCV_SPINLOCK_H
> > > +
> > > +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
> > > +#define _Q_PENDING_LOOPS (1 << 9)
> > > +
> > > +#define __no_arch_spinlock_redefine
> > > +#include <asm/ticket_spinlock.h>
> > > +#include <asm/qspinlock.h>
> > > +#include <asm/alternative.h>
> >
> > We need asm/jump_label.h instead of asm/alternative.h, but...
> >
> > > +
> > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > +
> > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > > +static __always_inline type arch_spin_##op(type_lock lock) \
> > > +{ \
> > > + if (static_branch_unlikely(&qspinlock_key)) \
> > > + return queued_spin_##op(lock); \
> > > + return ticket_spin_##op(lock); \
> > > +}
> >
> > ...do you know what impact this inlined static key check has on the
> > kernel size?
>
> No, I'll check, thanks.
>
> >
> > Actually, why not use ALTERNATIVE with any nonzero cpufeature value.
> > Then add code to riscv_cpufeature_patch_check() to return true when
> > qspinlocks should be enabled (based on the value of some global set
> > during riscv_spinlock_init)?
>
> As discussed with Guo in the previous iteration, the patching of the
> alternatives intervenes far after the first use of the spinlocks and
> the ticket spinlock implementation pollutes the spinlock value, so
> we'd have to unconditionally start with the qspinlock implementation
> and after switch to the ticket implementation if not supported by the
> platform. It works but it's dirty, I really don't like this hack.
>
> We could though:
> - add an initial value to the alternatives (not sure it's feasible though)
> - make the patching of alternatives happen sooner by parsing the isa
> string sooner, either in DT or ACPI (I have a working PoC for very
> early parsing of ACPI).
>
> I intend to do the latter as I think we should be aware of the
> extensions sooner in the boot process, so I'll change that to the
> alternatives when it's done. WDYT, any other idea?
Yes, we'll likely want early patching for other extensions as well,
so that's a good idea in general. Putting a TODO on this static key
to be changed to an ALTERNATIVE later when possible sounds reasonable
to me.
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-08-01 7:48 ` Andrew Jones
@ 2024-08-02 8:31 ` Alexandre Ghiti
0 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-08-02 8:31 UTC (permalink / raw)
To: Andrew Jones, Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Hi Drew,
On 01/08/2024 09:48, Andrew Jones wrote:
> On Thu, Aug 01, 2024 at 08:53:06AM GMT, Alexandre Ghiti wrote:
>> On Wed, Jul 31, 2024 at 5:29 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>>> On Wed, Jul 31, 2024 at 09:24:05AM GMT, Alexandre Ghiti wrote:
>>>> In order to produce a generic kernel, a user can select
>>>> CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
>>>> spinlock implementation if Zabha or Ziccrse are not present.
>>>>
>>>> Note that we can't use alternatives here because the discovery of
>>>> extensions is done too late and we need to start with the qspinlock
>>>> implementation because the ticket spinlock implementation would pollute
>>>> the spinlock value, so let's use static keys.
>>>>
>>>> This is largely based on Guo's work and Leonardo reviews at [1].
>>>>
>>>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
>>>> Signed-off-by: Guo Ren <guoren@kernel.org>
>>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>>>> ---
>>>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
>>>> arch/riscv/Kconfig | 29 +++++++++++++
>>>> arch/riscv/include/asm/Kbuild | 4 +-
>>>> arch/riscv/include/asm/spinlock.h | 43 +++++++++++++++++++
>>>> arch/riscv/kernel/setup.c | 38 ++++++++++++++++
>>>> include/asm-generic/qspinlock.h | 2 +
>>>> include/asm-generic/ticket_spinlock.h | 2 +
>>>> 7 files changed, 118 insertions(+), 2 deletions(-)
>>>> create mode 100644 arch/riscv/include/asm/spinlock.h
>>>>
>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>> index 22f2990392ff..cf26042480e2 100644
>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>> @@ -20,7 +20,7 @@
>>>> | openrisc: | ok |
>>>> | parisc: | TODO |
>>>> | powerpc: | ok |
>>>> - | riscv: | TODO |
>>>> + | riscv: | ok |
>>>> | s390: | TODO |
>>>> | sh: | TODO |
>>>> | sparc: | ok |
>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>> index ef55ab94027e..c9ff8081efc1 100644
>>>> --- a/arch/riscv/Kconfig
>>>> +++ b/arch/riscv/Kconfig
>>>> @@ -79,6 +79,7 @@ config RISCV
>>>> select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
>>>> select ARCH_WANTS_NO_INSTR
>>>> select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
>>>> + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
>>> Why do we need this? Also, we presumably would prefer not to have it
>>> when we end up using ticket spinlocks when combo spinlocks is selected.
>>> Is there no way to avoid it?
>> I'll let Andrea answer this as he asked for it.
>>
>>>> select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
>>>> select BUILDTIME_TABLE_SORT if MMU
>>>> select CLINT_TIMER if RISCV_M_MODE
>>>> @@ -488,6 +489,34 @@ config NODES_SHIFT
>>>> Specify the maximum number of NUMA Nodes available on the target
>>>> system. Increases memory reserved to accommodate various tables.
>>>>
>>>> +choice
>>>> + prompt "RISC-V spinlock type"
>>>> + default RISCV_COMBO_SPINLOCKS
>>>> +
>>>> +config RISCV_TICKET_SPINLOCKS
>>>> + bool "Using ticket spinlock"
>>>> +
>>>> +config RISCV_QUEUED_SPINLOCKS
>>>> + bool "Using queued spinlock"
>>>> + depends on SMP && MMU && NONPORTABLE
>>>> + select ARCH_USE_QUEUED_SPINLOCKS
>>>> + help
>>>> + The queued spinlock implementation requires the forward progress
>>>> + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
>>>> + LR/SC with Ziccrse provide such guarantee.
>>>> +
>>>> + Select this if and only if Zabha or Ziccrse is available on your
>>>> + platform.
>>> Maybe some text recommending combo spinlocks here? As it stands it sounds
>>> like enabling queued spinlocks is a bad idea for anybody that doesn't know
>>> what platforms will run the kernel they're building, which is all distros.
>> That's NONPORTABLE, so people enabling this config are supposed to
>> know that right?
> Yes, both the NONPORTABLE and the scary text will imply that qspinlocks
> shouldn't be selected. I'm asking for text which points people configuring
> kernels to COMBO. Something like
>
> qspinlocks provides performance enhancements on platforms which support
> Zabha or Ziccrse. RISCV_QUEUED_SPINLOCKS should not be selected for
> platforms without one of those extensions. If unsure, select
> RISCV_COMBO_SPINLOCKS, which will use qspinlocks when supported and
> otherwise ticket spinlocks.
Ok I'll add that.
>
>>>> +
>>>> +config RISCV_COMBO_SPINLOCKS
>>>> + bool "Using combo spinlock"
>>>> + depends on SMP && MMU
>>>> + select ARCH_USE_QUEUED_SPINLOCKS
>>>> + help
>>>> + Embed both queued spinlock and ticket lock so that the spinlock
>>>> + implementation can be chosen at runtime.
>>> nit: Add a blank line here
>> Done
>>
>>>> +endchoice
>>>> +
>>>> config RISCV_ALTERNATIVE
>>>> bool
>>>> depends on !XIP_KERNEL
>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
>>>> index 5c589770f2a8..1c2618c964f0 100644
>>>> --- a/arch/riscv/include/asm/Kbuild
>>>> +++ b/arch/riscv/include/asm/Kbuild
>>>> @@ -5,10 +5,12 @@ syscall-y += syscall_table_64.h
>>>> generic-y += early_ioremap.h
>>>> generic-y += flat.h
>>>> generic-y += kvm_para.h
>>>> +generic-y += mcs_spinlock.h
>>>> generic-y += parport.h
>>>> -generic-y += spinlock.h
>>>> generic-y += spinlock_types.h
>>>> +generic-y += ticket_spinlock.h
>>>> generic-y += qrwlock.h
>>>> generic-y += qrwlock_types.h
>>>> +generic-y += qspinlock.h
>>>> generic-y += user.h
>>>> generic-y += vmlinux.lds.h
>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
>>>> new file mode 100644
>>>> index 000000000000..503aef31db83
>>>> --- /dev/null
>>>> +++ b/arch/riscv/include/asm/spinlock.h
>>>> @@ -0,0 +1,43 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +
>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
>>>> +#define __ASM_RISCV_SPINLOCK_H
>>>> +
>>>> +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
>>>> +#define _Q_PENDING_LOOPS (1 << 9)
>>>> +
>>>> +#define __no_arch_spinlock_redefine
>>>> +#include <asm/ticket_spinlock.h>
>>>> +#include <asm/qspinlock.h>
>>>> +#include <asm/alternative.h>
>>> We need asm/jump_label.h instead of asm/alternative.h, but...
>>>
>>>> +
>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
>>>> +
>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
>>>> +static __always_inline type arch_spin_##op(type_lock lock) \
>>>> +{ \
>>>> + if (static_branch_unlikely(&qspinlock_key)) \
>>>> + return queued_spin_##op(lock); \
>>>> + return ticket_spin_##op(lock); \
>>>> +}
>>> ...do you know what impact this inlined static key check has on the
>>> kernel size?
>> No, I'll check, thanks.
>>
>>> Actually, why not use ALTERNATIVE with any nonzero cpufeature value.
>>> Then add code to riscv_cpufeature_patch_check() to return true when
>>> qspinlocks should be enabled (based on the value of some global set
>>> during riscv_spinlock_init)?
>> As discussed with Guo in the previous iteration, the patching of the
>> alternatives intervenes far after the first use of the spinlocks and
>> the ticket spinlock implementation pollutes the spinlock value, so
>> we'd have to unconditionally start with the qspinlock implementation
>> and after switch to the ticket implementation if not supported by the
>> platform. It works but it's dirty, I really don't like this hack.
>>
>> We could though:
>> - add an initial value to the alternatives (not sure it's feasible though)
>> - make the patching of alternatives happen sooner by parsing the isa
>> string sooner, either in DT or ACPI (I have a working PoC for very
>> early parsing of ACPI).
>>
>> I intend to do the latter as I think we should be aware of the
>> extensions sooner in the boot process, so I'll change that to the
>> alternatives when it's done. WDYT, any other idea?
> Yes, we'll likely want early patching for other extensions as well,
> so that's a good idea in general. Putting a TODO on this static key
> to be changed to an ALTERNATIVE later when possible sounds reasonable
> to me.
Great, I'll add a TODO.
Thanks,
Alex
>
> Thanks,
> drew
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-08-01 6:53 ` Alexandre Ghiti
2024-08-01 7:48 ` Andrew Jones
@ 2024-08-15 13:27 ` Alexandre Ghiti
2024-08-15 13:34 ` Andrew Jones
2024-08-17 5:08 ` Guo Ren
1 sibling, 2 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-08-15 13:27 UTC (permalink / raw)
To: Alexandre Ghiti, Andrew Jones
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
Hi Andrew,
On 01/08/2024 08:53, Alexandre Ghiti wrote:
> On Wed, Jul 31, 2024 at 5:29 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>> On Wed, Jul 31, 2024 at 09:24:05AM GMT, Alexandre Ghiti wrote:
>>> In order to produce a generic kernel, a user can select
>>> CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
>>> spinlock implementation if Zabha or Ziccrse are not present.
>>>
>>> Note that we can't use alternatives here because the discovery of
>>> extensions is done too late and we need to start with the qspinlock
>>> implementation because the ticket spinlock implementation would pollute
>>> the spinlock value, so let's use static keys.
>>>
>>> This is largely based on Guo's work and Leonardo reviews at [1].
>>>
>>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
>>> Signed-off-by: Guo Ren <guoren@kernel.org>
>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>>> ---
>>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
>>> arch/riscv/Kconfig | 29 +++++++++++++
>>> arch/riscv/include/asm/Kbuild | 4 +-
>>> arch/riscv/include/asm/spinlock.h | 43 +++++++++++++++++++
>>> arch/riscv/kernel/setup.c | 38 ++++++++++++++++
>>> include/asm-generic/qspinlock.h | 2 +
>>> include/asm-generic/ticket_spinlock.h | 2 +
>>> 7 files changed, 118 insertions(+), 2 deletions(-)
>>> create mode 100644 arch/riscv/include/asm/spinlock.h
>>>
>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>> index 22f2990392ff..cf26042480e2 100644
>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>> @@ -20,7 +20,7 @@
>>> | openrisc: | ok |
>>> | parisc: | TODO |
>>> | powerpc: | ok |
>>> - | riscv: | TODO |
>>> + | riscv: | ok |
>>> | s390: | TODO |
>>> | sh: | TODO |
>>> | sparc: | ok |
>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>> index ef55ab94027e..c9ff8081efc1 100644
>>> --- a/arch/riscv/Kconfig
>>> +++ b/arch/riscv/Kconfig
>>> @@ -79,6 +79,7 @@ config RISCV
>>> select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
>>> select ARCH_WANTS_NO_INSTR
>>> select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
>>> + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
>> Why do we need this? Also, we presumably would prefer not to have it
>> when we end up using ticket spinlocks when combo spinlocks is selected.
>> Is there no way to avoid it?
> I'll let Andrea answer this as he asked for it.
>
>>> select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
>>> select BUILDTIME_TABLE_SORT if MMU
>>> select CLINT_TIMER if RISCV_M_MODE
>>> @@ -488,6 +489,34 @@ config NODES_SHIFT
>>> Specify the maximum number of NUMA Nodes available on the target
>>> system. Increases memory reserved to accommodate various tables.
>>>
>>> +choice
>>> + prompt "RISC-V spinlock type"
>>> + default RISCV_COMBO_SPINLOCKS
>>> +
>>> +config RISCV_TICKET_SPINLOCKS
>>> + bool "Using ticket spinlock"
>>> +
>>> +config RISCV_QUEUED_SPINLOCKS
>>> + bool "Using queued spinlock"
>>> + depends on SMP && MMU && NONPORTABLE
>>> + select ARCH_USE_QUEUED_SPINLOCKS
>>> + help
>>> + The queued spinlock implementation requires the forward progress
>>> + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
>>> + LR/SC with Ziccrse provide such guarantee.
>>> +
>>> + Select this if and only if Zabha or Ziccrse is available on your
>>> + platform.
>> Maybe some text recommending combo spinlocks here? As it stands it sounds
>> like enabling queued spinlocks is a bad idea for anybody that doesn't know
>> what platforms will run the kernel they're building, which is all distros.
> That's NONPORTABLE, so people enabling this config are supposed to
> know that right?
>
>>> +
>>> +config RISCV_COMBO_SPINLOCKS
>>> + bool "Using combo spinlock"
>>> + depends on SMP && MMU
>>> + select ARCH_USE_QUEUED_SPINLOCKS
>>> + help
>>> + Embed both queued spinlock and ticket lock so that the spinlock
>>> + implementation can be chosen at runtime.
>> nit: Add a blank line here
> Done
>
>>> +endchoice
>>> +
>>> config RISCV_ALTERNATIVE
>>> bool
>>> depends on !XIP_KERNEL
>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
>>> index 5c589770f2a8..1c2618c964f0 100644
>>> --- a/arch/riscv/include/asm/Kbuild
>>> +++ b/arch/riscv/include/asm/Kbuild
>>> @@ -5,10 +5,12 @@ syscall-y += syscall_table_64.h
>>> generic-y += early_ioremap.h
>>> generic-y += flat.h
>>> generic-y += kvm_para.h
>>> +generic-y += mcs_spinlock.h
>>> generic-y += parport.h
>>> -generic-y += spinlock.h
>>> generic-y += spinlock_types.h
>>> +generic-y += ticket_spinlock.h
>>> generic-y += qrwlock.h
>>> generic-y += qrwlock_types.h
>>> +generic-y += qspinlock.h
>>> generic-y += user.h
>>> generic-y += vmlinux.lds.h
>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
>>> new file mode 100644
>>> index 000000000000..503aef31db83
>>> --- /dev/null
>>> +++ b/arch/riscv/include/asm/spinlock.h
>>> @@ -0,0 +1,43 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +
>>> +#ifndef __ASM_RISCV_SPINLOCK_H
>>> +#define __ASM_RISCV_SPINLOCK_H
>>> +
>>> +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
>>> +#define _Q_PENDING_LOOPS (1 << 9)
>>> +
>>> +#define __no_arch_spinlock_redefine
>>> +#include <asm/ticket_spinlock.h>
>>> +#include <asm/qspinlock.h>
>>> +#include <asm/alternative.h>
>> We need asm/jump_label.h instead of asm/alternative.h, but...
>>
>>> +
>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
>>> +
>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
>>> +static __always_inline type arch_spin_##op(type_lock lock) \
>>> +{ \
>>> + if (static_branch_unlikely(&qspinlock_key)) \
>>> + return queued_spin_##op(lock); \
>>> + return ticket_spin_##op(lock); \
>>> +}
>> ...do you know what impact this inlined static key check has on the
>> kernel size?
> No, I'll check, thanks.
So I have just checked the size of the jump table section:
* defconfig:
- ticket: 26928 bytes
- combo: 28320 bytes
So that's a ~5% increase.
* ubuntu config
- ticket: 107840 bytes
- combo: 174752 bytes
And that's a ~62% increase.
This is the ELF size difference between ticket and combo spinlocks:
* ticket: 776915592 bytes
* combo: 786958968 bytes
So that's an increase of ~1.3% on the ELF.
And the .text section size:
* ticket: 12290960 bytes
* combo: 12366644 bytes
And that's a ~0.6% increase!
Finally, I'd say the impact is very limited :)
Thanks,
Alex
>
>> Actually, why not use ALTERNATIVE with any nonzero cpufeature value.
>> Then add code to riscv_cpufeature_patch_check() to return true when
>> qspinlocks should be enabled (based on the value of some global set
>> during riscv_spinlock_init)?
> As discussed with Guo in the previous iteration, the patching of the
> alternatives intervenes far after the first use of the spinlocks and
> the ticket spinlock implementation pollutes the spinlock value, so
> we'd have to unconditionally start with the qspinlock implementation
> and after switch to the ticket implementation if not supported by the
> platform. It works but it's dirty, I really don't like this hack.
>
> We could though:
> - add an initial value to the alternatives (not sure it's feasible though)
> - make the patching of alternatives happen sooner by parsing the isa
> string sooner, either in DT or ACPI (I have a working PoC for very
> early parsing of ACPI).
>
> I intend to do the latter as I think we should be aware of the
> extensions sooner in the boot process, so I'll change that to the
> alternatives when it's done. WDYT, any other idea?
>
>
>>> +
>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
>>> +
>>> +#elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS)
>>> +
>>> +#include <asm/qspinlock.h>
>>> +
>>> +#else
>>> +
>>> +#include <asm/ticket_spinlock.h>
>>> +
>>> +#endif
>>> +
>>> +#include <asm/qrwlock.h>
>>> +
>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>>> index a2cde65b69e9..b811fa331982 100644
>>> --- a/arch/riscv/kernel/setup.c
>>> +++ b/arch/riscv/kernel/setup.c
>>> @@ -244,6 +244,43 @@ static void __init parse_dtb(void)
>>> #endif
>>> }
>>>
>>> +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
>>> +EXPORT_SYMBOL(qspinlock_key);
>>> +#endif
>>> +
>>> +static void __init riscv_spinlock_init(void)
>>> +{
>>> + char *using_ext = NULL;
>>> +
>>> + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) {
>>> + pr_info("Ticket spinlock: enabled\n");
>>> + return;
>>> + }
>>> +
>>> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
>>> + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
>>> + riscv_isa_extension_available(NULL, ZABHA) &&
>>> + riscv_isa_extension_available(NULL, ZACAS)) {
>>> + using_ext = "using Zabha";
>>> + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) {
>>> + using_ext = "using Ziccrse";
>>> + }
>>> +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
>>> + else {
>> else if (IS_ENABLED(CONFIG_RISCV_COMBO_SPINLOCKS))
>>
>>> + static_branch_disable(&qspinlock_key);
>>> + pr_info("Ticket spinlock: enabled\n");
>>> +
>> nit: remove this blank line
>>
>>> + return;
>>> + }
>>> +#endif
>>> +
>>> + if (!using_ext)
>>> + pr_err("Queued spinlock without Zabha or Ziccrse");
>>> + else
>>> + pr_info("Queued spinlock %s: enabled\n", using_ext);
>>> +}
>>> +
>>> extern void __init init_rt_signal_env(void);
>>>
>>> void __init setup_arch(char **cmdline_p)
>>> @@ -297,6 +334,7 @@ void __init setup_arch(char **cmdline_p)
>>> riscv_set_dma_cache_alignment();
>>>
>>> riscv_user_isa_enable();
>>> + riscv_spinlock_init();
>>> }
>>>
>>> bool arch_cpu_is_hotpluggable(int cpu)
>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
>>> index 0655aa5b57b2..bf47cca2c375 100644
>>> --- a/include/asm-generic/qspinlock.h
>>> +++ b/include/asm-generic/qspinlock.h
>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>> }
>>> #endif
>>>
>>> +#ifndef __no_arch_spinlock_redefine
>> I'm not sure what's better/worse, but instead of inventing this
>> __no_arch_spinlock_redefine thing we could just name all the functions
>> something like __arch_spin* and then add defines for both to asm/spinlock.h,
>> i.e.
>>
>> #define queued_spin_lock(l) __arch_spin_lock(l)
>> ...
>>
>> #define ticket_spin_lock(l) __arch_spin_lock(l)
>> ...
>>
>> Besides not having to touch asm-generic/qspinlock.h and
>> asm-generic/ticket_spinlock.h it allows one to find the implementations
>> a bit easier as following a tag to arch_spin_lock() will take them to
>> queued_spin_lock() which will then take them to
>> arch/riscv/include/asm/spinlock.h and there they'll figure out how
>> __arch_spin_lock() was defined.
>>
>>> /*
>>> * Remapping spinlock architecture specific functions to the corresponding
>>> * queued spinlock functions.
>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>> #define arch_spin_lock(l) queued_spin_lock(l)
>>> #define arch_spin_trylock(l) queued_spin_trylock(l)
>>> #define arch_spin_unlock(l) queued_spin_unlock(l)
>>> +#endif
>>>
>>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
>>> index cfcff22b37b3..325779970d8a 100644
>>> --- a/include/asm-generic/ticket_spinlock.h
>>> +++ b/include/asm-generic/ticket_spinlock.h
>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
>>> }
>>>
>>> +#ifndef __no_arch_spinlock_redefine
>>> /*
>>> * Remapping spinlock architecture specific functions to the corresponding
>>> * ticket spinlock functions.
>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>> #define arch_spin_lock(l) ticket_spin_lock(l)
>>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
>>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
>>> +#endif
>>>
>>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
>>> --
>>> 2.39.2
>>>
>> Thanks,
>> drew
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-08-15 13:27 ` Alexandre Ghiti
@ 2024-08-15 13:34 ` Andrew Jones
2024-08-17 5:08 ` Guo Ren
1 sibling, 0 replies; 47+ messages in thread
From: Andrew Jones @ 2024-08-15 13:34 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Alexandre Ghiti, Jonathan Corbet, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Conor Dooley, Rob Herring, Krzysztof Kozlowski,
Andrea Parri, Nathan Chancellor, Peter Zijlstra, Ingo Molnar,
Will Deacon, Waiman Long, Boqun Feng, Arnd Bergmann,
Leonardo Bras, Guo Ren, linux-doc, devicetree, linux-kernel,
linux-riscv, linux-arch
On Thu, Aug 15, 2024 at 03:27:31PM GMT, Alexandre Ghiti wrote:
> Hi Andrew,
>
> On 01/08/2024 08:53, Alexandre Ghiti wrote:
> > On Wed, Jul 31, 2024 at 5:29 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> > > On Wed, Jul 31, 2024 at 09:24:05AM GMT, Alexandre Ghiti wrote:
> > > > In order to produce a generic kernel, a user can select
> > > > CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
> > > > spinlock implementation if Zabha or Ziccrse are not present.
> > > >
> > > > Note that we can't use alternatives here because the discovery of
> > > > extensions is done too late and we need to start with the qspinlock
> > > > implementation because the ticket spinlock implementation would pollute
> > > > the spinlock value, so let's use static keys.
> > > >
> > > > This is largely based on Guo's work and Leonardo reviews at [1].
> > > >
> > > > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > ---
> > > > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > > > arch/riscv/Kconfig | 29 +++++++++++++
> > > > arch/riscv/include/asm/Kbuild | 4 +-
> > > > arch/riscv/include/asm/spinlock.h | 43 +++++++++++++++++++
> > > > arch/riscv/kernel/setup.c | 38 ++++++++++++++++
> > > > include/asm-generic/qspinlock.h | 2 +
> > > > include/asm-generic/ticket_spinlock.h | 2 +
> > > > 7 files changed, 118 insertions(+), 2 deletions(-)
> > > > create mode 100644 arch/riscv/include/asm/spinlock.h
> > > >
> > > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > index 22f2990392ff..cf26042480e2 100644
> > > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > @@ -20,7 +20,7 @@
> > > > | openrisc: | ok |
> > > > | parisc: | TODO |
> > > > | powerpc: | ok |
> > > > - | riscv: | TODO |
> > > > + | riscv: | ok |
> > > > | s390: | TODO |
> > > > | sh: | TODO |
> > > > | sparc: | ok |
> > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > index ef55ab94027e..c9ff8081efc1 100644
> > > > --- a/arch/riscv/Kconfig
> > > > +++ b/arch/riscv/Kconfig
> > > > @@ -79,6 +79,7 @@ config RISCV
> > > > select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
> > > > select ARCH_WANTS_NO_INSTR
> > > > select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
> > > > + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
> > > Why do we need this? Also, we presumably would prefer not to have it
> > > when we end up using ticket spinlocks when combo spinlocks is selected.
> > > Is there no way to avoid it?
> > I'll let Andrea answer this as he asked for it.
> >
> > > > select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
> > > > select BUILDTIME_TABLE_SORT if MMU
> > > > select CLINT_TIMER if RISCV_M_MODE
> > > > @@ -488,6 +489,34 @@ config NODES_SHIFT
> > > > Specify the maximum number of NUMA Nodes available on the target
> > > > system. Increases memory reserved to accommodate various tables.
> > > >
> > > > +choice
> > > > + prompt "RISC-V spinlock type"
> > > > + default RISCV_COMBO_SPINLOCKS
> > > > +
> > > > +config RISCV_TICKET_SPINLOCKS
> > > > + bool "Using ticket spinlock"
> > > > +
> > > > +config RISCV_QUEUED_SPINLOCKS
> > > > + bool "Using queued spinlock"
> > > > + depends on SMP && MMU && NONPORTABLE
> > > > + select ARCH_USE_QUEUED_SPINLOCKS
> > > > + help
> > > > + The queued spinlock implementation requires the forward progress
> > > > + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
> > > > + LR/SC with Ziccrse provide such guarantee.
> > > > +
> > > > + Select this if and only if Zabha or Ziccrse is available on your
> > > > + platform.
> > > Maybe some text recommending combo spinlocks here? As it stands it sounds
> > > like enabling queued spinlocks is a bad idea for anybody that doesn't know
> > > what platforms will run the kernel they're building, which is all distros.
> > That's NONPORTABLE, so people enabling this config are supposed to
> > know that right?
> >
> > > > +
> > > > +config RISCV_COMBO_SPINLOCKS
> > > > + bool "Using combo spinlock"
> > > > + depends on SMP && MMU
> > > > + select ARCH_USE_QUEUED_SPINLOCKS
> > > > + help
> > > > + Embed both queued spinlock and ticket lock so that the spinlock
> > > > + implementation can be chosen at runtime.
> > > nit: Add a blank line here
> > Done
> >
> > > > +endchoice
> > > > +
> > > > config RISCV_ALTERNATIVE
> > > > bool
> > > > depends on !XIP_KERNEL
> > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > > index 5c589770f2a8..1c2618c964f0 100644
> > > > --- a/arch/riscv/include/asm/Kbuild
> > > > +++ b/arch/riscv/include/asm/Kbuild
> > > > @@ -5,10 +5,12 @@ syscall-y += syscall_table_64.h
> > > > generic-y += early_ioremap.h
> > > > generic-y += flat.h
> > > > generic-y += kvm_para.h
> > > > +generic-y += mcs_spinlock.h
> > > > generic-y += parport.h
> > > > -generic-y += spinlock.h
> > > > generic-y += spinlock_types.h
> > > > +generic-y += ticket_spinlock.h
> > > > generic-y += qrwlock.h
> > > > generic-y += qrwlock_types.h
> > > > +generic-y += qspinlock.h
> > > > generic-y += user.h
> > > > generic-y += vmlinux.lds.h
> > > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > > new file mode 100644
> > > > index 000000000000..503aef31db83
> > > > --- /dev/null
> > > > +++ b/arch/riscv/include/asm/spinlock.h
> > > > @@ -0,0 +1,43 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > +
> > > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > > +#define __ASM_RISCV_SPINLOCK_H
> > > > +
> > > > +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
> > > > +#define _Q_PENDING_LOOPS (1 << 9)
> > > > +
> > > > +#define __no_arch_spinlock_redefine
> > > > +#include <asm/ticket_spinlock.h>
> > > > +#include <asm/qspinlock.h>
> > > > +#include <asm/alternative.h>
> > > We need asm/jump_label.h instead of asm/alternative.h, but...
> > >
> > > > +
> > > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > > +
> > > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > > > +static __always_inline type arch_spin_##op(type_lock lock) \
> > > > +{ \
> > > > + if (static_branch_unlikely(&qspinlock_key)) \
> > > > + return queued_spin_##op(lock); \
> > > > + return ticket_spin_##op(lock); \
> > > > +}
> > > ...do you know what impact this inlined static key check has on the
> > > kernel size?
> > No, I'll check, thanks.
>
>
> So I have just checked the size of the jump table section:
>
> * defconfig:
>
> - ticket: 26928 bytes
> - combo: 28320 bytes
>
> So that's a ~5% increase.
>
> * ubuntu config
>
> - ticket: 107840 bytes
> - combo: 174752 bytes
>
> And that's a ~62% increase.
>
> This is the ELF size difference between ticket and combo spinlocks:
>
> * ticket: 776915592 bytes
> * combo: 786958968 bytes
>
> So that's an increase of ~1.3% on the ELF.
>
> And the .text section size:
>
> * ticket: 12290960 bytes
> * combo: 12366644 bytes
>
> And that's a ~0.6% increase!
>
> Finally, I'd say the impact is very limited :)
Thanks for checking!
drew
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-08-15 13:27 ` Alexandre Ghiti
2024-08-15 13:34 ` Andrew Jones
@ 2024-08-17 5:08 ` Guo Ren
2024-08-21 12:18 ` Andrew Jones
2024-08-27 8:03 ` Alexandre Ghiti
1 sibling, 2 replies; 47+ messages in thread
From: Guo Ren @ 2024-08-17 5:08 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Alexandre Ghiti, Andrew Jones, Jonathan Corbet, Paul Walmsley,
Palmer Dabbelt, Albert Ou, Conor Dooley, Rob Herring,
Krzysztof Kozlowski, Andrea Parri, Nathan Chancellor,
Peter Zijlstra, Ingo Molnar, Will Deacon, Waiman Long, Boqun Feng,
Arnd Bergmann, Leonardo Bras, linux-doc, devicetree, linux-kernel,
linux-riscv, linux-arch
On Thu, Aug 15, 2024 at 9:27 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
> Hi Andrew,
>
> On 01/08/2024 08:53, Alexandre Ghiti wrote:
> > On Wed, Jul 31, 2024 at 5:29 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> >> On Wed, Jul 31, 2024 at 09:24:05AM GMT, Alexandre Ghiti wrote:
> >>> In order to produce a generic kernel, a user can select
> >>> CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
> >>> spinlock implementation if Zabha or Ziccrse are not present.
> >>>
> >>> Note that we can't use alternatives here because the discovery of
> >>> extensions is done too late and we need to start with the qspinlock
> >>> implementation because the ticket spinlock implementation would pollute
> >>> the spinlock value, so let's use static keys.
> >>>
> >>> This is largely based on Guo's work and Leonardo reviews at [1].
> >>>
> >>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> >>> Signed-off-by: Guo Ren <guoren@kernel.org>
> >>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> >>> ---
> >>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
> >>> arch/riscv/Kconfig | 29 +++++++++++++
> >>> arch/riscv/include/asm/Kbuild | 4 +-
> >>> arch/riscv/include/asm/spinlock.h | 43 +++++++++++++++++++
> >>> arch/riscv/kernel/setup.c | 38 ++++++++++++++++
> >>> include/asm-generic/qspinlock.h | 2 +
> >>> include/asm-generic/ticket_spinlock.h | 2 +
> >>> 7 files changed, 118 insertions(+), 2 deletions(-)
> >>> create mode 100644 arch/riscv/include/asm/spinlock.h
> >>>
> >>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>> index 22f2990392ff..cf26042480e2 100644
> >>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>> @@ -20,7 +20,7 @@
> >>> | openrisc: | ok |
> >>> | parisc: | TODO |
> >>> | powerpc: | ok |
> >>> - | riscv: | TODO |
> >>> + | riscv: | ok |
> >>> | s390: | TODO |
> >>> | sh: | TODO |
> >>> | sparc: | ok |
> >>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>> index ef55ab94027e..c9ff8081efc1 100644
> >>> --- a/arch/riscv/Kconfig
> >>> +++ b/arch/riscv/Kconfig
> >>> @@ -79,6 +79,7 @@ config RISCV
> >>> select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
> >>> select ARCH_WANTS_NO_INSTR
> >>> select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
> >>> + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
> >> Why do we need this? Also, we presumably would prefer not to have it
> >> when we end up using ticket spinlocks when combo spinlocks is selected.
> >> Is there no way to avoid it?
> > I'll let Andrea answer this as he asked for it.
> >
> >>> select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
> >>> select BUILDTIME_TABLE_SORT if MMU
> >>> select CLINT_TIMER if RISCV_M_MODE
> >>> @@ -488,6 +489,34 @@ config NODES_SHIFT
> >>> Specify the maximum number of NUMA Nodes available on the target
> >>> system. Increases memory reserved to accommodate various tables.
> >>>
> >>> +choice
> >>> + prompt "RISC-V spinlock type"
> >>> + default RISCV_COMBO_SPINLOCKS
> >>> +
> >>> +config RISCV_TICKET_SPINLOCKS
> >>> + bool "Using ticket spinlock"
> >>> +
> >>> +config RISCV_QUEUED_SPINLOCKS
> >>> + bool "Using queued spinlock"
> >>> + depends on SMP && MMU && NONPORTABLE
> >>> + select ARCH_USE_QUEUED_SPINLOCKS
> >>> + help
> >>> + The queued spinlock implementation requires the forward progress
> >>> + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
> >>> + LR/SC with Ziccrse provide such guarantee.
> >>> +
> >>> + Select this if and only if Zabha or Ziccrse is available on your
> >>> + platform.
> >> Maybe some text recommending combo spinlocks here? As it stands it sounds
> >> like enabling queued spinlocks is a bad idea for anybody that doesn't know
> >> what platforms will run the kernel they're building, which is all distros.
> > That's NONPORTABLE, so people enabling this config are supposed to
> > know that right?
> >
> >>> +
> >>> +config RISCV_COMBO_SPINLOCKS
> >>> + bool "Using combo spinlock"
> >>> + depends on SMP && MMU
> >>> + select ARCH_USE_QUEUED_SPINLOCKS
> >>> + help
> >>> + Embed both queued spinlock and ticket lock so that the spinlock
> >>> + implementation can be chosen at runtime.
> >> nit: Add a blank line here
> > Done
> >
> >>> +endchoice
> >>> +
> >>> config RISCV_ALTERNATIVE
> >>> bool
> >>> depends on !XIP_KERNEL
> >>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> >>> index 5c589770f2a8..1c2618c964f0 100644
> >>> --- a/arch/riscv/include/asm/Kbuild
> >>> +++ b/arch/riscv/include/asm/Kbuild
> >>> @@ -5,10 +5,12 @@ syscall-y += syscall_table_64.h
> >>> generic-y += early_ioremap.h
> >>> generic-y += flat.h
> >>> generic-y += kvm_para.h
> >>> +generic-y += mcs_spinlock.h
> >>> generic-y += parport.h
> >>> -generic-y += spinlock.h
> >>> generic-y += spinlock_types.h
> >>> +generic-y += ticket_spinlock.h
> >>> generic-y += qrwlock.h
> >>> generic-y += qrwlock_types.h
> >>> +generic-y += qspinlock.h
> >>> generic-y += user.h
> >>> generic-y += vmlinux.lds.h
> >>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> >>> new file mode 100644
> >>> index 000000000000..503aef31db83
> >>> --- /dev/null
> >>> +++ b/arch/riscv/include/asm/spinlock.h
> >>> @@ -0,0 +1,43 @@
> >>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>> +
> >>> +#ifndef __ASM_RISCV_SPINLOCK_H
> >>> +#define __ASM_RISCV_SPINLOCK_H
> >>> +
> >>> +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
> >>> +#define _Q_PENDING_LOOPS (1 << 9)
> >>> +
> >>> +#define __no_arch_spinlock_redefine
> >>> +#include <asm/ticket_spinlock.h>
> >>> +#include <asm/qspinlock.h>
> >>> +#include <asm/alternative.h>
> >> We need asm/jump_label.h instead of asm/alternative.h, but...
> >>
> >>> +
> >>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> >>> +
> >>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> >>> +static __always_inline type arch_spin_##op(type_lock lock) \
> >>> +{ \
> >>> + if (static_branch_unlikely(&qspinlock_key)) \
> >>> + return queued_spin_##op(lock); \
> >>> + return ticket_spin_##op(lock); \
> >>> +}
> >> ...do you know what impact this inlined static key check has on the
> >> kernel size?
> > No, I'll check, thanks.
>
>
> So I have just checked the size of the jump table section:
>
> * defconfig:
>
> - ticket: 26928 bytes
> - combo: 28320 bytes
>
> So that's a ~5% increase.
>
> * ubuntu config
>
> - ticket: 107840 bytes
> - combo: 174752 bytes
>
> And that's a ~62% increase.
The size of the jump table section has increased from 5% to 62%. I
think some configuration triggers the jump table's debug code. If it's
not a debugging configuration, that's a bug of the Ubuntu config.
>
> This is the ELF size difference between ticket and combo spinlocks:
>
> * ticket: 776915592 bytes
> * combo: 786958968 bytes
>
> So that's an increase of ~1.3% on the ELF.
>
> And the .text section size:
>
> * ticket: 12290960 bytes
> * combo: 12366644 bytes
>
> And that's a ~0.6% increase!
>
> Finally, I'd say the impact is very limited :)
>
> Thanks,
>
> Alex
>
>
> >
> >> Actually, why not use ALTERNATIVE with any nonzero cpufeature value.
> >> Then add code to riscv_cpufeature_patch_check() to return true when
> >> qspinlocks should be enabled (based on the value of some global set
> >> during riscv_spinlock_init)?
> > As discussed with Guo in the previous iteration, the patching of the
> > alternatives intervenes far after the first use of the spinlocks and
> > the ticket spinlock implementation pollutes the spinlock value, so
> > we'd have to unconditionally start with the qspinlock implementation
> > and after switch to the ticket implementation if not supported by the
> > platform. It works but it's dirty, I really don't like this hack.
> >
> > We could though:
> > - add an initial value to the alternatives (not sure it's feasible though)
> > - make the patching of alternatives happen sooner by parsing the isa
> > string sooner, either in DT or ACPI (I have a working PoC for very
> > early parsing of ACPI).
> >
> > I intend to do the latter as I think we should be aware of the
> > extensions sooner in the boot process, so I'll change that to the
> > alternatives when it's done. WDYT, any other idea?
> >
> >
> >>> +
> >>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> >>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> >>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> >>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> >>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> >>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> >>> +
> >>> +#elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS)
> >>> +
> >>> +#include <asm/qspinlock.h>
> >>> +
> >>> +#else
> >>> +
> >>> +#include <asm/ticket_spinlock.h>
> >>> +
> >>> +#endif
> >>> +
> >>> +#include <asm/qrwlock.h>
> >>> +
> >>> +#endif /* __ASM_RISCV_SPINLOCK_H */
> >>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >>> index a2cde65b69e9..b811fa331982 100644
> >>> --- a/arch/riscv/kernel/setup.c
> >>> +++ b/arch/riscv/kernel/setup.c
> >>> @@ -244,6 +244,43 @@ static void __init parse_dtb(void)
> >>> #endif
> >>> }
> >>>
> >>> +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
> >>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> >>> +EXPORT_SYMBOL(qspinlock_key);
> >>> +#endif
> >>> +
> >>> +static void __init riscv_spinlock_init(void)
> >>> +{
> >>> + char *using_ext = NULL;
> >>> +
> >>> + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) {
> >>> + pr_info("Ticket spinlock: enabled\n");
> >>> + return;
> >>> + }
> >>> +
> >>> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
> >>> + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
> >>> + riscv_isa_extension_available(NULL, ZABHA) &&
> >>> + riscv_isa_extension_available(NULL, ZACAS)) {
> >>> + using_ext = "using Zabha";
> >>> + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) {
> >>> + using_ext = "using Ziccrse";
> >>> + }
> >>> +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
> >>> + else {
> >> else if (IS_ENABLED(CONFIG_RISCV_COMBO_SPINLOCKS))
> >>
> >>> + static_branch_disable(&qspinlock_key);
> >>> + pr_info("Ticket spinlock: enabled\n");
> >>> +
> >> nit: remove this blank line
> >>
> >>> + return;
> >>> + }
> >>> +#endif
> >>> +
> >>> + if (!using_ext)
> >>> + pr_err("Queued spinlock without Zabha or Ziccrse");
> >>> + else
> >>> + pr_info("Queued spinlock %s: enabled\n", using_ext);
> >>> +}
> >>> +
> >>> extern void __init init_rt_signal_env(void);
> >>>
> >>> void __init setup_arch(char **cmdline_p)
> >>> @@ -297,6 +334,7 @@ void __init setup_arch(char **cmdline_p)
> >>> riscv_set_dma_cache_alignment();
> >>>
> >>> riscv_user_isa_enable();
> >>> + riscv_spinlock_init();
> >>> }
> >>>
> >>> bool arch_cpu_is_hotpluggable(int cpu)
> >>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> >>> index 0655aa5b57b2..bf47cca2c375 100644
> >>> --- a/include/asm-generic/qspinlock.h
> >>> +++ b/include/asm-generic/qspinlock.h
> >>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>> }
> >>> #endif
> >>>
> >>> +#ifndef __no_arch_spinlock_redefine
> >> I'm not sure what's better/worse, but instead of inventing this
> >> __no_arch_spinlock_redefine thing we could just name all the functions
> >> something like __arch_spin* and then add defines for both to asm/spinlock.h,
> >> i.e.
> >>
> >> #define queued_spin_lock(l) __arch_spin_lock(l)
> >> ...
> >>
> >> #define ticket_spin_lock(l) __arch_spin_lock(l)
> >> ...
> >>
> >> Besides not having to touch asm-generic/qspinlock.h and
> >> asm-generic/ticket_spinlock.h it allows one to find the implementations
> >> a bit easier as following a tag to arch_spin_lock() will take them to
> >> queued_spin_lock() which will then take them to
> >> arch/riscv/include/asm/spinlock.h and there they'll figure out how
> >> __arch_spin_lock() was defined.
> >>
> >>> /*
> >>> * Remapping spinlock architecture specific functions to the corresponding
> >>> * queued spinlock functions.
> >>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>> #define arch_spin_lock(l) queued_spin_lock(l)
> >>> #define arch_spin_trylock(l) queued_spin_trylock(l)
> >>> #define arch_spin_unlock(l) queued_spin_unlock(l)
> >>> +#endif
> >>>
> >>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
> >>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> >>> index cfcff22b37b3..325779970d8a 100644
> >>> --- a/include/asm-generic/ticket_spinlock.h
> >>> +++ b/include/asm-generic/ticket_spinlock.h
> >>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
> >>> }
> >>>
> >>> +#ifndef __no_arch_spinlock_redefine
> >>> /*
> >>> * Remapping spinlock architecture specific functions to the corresponding
> >>> * ticket spinlock functions.
> >>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>> #define arch_spin_lock(l) ticket_spin_lock(l)
> >>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
> >>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
> >>> +#endif
> >>>
> >>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> >>> --
> >>> 2.39.2
> >>>
> >> Thanks,
> >> drew
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
--
Best Regards
Guo Ren
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-08-17 5:08 ` Guo Ren
@ 2024-08-21 12:18 ` Andrew Jones
2024-08-27 8:02 ` Alexandre Ghiti
2024-08-27 8:03 ` Alexandre Ghiti
1 sibling, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2024-08-21 12:18 UTC (permalink / raw)
To: Guo Ren
Cc: Alexandre Ghiti, Alexandre Ghiti, Jonathan Corbet, Paul Walmsley,
Palmer Dabbelt, Albert Ou, Conor Dooley, Rob Herring,
Krzysztof Kozlowski, Andrea Parri, Nathan Chancellor,
Peter Zijlstra, Ingo Molnar, Will Deacon, Waiman Long, Boqun Feng,
Arnd Bergmann, Leonardo Bras, linux-doc, devicetree, linux-kernel,
linux-riscv, linux-arch
On Sat, Aug 17, 2024 at 01:08:34PM GMT, Guo Ren wrote:
...
> > So I have just checked the size of the jump table section:
> >
> > * defconfig:
> >
> > - ticket: 26928 bytes
> > - combo: 28320 bytes
> >
> > So that's a ~5% increase.
> >
> > * ubuntu config
> >
> > - ticket: 107840 bytes
> > - combo: 174752 bytes
> >
> > And that's a ~62% increase.
> The size of the jump table section has increased from 5% to 62%. I
> think some configuration triggers the jump table's debug code. If it's
> not a debugging configuration, that's a bug of the Ubuntu config.
And I just remembered we wanted to check with CONFIG_CC_OPTIMIZE_FOR_SIZE
Thanks,
drew
>
> >
> > This is the ELF size difference between ticket and combo spinlocks:
> >
> > * ticket: 776915592 bytes
> > * combo: 786958968 bytes
> >
> > So that's an increase of ~1.3% on the ELF.
> >
> > And the .text section size:
> >
> > * ticket: 12290960 bytes
> > * combo: 12366644 bytes
> >
> > And that's a ~0.6% increase!
> >
> > Finally, I'd say the impact is very limited :)
> >
> > Thanks,
> >
> > Alex
> >
> >
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-08-21 12:18 ` Andrew Jones
@ 2024-08-27 8:02 ` Alexandre Ghiti
0 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-08-27 8:02 UTC (permalink / raw)
To: Andrew Jones
Cc: Guo Ren, Alexandre Ghiti, Jonathan Corbet, Paul Walmsley,
Palmer Dabbelt, Albert Ou, Conor Dooley, Rob Herring,
Krzysztof Kozlowski, Andrea Parri, Nathan Chancellor,
Peter Zijlstra, Ingo Molnar, Will Deacon, Waiman Long, Boqun Feng,
Arnd Bergmann, Leonardo Bras, linux-doc, devicetree, linux-kernel,
linux-riscv, linux-arch
Hi drew,
On Wed, Aug 21, 2024 at 2:18 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Sat, Aug 17, 2024 at 01:08:34PM GMT, Guo Ren wrote:
> ...
> > > So I have just checked the size of the jump table section:
> > >
> > > * defconfig:
> > >
> > > - ticket: 26928 bytes
> > > - combo: 28320 bytes
> > >
> > > So that's a ~5% increase.
> > >
> > > * ubuntu config
> > >
> > > - ticket: 107840 bytes
> > > - combo: 174752 bytes
> > >
> > > And that's a ~62% increase.
> > The size of the jump table section has increased from 5% to 62%. I
> > think some configuration triggers the jump table's debug code. If it's
> > not a debugging configuration, that's a bug of the Ubuntu config.
>
> And I just remembered we wanted to check with CONFIG_CC_OPTIMIZE_FOR_SIZE
Here we go!
The size of the jump table section:
* defconfig:
- ticket: 12176 bytes
- combo: 13168 bytes
So that's a ~8% increase.
* ubuntu config
- ticket: 73920 bytes
- combo: 84656 bytes
And that's a ~14% increase.
This is the ELF size difference between ticket and combo spinlocks:
* ticket: 695906872 bytes
* combo: 697558496 bytes
So that's an increase of ~0.23% on the ELF.
And the .text section size:
* ticket: 10322820 bytes
* combo: 10332136 bytes
And that's a ~0.09% increase!
So that's even better :)
Thanks for asking!
Alex
>
> Thanks,
> drew
>
> >
> > >
> > > This is the ELF size difference between ticket and combo spinlocks:
> > >
> > > * ticket: 776915592 bytes
> > > * combo: 786958968 bytes
> > >
> > > So that's an increase of ~1.3% on the ELF.
> > >
> > > And the .text section size:
> > >
> > > * ticket: 12290960 bytes
> > > * combo: 12366644 bytes
> > >
> > > And that's a ~0.6% increase!
> > >
> > > Finally, I'd say the impact is very limited :)
> > >
> > > Thanks,
> > >
> > > Alex
> > >
> > >
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-08-17 5:08 ` Guo Ren
2024-08-21 12:18 ` Andrew Jones
@ 2024-08-27 8:03 ` Alexandre Ghiti
1 sibling, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-08-27 8:03 UTC (permalink / raw)
To: Guo Ren, Emil Renner Berthing
Cc: Alexandre Ghiti, Andrew Jones, Jonathan Corbet, Paul Walmsley,
Palmer Dabbelt, Albert Ou, Conor Dooley, Rob Herring,
Krzysztof Kozlowski, Andrea Parri, Nathan Chancellor,
Peter Zijlstra, Ingo Molnar, Will Deacon, Waiman Long, Boqun Feng,
Arnd Bergmann, Leonardo Bras, linux-doc, devicetree, linux-kernel,
linux-riscv, linux-arch
Hi Guo,
On Sat, Aug 17, 2024 at 7:08 AM Guo Ren <guoren@kernel.org> wrote:
>
> On Thu, Aug 15, 2024 at 9:27 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
> >
> > Hi Andrew,
> >
> > On 01/08/2024 08:53, Alexandre Ghiti wrote:
> > > On Wed, Jul 31, 2024 at 5:29 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> > >> On Wed, Jul 31, 2024 at 09:24:05AM GMT, Alexandre Ghiti wrote:
> > >>> In order to produce a generic kernel, a user can select
> > >>> CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
> > >>> spinlock implementation if Zabha or Ziccrse are not present.
> > >>>
> > >>> Note that we can't use alternatives here because the discovery of
> > >>> extensions is done too late and we need to start with the qspinlock
> > >>> implementation because the ticket spinlock implementation would pollute
> > >>> the spinlock value, so let's use static keys.
> > >>>
> > >>> This is largely based on Guo's work and Leonardo reviews at [1].
> > >>>
> > >>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > >>> Signed-off-by: Guo Ren <guoren@kernel.org>
> > >>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > >>> ---
> > >>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > >>> arch/riscv/Kconfig | 29 +++++++++++++
> > >>> arch/riscv/include/asm/Kbuild | 4 +-
> > >>> arch/riscv/include/asm/spinlock.h | 43 +++++++++++++++++++
> > >>> arch/riscv/kernel/setup.c | 38 ++++++++++++++++
> > >>> include/asm-generic/qspinlock.h | 2 +
> > >>> include/asm-generic/ticket_spinlock.h | 2 +
> > >>> 7 files changed, 118 insertions(+), 2 deletions(-)
> > >>> create mode 100644 arch/riscv/include/asm/spinlock.h
> > >>>
> > >>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > >>> index 22f2990392ff..cf26042480e2 100644
> > >>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > >>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > >>> @@ -20,7 +20,7 @@
> > >>> | openrisc: | ok |
> > >>> | parisc: | TODO |
> > >>> | powerpc: | ok |
> > >>> - | riscv: | TODO |
> > >>> + | riscv: | ok |
> > >>> | s390: | TODO |
> > >>> | sh: | TODO |
> > >>> | sparc: | ok |
> > >>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > >>> index ef55ab94027e..c9ff8081efc1 100644
> > >>> --- a/arch/riscv/Kconfig
> > >>> +++ b/arch/riscv/Kconfig
> > >>> @@ -79,6 +79,7 @@ config RISCV
> > >>> select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
> > >>> select ARCH_WANTS_NO_INSTR
> > >>> select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
> > >>> + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
> > >> Why do we need this? Also, we presumably would prefer not to have it
> > >> when we end up using ticket spinlocks when combo spinlocks is selected.
> > >> Is there no way to avoid it?
> > > I'll let Andrea answer this as he asked for it.
> > >
> > >>> select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
> > >>> select BUILDTIME_TABLE_SORT if MMU
> > >>> select CLINT_TIMER if RISCV_M_MODE
> > >>> @@ -488,6 +489,34 @@ config NODES_SHIFT
> > >>> Specify the maximum number of NUMA Nodes available on the target
> > >>> system. Increases memory reserved to accommodate various tables.
> > >>>
> > >>> +choice
> > >>> + prompt "RISC-V spinlock type"
> > >>> + default RISCV_COMBO_SPINLOCKS
> > >>> +
> > >>> +config RISCV_TICKET_SPINLOCKS
> > >>> + bool "Using ticket spinlock"
> > >>> +
> > >>> +config RISCV_QUEUED_SPINLOCKS
> > >>> + bool "Using queued spinlock"
> > >>> + depends on SMP && MMU && NONPORTABLE
> > >>> + select ARCH_USE_QUEUED_SPINLOCKS
> > >>> + help
> > >>> + The queued spinlock implementation requires the forward progress
> > >>> + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
> > >>> + LR/SC with Ziccrse provide such guarantee.
> > >>> +
> > >>> + Select this if and only if Zabha or Ziccrse is available on your
> > >>> + platform.
> > >> Maybe some text recommending combo spinlocks here? As it stands it sounds
> > >> like enabling queued spinlocks is a bad idea for anybody that doesn't know
> > >> what platforms will run the kernel they're building, which is all distros.
> > > That's NONPORTABLE, so people enabling this config are supposed to
> > > know that right?
> > >
> > >>> +
> > >>> +config RISCV_COMBO_SPINLOCKS
> > >>> + bool "Using combo spinlock"
> > >>> + depends on SMP && MMU
> > >>> + select ARCH_USE_QUEUED_SPINLOCKS
> > >>> + help
> > >>> + Embed both queued spinlock and ticket lock so that the spinlock
> > >>> + implementation can be chosen at runtime.
> > >> nit: Add a blank line here
> > > Done
> > >
> > >>> +endchoice
> > >>> +
> > >>> config RISCV_ALTERNATIVE
> > >>> bool
> > >>> depends on !XIP_KERNEL
> > >>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > >>> index 5c589770f2a8..1c2618c964f0 100644
> > >>> --- a/arch/riscv/include/asm/Kbuild
> > >>> +++ b/arch/riscv/include/asm/Kbuild
> > >>> @@ -5,10 +5,12 @@ syscall-y += syscall_table_64.h
> > >>> generic-y += early_ioremap.h
> > >>> generic-y += flat.h
> > >>> generic-y += kvm_para.h
> > >>> +generic-y += mcs_spinlock.h
> > >>> generic-y += parport.h
> > >>> -generic-y += spinlock.h
> > >>> generic-y += spinlock_types.h
> > >>> +generic-y += ticket_spinlock.h
> > >>> generic-y += qrwlock.h
> > >>> generic-y += qrwlock_types.h
> > >>> +generic-y += qspinlock.h
> > >>> generic-y += user.h
> > >>> generic-y += vmlinux.lds.h
> > >>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > >>> new file mode 100644
> > >>> index 000000000000..503aef31db83
> > >>> --- /dev/null
> > >>> +++ b/arch/riscv/include/asm/spinlock.h
> > >>> @@ -0,0 +1,43 @@
> > >>> +/* SPDX-License-Identifier: GPL-2.0 */
> > >>> +
> > >>> +#ifndef __ASM_RISCV_SPINLOCK_H
> > >>> +#define __ASM_RISCV_SPINLOCK_H
> > >>> +
> > >>> +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
> > >>> +#define _Q_PENDING_LOOPS (1 << 9)
> > >>> +
> > >>> +#define __no_arch_spinlock_redefine
> > >>> +#include <asm/ticket_spinlock.h>
> > >>> +#include <asm/qspinlock.h>
> > >>> +#include <asm/alternative.h>
> > >> We need asm/jump_label.h instead of asm/alternative.h, but...
> > >>
> > >>> +
> > >>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > >>> +
> > >>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > >>> +static __always_inline type arch_spin_##op(type_lock lock) \
> > >>> +{ \
> > >>> + if (static_branch_unlikely(&qspinlock_key)) \
> > >>> + return queued_spin_##op(lock); \
> > >>> + return ticket_spin_##op(lock); \
> > >>> +}
> > >> ...do you know what impact this inlined static key check has on the
> > >> kernel size?
> > > No, I'll check, thanks.
> >
> >
> > So I have just checked the size of the jump table section:
> >
> > * defconfig:
> >
> > - ticket: 26928 bytes
> > - combo: 28320 bytes
> >
> > So that's a ~5% increase.
> >
> > * ubuntu config
> >
> > - ticket: 107840 bytes
> > - combo: 174752 bytes
> >
> > And that's a ~62% increase.
> The size of the jump table section has increased from 5% to 62%. I
> think some configuration triggers the jump table's debug code. If it's
> not a debugging configuration, that's a bug of the Ubuntu config.
Let's cc @Emil Renner Berthing to make him aware of this "issue" and
maybe he even has an idea :)
>
> >
> > This is the ELF size difference between ticket and combo spinlocks:
> >
> > * ticket: 776915592 bytes
> > * combo: 786958968 bytes
> >
> > So that's an increase of ~1.3% on the ELF.
> >
> > And the .text section size:
> >
> > * ticket: 12290960 bytes
> > * combo: 12366644 bytes
> >
> > And that's a ~0.6% increase!
> >
> > Finally, I'd say the impact is very limited :)
> >
> > Thanks,
> >
> > Alex
> >
> >
> > >
> > >> Actually, why not use ALTERNATIVE with any nonzero cpufeature value.
> > >> Then add code to riscv_cpufeature_patch_check() to return true when
> > >> qspinlocks should be enabled (based on the value of some global set
> > >> during riscv_spinlock_init)?
> > > As discussed with Guo in the previous iteration, the patching of the
> > > alternatives intervenes far after the first use of the spinlocks and
> > > the ticket spinlock implementation pollutes the spinlock value, so
> > > we'd have to unconditionally start with the qspinlock implementation
> > > and after switch to the ticket implementation if not supported by the
> > > platform. It works but it's dirty, I really don't like this hack.
> > >
> > > We could though:
> > > - add an initial value to the alternatives (not sure it's feasible though)
> > > - make the patching of alternatives happen sooner by parsing the isa
> > > string sooner, either in DT or ACPI (I have a working PoC for very
> > > early parsing of ACPI).
> > >
> > > I intend to do the latter as I think we should be aware of the
> > > extensions sooner in the boot process, so I'll change that to the
> > > alternatives when it's done. WDYT, any other idea?
> > >
> > >
> > >>> +
> > >>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > >>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > >>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > >>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > >>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > >>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > >>> +
> > >>> +#elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS)
> > >>> +
> > >>> +#include <asm/qspinlock.h>
> > >>> +
> > >>> +#else
> > >>> +
> > >>> +#include <asm/ticket_spinlock.h>
> > >>> +
> > >>> +#endif
> > >>> +
> > >>> +#include <asm/qrwlock.h>
> > >>> +
> > >>> +#endif /* __ASM_RISCV_SPINLOCK_H */
> > >>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > >>> index a2cde65b69e9..b811fa331982 100644
> > >>> --- a/arch/riscv/kernel/setup.c
> > >>> +++ b/arch/riscv/kernel/setup.c
> > >>> @@ -244,6 +244,43 @@ static void __init parse_dtb(void)
> > >>> #endif
> > >>> }
> > >>>
> > >>> +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
> > >>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > >>> +EXPORT_SYMBOL(qspinlock_key);
> > >>> +#endif
> > >>> +
> > >>> +static void __init riscv_spinlock_init(void)
> > >>> +{
> > >>> + char *using_ext = NULL;
> > >>> +
> > >>> + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) {
> > >>> + pr_info("Ticket spinlock: enabled\n");
> > >>> + return;
> > >>> + }
> > >>> +
> > >>> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
> > >>> + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
> > >>> + riscv_isa_extension_available(NULL, ZABHA) &&
> > >>> + riscv_isa_extension_available(NULL, ZACAS)) {
> > >>> + using_ext = "using Zabha";
> > >>> + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) {
> > >>> + using_ext = "using Ziccrse";
> > >>> + }
> > >>> +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
> > >>> + else {
> > >> else if (IS_ENABLED(CONFIG_RISCV_COMBO_SPINLOCKS))
> > >>
> > >>> + static_branch_disable(&qspinlock_key);
> > >>> + pr_info("Ticket spinlock: enabled\n");
> > >>> +
> > >> nit: remove this blank line
> > >>
> > >>> + return;
> > >>> + }
> > >>> +#endif
> > >>> +
> > >>> + if (!using_ext)
> > >>> + pr_err("Queued spinlock without Zabha or Ziccrse");
> > >>> + else
> > >>> + pr_info("Queued spinlock %s: enabled\n", using_ext);
> > >>> +}
> > >>> +
> > >>> extern void __init init_rt_signal_env(void);
> > >>>
> > >>> void __init setup_arch(char **cmdline_p)
> > >>> @@ -297,6 +334,7 @@ void __init setup_arch(char **cmdline_p)
> > >>> riscv_set_dma_cache_alignment();
> > >>>
> > >>> riscv_user_isa_enable();
> > >>> + riscv_spinlock_init();
> > >>> }
> > >>>
> > >>> bool arch_cpu_is_hotpluggable(int cpu)
> > >>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > >>> index 0655aa5b57b2..bf47cca2c375 100644
> > >>> --- a/include/asm-generic/qspinlock.h
> > >>> +++ b/include/asm-generic/qspinlock.h
> > >>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > >>> }
> > >>> #endif
> > >>>
> > >>> +#ifndef __no_arch_spinlock_redefine
> > >> I'm not sure what's better/worse, but instead of inventing this
> > >> __no_arch_spinlock_redefine thing we could just name all the functions
> > >> something like __arch_spin* and then add defines for both to asm/spinlock.h,
> > >> i.e.
> > >>
> > >> #define queued_spin_lock(l) __arch_spin_lock(l)
> > >> ...
> > >>
> > >> #define ticket_spin_lock(l) __arch_spin_lock(l)
> > >> ...
> > >>
> > >> Besides not having to touch asm-generic/qspinlock.h and
> > >> asm-generic/ticket_spinlock.h it allows one to find the implementations
> > >> a bit easier as following a tag to arch_spin_lock() will take them to
> > >> queued_spin_lock() which will then take them to
> > >> arch/riscv/include/asm/spinlock.h and there they'll figure out how
> > >> __arch_spin_lock() was defined.
> > >>
> > >>> /*
> > >>> * Remapping spinlock architecture specific functions to the corresponding
> > >>> * queued spinlock functions.
> > >>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > >>> #define arch_spin_lock(l) queued_spin_lock(l)
> > >>> #define arch_spin_trylock(l) queued_spin_trylock(l)
> > >>> #define arch_spin_unlock(l) queued_spin_unlock(l)
> > >>> +#endif
> > >>>
> > >>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > >>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > >>> index cfcff22b37b3..325779970d8a 100644
> > >>> --- a/include/asm-generic/ticket_spinlock.h
> > >>> +++ b/include/asm-generic/ticket_spinlock.h
> > >>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > >>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > >>> }
> > >>>
> > >>> +#ifndef __no_arch_spinlock_redefine
> > >>> /*
> > >>> * Remapping spinlock architecture specific functions to the corresponding
> > >>> * ticket spinlock functions.
> > >>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > >>> #define arch_spin_lock(l) ticket_spin_lock(l)
> > >>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
> > >>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
> > >>> +#endif
> > >>>
> > >>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > >>> --
> > >>> 2.39.2
> > >>>
> > >> Thanks,
> > >> drew
> > > _______________________________________________
> > > linux-riscv mailing list
> > > linux-riscv@lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/linux-riscv
>
>
>
> --
> Best Regards
> Guo Ren
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-07-31 15:29 ` Andrew Jones
2024-08-01 6:53 ` Alexandre Ghiti
@ 2024-08-01 8:43 ` Alexandre Ghiti
2024-08-01 10:15 ` Andrew Jones
2024-08-01 9:48 ` Andrea Parri
2 siblings, 1 reply; 47+ messages in thread
From: Alexandre Ghiti @ 2024-08-01 8:43 UTC (permalink / raw)
To: Andrew Jones
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
(I missed a bunch of comments addressed here)
On Wed, Jul 31, 2024 at 5:29 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Wed, Jul 31, 2024 at 09:24:05AM GMT, Alexandre Ghiti wrote:
> > In order to produce a generic kernel, a user can select
> > CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket
> > spinlock implementation if Zabha or Ziccrse are not present.
> >
> > Note that we can't use alternatives here because the discovery of
> > extensions is done too late and we need to start with the qspinlock
> > implementation because the ticket spinlock implementation would pollute
> > the spinlock value, so let's use static keys.
> >
> > This is largely based on Guo's work and Leonardo reviews at [1].
> >
> > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > arch/riscv/Kconfig | 29 +++++++++++++
> > arch/riscv/include/asm/Kbuild | 4 +-
> > arch/riscv/include/asm/spinlock.h | 43 +++++++++++++++++++
> > arch/riscv/kernel/setup.c | 38 ++++++++++++++++
> > include/asm-generic/qspinlock.h | 2 +
> > include/asm-generic/ticket_spinlock.h | 2 +
> > 7 files changed, 118 insertions(+), 2 deletions(-)
> > create mode 100644 arch/riscv/include/asm/spinlock.h
> >
> > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > index 22f2990392ff..cf26042480e2 100644
> > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > @@ -20,7 +20,7 @@
> > | openrisc: | ok |
> > | parisc: | TODO |
> > | powerpc: | ok |
> > - | riscv: | TODO |
> > + | riscv: | ok |
> > | s390: | TODO |
> > | sh: | TODO |
> > | sparc: | ok |
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index ef55ab94027e..c9ff8081efc1 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -79,6 +79,7 @@ config RISCV
> > select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
> > select ARCH_WANTS_NO_INSTR
> > select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
> > + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
>
> Why do we need this? Also, we presumably would prefer not to have it
> when we end up using ticket spinlocks when combo spinlocks is selected.
> Is there no way to avoid it?
>
> > select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
> > select BUILDTIME_TABLE_SORT if MMU
> > select CLINT_TIMER if RISCV_M_MODE
> > @@ -488,6 +489,34 @@ config NODES_SHIFT
> > Specify the maximum number of NUMA Nodes available on the target
> > system. Increases memory reserved to accommodate various tables.
> >
> > +choice
> > + prompt "RISC-V spinlock type"
> > + default RISCV_COMBO_SPINLOCKS
> > +
> > +config RISCV_TICKET_SPINLOCKS
> > + bool "Using ticket spinlock"
> > +
> > +config RISCV_QUEUED_SPINLOCKS
> > + bool "Using queued spinlock"
> > + depends on SMP && MMU && NONPORTABLE
> > + select ARCH_USE_QUEUED_SPINLOCKS
> > + help
> > + The queued spinlock implementation requires the forward progress
> > + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or
> > + LR/SC with Ziccrse provide such guarantee.
> > +
> > + Select this if and only if Zabha or Ziccrse is available on your
> > + platform.
>
> Maybe some text recommending combo spinlocks here? As it stands it sounds
> like enabling queued spinlocks is a bad idea for anybody that doesn't know
> what platforms will run the kernel they're building, which is all distros.
>
> > +
> > +config RISCV_COMBO_SPINLOCKS
> > + bool "Using combo spinlock"
> > + depends on SMP && MMU
> > + select ARCH_USE_QUEUED_SPINLOCKS
> > + help
> > + Embed both queued spinlock and ticket lock so that the spinlock
> > + implementation can be chosen at runtime.
>
> nit: Add a blank line here
>
> > +endchoice
> > +
> > config RISCV_ALTERNATIVE
> > bool
> > depends on !XIP_KERNEL
> > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > index 5c589770f2a8..1c2618c964f0 100644
> > --- a/arch/riscv/include/asm/Kbuild
> > +++ b/arch/riscv/include/asm/Kbuild
> > @@ -5,10 +5,12 @@ syscall-y += syscall_table_64.h
> > generic-y += early_ioremap.h
> > generic-y += flat.h
> > generic-y += kvm_para.h
> > +generic-y += mcs_spinlock.h
> > generic-y += parport.h
> > -generic-y += spinlock.h
> > generic-y += spinlock_types.h
> > +generic-y += ticket_spinlock.h
> > generic-y += qrwlock.h
> > generic-y += qrwlock_types.h
> > +generic-y += qspinlock.h
> > generic-y += user.h
> > generic-y += vmlinux.lds.h
> > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > new file mode 100644
> > index 000000000000..503aef31db83
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/spinlock.h
> > @@ -0,0 +1,43 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#ifndef __ASM_RISCV_SPINLOCK_H
> > +#define __ASM_RISCV_SPINLOCK_H
> > +
> > +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS
> > +#define _Q_PENDING_LOOPS (1 << 9)
> > +
> > +#define __no_arch_spinlock_redefine
> > +#include <asm/ticket_spinlock.h>
> > +#include <asm/qspinlock.h>
> > +#include <asm/alternative.h>
>
> We need asm/jump_label.h instead of asm/alternative.h, but...
Done
>
> > +
> > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > +
> > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > +static __always_inline type arch_spin_##op(type_lock lock) \
> > +{ \
> > + if (static_branch_unlikely(&qspinlock_key)) \
> > + return queued_spin_##op(lock); \
> > + return ticket_spin_##op(lock); \
> > +}
>
> ...do you know what impact this inlined static key check has on the
> kernel size?
>
> Actually, why not use ALTERNATIVE with any nonzero cpufeature value.
> Then add code to riscv_cpufeature_patch_check() to return true when
> qspinlocks should be enabled (based on the value of some global set
> during riscv_spinlock_init)?
>
> > +
> > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > +
> > +#elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS)
> > +
> > +#include <asm/qspinlock.h>
> > +
> > +#else
> > +
> > +#include <asm/ticket_spinlock.h>
> > +
> > +#endif
> > +
> > +#include <asm/qrwlock.h>
> > +
> > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > index a2cde65b69e9..b811fa331982 100644
> > --- a/arch/riscv/kernel/setup.c
> > +++ b/arch/riscv/kernel/setup.c
> > @@ -244,6 +244,43 @@ static void __init parse_dtb(void)
> > #endif
> > }
> >
> > +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
> > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > +EXPORT_SYMBOL(qspinlock_key);
> > +#endif
> > +
> > +static void __init riscv_spinlock_init(void)
> > +{
> > + char *using_ext = NULL;
> > +
> > + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) {
> > + pr_info("Ticket spinlock: enabled\n");
> > + return;
> > + }
> > +
> > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
> > + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
> > + riscv_isa_extension_available(NULL, ZABHA) &&
> > + riscv_isa_extension_available(NULL, ZACAS)) {
> > + using_ext = "using Zabha";
> > + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) {
> > + using_ext = "using Ziccrse";
> > + }
> > +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
> > + else {
>
> else if (IS_ENABLED(CONFIG_RISCV_COMBO_SPINLOCKS))
It fails to compile because the static key is only declared when
CONFIG_RISCV_COMBO_SPINLOCKS is set.
>
> > + static_branch_disable(&qspinlock_key);
> > + pr_info("Ticket spinlock: enabled\n");
> > +
>
> nit: remove this blank line
Done
>
> > + return;
> > + }
> > +#endif
> > +
> > + if (!using_ext)
> > + pr_err("Queued spinlock without Zabha or Ziccrse");
> > + else
> > + pr_info("Queued spinlock %s: enabled\n", using_ext);
> > +}
> > +
> > extern void __init init_rt_signal_env(void);
> >
> > void __init setup_arch(char **cmdline_p)
> > @@ -297,6 +334,7 @@ void __init setup_arch(char **cmdline_p)
> > riscv_set_dma_cache_alignment();
> >
> > riscv_user_isa_enable();
> > + riscv_spinlock_init();
> > }
> >
> > bool arch_cpu_is_hotpluggable(int cpu)
> > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > index 0655aa5b57b2..bf47cca2c375 100644
> > --- a/include/asm-generic/qspinlock.h
> > +++ b/include/asm-generic/qspinlock.h
> > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > }
> > #endif
> >
> > +#ifndef __no_arch_spinlock_redefine
>
> I'm not sure what's better/worse, but instead of inventing this
> __no_arch_spinlock_redefine thing we could just name all the functions
> something like __arch_spin* and then add defines for both to asm/spinlock.h,
> i.e.
>
> #define queued_spin_lock(l) __arch_spin_lock(l)
> ...
>
> #define ticket_spin_lock(l) __arch_spin_lock(l)
> ...
__arch_spin_lock() would use queued_spin_lock() so that would make an
"infinite recursive definition" right? And that would override the
"real" queued_spin_lock() implementation too.
But maybe I missed something!
>
> Besides not having to touch asm-generic/qspinlock.h and
> asm-generic/ticket_spinlock.h it allows one to find the implementations
> a bit easier as following a tag to arch_spin_lock() will take them to
> queued_spin_lock() which will then take them to
> arch/riscv/include/asm/spinlock.h and there they'll figure out how
> __arch_spin_lock() was defined.
>
> > /*
> > * Remapping spinlock architecture specific functions to the corresponding
> > * queued spinlock functions.
> > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > #define arch_spin_lock(l) queued_spin_lock(l)
> > #define arch_spin_trylock(l) queued_spin_trylock(l)
> > #define arch_spin_unlock(l) queued_spin_unlock(l)
> > +#endif
> >
> > #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > index cfcff22b37b3..325779970d8a 100644
> > --- a/include/asm-generic/ticket_spinlock.h
> > +++ b/include/asm-generic/ticket_spinlock.h
> > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > }
> >
> > +#ifndef __no_arch_spinlock_redefine
> > /*
> > * Remapping spinlock architecture specific functions to the corresponding
> > * ticket spinlock functions.
> > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > #define arch_spin_lock(l) ticket_spin_lock(l)
> > #define arch_spin_trylock(l) ticket_spin_trylock(l)
> > #define arch_spin_unlock(l) ticket_spin_unlock(l)
> > +#endif
> >
> > #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > --
> > 2.39.2
> >
>
> Thanks,
> drew
^ permalink raw reply [flat|nested] 47+ messages in thread* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-08-01 8:43 ` Alexandre Ghiti
@ 2024-08-01 10:15 ` Andrew Jones
2024-08-02 8:58 ` Alexandre Ghiti
0 siblings, 1 reply; 47+ messages in thread
From: Andrew Jones @ 2024-08-01 10:15 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On Thu, Aug 01, 2024 at 10:43:03AM GMT, Alexandre Ghiti wrote:
...
> > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > index 0655aa5b57b2..bf47cca2c375 100644
> > > --- a/include/asm-generic/qspinlock.h
> > > +++ b/include/asm-generic/qspinlock.h
> > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > }
> > > #endif
> > >
> > > +#ifndef __no_arch_spinlock_redefine
> >
> > I'm not sure what's better/worse, but instead of inventing this
> > __no_arch_spinlock_redefine thing we could just name all the functions
> > something like __arch_spin* and then add defines for both to asm/spinlock.h,
> > i.e.
> >
> > #define queued_spin_lock(l) __arch_spin_lock(l)
> > ...
> >
> > #define ticket_spin_lock(l) __arch_spin_lock(l)
> > ...
>
> __arch_spin_lock() would use queued_spin_lock() so that would make an
> "infinite recursive definition" right? And that would override the
> "real" queued_spin_lock() implementation too.
>
> But maybe I missed something!
>
It depends on where the definition is done. It should work if the
preprocessor expands the implementation of __arch_spin_* before
evaluating the #define of queued_spin_*. IOW, we just need to put
the defines after the static inline constructions.
Thanks,
drew
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-08-01 10:15 ` Andrew Jones
@ 2024-08-02 8:58 ` Alexandre Ghiti
0 siblings, 0 replies; 47+ messages in thread
From: Alexandre Ghiti @ 2024-08-02 8:58 UTC (permalink / raw)
To: Andrew Jones, Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
On 01/08/2024 12:15, Andrew Jones wrote:
> On Thu, Aug 01, 2024 at 10:43:03AM GMT, Alexandre Ghiti wrote:
> ...
>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
>>>> index 0655aa5b57b2..bf47cca2c375 100644
>>>> --- a/include/asm-generic/qspinlock.h
>>>> +++ b/include/asm-generic/qspinlock.h
>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>> }
>>>> #endif
>>>>
>>>> +#ifndef __no_arch_spinlock_redefine
>>> I'm not sure what's better/worse, but instead of inventing this
>>> __no_arch_spinlock_redefine thing we could just name all the functions
>>> something like __arch_spin* and then add defines for both to asm/spinlock.h,
>>> i.e.
>>>
>>> #define queued_spin_lock(l) __arch_spin_lock(l)
>>> ...
>>>
>>> #define ticket_spin_lock(l) __arch_spin_lock(l)
>>> ...
>> __arch_spin_lock() would use queued_spin_lock() so that would make an
>> "infinite recursive definition" right? And that would override the
>> "real" queued_spin_lock() implementation too.
>>
>> But maybe I missed something!
>>
> It depends on where the definition is done. It should work if the
> preprocessor expands the implementation of __arch_spin_* before
> evaluating the #define of queued_spin_*. IOW, we just need to put
> the defines after the static inline constructions.
So I have just given it a try, both qspinlock.h and ticket_spinlock.h
define arch_spin_XXXX(). That triggers a bunch of warnings.
I'll drop this suggestion as I find it harder to understand and because
of the warnings that would need the introduction of a new preprocessor
variable (or something else?). And the solution with
__no_arch_spinlock_redefine is really straightforward and lightweight.
Thanks,
Alex
>
> Thanks,
> drew
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply [flat|nested] 47+ messages in thread
* Re: [PATCH v4 13/13] riscv: Add qspinlock support
2024-07-31 15:29 ` Andrew Jones
2024-08-01 6:53 ` Alexandre Ghiti
2024-08-01 8:43 ` Alexandre Ghiti
@ 2024-08-01 9:48 ` Andrea Parri
2 siblings, 0 replies; 47+ messages in thread
From: Andrea Parri @ 2024-08-01 9:48 UTC (permalink / raw)
To: Andrew Jones
Cc: Alexandre Ghiti, Jonathan Corbet, Paul Walmsley, Palmer Dabbelt,
Albert Ou, Conor Dooley, Rob Herring, Krzysztof Kozlowski,
Nathan Chancellor, Peter Zijlstra, Ingo Molnar, Will Deacon,
Waiman Long, Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren,
linux-doc, devicetree, linux-kernel, linux-riscv, linux-arch
> > + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
>
> Why do we need this? Also, we presumably would prefer not to have it
> when we end up using ticket spinlocks when combo spinlocks is selected.
> Is there no way to avoid it?
Probably not what you had in mind but we should be able to drop the full
barriers in the ticket-lock implementation, deferring/confining them to
RCU code; this way no separate treatment would be needed for either lock:
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c9ff8081efc1a..d37afe3bb20cb 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -79,7 +79,7 @@ config RISCV
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
- select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
+ select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
select BUILDTIME_TABLE_SORT if MMU
select CLINT_TIMER if RISCV_M_MODE
diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
index 325779970d8a0..47522640e5e39 100644
--- a/include/asm-generic/ticket_spinlock.h
+++ b/include/asm-generic/ticket_spinlock.h
@@ -13,10 +13,8 @@
* about this. If your architecture cannot do this you might be better off with
* a test-and-set.
*
- * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
- * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
- * a full fence after the spin to upgrade the otherwise-RCpc
- * atomic_cond_read_acquire().
+ * It further assumes atomic_*_release() + atomic_*_acquire() is RCtso, where
+ * regular code only expects atomic_t to be RCpc.
*
* The implementation uses smp_cond_load_acquire() to spin, so if the
* architecture has WFE like instructions to sleep instead of poll for word
@@ -32,22 +30,13 @@
static __always_inline void ticket_spin_lock(arch_spinlock_t *lock)
{
- u32 val = atomic_fetch_add(1<<16, &lock->val);
+ u32 val = atomic_fetch_add_acquire(1<<16, &lock->val);
u16 ticket = val >> 16;
if (ticket == (u16)val)
return;
- /*
- * atomic_cond_read_acquire() is RCpc, but rather than defining a
- * custom cond_read_rcsc() here we just emit a full fence. We only
- * need the prior reads before subsequent writes ordering from
- * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
- * have no outstanding writes due to the atomic_fetch_add() the extra
- * orderings are free.
- */
atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
- smp_mb();
}
static __always_inline bool ticket_spin_trylock(arch_spinlock_t *lock)
@@ -57,7 +46,7 @@ static __always_inline bool ticket_spin_trylock(arch_spinlock_t *lock)
if ((old >> 16) != (old & 0xffff))
return false;
- return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
+ return atomic_try_cmpxchg_acquire(&lock->val, &old, old + (1<<16));
}
static __always_inline void ticket_spin_unlock(arch_spinlock_t *lock)
https://lore.kernel.org/lkml/ZlnyKclZOQdrJTtU@andrea/ provides additional
context.
But enough presumably, ;-) How do the above changes look in your tests?
other suggestions?
Andrea
^ permalink raw reply related [flat|nested] 47+ messages in thread
* Re: [PATCH v4 00/13] Zacas/Zabha support and qspinlocks
2024-07-31 7:23 [PATCH v4 00/13] Zacas/Zabha support and qspinlocks Alexandre Ghiti
` (12 preceding siblings ...)
2024-07-31 7:24 ` [PATCH v4 13/13] riscv: Add qspinlock support Alexandre Ghiti
@ 2024-08-01 14:15 ` Peter Zijlstra
13 siblings, 0 replies; 47+ messages in thread
From: Peter Zijlstra @ 2024-08-01 14:15 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Jonathan Corbet, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Conor Dooley, Rob Herring, Krzysztof Kozlowski, Andrea Parri,
Nathan Chancellor, Ingo Molnar, Will Deacon, Waiman Long,
Boqun Feng, Arnd Bergmann, Leonardo Bras, Guo Ren, linux-doc,
devicetree, linux-kernel, linux-riscv, linux-arch
On Wed, Jul 31, 2024 at 09:23:52AM +0200, Alexandre Ghiti wrote:
> Guo Ren (2):
> asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock
> asm-generic: ticket-lock: Add separate ticket-lock.h
> include/asm-generic/qspinlock.h | 2 +
> include/asm-generic/spinlock.h | 87 +-----
> include/asm-generic/spinlock_types.h | 12 +-
> include/asm-generic/ticket_spinlock.h | 105 +++++++
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
^ permalink raw reply [flat|nested] 47+ messages in thread