* [PATCH v4 05/10] riscv: Apply Zalasr to smp_load_acquire/smp_store_release
@ 2025-10-20 4:24 Xu Lu
2025-10-20 4:24 ` [PATCH v4 06/10] riscv: Apply acquire/release semantics to arch_xchg/arch_cmpxchg operations Xu Lu
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Xu Lu @ 2025-10-20 4:24 UTC (permalink / raw)
To: corbet, paul.walmsley, palmer, aou, alex, robh, krzk+dt, conor+dt,
will, peterz, boqun.feng, mark.rutland, anup, atish.patra,
pbonzini, shuah, parri.andrea, ajones, brs, guoren
Cc: linux-doc, linux-riscv, linux-kernel, devicetree, kvm, kvm-riscv,
linux-kselftest, apw, joe, lukas.bulwahn, Xu Lu
Replace fence instructions with Zalasr instructions during
smp_load_acquire() and smp_store_release() operations.
|----------------------------------|
| | __smp_store_release |
| |-----------------------------|
| | zalasr | !zalasr |
| rl |-----------------------------|
| | s{b|h|w|d}.rl | fence rw, w |
| | | s{b|h|w|d} |
|----------------------------------|
| | __smp_load_acquire |
| |-----------------------------|
| | zalasr | !zalasr |
| aq |-----------------------------|
| | l{b|h|w|d}.aq | l{b|h|w|d} |
| | | fence r, rw |
|----------------------------------|
Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
arch/riscv/include/asm/barrier.h | 91 ++++++++++++++++++++++++++++----
1 file changed, 80 insertions(+), 11 deletions(-)
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index b8c5726d86acb..9eaf94a028096 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -51,19 +51,88 @@
*/
#define smp_mb__after_spinlock() RISCV_FENCE(iorw, iorw)
-#define __smp_store_release(p, v) \
-do { \
- compiletime_assert_atomic_type(*p); \
- RISCV_FENCE(rw, w); \
- WRITE_ONCE(*p, v); \
+extern void __bad_size_call_parameter(void);
+
+#define __smp_store_release(p, v) \
+do { \
+ typeof(p) __p = (p); \
+ union { typeof(*p) __val; char __c[1]; } __u = \
+ { .__val = (__force typeof(*p)) (v) }; \
+ compiletime_assert_atomic_type(*p); \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile(ALTERNATIVE("fence rw, w;\t\nsb %0, 0(%1)\t\n", \
+ SB_RL(%0, %1) "\t\nnop\t\n", \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
+ : : "r" (*(__u8 *)__u.__c), "r" (__p) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile(ALTERNATIVE("fence rw, w;\t\nsh %0, 0(%1)\t\n", \
+ SH_RL(%0, %1) "\t\nnop\t\n", \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
+ : : "r" (*(__u16 *)__u.__c), "r" (__p) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile(ALTERNATIVE("fence rw, w;\t\nsw %0, 0(%1)\t\n", \
+ SW_RL(%0, %1) "\t\nnop\t\n", \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
+ : : "r" (*(__u32 *)__u.__c), "r" (__p) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile(ALTERNATIVE("fence rw, w;\t\nsd %0, 0(%1)\t\n", \
+ SD_RL(%0, %1) "\t\nnop\t\n", \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
+ : : "r" (*(__u64 *)__u.__c), "r" (__p) \
+ : "memory"); \
+ break; \
+ default: \
+ __bad_size_call_parameter(); \
+ break; \
+ } \
} while (0)
-#define __smp_load_acquire(p) \
-({ \
- typeof(*p) ___p1 = READ_ONCE(*p); \
- compiletime_assert_atomic_type(*p); \
- RISCV_FENCE(r, rw); \
- ___p1; \
+#define __smp_load_acquire(p) \
+({ \
+ union { typeof(*p) __val; char __c[1]; } __u; \
+ typeof(p) __p = (p); \
+ compiletime_assert_atomic_type(*p); \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile(ALTERNATIVE("lb %0, 0(%1)\t\nfence r, rw\t\n", \
+ LB_AQ(%0, %1) "\t\nnop\t\n", \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
+ : "=r" (*(__u8 *)__u.__c) : "r" (__p) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile(ALTERNATIVE("lh %0, 0(%1)\t\nfence r, rw\t\n", \
+ LH_AQ(%0, %1) "\t\nnop\t\n", \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
+ : "=r" (*(__u16 *)__u.__c) : "r" (__p) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile(ALTERNATIVE("lw %0, 0(%1)\t\nfence r, rw\t\n", \
+ LW_AQ(%0, %1) "\t\nnop\t\n", \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
+ : "=r" (*(__u32 *)__u.__c) : "r" (__p) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile(ALTERNATIVE("ld %0, 0(%1)\t\nfence r, rw\t\n", \
+ LD_AQ(%0, %1) "\t\nnop\t\n", \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
+ : "=r" (*(__u64 *)__u.__c) : "r" (__p) \
+ : "memory"); \
+ break; \
+ default: \
+ __bad_size_call_parameter(); \
+ break; \
+ } \
+ __u.__val; \
})
#ifdef CONFIG_RISCV_ISA_ZAWRS
--
2.20.1
--
kvm-riscv mailing list
kvm-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kvm-riscv
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v4 06/10] riscv: Apply acquire/release semantics to arch_xchg/arch_cmpxchg operations
2025-10-20 4:24 [PATCH v4 05/10] riscv: Apply Zalasr to smp_load_acquire/smp_store_release Xu Lu
@ 2025-10-20 4:24 ` Xu Lu
2025-10-20 4:24 ` [PATCH v4 07/10] riscv: Apply acquire/release semantics to arch_atomic operations Xu Lu
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Xu Lu @ 2025-10-20 4:24 UTC (permalink / raw)
To: corbet, paul.walmsley, palmer, aou, alex, robh, krzk+dt, conor+dt,
will, peterz, boqun.feng, mark.rutland, anup, atish.patra,
pbonzini, shuah, parri.andrea, ajones, brs, guoren
Cc: linux-doc, linux-riscv, linux-kernel, devicetree, kvm, kvm-riscv,
linux-kselftest, apw, joe, lukas.bulwahn, Xu Lu
The existing arch_xchg/arch_cmpxchg operations are implemented by
inserting fence instructions before or after atomic instructions.
This commit replaces them with real acquire/release semantics.
|----------------------------------------------------------------|
| | arch_xchg_release | arch_cmpxchg_release |
| |-----------------------------------------------------------|
| | zabha | !zabha | zabha+zacas | !(zabha+zacas) |
| rl |-----------------------------------------------------------|
| | | (fence rw, w) | | (fence rw, w) |
| | amoswap.rl | lr.w | amocas.rl | lr.w |
| | | sc.w.rl | | sc.w.rl |
|----------------------------------------------------------------|
| | arch_xchg_acquire | arch_cmpxchg_acquire |
| |-----------------------------------------------------------|
| | zabha | !zabha | zabha+zacas | !(zabha+zacas) |
| aq |-----------------------------------------------------------|
| | | lr.w.aq | | lr.w.aq |
| | amoswap.aq | sc.w | amocas.aq | sc.w |
| | | (fence r, rw) | | (fence r, rw) |
|----------------------------------------------------------------|
(fence rw, w), (fence r, rw) here means such instructions will only
be inserted when zalasr is not implemented.
Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
arch/riscv/include/asm/cmpxchg.h | 144 +++++++++++++++----------------
1 file changed, 71 insertions(+), 73 deletions(-)
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 0b749e7102162..50fd46af448a9 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -15,15 +15,31 @@
#include <asm/cpufeature-macros.h>
#include <asm/processor.h>
-#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
- swap_append, r, p, n) \
+/*
+ * These macros are here to improve the readability of the arch_xchg_XXX()
+ * and arch_cmpxchg_XXX() macros.
+ */
+#define LR_SFX(x) x
+#define SC_SFX(x) x
+#define CAS_SFX(x) x
+#define SC_PREPEND(x) x
+#define SC_APPEND(x) x
+
+#ifdef CONFIG_SMP
+#define SC_ACQUIRE_BARRIER RISCV_FENCE_ASM(r, rw)
+#define SC_RELEASE_BARRIER RISCV_FENCE_ASM(rw, w)
+#else
+#define SC_ACQUIRE_BARRIER __nops(1)
+#define SC_RELEASE_BARRIER __nops(1)
+#endif
+
+#define __arch_xchg_masked(lr_sfx, sc_sfx, swap_sfx, sc_prepend, sc_append, \
+ r, p, n) \
({ \
if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
__asm__ __volatile__ ( \
- prepend \
" amoswap" swap_sfx " %0, %z2, %1\n" \
- swap_append \
: "=&r" (r), "+A" (*(p)) \
: "rJ" (n) \
: "memory"); \
@@ -37,14 +53,16 @@
ulong __rc; \
\
__asm__ __volatile__ ( \
- prepend \
PREFETCHW_ASM(%5) \
+ ALTERNATIVE(__nops(1), sc_prepend, \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
"0: lr.w %0, %2\n" \
" and %1, %0, %z4\n" \
" or %1, %1, %z3\n" \
" sc.w" sc_sfx " %1, %1, %2\n" \
" bnez %1, 0b\n" \
- sc_append \
+ ALTERNATIVE(__nops(1), sc_append, \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
: "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \
: "memory"); \
@@ -53,19 +71,17 @@
} \
})
-#define __arch_xchg(sfx, prepend, append, r, p, n) \
+#define __arch_xchg(sfx, r, p, n) \
({ \
__asm__ __volatile__ ( \
- prepend \
" amoswap" sfx " %0, %2, %1\n" \
- append \
: "=r" (r), "+A" (*(p)) \
: "r" (n) \
: "memory"); \
})
-#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
- sc_append, swap_append) \
+#define _arch_xchg(ptr, new, lr_sfx, sc_sfx, swap_sfx, \
+ sc_prepend, sc_append) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __new = (new); \
@@ -73,22 +89,20 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
- __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \
- prepend, sc_append, swap_append, \
+ __arch_xchg_masked(lr_sfx, sc_sfx, ".b" swap_sfx, \
+ sc_prepend, sc_append, \
__ret, __ptr, __new); \
break; \
case 2: \
- __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \
- prepend, sc_append, swap_append, \
+ __arch_xchg_masked(lr_sfx, sc_sfx, ".h" swap_sfx, \
+ sc_prepend, sc_append, \
__ret, __ptr, __new); \
break; \
case 4: \
- __arch_xchg(".w" swap_sfx, prepend, swap_append, \
- __ret, __ptr, __new); \
+ __arch_xchg(".w" swap_sfx, __ret, __ptr, __new); \
break; \
case 8: \
- __arch_xchg(".d" swap_sfx, prepend, swap_append, \
- __ret, __ptr, __new); \
+ __arch_xchg(".d" swap_sfx, __ret, __ptr, __new); \
break; \
default: \
BUILD_BUG(); \
@@ -97,17 +111,23 @@
})
#define arch_xchg_relaxed(ptr, x) \
- _arch_xchg(ptr, x, "", "", "", "", "")
+ _arch_xchg(ptr, x, LR_SFX(""), SC_SFX(""), CAS_SFX(""), \
+ SC_PREPEND(__nops(1)), SC_APPEND(__nops(1)))
#define arch_xchg_acquire(ptr, x) \
- _arch_xchg(ptr, x, "", "", "", \
- RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
+ _arch_xchg(ptr, x, LR_SFX(".aq"), SC_SFX(""), CAS_SFX(".aq"), \
+ SC_PREPEND(__nops(1)), \
+ SC_APPEND(SC_ACQUIRE_BARRIER))
#define arch_xchg_release(ptr, x) \
- _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
+ _arch_xchg(ptr, x, LR_SFX(""), SC_SFX(".rl"), CAS_SFX(".rl"), \
+ SC_PREPEND(SC_RELEASE_BARRIER), \
+ SC_APPEND(__nops(1)))
#define arch_xchg(ptr, x) \
- _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
+ _arch_xchg(ptr, x, LR_SFX(""), SC_SFX(".aqrl"), \
+ CAS_SFX(".aqrl"), SC_PREPEND(__nops(1)), \
+ SC_APPEND(__nops(1)))
#define xchg32(ptr, x) \
({ \
@@ -126,9 +146,7 @@
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
-#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
- sc_prepend, sc_append, \
- cas_prepend, cas_append, \
+#define __arch_cmpxchg_masked(lr_sfx, sc_sfx, cas_sfx, sc_prepend, sc_append, \
r, p, o, n) \
({ \
if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
@@ -138,9 +156,7 @@
r = o; \
\
__asm__ __volatile__ ( \
- cas_prepend \
" amocas" cas_sfx " %0, %z2, %1\n" \
- cas_append \
: "+&r" (r), "+A" (*(p)) \
: "rJ" (n) \
: "memory"); \
@@ -155,15 +171,17 @@
ulong __rc; \
\
__asm__ __volatile__ ( \
- sc_prepend \
- "0: lr.w %0, %2\n" \
+ ALTERNATIVE(__nops(1), sc_prepend, \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
+ "0: lr.w" lr_sfx " %0, %2\n" \
" and %1, %0, %z5\n" \
" bne %1, %z3, 1f\n" \
" and %1, %0, %z6\n" \
" or %1, %1, %z4\n" \
" sc.w" sc_sfx " %1, %1, %2\n" \
" bnez %1, 0b\n" \
- sc_append \
+ ALTERNATIVE(__nops(1), sc_append, \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
"1:\n" \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
: "rJ" ((long)__oldx), "rJ" (__newx), \
@@ -174,9 +192,7 @@
} \
})
-#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
- sc_prepend, sc_append, \
- cas_prepend, cas_append, \
+#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, sc_prepend, sc_append, \
r, p, co, o, n) \
({ \
if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
@@ -184,9 +200,7 @@
r = o; \
\
__asm__ __volatile__ ( \
- cas_prepend \
" amocas" cas_sfx " %0, %z2, %1\n" \
- cas_append \
: "+&r" (r), "+A" (*(p)) \
: "rJ" (n) \
: "memory"); \
@@ -194,12 +208,14 @@
register unsigned int __rc; \
\
__asm__ __volatile__ ( \
- sc_prepend \
+ ALTERNATIVE(__nops(1), sc_prepend, \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
"0: lr" lr_sfx " %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc" sc_sfx " %1, %z4, %2\n" \
" bnez %1, 0b\n" \
- sc_append \
+ ALTERNATIVE(__nops(1), sc_append, \
+ 0, RISCV_ISA_EXT_ZALASR, 1) \
"1:\n" \
: "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
: "rJ" (co o), "rJ" (n) \
@@ -207,9 +223,8 @@
} \
})
-#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
- sc_prepend, sc_append, \
- cas_prepend, cas_append) \
+#define _arch_cmpxchg(ptr, old, new, lr_sfx, sc_sfx, cas_sfx, \
+ sc_prepend, sc_append) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __old = (old); \
@@ -218,27 +233,23 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
- __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
+ __arch_cmpxchg_masked(lr_sfx, sc_sfx, ".b" cas_sfx, \
sc_prepend, sc_append, \
- cas_prepend, cas_append, \
__ret, __ptr, __old, __new); \
break; \
case 2: \
- __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
+ __arch_cmpxchg_masked(lr_sfx, sc_sfx, ".h" cas_sfx, \
sc_prepend, sc_append, \
- cas_prepend, cas_append, \
__ret, __ptr, __old, __new); \
break; \
case 4: \
- __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
+ __arch_cmpxchg(".w" lr_sfx, ".w" sc_sfx, ".w" cas_sfx, \
sc_prepend, sc_append, \
- cas_prepend, cas_append, \
__ret, __ptr, (long)(int)(long), __old, __new); \
break; \
case 8: \
- __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
+ __arch_cmpxchg(".d" lr_sfx, ".d" sc_sfx, ".d" cas_sfx, \
sc_prepend, sc_append, \
- cas_prepend, cas_append, \
__ret, __ptr, /**/, __old, __new); \
break; \
default: \
@@ -247,40 +258,27 @@
(__typeof__(*(__ptr)))__ret; \
})
-/*
- * These macros are here to improve the readability of the arch_cmpxchg_XXX()
- * macros.
- */
-#define SC_SFX(x) x
-#define CAS_SFX(x) x
-#define SC_PREPEND(x) x
-#define SC_APPEND(x) x
-#define CAS_PREPEND(x) x
-#define CAS_APPEND(x) x
-
#define arch_cmpxchg_relaxed(ptr, o, n) \
_arch_cmpxchg((ptr), (o), (n), \
- SC_SFX(""), CAS_SFX(""), \
- SC_PREPEND(""), SC_APPEND(""), \
- CAS_PREPEND(""), CAS_APPEND(""))
+ LR_SFX(""), SC_SFX(""), CAS_SFX(""), \
+ SC_PREPEND(__nops(1)), SC_APPEND(__nops(1)))
#define arch_cmpxchg_acquire(ptr, o, n) \
_arch_cmpxchg((ptr), (o), (n), \
- SC_SFX(""), CAS_SFX(""), \
- SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \
- CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
+ LR_SFX(".aq"), SC_SFX(""), CAS_SFX(".aq"), \
+ SC_PREPEND(__nops(1)), \
+ SC_APPEND(SC_ACQUIRE_BARRIER))
#define arch_cmpxchg_release(ptr, o, n) \
_arch_cmpxchg((ptr), (o), (n), \
- SC_SFX(""), CAS_SFX(""), \
- SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \
- CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
+ LR_SFX(""), SC_SFX(".rl"), CAS_SFX(".rl"), \
+ SC_PREPEND(SC_RELEASE_BARRIER), \
+ SC_APPEND(__nops(1)))
#define arch_cmpxchg(ptr, o, n) \
_arch_cmpxchg((ptr), (o), (n), \
- SC_SFX(".rl"), CAS_SFX(".aqrl"), \
- SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \
- CAS_PREPEND(""), CAS_APPEND(""))
+ LR_SFX(""), SC_SFX(".aqrl"), CAS_SFX(".aqrl"), \
+ SC_PREPEND(__nops(1)), SC_APPEND(__nops(1)))
#define arch_cmpxchg_local(ptr, o, n) \
arch_cmpxchg_relaxed((ptr), (o), (n))
--
2.20.1
--
kvm-riscv mailing list
kvm-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kvm-riscv
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v4 07/10] riscv: Apply acquire/release semantics to arch_atomic operations
2025-10-20 4:24 [PATCH v4 05/10] riscv: Apply Zalasr to smp_load_acquire/smp_store_release Xu Lu
2025-10-20 4:24 ` [PATCH v4 06/10] riscv: Apply acquire/release semantics to arch_xchg/arch_cmpxchg operations Xu Lu
@ 2025-10-20 4:24 ` Xu Lu
2025-10-20 4:24 ` [PATCH v4 08/10] riscv: Remove arch specific __atomic_acquire/release_fence Xu Lu
2025-10-20 4:24 ` [PATCH v4 09/10] RISC-V: KVM: Allow Zalasr extensions for Guest/VM Xu Lu
3 siblings, 0 replies; 5+ messages in thread
From: Xu Lu @ 2025-10-20 4:24 UTC (permalink / raw)
To: corbet, paul.walmsley, palmer, aou, alex, robh, krzk+dt, conor+dt,
will, peterz, boqun.feng, mark.rutland, anup, atish.patra,
pbonzini, shuah, parri.andrea, ajones, brs, guoren
Cc: linux-doc, linux-riscv, linux-kernel, devicetree, kvm, kvm-riscv,
linux-kselftest, apw, joe, lukas.bulwahn, Xu Lu
The existing arch_atomic operations are implemented by inserting fence
instrucitons before or after atomic instructions. This commit replaces
them with real acquire/release semantics.
|-----------------------------------------------|
| | arch_atomic(64)_{add|sub}_return_release |
| rl |------------------------------------------|
| | amoadd.rl |
|-----------------------------------------------|
| | arch_atomic(64)_{add|sub}_return_acquire |
| aq |------------------------------------------|
| | amoadd.aq |
|-----------------------------------------------|
|---------------------------------------------------------|
| | arch_atomic(64)_fetch_{add|sub|and|or|xor}_release |
| rl |----------------------------------------------------|
| | amo{add|and|or|xor}.rl |
|---------------------------------------------------------|
| | arch_atomic(64)_fetch_{add|sub|and|or|xor}_acquire |
| aq |----------------------------------------------------|
| | amo{add|and|or|xor}.aq |
|---------------------------------------------------------|
Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
arch/riscv/include/asm/atomic.h | 64 +++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 5b96c2f61adb5..86291de07de62 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -98,6 +98,30 @@ c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
return ret; \
} \
static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op##_acquire(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ register c_type ret; \
+ __asm__ __volatile__ ( \
+ " amo" #asm_op "." #asm_type ".aq %1, %2, %0" \
+ : "+A" (v->counter), "=r" (ret) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op##_release(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ register c_type ret; \
+ __asm__ __volatile__ ( \
+ " amo" #asm_op "." #asm_type ".rl %1, %2, %0" \
+ : "+A" (v->counter), "=r" (ret) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
{ \
register c_type ret; \
@@ -117,6 +141,18 @@ c_type arch_atomic##prefix##_##op##_return_relaxed(c_type i, \
return arch_atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \
} \
static __always_inline \
+c_type arch_atomic##prefix##_##op##_return_acquire(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ return arch_atomic##prefix##_fetch_##op##_acquire(i, v) c_op I; \
+} \
+static __always_inline \
+c_type arch_atomic##prefix##_##op##_return_release(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ return arch_atomic##prefix##_fetch_##op##_release(i, v) c_op I; \
+} \
+static __always_inline \
c_type arch_atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
{ \
return arch_atomic##prefix##_fetch_##op(i, v) c_op I; \
@@ -139,22 +175,38 @@ ATOMIC_OPS(sub, add, +, -i)
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_add_return_acquire arch_atomic_add_return_acquire
+#define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire
+#define arch_atomic_add_return_release arch_atomic_add_return_release
+#define arch_atomic_sub_return_release arch_atomic_sub_return_release
#define arch_atomic_add_return arch_atomic_add_return
#define arch_atomic_sub_return arch_atomic_sub_return
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire
+#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire
+#define arch_atomic_fetch_add_release arch_atomic_fetch_add_release
+#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release
#define arch_atomic_fetch_add arch_atomic_fetch_add
#define arch_atomic_fetch_sub arch_atomic_fetch_sub
#ifndef CONFIG_GENERIC_ATOMIC64
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+#define arch_atomic64_add_return_acquire arch_atomic64_add_return_acquire
+#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire
+#define arch_atomic64_add_return_release arch_atomic64_add_return_release
+#define arch_atomic64_sub_return_release arch_atomic64_sub_return_release
#define arch_atomic64_add_return arch_atomic64_add_return
#define arch_atomic64_sub_return arch_atomic64_sub_return
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire
+#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire
+#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add_release
+#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
#endif
@@ -177,6 +229,12 @@ ATOMIC_OPS(xor, xor, i)
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire
+#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire
+#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire
+#define arch_atomic_fetch_and_release arch_atomic_fetch_and_release
+#define arch_atomic_fetch_or_release arch_atomic_fetch_or_release
+#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
#define arch_atomic_fetch_and arch_atomic_fetch_and
#define arch_atomic_fetch_or arch_atomic_fetch_or
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
@@ -185,6 +243,12 @@ ATOMIC_OPS(xor, xor, i)
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire
+#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire
+#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire
+#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and_release
+#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or_release
+#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
--
2.20.1
--
kvm-riscv mailing list
kvm-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kvm-riscv
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v4 08/10] riscv: Remove arch specific __atomic_acquire/release_fence
2025-10-20 4:24 [PATCH v4 05/10] riscv: Apply Zalasr to smp_load_acquire/smp_store_release Xu Lu
2025-10-20 4:24 ` [PATCH v4 06/10] riscv: Apply acquire/release semantics to arch_xchg/arch_cmpxchg operations Xu Lu
2025-10-20 4:24 ` [PATCH v4 07/10] riscv: Apply acquire/release semantics to arch_atomic operations Xu Lu
@ 2025-10-20 4:24 ` Xu Lu
2025-10-20 4:24 ` [PATCH v4 09/10] RISC-V: KVM: Allow Zalasr extensions for Guest/VM Xu Lu
3 siblings, 0 replies; 5+ messages in thread
From: Xu Lu @ 2025-10-20 4:24 UTC (permalink / raw)
To: corbet, paul.walmsley, palmer, aou, alex, robh, krzk+dt, conor+dt,
will, peterz, boqun.feng, mark.rutland, anup, atish.patra,
pbonzini, shuah, parri.andrea, ajones, brs, guoren
Cc: linux-doc, linux-riscv, linux-kernel, devicetree, kvm, kvm-riscv,
linux-kselftest, apw, joe, lukas.bulwahn, Xu Lu
Remove arch specific __atomic_acquire/release_fence() operations since
they use fence instruction to simulate acquire/release order and can not
work well with real acquire/release instructions.
The default generic __atomic_acuire/release_fence() now provide sequential
order via 'fennce rw, rw'. They are rarely called since we use real
acquire/release instructions in most of times.
Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
arch/riscv/include/asm/atomic.h | 6 ------
arch/riscv/include/asm/fence.h | 4 ----
2 files changed, 10 deletions(-)
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 86291de07de62..6ed50a283bf8b 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -18,12 +18,6 @@
#include <asm/cmpxchg.h>
-#define __atomic_acquire_fence() \
- __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory")
-
-#define __atomic_release_fence() \
- __asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");
-
static __always_inline int arch_atomic_read(const atomic_t *v)
{
return READ_ONCE(v->counter);
diff --git a/arch/riscv/include/asm/fence.h b/arch/riscv/include/asm/fence.h
index 182db7930edc2..9ce83e4793948 100644
--- a/arch/riscv/include/asm/fence.h
+++ b/arch/riscv/include/asm/fence.h
@@ -7,12 +7,8 @@
({ __asm__ __volatile__ (RISCV_FENCE_ASM(p, s) : : : "memory"); })
#ifdef CONFIG_SMP
-#define RISCV_ACQUIRE_BARRIER RISCV_FENCE_ASM(r, rw)
-#define RISCV_RELEASE_BARRIER RISCV_FENCE_ASM(rw, w)
#define RISCV_FULL_BARRIER RISCV_FENCE_ASM(rw, rw)
#else
-#define RISCV_ACQUIRE_BARRIER
-#define RISCV_RELEASE_BARRIER
#define RISCV_FULL_BARRIER
#endif
--
2.20.1
--
kvm-riscv mailing list
kvm-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kvm-riscv
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v4 09/10] RISC-V: KVM: Allow Zalasr extensions for Guest/VM
2025-10-20 4:24 [PATCH v4 05/10] riscv: Apply Zalasr to smp_load_acquire/smp_store_release Xu Lu
` (2 preceding siblings ...)
2025-10-20 4:24 ` [PATCH v4 08/10] riscv: Remove arch specific __atomic_acquire/release_fence Xu Lu
@ 2025-10-20 4:24 ` Xu Lu
3 siblings, 0 replies; 5+ messages in thread
From: Xu Lu @ 2025-10-20 4:24 UTC (permalink / raw)
To: corbet, paul.walmsley, palmer, aou, alex, robh, krzk+dt, conor+dt,
will, peterz, boqun.feng, mark.rutland, anup, atish.patra,
pbonzini, shuah, parri.andrea, ajones, brs, guoren
Cc: linux-doc, linux-riscv, linux-kernel, devicetree, kvm, kvm-riscv,
linux-kselftest, apw, joe, lukas.bulwahn, Xu Lu
Extend the KVM ISA extension ONE_REG interface to allow KVM user space
to detect and enable Zalasr extensions for Guest/VM.
Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
arch/riscv/include/uapi/asm/kvm.h | 1 +
arch/riscv/kvm/vcpu_onereg.c | 2 ++
2 files changed, 3 insertions(+)
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index ef27d4289da11..4fbc32ef888fa 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -185,6 +185,7 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZICCRSE,
KVM_RISCV_ISA_EXT_ZAAMO,
KVM_RISCV_ISA_EXT_ZALRSC,
+ KVM_RISCV_ISA_EXT_ZALASR,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index cce6a38ea54f2..6ae5f9859f25b 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -50,6 +50,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
KVM_ISA_EXT_ARR(ZAAMO),
KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALASR),
KVM_ISA_EXT_ARR(ZALRSC),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
@@ -184,6 +185,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_ZAAMO:
case KVM_RISCV_ISA_EXT_ZABHA:
case KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_RISCV_ISA_EXT_ZALASR:
case KVM_RISCV_ISA_EXT_ZALRSC:
case KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_RISCV_ISA_EXT_ZBA:
--
2.20.1
--
kvm-riscv mailing list
kvm-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kvm-riscv
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-10-20 4:25 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-20 4:24 [PATCH v4 05/10] riscv: Apply Zalasr to smp_load_acquire/smp_store_release Xu Lu
2025-10-20 4:24 ` [PATCH v4 06/10] riscv: Apply acquire/release semantics to arch_xchg/arch_cmpxchg operations Xu Lu
2025-10-20 4:24 ` [PATCH v4 07/10] riscv: Apply acquire/release semantics to arch_atomic operations Xu Lu
2025-10-20 4:24 ` [PATCH v4 08/10] riscv: Remove arch specific __atomic_acquire/release_fence Xu Lu
2025-10-20 4:24 ` [PATCH v4 09/10] RISC-V: KVM: Allow Zalasr extensions for Guest/VM Xu Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).