From: guoren@kernel.org
To: guoren@kernel.org, arnd@arndb.de, palmer@dabbelt.com,
mark.rutland@arm.com, will@kernel.org, peterz@infradead.org,
boqun.feng@gmail.com
Cc: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-riscv@lists.infradead.org,
Guo Ren <guoren@linux.alibaba.com>
Subject: [PATCH V2 2/3] riscv: atomic: Optimize acquire and release for AMO operations
Date: Tue, 12 Apr 2022 11:49:56 +0800 [thread overview]
Message-ID: <20220412034957.1481088-3-guoren@kernel.org> (raw)
In-Reply-To: <20220412034957.1481088-1-guoren@kernel.org>
From: Guo Ren <guoren@linux.alibaba.com>
Current acquire & release implementations from atomic-arch-
fallback.h are using __atomic_acquire/release_fence(), it cause
another extra "fence r, rw/fence rw,w" instruction after/before
AMO instruction. RISC-V AMO instructions could combine acquire
and release in the instruction self, we could gain two benefits by
using this feature:
- Reduce an instruction
- Prevent strong "fence r/fence ,w" which are used to protect
"lr/sc".
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Mark Rutland <mark.rutland@arm.com>
---
arch/riscv/include/asm/atomic.h | 64 ++++++++++++++++++++++++++++++++
arch/riscv/include/asm/cmpxchg.h | 12 ++----
2 files changed, 68 insertions(+), 8 deletions(-)
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index ac9bdf4fc404..20ce8b83bc18 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -99,6 +99,30 @@ c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
return ret; \
} \
static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op##_acquire(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ register c_type ret; \
+ __asm__ __volatile__ ( \
+ " amo" #asm_op "." #asm_type ".aq %1, %2, %0" \
+ : "+A" (v->counter), "=r" (ret) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op##_release(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ register c_type ret; \
+ __asm__ __volatile__ ( \
+ " amo" #asm_op "." #asm_type ".rl %1, %2, %0" \
+ : "+A" (v->counter), "=r" (ret) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
{ \
register c_type ret; \
@@ -118,6 +142,18 @@ c_type arch_atomic##prefix##_##op##_return_relaxed(c_type i, \
return arch_atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \
} \
static __always_inline \
+c_type arch_atomic##prefix##_##op##_return_acquire(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ return arch_atomic##prefix##_fetch_##op##_acquire(i, v) c_op I; \
+} \
+static __always_inline \
+c_type arch_atomic##prefix##_##op##_return_release(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ return arch_atomic##prefix##_fetch_##op##_release(i, v) c_op I; \
+} \
+static __always_inline \
c_type arch_atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
{ \
return arch_atomic##prefix##_fetch_##op(i, v) c_op I; \
@@ -140,22 +176,38 @@ ATOMIC_OPS(sub, add, +, -i)
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_add_return_acquire arch_atomic_add_return_acquire
+#define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire
+#define arch_atomic_add_return_release arch_atomic_add_return_release
+#define arch_atomic_sub_return_release arch_atomic_sub_return_release
#define arch_atomic_add_return arch_atomic_add_return
#define arch_atomic_sub_return arch_atomic_sub_return
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire
+#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire
+#define arch_atomic_fetch_add_release arch_atomic_fetch_add_release
+#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release
#define arch_atomic_fetch_add arch_atomic_fetch_add
#define arch_atomic_fetch_sub arch_atomic_fetch_sub
#ifndef CONFIG_GENERIC_ATOMIC64
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+#define arch_atomic64_add_return_acquire arch_atomic64_add_return_acquire
+#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire
+#define arch_atomic64_add_return_release arch_atomic64_add_return_release
+#define arch_atomic64_sub_return_release arch_atomic64_sub_return_release
#define arch_atomic64_add_return arch_atomic64_add_return
#define arch_atomic64_sub_return arch_atomic64_sub_return
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire
+#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire
+#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add_release
+#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
#endif
@@ -178,6 +230,12 @@ ATOMIC_OPS(xor, xor, i)
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire
+#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire
+#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire
+#define arch_atomic_fetch_and_release arch_atomic_fetch_and_release
+#define arch_atomic_fetch_or_release arch_atomic_fetch_or_release
+#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
#define arch_atomic_fetch_and arch_atomic_fetch_and
#define arch_atomic_fetch_or arch_atomic_fetch_or
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
@@ -186,6 +244,12 @@ ATOMIC_OPS(xor, xor, i)
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire
+#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire
+#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire
+#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and_release
+#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or_release
+#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 12debce235e5..1af8db92250b 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -52,16 +52,14 @@
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
- " amoswap.w %0, %2, %1\n" \
- RISCV_ACQUIRE_BARRIER \
+ " amoswap.w.aq %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
- " amoswap.d %0, %2, %1\n" \
- RISCV_ACQUIRE_BARRIER \
+ " amoswap.d.aq %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
@@ -87,16 +85,14 @@
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
- RISCV_RELEASE_BARRIER \
- " amoswap.w %0, %2, %1\n" \
+ " amoswap.w.rl %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
- RISCV_RELEASE_BARRIER \
- " amoswap.d %0, %2, %1\n" \
+ " amoswap.d.rl %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
--
2.25.1
next prev parent reply other threads:[~2022-04-12 3:50 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-12 3:49 [PATCH V2 0/3] riscv: atomic: Optimize AMO instructions usage guoren
2022-04-12 3:49 ` [PATCH V2 1/3] riscv: atomic: Cleanup unnecessary definition guoren
2022-04-12 3:49 ` guoren [this message]
2022-04-12 3:49 ` [PATCH V2 3/3] riscv: atomic: Optimize memory barrier semantics of LRSC-pairs guoren
2022-04-13 15:46 ` [PATCH V2 0/3] riscv: atomic: Optimize AMO instructions usage Boqun Feng
2022-04-16 16:49 ` Guo Ren
2022-04-17 2:26 ` Boqun Feng
2022-04-17 4:51 ` Guo Ren
2022-04-17 6:30 ` Boqun Feng
2022-04-17 6:45 ` Guo Ren
2022-04-19 17:12 ` Dan Lustig
2022-04-20 5:33 ` Guo Ren
2022-04-20 17:03 ` Dan Lustig
2022-04-21 9:39 ` Guo Ren
2022-04-21 22:56 ` Boqun Feng
2022-04-22 1:56 ` Guo Ren
2022-04-22 3:11 ` Boqun Feng
2022-04-24 7:52 ` Guo Ren
2022-04-18 23:41 ` Andrea Parri
2022-04-19 17:13 ` Dan Lustig
2022-04-24 8:33 ` Guo Ren
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220412034957.1481088-3-guoren@kernel.org \
--to=guoren@kernel.org \
--cc=arnd@arndb.de \
--cc=boqun.feng@gmail.com \
--cc=guoren@linux.alibaba.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=mark.rutland@arm.com \
--cc=palmer@dabbelt.com \
--cc=peterz@infradead.org \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox