From: Puranjay Mohan <puranjay@kernel.org>
To: Will Deacon <will@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Boqun Feng <boqun.feng@gmail.com>,
Mark Rutland <mark.rutland@arm.com>,
Paul Walmsley <paul.walmsley@sifive.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Albert Ou <aou@eecs.berkeley.edu>,
linux-kernel@vger.kernel.org, linux-riscv@lists.infradead.org
Cc: puranjay12@gmail.com
Subject: [PATCH] riscv/atomic.h: optimize ops with acquire/release ordering
Date: Sun, 5 May 2024 12:33:40 +0000 [thread overview]
Message-ID: <20240505123340.38495-1-puranjay@kernel.org> (raw)
Currently, atomic ops with acquire or release ordering are implemented
as atomic ops with relaxed ordering followed by or preceded by an
acquire fence or a release fence.
Section 8.1 of the "The RISC-V Instruction Set Manual Volume I:
Unprivileged ISA", titled, "Specifying Ordering of Atomic Instructions"
says:
| To provide more efficient support for release consistency [5], each
| atomic instruction has two bits, aq and rl, used to specify additional
| memory ordering constraints as viewed by other RISC-V harts.
and
| If only the aq bit is set, the atomic memory operation is treated as
| an acquire access.
| If only the rl bit is set, the atomic memory operation is treated as a
| release access.
So, rather than using two instructions (relaxed atomic op + fence), use
a single atomic op instruction with acquire/release ordering.
Example program:
atomic_t cnt = ATOMIC_INIT(0);
atomic_fetch_add_acquire(1, &cnt);
atomic_fetch_add_release(1, &cnt);
Before:
amoadd.w a4,a5,(a4) // Atomic add with relaxed ordering
fence r,rw // Fence to force Acquire ordering
fence rw,w // Fence to force Release ordering
amoadd.w a4,a5,(a4) // Atomic add with relaxed ordering
After:
amoadd.w.aq a4,a5,(a4) // Atomic add with Acquire ordering
amoadd.w.rl a4,a5,(a4) // Atomic add with Release ordering
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
arch/riscv/include/asm/atomic.h | 64 +++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 5b96c2f61adb..024e83936910 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -98,6 +98,30 @@ c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
return ret; \
} \
static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op##_acquire(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ register c_type ret; \
+ __asm__ __volatile__ ( \
+ " amo" #asm_op "." #asm_type ".aq %1, %2, %0" \
+ : "+A" (v->counter), "=r" (ret) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op##_release(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ register c_type ret; \
+ __asm__ __volatile__ ( \
+ " amo" #asm_op "." #asm_type ".rl %1, %2, %0" \
+ : "+A" (v->counter), "=r" (ret) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
{ \
register c_type ret; \
@@ -117,6 +141,18 @@ c_type arch_atomic##prefix##_##op##_return_relaxed(c_type i, \
return arch_atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \
} \
static __always_inline \
+c_type arch_atomic##prefix##_##op##_return_acquire(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ return arch_atomic##prefix##_fetch_##op##_acquire(i, v) c_op I; \
+} \
+static __always_inline \
+c_type arch_atomic##prefix##_##op##_return_release(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ return arch_atomic##prefix##_fetch_##op##_release(i, v) c_op I; \
+} \
+static __always_inline \
c_type arch_atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
{ \
return arch_atomic##prefix##_fetch_##op(i, v) c_op I; \
@@ -138,23 +174,39 @@ ATOMIC_OPS(add, add, +, i)
ATOMIC_OPS(sub, add, +, -i)
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_add_return_acquire arch_atomic_add_return_acquire
+#define arch_atomic_add_return_release arch_atomic_add_return_release
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire
+#define arch_atomic_sub_return_release arch_atomic_sub_return_release
#define arch_atomic_add_return arch_atomic_add_return
#define arch_atomic_sub_return arch_atomic_sub_return
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire
+#define arch_atomic_fetch_add_release arch_atomic_fetch_add_release
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire
+#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release
#define arch_atomic_fetch_add arch_atomic_fetch_add
#define arch_atomic_fetch_sub arch_atomic_fetch_sub
#ifndef CONFIG_GENERIC_ATOMIC64
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_add_return_acquire arch_atomic64_add_return_acquire
+#define arch_atomic64_add_return_release arch_atomic64_add_return_release
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire
+#define arch_atomic64_sub_return_release arch_atomic64_sub_return_release
#define arch_atomic64_add_return arch_atomic64_add_return
#define arch_atomic64_sub_return arch_atomic64_sub_return
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire
+#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add_release
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire
+#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
#endif
@@ -175,16 +227,28 @@ ATOMIC_OPS( or, or, i)
ATOMIC_OPS(xor, xor, i)
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire
+#define arch_atomic_fetch_and_release arch_atomic_fetch_and_release
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire
+#define arch_atomic_fetch_or_release arch_atomic_fetch_or_release
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire
+#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
#define arch_atomic_fetch_and arch_atomic_fetch_and
#define arch_atomic_fetch_or arch_atomic_fetch_or
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
#ifndef CONFIG_GENERIC_ATOMIC64
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire
+#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and_release
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire
+#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or_release
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire
+#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
--
2.40.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
WARNING: multiple messages have this Message-ID (diff)
From: Puranjay Mohan <puranjay@kernel.org>
To: Will Deacon <will@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Boqun Feng <boqun.feng@gmail.com>,
Mark Rutland <mark.rutland@arm.com>,
Paul Walmsley <paul.walmsley@sifive.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Albert Ou <aou@eecs.berkeley.edu>,
linux-kernel@vger.kernel.org, linux-riscv@lists.infradead.org
Cc: puranjay12@gmail.com
Subject: [PATCH] riscv/atomic.h: optimize ops with acquire/release ordering
Date: Sun, 5 May 2024 12:33:40 +0000 [thread overview]
Message-ID: <20240505123340.38495-1-puranjay@kernel.org> (raw)
Currently, atomic ops with acquire or release ordering are implemented
as atomic ops with relaxed ordering followed by or preceded by an
acquire fence or a release fence.
Section 8.1 of the "The RISC-V Instruction Set Manual Volume I:
Unprivileged ISA", titled, "Specifying Ordering of Atomic Instructions"
says:
| To provide more efficient support for release consistency [5], each
| atomic instruction has two bits, aq and rl, used to specify additional
| memory ordering constraints as viewed by other RISC-V harts.
and
| If only the aq bit is set, the atomic memory operation is treated as
| an acquire access.
| If only the rl bit is set, the atomic memory operation is treated as a
| release access.
So, rather than using two instructions (relaxed atomic op + fence), use
a single atomic op instruction with acquire/release ordering.
Example program:
atomic_t cnt = ATOMIC_INIT(0);
atomic_fetch_add_acquire(1, &cnt);
atomic_fetch_add_release(1, &cnt);
Before:
amoadd.w a4,a5,(a4) // Atomic add with relaxed ordering
fence r,rw // Fence to force Acquire ordering
fence rw,w // Fence to force Release ordering
amoadd.w a4,a5,(a4) // Atomic add with relaxed ordering
After:
amoadd.w.aq a4,a5,(a4) // Atomic add with Acquire ordering
amoadd.w.rl a4,a5,(a4) // Atomic add with Release ordering
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
arch/riscv/include/asm/atomic.h | 64 +++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 5b96c2f61adb..024e83936910 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -98,6 +98,30 @@ c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
return ret; \
} \
static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op##_acquire(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ register c_type ret; \
+ __asm__ __volatile__ ( \
+ " amo" #asm_op "." #asm_type ".aq %1, %2, %0" \
+ : "+A" (v->counter), "=r" (ret) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
+c_type arch_atomic##prefix##_fetch_##op##_release(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ register c_type ret; \
+ __asm__ __volatile__ ( \
+ " amo" #asm_op "." #asm_type ".rl %1, %2, %0" \
+ : "+A" (v->counter), "=r" (ret) \
+ : "r" (I) \
+ : "memory"); \
+ return ret; \
+} \
+static __always_inline \
c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
{ \
register c_type ret; \
@@ -117,6 +141,18 @@ c_type arch_atomic##prefix##_##op##_return_relaxed(c_type i, \
return arch_atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \
} \
static __always_inline \
+c_type arch_atomic##prefix##_##op##_return_acquire(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ return arch_atomic##prefix##_fetch_##op##_acquire(i, v) c_op I; \
+} \
+static __always_inline \
+c_type arch_atomic##prefix##_##op##_return_release(c_type i, \
+ atomic##prefix##_t *v) \
+{ \
+ return arch_atomic##prefix##_fetch_##op##_release(i, v) c_op I; \
+} \
+static __always_inline \
c_type arch_atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
{ \
return arch_atomic##prefix##_fetch_##op(i, v) c_op I; \
@@ -138,23 +174,39 @@ ATOMIC_OPS(add, add, +, i)
ATOMIC_OPS(sub, add, +, -i)
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_add_return_acquire arch_atomic_add_return_acquire
+#define arch_atomic_add_return_release arch_atomic_add_return_release
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire
+#define arch_atomic_sub_return_release arch_atomic_sub_return_release
#define arch_atomic_add_return arch_atomic_add_return
#define arch_atomic_sub_return arch_atomic_sub_return
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire
+#define arch_atomic_fetch_add_release arch_atomic_fetch_add_release
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire
+#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release
#define arch_atomic_fetch_add arch_atomic_fetch_add
#define arch_atomic_fetch_sub arch_atomic_fetch_sub
#ifndef CONFIG_GENERIC_ATOMIC64
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_add_return_acquire arch_atomic64_add_return_acquire
+#define arch_atomic64_add_return_release arch_atomic64_add_return_release
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire
+#define arch_atomic64_sub_return_release arch_atomic64_sub_return_release
#define arch_atomic64_add_return arch_atomic64_add_return
#define arch_atomic64_sub_return arch_atomic64_sub_return
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire
+#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add_release
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire
+#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
#endif
@@ -175,16 +227,28 @@ ATOMIC_OPS( or, or, i)
ATOMIC_OPS(xor, xor, i)
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire
+#define arch_atomic_fetch_and_release arch_atomic_fetch_and_release
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire
+#define arch_atomic_fetch_or_release arch_atomic_fetch_or_release
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire
+#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
#define arch_atomic_fetch_and arch_atomic_fetch_and
#define arch_atomic_fetch_or arch_atomic_fetch_or
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
#ifndef CONFIG_GENERIC_ATOMIC64
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire
+#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and_release
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire
+#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or_release
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire
+#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
--
2.40.1
next reply other threads:[~2024-05-05 12:34 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-05 12:33 Puranjay Mohan [this message]
2024-05-05 12:33 ` [PATCH] riscv/atomic.h: optimize ops with acquire/release ordering Puranjay Mohan
2024-05-05 22:45 ` Andrea Parri
2024-05-05 22:45 ` Andrea Parri
2024-05-07 14:07 ` Puranjay Mohan
2024-05-07 14:07 ` Puranjay Mohan
2024-05-07 22:27 ` Andrea Parri
2024-05-07 22:27 ` Andrea Parri
2024-05-08 13:58 ` Puranjay Mohan
2024-05-08 13:58 ` Puranjay Mohan
2024-05-08 15:04 ` Andrea Parri
2024-05-08 15:04 ` Andrea Parri
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240505123340.38495-1-puranjay@kernel.org \
--to=puranjay@kernel.org \
--cc=aou@eecs.berkeley.edu \
--cc=boqun.feng@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=mark.rutland@arm.com \
--cc=palmer@dabbelt.com \
--cc=paul.walmsley@sifive.com \
--cc=peterz@infradead.org \
--cc=puranjay12@gmail.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.