* [PATCH v3 1/4] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg to x86_32
@ 2024-04-10 6:29 Uros Bizjak
2024-04-10 6:29 ` [PATCH v3 2/4] locking/atomic/x86: Introduce arch_atomic64_read_nonatomic " Uros Bizjak
` (3 more replies)
0 siblings, 4 replies; 8+ messages in thread
From: Uros Bizjak @ 2024-04-10 6:29 UTC (permalink / raw)
To: x86, linux-kernel
Cc: Uros Bizjak, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
Dave Hansen, H. Peter Anvin, Peter Zijlstra
Introduce arch_atomic64_try_cmpxchg for 32-bit targets to use
optimized target specific implementation instead of a generic one.
This implementation eliminates dual-word compare after
cmpxchg8b instruction and improves generated asm code from:
2273: f0 0f c7 0f lock cmpxchg8b (%edi)
2277: 8b 74 24 2c mov 0x2c(%esp),%esi
227b: 89 d3 mov %edx,%ebx
227d: 89 c2 mov %eax,%edx
227f: 89 5c 24 10 mov %ebx,0x10(%esp)
2283: 8b 7c 24 30 mov 0x30(%esp),%edi
2287: 89 44 24 1c mov %eax,0x1c(%esp)
228b: 31 f2 xor %esi,%edx
228d: 89 d0 mov %edx,%eax
228f: 89 da mov %ebx,%edx
2291: 31 fa xor %edi,%edx
2293: 09 d0 or %edx,%eax
2295: 0f 85 a5 00 00 00 jne 2340 <...>
to:
2270: f0 0f c7 0f lock cmpxchg8b (%edi)
2274: 0f 85 a6 00 00 00 jne 2320 <...>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
arch/x86/include/asm/atomic64_32.h | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index d510405e4e1d..11e817dab44a 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -61,12 +61,18 @@ ATOMIC64_DECL(add_unless);
#undef __ATOMIC64_DECL
#undef ATOMIC64_EXPORT
-static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
- return arch_cmpxchg64(&v->counter, o, n);
+ return arch_cmpxchg64(&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+ return arch_try_cmpxchg64(&v->counter, old, new);
+}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
s64 o;
--
2.44.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v3 2/4] locking/atomic/x86: Introduce arch_atomic64_read_nonatomic to x86_32
2024-04-10 6:29 [PATCH v3 1/4] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg to x86_32 Uros Bizjak
@ 2024-04-10 6:29 ` Uros Bizjak
2024-04-10 13:40 ` [tip: locking/core] locking/atomic/x86: Introduce arch_atomic64_read_nonatomic() " tip-bot2 for Uros Bizjak
2024-04-10 6:29 ` [PATCH v3 3/4] locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions Uros Bizjak
` (2 subsequent siblings)
3 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2024-04-10 6:29 UTC (permalink / raw)
To: x86, linux-kernel
Cc: Uros Bizjak, Mark Rutland, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, Dave Hansen, H. Peter Anvin, Peter Zijlstra
Introduce arch_atomic64_read_nonatomic for 32-bit targets to load
the value from atomic64_t location in a non-atomic way. This
function is intended to be used in cases where a subsequent atomic
operation will handle the torn value, and can be used to prime the
first iteration of unconditional try_cmpxchg() loops.
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
v2: New patch.
v3: Expand function comment.
---
arch/x86/include/asm/atomic64_32.h | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 11e817dab44a..bc76a88ae481 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -14,6 +14,32 @@ typedef struct {
#define ATOMIC64_INIT(val) { (val) }
+/*
+ * Read an atomic64_t non-atomically.
+ *
+ * This is intended to be used in cases where a subsequent atomic operation
+ * will handle the torn value, and can be used to prime the first iteration
+ * of unconditional try_cmpxchg() loops, e.g.:
+ *
+ * s64 val = arch_atomic64_read_nonatomic(v);
+ * do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ *
+ * This is NOT safe to use where the value is not always checked by a
+ * subsequent atomic operation, such as in conditional try_cmpxchg() loops
+ * that can break before the atomic operation, e.g.:
+ *
+ * s64 val = arch_atomic64_read_nonatomic(v);
+ * do {
+ * if (condition(val))
+ * break;
+ * } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ */
+static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
+{
+ /* See comment in arch_atomic_read(). */
+ return __READ_ONCE(v->counter);
+}
+
#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
#ifndef ATOMIC64_EXPORT
#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
--
2.44.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v3 3/4] locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions
2024-04-10 6:29 [PATCH v3 1/4] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg to x86_32 Uros Bizjak
2024-04-10 6:29 ` [PATCH v3 2/4] locking/atomic/x86: Introduce arch_atomic64_read_nonatomic " Uros Bizjak
@ 2024-04-10 6:29 ` Uros Bizjak
2024-04-10 13:40 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
2024-04-10 6:29 ` [PATCH v3 4/4] locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions Uros Bizjak
2024-04-10 13:40 ` [tip: locking/core] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg() to x86_32 tip-bot2 for Uros Bizjak
3 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2024-04-10 6:29 UTC (permalink / raw)
To: x86, linux-kernel
Cc: Uros Bizjak, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
Dave Hansen, H. Peter Anvin, Peter Zijlstra
Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions to
use arch_atomic64_try_cmpxchg. This implementation avoids one extra
trip through the cmpxchg loop.
The value preload before the cmpxchg loop does not need to be atomic.
Use arch_atomic64_read_nonatomic(v) to load the value from atomic_t
location in a non-atomic way.
The generated code improves from:
1917d5: 31 c9 xor %ecx,%ecx
1917d7: 31 db xor %ebx,%ebx
1917d9: 89 4c 24 3c mov %ecx,0x3c(%esp)
1917dd: 8b 74 24 24 mov 0x24(%esp),%esi
1917e1: 89 c8 mov %ecx,%eax
1917e3: 89 5c 24 34 mov %ebx,0x34(%esp)
1917e7: 8b 7c 24 28 mov 0x28(%esp),%edi
1917eb: 21 ce and %ecx,%esi
1917ed: 89 74 24 4c mov %esi,0x4c(%esp)
1917f1: 21 df and %ebx,%edi
1917f3: 89 de mov %ebx,%esi
1917f5: 89 7c 24 50 mov %edi,0x50(%esp)
1917f9: 8b 54 24 4c mov 0x4c(%esp),%edx
1917fd: 8b 7c 24 2c mov 0x2c(%esp),%edi
191801: 8b 4c 24 50 mov 0x50(%esp),%ecx
191805: 89 d3 mov %edx,%ebx
191807: 89 f2 mov %esi,%edx
191809: f0 0f c7 0f lock cmpxchg8b (%edi)
19180d: 89 c1 mov %eax,%ecx
19180f: 8b 74 24 34 mov 0x34(%esp),%esi
191813: 89 d3 mov %edx,%ebx
191815: 89 44 24 4c mov %eax,0x4c(%esp)
191819: 8b 44 24 3c mov 0x3c(%esp),%eax
19181d: 89 df mov %ebx,%edi
19181f: 89 54 24 44 mov %edx,0x44(%esp)
191823: 89 ca mov %ecx,%edx
191825: 31 de xor %ebx,%esi
191827: 31 c8 xor %ecx,%eax
191829: 09 f0 or %esi,%eax
19182b: 75 ac jne 1917d9 <...>
to:
1912ba: 8b 06 mov (%esi),%eax
1912bc: 8b 56 04 mov 0x4(%esi),%edx
1912bf: 89 44 24 3c mov %eax,0x3c(%esp)
1912c3: 89 c1 mov %eax,%ecx
1912c5: 23 4c 24 34 and 0x34(%esp),%ecx
1912c9: 89 d3 mov %edx,%ebx
1912cb: 23 5c 24 38 and 0x38(%esp),%ebx
1912cf: 89 54 24 40 mov %edx,0x40(%esp)
1912d3: 89 4c 24 2c mov %ecx,0x2c(%esp)
1912d7: 89 5c 24 30 mov %ebx,0x30(%esp)
1912db: 8b 5c 24 2c mov 0x2c(%esp),%ebx
1912df: 8b 4c 24 30 mov 0x30(%esp),%ecx
1912e3: f0 0f c7 0e lock cmpxchg8b (%esi)
1912e7: 0f 85 f3 02 00 00 jne 1915e0 <...>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
v2: Use arch_atomic64_read_nonatomic().
---
arch/x86/include/asm/atomic64_32.h | 43 +++++++++++++-----------------
1 file changed, 18 insertions(+), 25 deletions(-)
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index bc76a88ae481..8db2ec4d6cda 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -227,69 +227,62 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
}
static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
}
static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
}
static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
--
2.44.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v3 4/4] locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions
2024-04-10 6:29 [PATCH v3 1/4] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg to x86_32 Uros Bizjak
2024-04-10 6:29 ` [PATCH v3 2/4] locking/atomic/x86: Introduce arch_atomic64_read_nonatomic " Uros Bizjak
2024-04-10 6:29 ` [PATCH v3 3/4] locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions Uros Bizjak
@ 2024-04-10 6:29 ` Uros Bizjak
2024-04-10 13:40 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
2024-04-10 13:40 ` [tip: locking/core] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg() to x86_32 tip-bot2 for Uros Bizjak
3 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2024-04-10 6:29 UTC (permalink / raw)
To: x86, linux-kernel
Cc: Uros Bizjak, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
Dave Hansen, H. Peter Anvin, Peter Zijlstra
There is no need to implement arch_atomic_sub() family of inline
functions, corresponding macros can be directly implemented using
arch_atomic_add() inlines with negated argument.
No functional changes intended.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
arch/x86/include/asm/atomic.h | 12 ++----------
arch/x86/include/asm/atomic64_64.h | 12 ++----------
2 files changed, 4 insertions(+), 20 deletions(-)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 55a55ec04350..55b4d24356ea 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -86,11 +86,7 @@ static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
}
#define arch_atomic_add_return arch_atomic_add_return
-static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
-{
- return arch_atomic_add_return(-i, v);
-}
-#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v)
static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
@@ -98,11 +94,7 @@ static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
}
#define arch_atomic_fetch_add arch_atomic_fetch_add
-static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
-{
- return xadd(&v->counter, -i);
-}
-#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v)
static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 3165c0feedf7..ae12acae5b06 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -80,11 +80,7 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
}
#define arch_atomic64_add_return arch_atomic64_add_return
-static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
-{
- return arch_atomic64_add_return(-i, v);
-}
-#define arch_atomic64_sub_return arch_atomic64_sub_return
+#define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v)
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
@@ -92,11 +88,7 @@ static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
-{
- return xadd(&v->counter, -i);
-}
-#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v)
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
--
2.44.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [tip: locking/core] locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions
2024-04-10 6:29 ` [PATCH v3 4/4] locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions Uros Bizjak
@ 2024-04-10 13:40 ` tip-bot2 for Uros Bizjak
0 siblings, 0 replies; 8+ messages in thread
From: tip-bot2 for Uros Bizjak @ 2024-04-10 13:40 UTC (permalink / raw)
To: linux-tip-commits
Cc: Uros Bizjak, Ingo Molnar, Linus Torvalds, x86, linux-kernel
The following commit has been merged into the locking/core branch of tip:
Commit-ID: 21689e4bfb9ae8f8b45279c53faecaa5a056ffa5
Gitweb: https://git.kernel.org/tip/21689e4bfb9ae8f8b45279c53faecaa5a056ffa5
Author: Uros Bizjak <ubizjak@gmail.com>
AuthorDate: Wed, 10 Apr 2024 08:29:36 +02:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 10 Apr 2024 15:04:55 +02:00
locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions
There is no need to implement arch_atomic_sub() family of inline
functions, corresponding macros can be directly implemented using
arch_atomic_add() inlines with negated argument.
No functional changes intended.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240410062957.322614-4-ubizjak@gmail.com
---
arch/x86/include/asm/atomic.h | 12 ++----------
arch/x86/include/asm/atomic64_64.h | 12 ++----------
2 files changed, 4 insertions(+), 20 deletions(-)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 55a55ec..55b4d24 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -86,11 +86,7 @@ static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
}
#define arch_atomic_add_return arch_atomic_add_return
-static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
-{
- return arch_atomic_add_return(-i, v);
-}
-#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v)
static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
@@ -98,11 +94,7 @@ static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
}
#define arch_atomic_fetch_add arch_atomic_fetch_add
-static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
-{
- return xadd(&v->counter, -i);
-}
-#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v)
static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 3165c0f..ae12aca 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -80,11 +80,7 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
}
#define arch_atomic64_add_return arch_atomic64_add_return
-static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
-{
- return arch_atomic64_add_return(-i, v);
-}
-#define arch_atomic64_sub_return arch_atomic64_sub_return
+#define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v)
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
@@ -92,11 +88,7 @@ static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
-{
- return xadd(&v->counter, -i);
-}
-#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v)
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [tip: locking/core] locking/atomic/x86: Introduce arch_atomic64_read_nonatomic() to x86_32
2024-04-10 6:29 ` [PATCH v3 2/4] locking/atomic/x86: Introduce arch_atomic64_read_nonatomic " Uros Bizjak
@ 2024-04-10 13:40 ` tip-bot2 for Uros Bizjak
0 siblings, 0 replies; 8+ messages in thread
From: tip-bot2 for Uros Bizjak @ 2024-04-10 13:40 UTC (permalink / raw)
To: linux-tip-commits
Cc: Mark Rutland, Uros Bizjak, Ingo Molnar, Linus Torvalds, x86,
linux-kernel
The following commit has been merged into the locking/core branch of tip:
Commit-ID: e73c4e34a0e9e3dfcb4e5ee4ccd3039a7b603218
Gitweb: https://git.kernel.org/tip/e73c4e34a0e9e3dfcb4e5ee4ccd3039a7b603218
Author: Uros Bizjak <ubizjak@gmail.com>
AuthorDate: Wed, 10 Apr 2024 08:29:34 +02:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 10 Apr 2024 15:04:54 +02:00
locking/atomic/x86: Introduce arch_atomic64_read_nonatomic() to x86_32
Introduce arch_atomic64_read_nonatomic() for 32-bit targets to load
the value from atomic64_t location in a non-atomic way. This
function is intended to be used in cases where a subsequent atomic
operation will handle the torn value, and can be used to prime the
first iteration of unconditional try_cmpxchg() loops.
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240410062957.322614-2-ubizjak@gmail.com
---
arch/x86/include/asm/atomic64_32.h | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index ec217aa..bbdf174 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -14,6 +14,32 @@ typedef struct {
#define ATOMIC64_INIT(val) { (val) }
+/*
+ * Read an atomic64_t non-atomically.
+ *
+ * This is intended to be used in cases where a subsequent atomic operation
+ * will handle the torn value, and can be used to prime the first iteration
+ * of unconditional try_cmpxchg() loops, e.g.:
+ *
+ * s64 val = arch_atomic64_read_nonatomic(v);
+ * do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ *
+ * This is NOT safe to use where the value is not always checked by a
+ * subsequent atomic operation, such as in conditional try_cmpxchg() loops
+ * that can break before the atomic operation, e.g.:
+ *
+ * s64 val = arch_atomic64_read_nonatomic(v);
+ * do {
+ * if (condition(val))
+ * break;
+ * } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ */
+static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
+{
+ /* See comment in arch_atomic_read(). */
+ return __READ_ONCE(v->counter);
+}
+
#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
#ifndef ATOMIC64_EXPORT
#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [tip: locking/core] locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions
2024-04-10 6:29 ` [PATCH v3 3/4] locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions Uros Bizjak
@ 2024-04-10 13:40 ` tip-bot2 for Uros Bizjak
0 siblings, 0 replies; 8+ messages in thread
From: tip-bot2 for Uros Bizjak @ 2024-04-10 13:40 UTC (permalink / raw)
To: linux-tip-commits
Cc: Uros Bizjak, Ingo Molnar, Linus Torvalds, x86, linux-kernel
The following commit has been merged into the locking/core branch of tip:
Commit-ID: 95ece48165c136b96fae0f6144f55cbf8b24aeb9
Gitweb: https://git.kernel.org/tip/95ece48165c136b96fae0f6144f55cbf8b24aeb9
Author: Uros Bizjak <ubizjak@gmail.com>
AuthorDate: Wed, 10 Apr 2024 08:29:35 +02:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 10 Apr 2024 15:04:55 +02:00
locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions
Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions to
use arch_atomic64_try_cmpxchg(). This implementation avoids one extra
trip through the CMPXCHG loop.
The value preload before the cmpxchg loop does not need to be atomic.
Use arch_atomic64_read_nonatomic(v) to load the value from atomic_t
location in a non-atomic way.
The generated code improves from:
1917d5: 31 c9 xor %ecx,%ecx
1917d7: 31 db xor %ebx,%ebx
1917d9: 89 4c 24 3c mov %ecx,0x3c(%esp)
1917dd: 8b 74 24 24 mov 0x24(%esp),%esi
1917e1: 89 c8 mov %ecx,%eax
1917e3: 89 5c 24 34 mov %ebx,0x34(%esp)
1917e7: 8b 7c 24 28 mov 0x28(%esp),%edi
1917eb: 21 ce and %ecx,%esi
1917ed: 89 74 24 4c mov %esi,0x4c(%esp)
1917f1: 21 df and %ebx,%edi
1917f3: 89 de mov %ebx,%esi
1917f5: 89 7c 24 50 mov %edi,0x50(%esp)
1917f9: 8b 54 24 4c mov 0x4c(%esp),%edx
1917fd: 8b 7c 24 2c mov 0x2c(%esp),%edi
191801: 8b 4c 24 50 mov 0x50(%esp),%ecx
191805: 89 d3 mov %edx,%ebx
191807: 89 f2 mov %esi,%edx
191809: f0 0f c7 0f lock cmpxchg8b (%edi)
19180d: 89 c1 mov %eax,%ecx
19180f: 8b 74 24 34 mov 0x34(%esp),%esi
191813: 89 d3 mov %edx,%ebx
191815: 89 44 24 4c mov %eax,0x4c(%esp)
191819: 8b 44 24 3c mov 0x3c(%esp),%eax
19181d: 89 df mov %ebx,%edi
19181f: 89 54 24 44 mov %edx,0x44(%esp)
191823: 89 ca mov %ecx,%edx
191825: 31 de xor %ebx,%esi
191827: 31 c8 xor %ecx,%eax
191829: 09 f0 or %esi,%eax
19182b: 75 ac jne 1917d9 <...>
to:
1912ba: 8b 06 mov (%esi),%eax
1912bc: 8b 56 04 mov 0x4(%esi),%edx
1912bf: 89 44 24 3c mov %eax,0x3c(%esp)
1912c3: 89 c1 mov %eax,%ecx
1912c5: 23 4c 24 34 and 0x34(%esp),%ecx
1912c9: 89 d3 mov %edx,%ebx
1912cb: 23 5c 24 38 and 0x38(%esp),%ebx
1912cf: 89 54 24 40 mov %edx,0x40(%esp)
1912d3: 89 4c 24 2c mov %ecx,0x2c(%esp)
1912d7: 89 5c 24 30 mov %ebx,0x30(%esp)
1912db: 8b 5c 24 2c mov 0x2c(%esp),%ebx
1912df: 8b 4c 24 30 mov 0x30(%esp),%ecx
1912e3: f0 0f c7 0e lock cmpxchg8b (%esi)
1912e7: 0f 85 f3 02 00 00 jne 1915e0 <...>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240410062957.322614-3-ubizjak@gmail.com
---
arch/x86/include/asm/atomic64_32.h | 43 ++++++++++++-----------------
1 file changed, 18 insertions(+), 25 deletions(-)
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index bbdf174..40ff73b 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -227,69 +227,62 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
}
static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
}
static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
}
static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [tip: locking/core] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg() to x86_32
2024-04-10 6:29 [PATCH v3 1/4] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg to x86_32 Uros Bizjak
` (2 preceding siblings ...)
2024-04-10 6:29 ` [PATCH v3 4/4] locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions Uros Bizjak
@ 2024-04-10 13:40 ` tip-bot2 for Uros Bizjak
3 siblings, 0 replies; 8+ messages in thread
From: tip-bot2 for Uros Bizjak @ 2024-04-10 13:40 UTC (permalink / raw)
To: linux-tip-commits
Cc: Uros Bizjak, Ingo Molnar, Linus Torvalds, x86, linux-kernel
The following commit has been merged into the locking/core branch of tip:
Commit-ID: 276b893049e4cdc2f33c009706a75ec18a114485
Gitweb: https://git.kernel.org/tip/276b893049e4cdc2f33c009706a75ec18a114485
Author: Uros Bizjak <ubizjak@gmail.com>
AuthorDate: Wed, 10 Apr 2024 08:29:33 +02:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 10 Apr 2024 15:04:54 +02:00
locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg() to x86_32
Introduce arch_atomic64_try_cmpxchg() for 32-bit targets to use
optimized target specific implementation instead of a generic one.
This implementation eliminates dual-word compare after
cmpxchg8b instruction and improves generated asm code from:
2273: f0 0f c7 0f lock cmpxchg8b (%edi)
2277: 8b 74 24 2c mov 0x2c(%esp),%esi
227b: 89 d3 mov %edx,%ebx
227d: 89 c2 mov %eax,%edx
227f: 89 5c 24 10 mov %ebx,0x10(%esp)
2283: 8b 7c 24 30 mov 0x30(%esp),%edi
2287: 89 44 24 1c mov %eax,0x1c(%esp)
228b: 31 f2 xor %esi,%edx
228d: 89 d0 mov %edx,%eax
228f: 89 da mov %ebx,%edx
2291: 31 fa xor %edi,%edx
2293: 09 d0 or %edx,%eax
2295: 0f 85 a5 00 00 00 jne 2340 <...>
to:
2270: f0 0f c7 0f lock cmpxchg8b (%edi)
2274: 0f 85 a6 00 00 00 jne 2320 <...>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240410062957.322614-1-ubizjak@gmail.com
---
arch/x86/include/asm/atomic64_32.h | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 3486d91..ec217aa 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -61,12 +61,18 @@ ATOMIC64_DECL(add_unless);
#undef __ATOMIC64_DECL
#undef ATOMIC64_EXPORT
-static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
- return arch_cmpxchg64(&v->counter, o, n);
+ return arch_cmpxchg64(&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+ return arch_try_cmpxchg64(&v->counter, old, new);
+}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
s64 o;
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2024-04-10 13:40 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-10 6:29 [PATCH v3 1/4] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg to x86_32 Uros Bizjak
2024-04-10 6:29 ` [PATCH v3 2/4] locking/atomic/x86: Introduce arch_atomic64_read_nonatomic " Uros Bizjak
2024-04-10 13:40 ` [tip: locking/core] locking/atomic/x86: Introduce arch_atomic64_read_nonatomic() " tip-bot2 for Uros Bizjak
2024-04-10 6:29 ` [PATCH v3 3/4] locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions Uros Bizjak
2024-04-10 13:40 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
2024-04-10 6:29 ` [PATCH v3 4/4] locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions Uros Bizjak
2024-04-10 13:40 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
2024-04-10 13:40 ` [tip: locking/core] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg() to x86_32 tip-bot2 for Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox