* [PATCH v2 1/2] locking/atomic: Add generic try_cmpxchg64 support
2022-05-13 15:30 [PATCH v2 0/2] locking/atomic/x86: Introduce arch_try_cmpxchg64 Uros Bizjak
@ 2022-05-13 15:30 ` Uros Bizjak
2022-05-13 15:30 ` [PATCH v2 2/2] locking/atomic/x86: Introduce arch_try_cmpxchg64 Uros Bizjak
1 sibling, 0 replies; 5+ messages in thread
From: Uros Bizjak @ 2022-05-13 15:30 UTC (permalink / raw)
To: x86, linux-kernel
Cc: Uros Bizjak, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
Dave Hansen, H. Peter Anvin, Will Deacon, Peter Zijlstra,
Boqun Feng, Mark Rutland, Paul E. McKenney, Marco Elver
Add generic support for try_cmpxchg64{,_acquire,_release,_relaxed}
and their falbacks involving cmpxchg64.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Marco Elver <elver@google.com>
---
include/linux/atomic/atomic-arch-fallback.h | 72 ++++++++++++++++++++-
include/linux/atomic/atomic-instrumented.h | 40 +++++++++++-
scripts/atomic/gen-atomic-fallback.sh | 31 +++++----
scripts/atomic/gen-atomic-instrumented.sh | 2 +-
4 files changed, 129 insertions(+), 16 deletions(-)
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 6db58d180866..77bc5522e61c 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -147,6 +147,76 @@
#endif /* arch_try_cmpxchg_relaxed */
+#ifndef arch_try_cmpxchg64_relaxed
+#ifdef arch_try_cmpxchg64
+#define arch_try_cmpxchg64_acquire arch_try_cmpxchg64
+#define arch_try_cmpxchg64_release arch_try_cmpxchg64
+#define arch_try_cmpxchg64_relaxed arch_try_cmpxchg64
+#endif /* arch_try_cmpxchg64 */
+
+#ifndef arch_try_cmpxchg64
+#define arch_try_cmpxchg64(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg64((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg64 */
+
+#ifndef arch_try_cmpxchg64_acquire
+#define arch_try_cmpxchg64_acquire(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg64_acquire((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg64_acquire */
+
+#ifndef arch_try_cmpxchg64_release
+#define arch_try_cmpxchg64_release(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg64_release((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg64_release */
+
+#ifndef arch_try_cmpxchg64_relaxed
+#define arch_try_cmpxchg64_relaxed(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg64_relaxed((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg64_relaxed */
+
+#else /* arch_try_cmpxchg64_relaxed */
+
+#ifndef arch_try_cmpxchg64_acquire
+#define arch_try_cmpxchg64_acquire(...) \
+ __atomic_op_acquire(arch_try_cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef arch_try_cmpxchg64_release
+#define arch_try_cmpxchg64_release(...) \
+ __atomic_op_release(arch_try_cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef arch_try_cmpxchg64
+#define arch_try_cmpxchg64(...) \
+ __atomic_op_fence(arch_try_cmpxchg64, __VA_ARGS__)
+#endif
+
+#endif /* arch_try_cmpxchg64_relaxed */
+
#ifndef arch_atomic_read_acquire
static __always_inline int
arch_atomic_read_acquire(const atomic_t *v)
@@ -2386,4 +2456,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
#endif
#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae
+// b5e87bdd5ede61470c29f7a7e4de781af3770f09
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index 5d69b143c28e..7a139ec030b0 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -2006,6 +2006,44 @@ atomic_long_dec_if_positive(atomic_long_t *v)
arch_try_cmpxchg_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})
+#define try_cmpxchg64(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ kcsan_mb(); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg64(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg64_acquire(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg64_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg64_release(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ kcsan_release(); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg64_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg64_relaxed(ptr, oldp, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ typeof(oldp) __ai_oldp = (oldp); \
+ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
+ arch_try_cmpxchg64_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
#define cmpxchg_local(ptr, ...) \
({ \
typeof(ptr) __ai_ptr = (ptr); \
@@ -2045,4 +2083,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
})
#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 87c974b93032afd42143613434d1a7788fa598f9
+// 764f741eb77a7ad565dc8d99ce2837d5542e8aee
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
index 8e2da71f1d5f..3a07695e3c89 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -164,41 +164,44 @@ gen_xchg_fallbacks()
gen_try_cmpxchg_fallback()
{
+ local cmpxchg="$1"; shift;
local order="$1"; shift;
cat <<EOF
-#ifndef arch_try_cmpxchg${order}
-#define arch_try_cmpxchg${order}(_ptr, _oldp, _new) \\
+#ifndef arch_try_${cmpxchg}${order}
+#define arch_try_${cmpxchg}${order}(_ptr, _oldp, _new) \\
({ \\
typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \\
- ___r = arch_cmpxchg${order}((_ptr), ___o, (_new)); \\
+ ___r = arch_${cmpxchg}${order}((_ptr), ___o, (_new)); \\
if (unlikely(___r != ___o)) \\
*___op = ___r; \\
likely(___r == ___o); \\
})
-#endif /* arch_try_cmpxchg${order} */
+#endif /* arch_try_${cmpxchg}${order} */
EOF
}
gen_try_cmpxchg_fallbacks()
{
- printf "#ifndef arch_try_cmpxchg_relaxed\n"
- printf "#ifdef arch_try_cmpxchg\n"
+ local cmpxchg="$1"; shift;
- gen_basic_fallbacks "arch_try_cmpxchg"
+ printf "#ifndef arch_try_${cmpxchg}_relaxed\n"
+ printf "#ifdef arch_try_${cmpxchg}\n"
- printf "#endif /* arch_try_cmpxchg */\n\n"
+ gen_basic_fallbacks "arch_try_${cmpxchg}"
+
+ printf "#endif /* arch_try_${cmpxchg} */\n\n"
for order in "" "_acquire" "_release" "_relaxed"; do
- gen_try_cmpxchg_fallback "${order}"
+ gen_try_cmpxchg_fallback "${cmpxchg}" "${order}"
done
- printf "#else /* arch_try_cmpxchg_relaxed */\n"
+ printf "#else /* arch_try_${cmpxchg}_relaxed */\n"
- gen_order_fallbacks "arch_try_cmpxchg"
+ gen_order_fallbacks "arch_try_${cmpxchg}"
- printf "#endif /* arch_try_cmpxchg_relaxed */\n\n"
+ printf "#endif /* arch_try_${cmpxchg}_relaxed */\n\n"
}
cat << EOF
@@ -218,7 +221,9 @@ for xchg in "arch_xchg" "arch_cmpxchg" "arch_cmpxchg64"; do
gen_xchg_fallbacks "${xchg}"
done
-gen_try_cmpxchg_fallbacks
+for cmpxchg in "cmpxchg" "cmpxchg64"; do
+ gen_try_cmpxchg_fallbacks "${cmpxchg}"
+done
grep '^[a-z]' "$1" | while read name meta args; do
gen_proto "${meta}" "${name}" "atomic" "int" ${args}
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index 68f902731d01..77c06526a574 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -166,7 +166,7 @@ grep '^[a-z]' "$1" | while read name meta args; do
done
-for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg"; do
+for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg" "try_cmpxchg64"; do
for order in "" "_acquire" "_release" "_relaxed"; do
gen_xchg "${xchg}" "${order}" ""
printf "\n"
--
2.35.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH v2 2/2] locking/atomic/x86: Introduce arch_try_cmpxchg64
2022-05-13 15:30 [PATCH v2 0/2] locking/atomic/x86: Introduce arch_try_cmpxchg64 Uros Bizjak
2022-05-13 15:30 ` [PATCH v2 1/2] locking/atomic: Add generic try_cmpxchg64 support Uros Bizjak
@ 2022-05-13 15:30 ` Uros Bizjak
2022-05-13 22:18 ` Peter Zijlstra
1 sibling, 1 reply; 5+ messages in thread
From: Uros Bizjak @ 2022-05-13 15:30 UTC (permalink / raw)
To: x86, linux-kernel
Cc: Uros Bizjak, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
Dave Hansen, H. Peter Anvin, Will Deacon, Peter Zijlstra,
Boqun Feng, Mark Rutland, Paul E. McKenney, Marco Elver
Introduce arch_try_cmpxchg64 for 64-bit and 32-bit targets to improve
code using cmpxchg64. On 64-bit targets, the generated assembly improves
from:
ab: 89 c8 mov %ecx,%eax
ad: 48 89 4c 24 60 mov %rcx,0x60(%rsp)
b2: 83 e0 fd and $0xfffffffd,%eax
b5: 89 54 24 64 mov %edx,0x64(%rsp)
b9: 88 44 24 60 mov %al,0x60(%rsp)
bd: 48 89 c8 mov %rcx,%rax
c0: c6 44 24 62 f2 movb $0xf2,0x62(%rsp)
c5: 48 8b 74 24 60 mov 0x60(%rsp),%rsi
ca: f0 49 0f b1 34 24 lock cmpxchg %rsi,(%r12)
d0: 48 39 c1 cmp %rax,%rcx
d3: 75 cf jne a4 <t+0xa4>
to:
b3: 89 c2 mov %eax,%edx
b5: 48 89 44 24 60 mov %rax,0x60(%rsp)
ba: 83 e2 fd and $0xfffffffd,%edx
bd: 89 4c 24 64 mov %ecx,0x64(%rsp)
c1: 88 54 24 60 mov %dl,0x60(%rsp)
c5: c6 44 24 62 f2 movb $0xf2,0x62(%rsp)
ca: 48 8b 54 24 60 mov 0x60(%rsp),%rdx
cf: f0 48 0f b1 13 lock cmpxchg %rdx,(%rbx)
d4: 75 d5 jne ab <t+0xab>
where a move and a compare after cmpxchg is saved. The improvements
for 32-bit targets are even more noticeable, because dual-word compare
after cmpxchg8b gets eliminated.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Marco Elver <elver@google.com>
---
arch/x86/include/asm/cmpxchg_32.h | 21 +++++++++++++++++++++
arch/x86/include/asm/cmpxchg_64.h | 6 ++++++
2 files changed, 27 insertions(+)
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 0a7fe0321613..16a604cf24d4 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -42,6 +42,9 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
#define arch_cmpxchg64_local(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
+#define arch_try_cmpxchg64(ptr, po, n) \
+ ((__typeof__(*(ptr)))__try_cmpxchg64((ptr), (unsigned long long *)(po), \
+ (unsigned long long)(n)))
#endif
static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
@@ -70,6 +73,24 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
return prev;
}
+static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
+{
+ bool success;
+ u64 old = *pold;
+ asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]"
+ CC_SET(z)
+ : CC_OUT(z) (success),
+ [ptr] "+m" (*ptr),
+ "+A" (old)
+ : "b" ((u32)new),
+ "c" ((u32)(new >> 32))
+ : "memory");
+
+ if (unlikely(!success))
+ *pold = old;
+ return success;
+}
+
#ifndef CONFIG_X86_CMPXCHG64
/*
* Building a kernel capable running on 80386 and 80486. It may be necessary
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 072e5459fe2f..250187ac8248 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -19,6 +19,12 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
arch_cmpxchg_local((ptr), (o), (n)); \
})
+#define arch_try_cmpxchg64(ptr, po, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ arch_try_cmpxchg((ptr), (po), (n)); \
+})
+
#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
#endif /* _ASM_X86_CMPXCHG_64_H */
--
2.35.1
^ permalink raw reply related [flat|nested] 5+ messages in thread