From: Peter Zijlstra <peterz@infradead.org>
To: torvalds@linux-foundation.org
Cc: corbet@lwn.net, will@kernel.org, boqun.feng@gmail.com,
mark.rutland@arm.com, catalin.marinas@arm.com, dennis@kernel.org,
tj@kernel.org, cl@linux.com, hca@linux.ibm.com,
gor@linux.ibm.com, agordeev@linux.ibm.com,
borntraeger@linux.ibm.com, svens@linux.ibm.com,
tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com,
joro@8bytes.org, suravee.suthikulpanit@amd.com,
robin.murphy@arm.com, dwmw2@infradead.org,
baolu.lu@linux.intel.com, Arnd Bergmann <arnd@arndb.de>,
Herbert Xu <herbert@gondor.apana.org.au>,
davem@davemloft.net, penberg@kernel.org, rientjes@google.com,
iamjoonsoo.kim@lge.com, Andrew Morton <akpm@linux-foundation.org>,
vbabka@suse.cz, roman.gushchin@linux.dev, 42.hyeyoo@gmail.com,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, linux-s390@vger.kernel.org,
iommu@lists.linux.dev, linux-arch@vger.kernel.org,
linux-crypto@vger.kernel.org
Subject: Re: [PATCH v3 05/11] percpu: Wire up cmpxchg128
Date: Thu, 25 May 2023 14:49:55 +0200 [thread overview]
Message-ID: <20230525124955.GS83892@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20230515080554.248739380@infradead.org>
On Mon, May 15, 2023 at 09:57:04AM +0200, Peter Zijlstra wrote:
> In order to replace cmpxchg_double() with the newly minted
> cmpxchg128() family of functions, wire it up in this_cpu_cmpxchg().
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> --- a/arch/x86/include/asm/percpu.h
> +++ b/arch/x86/include/asm/percpu.h
> @@ -210,6 +210,65 @@ do { \
> (typeof(_var))(unsigned long) pco_old__; \
> })
>
> +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_CMPXCHG64)
> +#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \
> +({ \
> + union { \
> + u64 var; \
> + struct { \
> + u32 low, high; \
> + }; \
> + } old__, new__; \
> + \
> + old__.var = _oval; \
> + new__.var = _nval; \
> + \
> + asm qual ("cmpxchg8b " __percpu_arg([var]) \
> + : [var] "+m" (_var), \
> + "+a" (old__.low), \
> + "+d" (old__.high) \
> + : "b" (new__.low), \
> + "c" (new__.high) \
> + : "memory"); \
> + \
> + old__.var; \
> +})
> +
> +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval)
> +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
> +#endif
> +
> +#ifdef CONFIG_X86_64
> +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval);
> +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
> +
> +#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \
> +({ \
> + union { \
> + u128 var; \
> + struct { \
> + u64 low, high; \
> + }; \
> + } old__, new__; \
> + \
> + old__.var = _oval; \
> + new__.var = _nval; \
> + \
> + asm qual ("cmpxchg16b " __percpu_arg([var]) \
> + : [var] "+m" (_var), \
> + "+a" (old__.low), \
> + "+d" (old__.high) \
> + : "b" (new__.low), \
> + "c" (new__.high) \
> + : "memory"); \
> + \
> + old__.var; \
> +})
> +
> +#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval)
> +#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
> +#endif
> +
> /*
> * this_cpu_read() makes gcc load the percpu variable every time it is
> * accessed while this_cpu_read_stable() allows the value to be cached.
Since this_cpu_cmpxchg*() is assumed to always be present (their usage
is not guarded with system_has_cmpxchg*() it needs a fallback for then
the instruction is not present.
The below has been tested with clearcpuid=cx16; adding an obvious defect
in the fallback implemention crashes the kernel, with the code as
presented it boots.
Build tested i386-defconfig.
(the things we do for museum pieces :/)
---
include/asm/percpu.h | 25 +++++++++++---------
lib/Makefile | 3 +-
lib/cmpxchg16b_emu.S | 43 ++++++++++++++++++++--------------
lib/cmpxchg8b_emu.S | 63 +++++++++++++++++++++++++++++++++++++++------------
4 files changed, 90 insertions(+), 44 deletions(-)
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -210,7 +210,7 @@ do { \
(typeof(_var))(unsigned long) pco_old__; \
})
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_CMPXCHG64)
+#ifdef CONFIG_X86_32
#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \
({ \
union { \
@@ -223,13 +223,14 @@ do { \
old__.var = _oval; \
new__.var = _nval; \
\
- asm qual ("cmpxchg8b " __percpu_arg([var]) \
+ asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
: [var] "+m" (_var), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
"c" (new__.high) \
- : "memory"); \
+ : "memory", "esi"); \
\
old__.var; \
})
@@ -254,13 +255,14 @@ do { \
old__.var = _oval; \
new__.var = _nval; \
\
- asm qual ("cmpxchg16b " __percpu_arg([var]) \
+ asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
: [var] "+m" (_var), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
"c" (new__.high) \
- : "memory"); \
+ : "memory", "rsi"); \
\
old__.var; \
})
@@ -400,12 +402,13 @@ do { \
bool __ret; \
typeof(pcp1) __o1 = (o1), __n1 = (n1); \
typeof(pcp2) __o2 = (o2), __n2 = (n2); \
- alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
- "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
- X86_FEATURE_CX16, \
- ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \
- "+m" (pcp2), "+d" (__o2)), \
- "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
+ asm volatile (ALTERNATIVE("leaq %P1, %%rsi; call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg(1), X86_FEATURE_CX16) \
+ "setz %0" \
+ : "=a" (__ret), "+m" (pcp1) \
+ : "b" (__n1), "c" (__n2), \
+ "a" (__o1), "d" (__o2) \
+ : "memory", "rsi"); \
__ret; \
})
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -61,8 +61,9 @@ ifeq ($(CONFIG_X86_32),y)
lib-y += strstr_32.o
lib-y += string_32.o
lib-y += memmove_32.o
+ lib-y += cmpxchg8b_emu.o
ifneq ($(CONFIG_X86_CMPXCHG64),y)
- lib-y += cmpxchg8b_emu.o atomic64_386_32.o
+ lib-y += atomic64_386_32.o
endif
else
obj-y += iomap_copy_64.o
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -1,47 +1,54 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
#include <asm/percpu.h>
+#include <asm/processor-flags.h>
.text
/*
+ * Emulate 'cmpxchg16b %gs:(%rsi)'
+ *
* Inputs:
* %rsi : memory location to compare
* %rax : low 64 bits of old value
* %rdx : high 64 bits of old value
* %rbx : low 64 bits of new value
* %rcx : high 64 bits of new value
- * %al : Operation successful
+ *
+ * Notably this is not LOCK prefixed and is not safe against NMIs
*/
SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
-#
-# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
-# via the ZF. Caller will access %al to get result.
-#
-# Note that this is only useful for a cpuops operation. Meaning that we
-# do *not* have a fully atomic operation but just an operation that is
-# *atomic* on a single cpu (as provided by the this_cpu_xx class of
-# macros).
-#
pushfq
cli
- cmpq PER_CPU_VAR((%rsi)), %rax
- jne .Lnot_same
- cmpq PER_CPU_VAR(8(%rsi)), %rdx
- jne .Lnot_same
+ /* if (*ptr == old) */
+ cmpq PER_CPU_VAR(0(%rsi)), %rax
+ jne .Lnot_same
+ cmpq PER_CPU_VAR(8(%rsi)), %rdx
+ jne .Lnot_same
+
+ /* *ptr = new */
+ movq %rbx, PER_CPU_VAR(0(%rsi))
+ movq %rcx, PER_CPU_VAR(8(%rsi))
- movq %rbx, PER_CPU_VAR((%rsi))
- movq %rcx, PER_CPU_VAR(8(%rsi))
+ /* set ZF in EFLAGS to indicate success */
+ orl $X86_EFLAGS_ZF, (%rsp)
popfq
- mov $1, %al
RET
.Lnot_same:
+ /* *ptr != old */
+
+ /* old = *ptr */
+ movq PER_CPU_VAR(0(%rsi)), %rax
+ movq PER_CPU_VAR(8(%rsi)), %rdx
+
+ /* clear ZF in EFLAGS to indicate failure */
+ andl $(~X86_EFLAGS_ZF), (%rsp)
+
popfq
- xor %al,%al
RET
SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -2,10 +2,16 @@
#include <linux/linkage.h>
#include <asm/export.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
.text
+#ifndef CONFIG_X86_CMPXCHG64
+
/*
+ * Emulate 'cmpxchg8b (%esi)' on UP
+ *
* Inputs:
* %esi : memory location to compare
* %eax : low 32 bits of old value
@@ -15,32 +21,61 @@
*/
SYM_FUNC_START(cmpxchg8b_emu)
-#
-# Emulate 'cmpxchg8b (%esi)' on UP except we don't
-# set the whole ZF thing (caller will just compare
-# eax:edx with the expected value)
-#
pushfl
cli
- cmpl (%esi), %eax
- jne .Lnot_same
- cmpl 4(%esi), %edx
- jne .Lhalf_same
+ cmpl 0(%esi), %eax
+ jne .Lnot_same
+ cmpl 4(%esi), %edx
+ jne .Lnot_same
+
+ movl %ebx, 0(%esi)
+ movl %ecx, 4(%esi)
- movl %ebx, (%esi)
- movl %ecx, 4(%esi)
+ orl $X86_EFLAGS_ZF, (%esp)
popfl
RET
.Lnot_same:
- movl (%esi), %eax
-.Lhalf_same:
- movl 4(%esi), %edx
+ movl 0(%esi), %eax
+ movl 4(%esi), %edx
+
+ andl $(~X86_EFLAGS_ZF), (%esp)
popfl
RET
SYM_FUNC_END(cmpxchg8b_emu)
EXPORT_SYMBOL(cmpxchg8b_emu)
+
+#endif
+
+SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
+
+ pushfl
+ cli
+
+ cmpl PER_CPU_VAR(0(%esi)), %eax
+ jne .Lnot_same2
+ cmpl PER_CPU_VAR(4(%esi)), %edx
+ jne .Lnot_same2
+
+ movl %ebx, PER_CPU_VAR(0(%esi))
+ movl %ecx, PER_CPU_VAR(4(%esi))
+
+ orl $X86_EFLAGS_ZF, (%esp)
+
+ popfl
+ RET
+
+.Lnot_same2:
+ movl PER_CPU_VAR(0(%esi)), %eax
+ movl PER_CPU_VAR(4(%esi)), %edx
+
+ andl $(~X86_EFLAGS_ZF), (%esp)
+
+ popfl
+ RET
+
+SYM_FUNC_END(this_cpu_cmpxchg8b_emu)
next prev parent reply other threads:[~2023-05-25 12:51 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-15 7:56 [PATCH v3 00/11] Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() Peter Zijlstra
2023-05-15 7:57 ` [PATCH v3 01/11] cyrpto/b128ops: Remove struct u128 Peter Zijlstra
2023-05-20 10:49 ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27 ` tip-bot2 for Peter Zijlstra
2023-05-15 7:57 ` [PATCH v3 02/11] types: Introduce [us]128 Peter Zijlstra
2023-05-20 10:49 ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27 ` tip-bot2 for Peter Zijlstra
2023-05-15 7:57 ` [PATCH v3 03/11] arch: Introduce arch_{,try_}_cmpxchg128{,_local}() Peter Zijlstra
2023-05-20 10:49 ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27 ` tip-bot2 for Peter Zijlstra
2023-05-15 7:57 ` [PATCH v3 04/11] instrumentation: Wire up cmpxchg128() Peter Zijlstra
2023-05-20 10:49 ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27 ` tip-bot2 for Peter Zijlstra
2023-05-15 7:57 ` [PATCH v3 05/11] percpu: Wire up cmpxchg128 Peter Zijlstra
2023-05-20 10:49 ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27 ` tip-bot2 for Peter Zijlstra
2023-05-25 12:49 ` Peter Zijlstra [this message]
2023-05-25 22:59 ` [PATCH v3 05/11] " Petr Tesařík
2023-05-15 7:57 ` [PATCH v3 06/11] x86,amd_iommu: Replace cmpxchg_double() Peter Zijlstra
2023-05-20 10:49 ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27 ` tip-bot2 for Peter Zijlstra
2023-05-15 7:57 ` [PATCH v3 07/11] x86,intel_iommu: " Peter Zijlstra
2023-05-20 10:49 ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27 ` tip-bot2 for Peter Zijlstra
2023-05-15 7:57 ` [PATCH v3 08/11] slub: " Peter Zijlstra
2023-05-20 10:49 ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27 ` tip-bot2 for Peter Zijlstra
2023-05-24 9:32 ` [PATCH v3 08/11] " Peter Zijlstra
2023-05-24 10:13 ` Vlastimil Babka
2023-05-25 10:29 ` Peter Zijlstra
2023-05-25 10:52 ` Arnd Bergmann
2023-05-25 13:10 ` Peter Zijlstra
2023-05-30 14:22 ` Peter Zijlstra
2023-05-30 19:32 ` Peter Zijlstra
2023-05-15 7:57 ` [PATCH v3 09/11] mm/slub: Fold slab_update_freelist() Peter Zijlstra
2023-05-24 11:58 ` Vlastimil Babka
2023-05-15 7:57 ` [PATCH v3 10/11] arch: Remove cmpxchg_double Peter Zijlstra
2023-05-15 8:52 ` Heiko Carstens
2023-05-20 10:49 ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27 ` tip-bot2 for Peter Zijlstra
2023-05-15 7:57 ` [PATCH v3 11/11] s390/cpum_sf: Convert to cmpxchg128() Peter Zijlstra
2023-05-20 10:49 ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27 ` tip-bot2 for Peter Zijlstra
2023-05-15 9:42 ` [PATCH v3 00/11] Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() Arnd Bergmann
2023-05-24 9:39 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230525124955.GS83892@hirez.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=42.hyeyoo@gmail.com \
--cc=agordeev@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=arnd@arndb.de \
--cc=baolu.lu@linux.intel.com \
--cc=boqun.feng@gmail.com \
--cc=borntraeger@linux.ibm.com \
--cc=bp@alien8.de \
--cc=catalin.marinas@arm.com \
--cc=cl@linux.com \
--cc=corbet@lwn.net \
--cc=dave.hansen@linux.intel.com \
--cc=davem@davemloft.net \
--cc=dennis@kernel.org \
--cc=dwmw2@infradead.org \
--cc=gor@linux.ibm.com \
--cc=hca@linux.ibm.com \
--cc=herbert@gondor.apana.org.au \
--cc=hpa@zytor.com \
--cc=iamjoonsoo.kim@lge.com \
--cc=iommu@lists.linux.dev \
--cc=joro@8bytes.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-s390@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=penberg@kernel.org \
--cc=rientjes@google.com \
--cc=robin.murphy@arm.com \
--cc=roman.gushchin@linux.dev \
--cc=suravee.suthikulpanit@amd.com \
--cc=svens@linux.ibm.com \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=vbabka@suse.cz \
--cc=will@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.