From: Uros Bizjak <ubizjak@gmail.com>
To: x86@kernel.org, linux-kernel@vger.kernel.org
Cc: Uros Bizjak <ubizjak@gmail.com>,
Peter Zijlstra <peterz@infradead.org>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
"H. Peter Anvin" <hpa@zytor.com>
Subject: [PATCH] x86/percpu: Define {raw,this}_cpu_try_cmpxchg{64,128}
Date: Wed, 6 Sep 2023 20:58:44 +0200 [thread overview]
Message-ID: <20230906185941.53527-1-ubizjak@gmail.com> (raw)
Define target-specific {raw,this}_cpu_try_cmpxchg64 and
{raw,this}_cpu_try_cmpxchg128 macros. These definitions override
the generic fallback definitions and enable target-specific
optimized implementations.
Several places in mm/slub.o improve from e.g.:
53bc: 48 8d 4f 40 lea 0x40(%rdi),%rcx
53c0: 48 89 fa mov %rdi,%rdx
53c3: 49 8b 5c 05 00 mov 0x0(%r13,%rax,1),%rbx
53c8: 4c 89 e8 mov %r13,%rax
53cb: 49 8d 30 lea (%r8),%rsi
53ce: e8 00 00 00 00 call 53d3 <...>
53cf: R_X86_64_PLT32 this_cpu_cmpxchg16b_emu-0x4
53d3: 48 31 d7 xor %rdx,%rdi
53d6: 4c 31 e8 xor %r13,%rax
53d9: 48 09 c7 or %rax,%rdi
53dc: 75 ae jne 538c <...>
to:
53bc: 48 8d 4a 40 lea 0x40(%rdx),%rcx
53c0: 49 8b 1c 07 mov (%r15,%rax,1),%rbx
53c4: 4c 89 f8 mov %r15,%rax
53c7: 48 8d 37 lea (%rdi),%rsi
53ca: e8 00 00 00 00 call 53cf <...>
53cb: R_X86_64_PLT32 this_cpu_cmpxchg16b_emu-0x4
53cf: 75 bb jne 538c <...>
reducing the size of mm/slub.o by 80 bytes:
text data bss dec hex filename
39758 5337 4208 49303 c097 slub-new.o
39838 5337 4208 49383 c0e7 slub-old.o
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
arch/x86/include/asm/percpu.h | 67 +++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 34734d730463..4c3641927f39 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -237,12 +237,47 @@ do { \
#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval)
#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
+
+#define percpu_try_cmpxchg64_op(size, qual, _var, _ovalp, _nval) \
+({ \
+ bool success; \
+ u64 *_oval = (u64 *)(_ovalp); \
+ union { \
+ u64 var; \
+ struct { \
+ u32 low, high; \
+ }; \
+ } old__, new__; \
+ \
+ old__.var = *_oval; \
+ new__.var = _nval; \
+ \
+ asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+ CC_SET(z) \
+ : CC_OUT(z) (success), \
+ [var] "+m" (_var), \
+ "+a" (old__.low), \
+ "+d" (old__.high) \
+ : "b" (new__.low), \
+ "c" (new__.high) \
+ : "memory", "esi"); \
+ if (unlikely(!success)) \
+ *_oval = old__.var; \
+ likely(success); \
+})
+
+#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, , pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, volatile, pcp, ovalp, nval)
#endif
#ifdef CONFIG_X86_64
#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval);
#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
+#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval);
+#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval);
+
#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \
({ \
union { \
@@ -269,6 +304,38 @@ do { \
#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval)
#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
+
+#define percpu_try_cmpxchg128_op(size, qual, _var, _ovalp, _nval) \
+({ \
+ bool success; \
+ u128 *_oval = (u128 *)(_ovalp); \
+ union { \
+ u128 var; \
+ struct { \
+ u64 low, high; \
+ }; \
+ } old__, new__; \
+ \
+ old__.var = *_oval; \
+ new__.var = _nval; \
+ \
+ asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+ CC_SET(z) \
+ : CC_OUT(z) (success), \
+ [var] "+m" (_var), \
+ "+a" (old__.low), \
+ "+d" (old__.high) \
+ : "b" (new__.low), \
+ "c" (new__.high) \
+ : "memory", "rsi"); \
+ if (unlikely(!success)) \
+ *_oval = old__.var; \
+ likely(success); \
+})
+
+#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, , pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, volatile, pcp, ovalp, nval)
#endif
/*
--
2.41.0
next reply other threads:[~2023-09-06 19:00 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-06 18:58 Uros Bizjak [this message]
2023-09-15 11:25 ` [tip: x86/asm] x86/percpu: Define {raw,this}_cpu_try_cmpxchg{64,128} tip-bot2 for Uros Bizjak
2023-09-15 16:45 ` Linus Torvalds
2023-09-17 18:31 ` Uros Bizjak
2023-09-17 18:33 ` Uros Bizjak
2023-09-18 7:31 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230906185941.53527-1-ubizjak@gmail.com \
--to=ubizjak@gmail.com \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox