From: Uros Bizjak <ubizjak@gmail.com>
To: x86@kernel.org, linux-kernel@vger.kernel.org
Cc: Uros Bizjak <ubizjak@gmail.com>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@kernel.org>, Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
"H. Peter Anvin" <hpa@zytor.com>,
Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH 2/6] locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions
Date: Tue, 9 Apr 2024 12:03:53 +0200 [thread overview]
Message-ID: <20240409100503.274629-3-ubizjak@gmail.com> (raw)
In-Reply-To: <20240409100503.274629-1-ubizjak@gmail.com>
Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions to
use arch_atomic64_try_cmpxchg. This implementation avoids one extra
trip through the cmpxchg loop.
The value preload before the cmpxchg loop does not need to be atomic,
but should use READ_ONCE to prevent compiler from merging, refetching
or reordering the read.
The generated code improves from:
1917d5: 31 c9 xor %ecx,%ecx
1917d7: 31 db xor %ebx,%ebx
1917d9: 89 4c 24 3c mov %ecx,0x3c(%esp)
1917dd: 8b 74 24 24 mov 0x24(%esp),%esi
1917e1: 89 c8 mov %ecx,%eax
1917e3: 89 5c 24 34 mov %ebx,0x34(%esp)
1917e7: 8b 7c 24 28 mov 0x28(%esp),%edi
1917eb: 21 ce and %ecx,%esi
1917ed: 89 74 24 4c mov %esi,0x4c(%esp)
1917f1: 21 df and %ebx,%edi
1917f3: 89 de mov %ebx,%esi
1917f5: 89 7c 24 50 mov %edi,0x50(%esp)
1917f9: 8b 54 24 4c mov 0x4c(%esp),%edx
1917fd: 8b 7c 24 2c mov 0x2c(%esp),%edi
191801: 8b 4c 24 50 mov 0x50(%esp),%ecx
191805: 89 d3 mov %edx,%ebx
191807: 89 f2 mov %esi,%edx
191809: f0 0f c7 0f lock cmpxchg8b (%edi)
19180d: 89 c1 mov %eax,%ecx
19180f: 8b 74 24 34 mov 0x34(%esp),%esi
191813: 89 d3 mov %edx,%ebx
191815: 89 44 24 4c mov %eax,0x4c(%esp)
191819: 8b 44 24 3c mov 0x3c(%esp),%eax
19181d: 89 df mov %ebx,%edi
19181f: 89 54 24 44 mov %edx,0x44(%esp)
191823: 89 ca mov %ecx,%edx
191825: 31 de xor %ebx,%esi
191827: 31 c8 xor %ecx,%eax
191829: 09 f0 or %esi,%eax
19182b: 75 ac jne 1917d9 <...>
to:
1912ba: 8b 06 mov (%esi),%eax
1912bc: 8b 56 04 mov 0x4(%esi),%edx
1912bf: 89 44 24 3c mov %eax,0x3c(%esp)
1912c3: 89 c1 mov %eax,%ecx
1912c5: 23 4c 24 34 and 0x34(%esp),%ecx
1912c9: 89 d3 mov %edx,%ebx
1912cb: 23 5c 24 38 and 0x38(%esp),%ebx
1912cf: 89 54 24 40 mov %edx,0x40(%esp)
1912d3: 89 4c 24 2c mov %ecx,0x2c(%esp)
1912d7: 89 5c 24 30 mov %ebx,0x30(%esp)
1912db: 8b 5c 24 2c mov 0x2c(%esp),%ebx
1912df: 8b 4c 24 30 mov 0x30(%esp),%ecx
1912e3: f0 0f c7 0e lock cmpxchg8b (%esi)
1912e7: 0f 85 f3 02 00 00 jne 1915e0 <...>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
arch/x86/include/asm/atomic64_32.h | 44 ++++++++++++------------------
1 file changed, 18 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 11e817dab44a..84affd7a5d1c 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -201,69 +201,61 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = __READ_ONCE(v->counter);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
}
static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = __READ_ONCE(v->counter);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = __READ_ONCE(v->counter);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
}
static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = __READ_ONCE(v->counter);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = __READ_ONCE(v->counter);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
}
static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = __READ_ONCE(v->counter);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = __READ_ONCE(v->counter);
- while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
- c = old;
-
- return old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i));
+ return val;
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
--
2.44.0
next prev parent reply other threads:[~2024-04-09 10:05 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-09 10:03 [PATCH 0/6] locking/atomic/x86: Improve arch_atomic*() family of functions Uros Bizjak
2024-04-09 10:03 ` [PATCH 1/6] locking/atomic/x86: Introduce arch_atomic64_try_cmpxchg to x86_32 Uros Bizjak
2024-04-09 10:03 ` Uros Bizjak [this message]
2024-04-09 11:13 ` [PATCH 2/6] locking/atomic/x86: Rewrite x86_32 arch_atomic64_{,fetch}_{and,or,xor}() functions Mark Rutland
2024-04-09 12:03 ` Uros Bizjak
2024-04-09 12:50 ` Uros Bizjak
2024-04-09 16:34 ` Mark Rutland
2024-04-09 16:39 ` Uros Bizjak
2024-04-09 16:53 ` Uros Bizjak
2024-04-09 10:03 ` [PATCH 3/6] locking/atomic/x86: Use READ_ONCE before atomic{,64}_try_cmpxchg loops Uros Bizjak
2024-04-09 11:07 ` Mark Rutland
2024-04-09 11:59 ` Uros Bizjak
2024-04-09 10:03 ` [PATCH 4/6] locking/atomic/x86: Merge x86_32 and x86_64 arch_atomic64_fetch_{and,or,xor}() functions Uros Bizjak
2024-04-09 10:03 ` [PATCH 5/6] locking/atomic/x86: Define arch_atomic_sub() family using arch_atomic_add() functions Uros Bizjak
2024-04-09 10:03 ` [PATCH 6/6] locking/atomic/x86: Reorder a couple of arch_atomic64 functions Uros Bizjak
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240409100503.274629-3-ubizjak@gmail.com \
--to=ubizjak@gmail.com \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox