* [PATCH v2 1/3] locking/generic: Add generic support for sync_try_cmpxchg and its fallback
@ 2023-10-01 10:39 Uros Bizjak
2023-10-01 10:39 ` [PATCH v2 2/3] locking/x86: Introduce arch_sync_try_cmpxchg Uros Bizjak
2023-10-01 10:39 ` [PATCH v2 3/3] xen: Use sync_try_cmpxchg instead of sync_cmpxchg Uros Bizjak
0 siblings, 2 replies; 3+ messages in thread
From: Uros Bizjak @ 2023-10-01 10:39 UTC (permalink / raw)
To: linux-kernel, x86, xen-devel
Cc: Uros Bizjak, Will Deacon, Peter Zijlstra, Ingo Molnar, Boqun Feng,
Mark Rutland
Provide the generic sync_try_cmpxchg function from the
raw_ prefixed version, also adding explicit instrumentation.
The patch amends existing scripts to generate sync_try_cmpxchg
locking primitive and its raw_sync_try_cmpxchg fallback, while
leaving existing macros from try_cmpxchg family unchanged.
The target can define its own arch_sync_try_cmpxchg to override the
generic version of raw_sync_try_cmpxchg. This allows the target
to generate more optimal assembly than the generic version.
Additionally, the patch renames two scripts to better reflect
whet they really do.
Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
v2: Improve commit description.
---
include/linux/atomic/atomic-arch-fallback.h | 15 +++++++++-
include/linux/atomic/atomic-instrumented.h | 10 ++++++-
scripts/atomic/gen-atomic-fallback.sh | 33 +++++++++++----------
scripts/atomic/gen-atomic-instrumented.sh | 3 +-
4 files changed, 43 insertions(+), 18 deletions(-)
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index b83ef19da13d..5e95faa959c4 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -428,6 +428,19 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void);
#define raw_sync_cmpxchg arch_sync_cmpxchg
+#ifdef arch_sync_try_cmpxchg
+#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg
+#else
+#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif
+
/**
* raw_atomic_read() - atomic load with relaxed ordering
* @v: pointer to atomic_t
@@ -4649,4 +4662,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v)
}
#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 2fdd6702823fa842f9cea57a002e6e4476ae780c
+// eec048affea735b8464f58e6d96992101f8f85f1
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index d401b406ef7c..54d7bbe0aeaa 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -4998,6 +4998,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})
+#define sync_try_cmpxchg(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ kcsan_mb(); \
+ instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \
+})
+
#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 1568f875fef72097413caab8339120c065a39aa4
+// 2cc4bc990fef44d3836ec108f11b610f3f438184
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
index a45154cefa48..f80d69cfeb1f 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -223,14 +223,15 @@ gen_xchg_fallbacks()
gen_try_cmpxchg_fallback()
{
+ local prefix="$1"; shift
local cmpxchg="$1"; shift;
- local order="$1"; shift;
+ local suffix="$1"; shift;
cat <<EOF
-#define raw_try_${cmpxchg}${order}(_ptr, _oldp, _new) \\
+#define raw_${prefix}try_${cmpxchg}${suffix}(_ptr, _oldp, _new) \\
({ \\
typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \\
- ___r = raw_${cmpxchg}${order}((_ptr), ___o, (_new)); \\
+ ___r = raw_${prefix}${cmpxchg}${suffix}((_ptr), ___o, (_new)); \\
if (unlikely(___r != ___o)) \\
*___op = ___r; \\
likely(___r == ___o); \\
@@ -259,11 +260,11 @@ gen_try_cmpxchg_order_fallback()
fi
printf "#else\n"
- gen_try_cmpxchg_fallback "${cmpxchg}" "${order}"
+ gen_try_cmpxchg_fallback "" "${cmpxchg}" "${order}"
printf "#endif\n\n"
}
-gen_try_cmpxchg_fallbacks()
+gen_try_cmpxchg_order_fallbacks()
{
local cmpxchg="$1"; shift;
@@ -272,15 +273,17 @@ gen_try_cmpxchg_fallbacks()
done
}
-gen_cmpxchg_local_fallbacks()
+gen_def_and_try_cmpxchg_fallback()
{
+ local prefix="$1"; shift
local cmpxchg="$1"; shift
+ local suffix="$1"; shift
- printf "#define raw_${cmpxchg} arch_${cmpxchg}\n\n"
- printf "#ifdef arch_try_${cmpxchg}\n"
- printf "#define raw_try_${cmpxchg} arch_try_${cmpxchg}\n"
+ printf "#define raw_${prefix}${cmpxchg}${suffix} arch_${prefix}${cmpxchg}${suffix}\n\n"
+ printf "#ifdef arch_${prefix}try_${cmpxchg}${suffix}\n"
+ printf "#define raw_${prefix}try_${cmpxchg}${suffix} arch_${prefix}try_${cmpxchg}${suffix}\n"
printf "#else\n"
- gen_try_cmpxchg_fallback "${cmpxchg}" ""
+ gen_try_cmpxchg_fallback "${prefix}" "${cmpxchg}" "${suffix}"
printf "#endif\n\n"
}
@@ -302,15 +305,15 @@ for xchg in "xchg" "cmpxchg" "cmpxchg64" "cmpxchg128"; do
done
for cmpxchg in "cmpxchg" "cmpxchg64" "cmpxchg128"; do
- gen_try_cmpxchg_fallbacks "${cmpxchg}"
+ gen_try_cmpxchg_order_fallbacks "${cmpxchg}"
done
-for cmpxchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local"; do
- gen_cmpxchg_local_fallbacks "${cmpxchg}" ""
+for cmpxchg in "cmpxchg" "cmpxchg64" "cmpxchg128"; do
+ gen_def_and_try_cmpxchg_fallback "" "${cmpxchg}" "_local"
done
-for cmpxchg in "sync_cmpxchg"; do
- printf "#define raw_${cmpxchg} arch_${cmpxchg}\n\n"
+for cmpxchg in "cmpxchg"; do
+ gen_def_and_try_cmpxchg_fallback "sync_" "${cmpxchg}" ""
done
grep '^[a-z]' "$1" | while read name meta args; do
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index 8f8f8e3b20f9..592f3ec89b5f 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -169,7 +169,8 @@ for xchg in "xchg" "cmpxchg" "cmpxchg64" "cmpxchg128" "try_cmpxchg" "try_cmpxchg
done
done
-for xchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local" "sync_cmpxchg" "try_cmpxchg_local" "try_cmpxchg64_local" "try_cmpxchg128_local"; do
+for xchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local" "sync_cmpxchg" \
+ "try_cmpxchg_local" "try_cmpxchg64_local" "try_cmpxchg128_local" "sync_try_cmpxchg"; do
gen_xchg "${xchg}" ""
printf "\n"
done
--
2.41.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v2 2/3] locking/x86: Introduce arch_sync_try_cmpxchg
2023-10-01 10:39 [PATCH v2 1/3] locking/generic: Add generic support for sync_try_cmpxchg and its fallback Uros Bizjak
@ 2023-10-01 10:39 ` Uros Bizjak
2023-10-01 10:39 ` [PATCH v2 3/3] xen: Use sync_try_cmpxchg instead of sync_cmpxchg Uros Bizjak
1 sibling, 0 replies; 3+ messages in thread
From: Uros Bizjak @ 2023-10-01 10:39 UTC (permalink / raw)
To: linux-kernel, x86, xen-devel
Cc: Uros Bizjak, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, Dave Hansen, H. Peter Anvin
Introduce arch_sync_try_cmpxchg macro to improve code using
sync_try_cmpxchg locking primitive. The new definitions use existing
__raw_try_cmpxchg macros, but use its own "lock; " prefix.
The new macros improve assembly of the cmpxchg loop in
evtchn_fifo_unmask() from drivers/xen/events/events_fifo.c from:
57a: 85 c0 test %eax,%eax
57c: 78 52 js 5d0 <...>
57e: 89 c1 mov %eax,%ecx
580: 25 ff ff ff af and $0xafffffff,%eax
585: c7 04 24 00 00 00 00 movl $0x0,(%rsp)
58c: 81 e1 ff ff ff ef and $0xefffffff,%ecx
592: 89 4c 24 04 mov %ecx,0x4(%rsp)
596: 89 44 24 08 mov %eax,0x8(%rsp)
59a: 8b 74 24 08 mov 0x8(%rsp),%esi
59e: 8b 44 24 04 mov 0x4(%rsp),%eax
5a2: f0 0f b1 32 lock cmpxchg %esi,(%rdx)
5a6: 89 04 24 mov %eax,(%rsp)
5a9: 8b 04 24 mov (%rsp),%eax
5ac: 39 c1 cmp %eax,%ecx
5ae: 74 07 je 5b7 <...>
5b0: a9 00 00 00 40 test $0x40000000,%eax
5b5: 75 c3 jne 57a <...>
<...>
to:
578: a9 00 00 00 40 test $0x40000000,%eax
57d: 74 2b je 5aa <...>
57f: 85 c0 test %eax,%eax
581: 78 40 js 5c3 <...>
583: 89 c1 mov %eax,%ecx
585: 25 ff ff ff af and $0xafffffff,%eax
58a: 81 e1 ff ff ff ef and $0xefffffff,%ecx
590: 89 4c 24 04 mov %ecx,0x4(%rsp)
594: 89 44 24 08 mov %eax,0x8(%rsp)
598: 8b 4c 24 08 mov 0x8(%rsp),%ecx
59c: 8b 44 24 04 mov 0x4(%rsp),%eax
5a0: f0 0f b1 0a lock cmpxchg %ecx,(%rdx)
5a4: 89 44 24 04 mov %eax,0x4(%rsp)
5a8: 75 30 jne 5da <...>
<...>
5da: 8b 44 24 04 mov 0x4(%rsp),%eax
5de: eb 98 jmp 578 <...>
The new code removes move instructions from 585: 5a6: and 5a9:
and the compare from 5ac:. Additionally, the compiler assumes that
cmpxchg success is more probable and optimizes code flow accordingly.
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
v2: Improve commit description.
---
arch/x86/include/asm/cmpxchg.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index d53636506134..5612648b0202 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -221,12 +221,18 @@ extern void __add_wrong_size(void)
#define __try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
+#define __sync_try_cmpxchg(ptr, pold, new, size) \
+ __raw_try_cmpxchg((ptr), (pold), (new), (size), "lock; ")
+
#define __try_cmpxchg_local(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), "")
#define arch_try_cmpxchg(ptr, pold, new) \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+#define arch_sync_try_cmpxchg(ptr, pold, new) \
+ __sync_try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+
#define arch_try_cmpxchg_local(ptr, pold, new) \
__try_cmpxchg_local((ptr), (pold), (new), sizeof(*(ptr)))
--
2.41.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v2 3/3] xen: Use sync_try_cmpxchg instead of sync_cmpxchg
2023-10-01 10:39 [PATCH v2 1/3] locking/generic: Add generic support for sync_try_cmpxchg and its fallback Uros Bizjak
2023-10-01 10:39 ` [PATCH v2 2/3] locking/x86: Introduce arch_sync_try_cmpxchg Uros Bizjak
@ 2023-10-01 10:39 ` Uros Bizjak
1 sibling, 0 replies; 3+ messages in thread
From: Uros Bizjak @ 2023-10-01 10:39 UTC (permalink / raw)
To: linux-kernel, x86, xen-devel
Cc: Uros Bizjak, Peter Zijlstra, Ingo Molnar, Stefano Stabellini,
Oleksandr Tyshchenko, Juergen Gross
Use sync_try_cmpxchg instead of sync_cmpxchg (*ptr, old, new) == old
in clear_masked_cond(), clear_linked() and
gnttab_end_foreign_access_ref_v1(). x86 CMPXCHG instruction returns
success in ZF flag, so this change saves a compare after cmpxchg
(and related move instruction in front of cmpxchg), improving the
cmpxchg loop in gnttab_end_foreign_access_ref_v1 from:
174: eb 0e jmp 184 <...>
176: 89 d0 mov %edx,%eax
178: f0 66 0f b1 31 lock cmpxchg %si,(%rcx)
17d: 66 39 c2 cmp %ax,%dx
180: 74 11 je 193 <...>
182: 89 c2 mov %eax,%edx
184: 89 d6 mov %edx,%esi
186: 66 83 e6 18 and $0x18,%si
18a: 74 ea je 176 <...>
to:
614: 89 c1 mov %eax,%ecx
616: 66 83 e1 18 and $0x18,%cx
61a: 75 11 jne 62d <...>
61c: f0 66 0f b1 0a lock cmpxchg %cx,(%rdx)
621: 75 f1 jne 614 <...>
No functional change intended.
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Acked-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
v2: Improve commit description.
---
drivers/xen/events/events_fifo.c | 26 ++++++++++++--------------
drivers/xen/grant-table.c | 10 ++++------
2 files changed, 16 insertions(+), 20 deletions(-)
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index ad9fe51d3fb3..655775db7caf 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -226,21 +226,20 @@ static bool evtchn_fifo_is_masked(evtchn_port_t port)
*/
static bool clear_masked_cond(volatile event_word_t *word)
{
- event_word_t new, old, w;
+ event_word_t new, old;
- w = *word;
+ old = *word;
do {
- if (!(w & (1 << EVTCHN_FIFO_MASKED)))
+ if (!(old & (1 << EVTCHN_FIFO_MASKED)))
return true;
- if (w & (1 << EVTCHN_FIFO_PENDING))
+ if (old & (1 << EVTCHN_FIFO_PENDING))
return false;
- old = w & ~(1 << EVTCHN_FIFO_BUSY);
+ old = old & ~(1 << EVTCHN_FIFO_BUSY);
new = old & ~(1 << EVTCHN_FIFO_MASKED);
- w = sync_cmpxchg(word, old, new);
- } while (w != old);
+ } while (!sync_try_cmpxchg(word, &old, new));
return true;
}
@@ -259,17 +258,16 @@ static void evtchn_fifo_unmask(evtchn_port_t port)
static uint32_t clear_linked(volatile event_word_t *word)
{
- event_word_t new, old, w;
+ event_word_t new, old;
- w = *word;
+ old = *word;
do {
- old = w;
- new = (w & ~((1 << EVTCHN_FIFO_LINKED)
- | EVTCHN_FIFO_LINK_MASK));
- } while ((w = sync_cmpxchg(word, old, new)) != old);
+ new = (old & ~((1 << EVTCHN_FIFO_LINKED)
+ | EVTCHN_FIFO_LINK_MASK));
+ } while (!sync_try_cmpxchg(word, &old, new));
- return w & EVTCHN_FIFO_LINK_MASK;
+ return old & EVTCHN_FIFO_LINK_MASK;
}
static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 35659bf70746..04a6b470b15d 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -427,16 +427,14 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref)
{
- u16 flags, nflags;
- u16 *pflags;
+ u16 *pflags = &gnttab_shared.v1[ref].flags;
+ u16 flags;
- pflags = &gnttab_shared.v1[ref].flags;
- nflags = *pflags;
+ flags = *pflags;
do {
- flags = nflags;
if (flags & (GTF_reading|GTF_writing))
return 0;
- } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
+ } while (!sync_try_cmpxchg(pflags, &flags, 0));
return 1;
}
--
2.41.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-10-01 10:41 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-01 10:39 [PATCH v2 1/3] locking/generic: Add generic support for sync_try_cmpxchg and its fallback Uros Bizjak
2023-10-01 10:39 ` [PATCH v2 2/3] locking/x86: Introduce arch_sync_try_cmpxchg Uros Bizjak
2023-10-01 10:39 ` [PATCH v2 3/3] xen: Use sync_try_cmpxchg instead of sync_cmpxchg Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox