public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] locking/generic: Add generic support for sync_try_cmpxchg and its fallback
@ 2023-10-01 10:39 Uros Bizjak
  2023-10-01 10:39 ` [PATCH v2 2/3] locking/x86: Introduce arch_sync_try_cmpxchg Uros Bizjak
  2023-10-01 10:39 ` [PATCH v2 3/3] xen: Use sync_try_cmpxchg instead of sync_cmpxchg Uros Bizjak
  0 siblings, 2 replies; 3+ messages in thread
From: Uros Bizjak @ 2023-10-01 10:39 UTC (permalink / raw)
  To: linux-kernel, x86, xen-devel
  Cc: Uros Bizjak, Will Deacon, Peter Zijlstra, Ingo Molnar, Boqun Feng,
	Mark Rutland

Provide the generic sync_try_cmpxchg function from the
raw_ prefixed version, also adding explicit instrumentation.

The patch amends existing scripts to generate sync_try_cmpxchg
locking primitive and its raw_sync_try_cmpxchg fallback, while
leaving existing macros from try_cmpxchg family unchanged.

The target can define its own arch_sync_try_cmpxchg to override the
generic version of raw_sync_try_cmpxchg. This allows the target
to generate more optimal assembly than the generic version.

Additionally, the patch renames two scripts to better reflect
whet they really do.

Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
v2: Improve commit description.
---
 include/linux/atomic/atomic-arch-fallback.h | 15 +++++++++-
 include/linux/atomic/atomic-instrumented.h  | 10 ++++++-
 scripts/atomic/gen-atomic-fallback.sh       | 33 +++++++++++----------
 scripts/atomic/gen-atomic-instrumented.sh   |  3 +-
 4 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index b83ef19da13d..5e95faa959c4 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -428,6 +428,19 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void);
 
 #define raw_sync_cmpxchg arch_sync_cmpxchg
 
+#ifdef arch_sync_try_cmpxchg
+#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg
+#else
+#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \
+({ \
+	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+	___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \
+	if (unlikely(___r != ___o)) \
+		*___op = ___r; \
+	likely(___r == ___o); \
+})
+#endif
+
 /**
  * raw_atomic_read() - atomic load with relaxed ordering
  * @v: pointer to atomic_t
@@ -4649,4 +4662,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v)
 }
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 2fdd6702823fa842f9cea57a002e6e4476ae780c
+// eec048affea735b8464f58e6d96992101f8f85f1
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index d401b406ef7c..54d7bbe0aeaa 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -4998,6 +4998,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
+#define sync_try_cmpxchg(ptr, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_mb(); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \
+})
+
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 1568f875fef72097413caab8339120c065a39aa4
+// 2cc4bc990fef44d3836ec108f11b610f3f438184
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
index a45154cefa48..f80d69cfeb1f 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -223,14 +223,15 @@ gen_xchg_fallbacks()
 
 gen_try_cmpxchg_fallback()
 {
+	local prefix="$1"; shift
 	local cmpxchg="$1"; shift;
-	local order="$1"; shift;
+	local suffix="$1"; shift;
 
 cat <<EOF
-#define raw_try_${cmpxchg}${order}(_ptr, _oldp, _new) \\
+#define raw_${prefix}try_${cmpxchg}${suffix}(_ptr, _oldp, _new) \\
 ({ \\
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \\
-	___r = raw_${cmpxchg}${order}((_ptr), ___o, (_new)); \\
+	___r = raw_${prefix}${cmpxchg}${suffix}((_ptr), ___o, (_new)); \\
 	if (unlikely(___r != ___o)) \\
 		*___op = ___r; \\
 	likely(___r == ___o); \\
@@ -259,11 +260,11 @@ gen_try_cmpxchg_order_fallback()
 	fi
 
 	printf "#else\n"
-	gen_try_cmpxchg_fallback "${cmpxchg}" "${order}"
+	gen_try_cmpxchg_fallback "" "${cmpxchg}" "${order}"
 	printf "#endif\n\n"
 }
 
-gen_try_cmpxchg_fallbacks()
+gen_try_cmpxchg_order_fallbacks()
 {
 	local cmpxchg="$1"; shift;
 
@@ -272,15 +273,17 @@ gen_try_cmpxchg_fallbacks()
 	done
 }
 
-gen_cmpxchg_local_fallbacks()
+gen_def_and_try_cmpxchg_fallback()
 {
+	local prefix="$1"; shift
 	local cmpxchg="$1"; shift
+	local suffix="$1"; shift
 
-	printf "#define raw_${cmpxchg} arch_${cmpxchg}\n\n"
-	printf "#ifdef arch_try_${cmpxchg}\n"
-	printf "#define raw_try_${cmpxchg} arch_try_${cmpxchg}\n"
+	printf "#define raw_${prefix}${cmpxchg}${suffix} arch_${prefix}${cmpxchg}${suffix}\n\n"
+	printf "#ifdef arch_${prefix}try_${cmpxchg}${suffix}\n"
+	printf "#define raw_${prefix}try_${cmpxchg}${suffix} arch_${prefix}try_${cmpxchg}${suffix}\n"
 	printf "#else\n"
-	gen_try_cmpxchg_fallback "${cmpxchg}" ""
+	gen_try_cmpxchg_fallback "${prefix}" "${cmpxchg}" "${suffix}"
 	printf "#endif\n\n"
 }
 
@@ -302,15 +305,15 @@ for xchg in "xchg" "cmpxchg" "cmpxchg64" "cmpxchg128"; do
 done
 
 for cmpxchg in "cmpxchg" "cmpxchg64" "cmpxchg128"; do
-	gen_try_cmpxchg_fallbacks "${cmpxchg}"
+	gen_try_cmpxchg_order_fallbacks "${cmpxchg}"
 done
 
-for cmpxchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local"; do
-	gen_cmpxchg_local_fallbacks "${cmpxchg}" ""
+for cmpxchg in "cmpxchg" "cmpxchg64" "cmpxchg128"; do
+	gen_def_and_try_cmpxchg_fallback "" "${cmpxchg}" "_local"
 done
 
-for cmpxchg in "sync_cmpxchg"; do
-	printf "#define raw_${cmpxchg} arch_${cmpxchg}\n\n"
+for cmpxchg in "cmpxchg"; do
+	gen_def_and_try_cmpxchg_fallback "sync_" "${cmpxchg}" ""
 done
 
 grep '^[a-z]' "$1" | while read name meta args; do
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index 8f8f8e3b20f9..592f3ec89b5f 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -169,7 +169,8 @@ for xchg in "xchg" "cmpxchg" "cmpxchg64" "cmpxchg128" "try_cmpxchg" "try_cmpxchg
 	done
 done
 
-for xchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local" "sync_cmpxchg" "try_cmpxchg_local" "try_cmpxchg64_local" "try_cmpxchg128_local"; do
+for xchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local" "sync_cmpxchg" \
+	    "try_cmpxchg_local" "try_cmpxchg64_local" "try_cmpxchg128_local" "sync_try_cmpxchg"; do
 	gen_xchg "${xchg}" ""
 	printf "\n"
 done
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2 2/3] locking/x86: Introduce arch_sync_try_cmpxchg
  2023-10-01 10:39 [PATCH v2 1/3] locking/generic: Add generic support for sync_try_cmpxchg and its fallback Uros Bizjak
@ 2023-10-01 10:39 ` Uros Bizjak
  2023-10-01 10:39 ` [PATCH v2 3/3] xen: Use sync_try_cmpxchg instead of sync_cmpxchg Uros Bizjak
  1 sibling, 0 replies; 3+ messages in thread
From: Uros Bizjak @ 2023-10-01 10:39 UTC (permalink / raw)
  To: linux-kernel, x86, xen-devel
  Cc: Uros Bizjak, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, H. Peter Anvin

Introduce arch_sync_try_cmpxchg macro to improve code using
sync_try_cmpxchg locking primitive. The new definitions use existing
__raw_try_cmpxchg macros, but use its own "lock; " prefix.

The new macros improve assembly of the cmpxchg loop in
evtchn_fifo_unmask() from drivers/xen/events/events_fifo.c from:

 57a:	85 c0                	test   %eax,%eax
 57c:	78 52                	js     5d0 <...>
 57e:	89 c1                	mov    %eax,%ecx
 580:	25 ff ff ff af       	and    $0xafffffff,%eax
 585:	c7 04 24 00 00 00 00 	movl   $0x0,(%rsp)
 58c:	81 e1 ff ff ff ef    	and    $0xefffffff,%ecx
 592:	89 4c 24 04          	mov    %ecx,0x4(%rsp)
 596:	89 44 24 08          	mov    %eax,0x8(%rsp)
 59a:	8b 74 24 08          	mov    0x8(%rsp),%esi
 59e:	8b 44 24 04          	mov    0x4(%rsp),%eax
 5a2:	f0 0f b1 32          	lock cmpxchg %esi,(%rdx)
 5a6:	89 04 24             	mov    %eax,(%rsp)
 5a9:	8b 04 24             	mov    (%rsp),%eax
 5ac:	39 c1                	cmp    %eax,%ecx
 5ae:	74 07                	je     5b7 <...>
 5b0:	a9 00 00 00 40       	test   $0x40000000,%eax
 5b5:	75 c3                	jne    57a <...>
 <...>

to:

 578:	a9 00 00 00 40       	test   $0x40000000,%eax
 57d:	74 2b                	je     5aa <...>
 57f:	85 c0                	test   %eax,%eax
 581:	78 40                	js     5c3 <...>
 583:	89 c1                	mov    %eax,%ecx
 585:	25 ff ff ff af       	and    $0xafffffff,%eax
 58a:	81 e1 ff ff ff ef    	and    $0xefffffff,%ecx
 590:	89 4c 24 04          	mov    %ecx,0x4(%rsp)
 594:	89 44 24 08          	mov    %eax,0x8(%rsp)
 598:	8b 4c 24 08          	mov    0x8(%rsp),%ecx
 59c:	8b 44 24 04          	mov    0x4(%rsp),%eax
 5a0:	f0 0f b1 0a          	lock cmpxchg %ecx,(%rdx)
 5a4:	89 44 24 04          	mov    %eax,0x4(%rsp)
 5a8:	75 30                	jne    5da <...>
 <...>
 5da:	8b 44 24 04          	mov    0x4(%rsp),%eax
 5de:	eb 98                	jmp    578 <...>

The new code removes move instructions from 585: 5a6: and 5a9:
and the compare from 5ac:. Additionally, the compiler assumes that
cmpxchg success is more probable and optimizes code flow accordingly.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
v2: Improve commit description.
---
 arch/x86/include/asm/cmpxchg.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index d53636506134..5612648b0202 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -221,12 +221,18 @@ extern void __add_wrong_size(void)
 #define __try_cmpxchg(ptr, pold, new, size)				\
 	__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
 
+#define __sync_try_cmpxchg(ptr, pold, new, size)			\
+	__raw_try_cmpxchg((ptr), (pold), (new), (size), "lock; ")
+
 #define __try_cmpxchg_local(ptr, pold, new, size)			\
 	__raw_try_cmpxchg((ptr), (pold), (new), (size), "")
 
 #define arch_try_cmpxchg(ptr, pold, new) 				\
 	__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
 
+#define arch_sync_try_cmpxchg(ptr, pold, new) 				\
+	__sync_try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+
 #define arch_try_cmpxchg_local(ptr, pold, new)				\
 	__try_cmpxchg_local((ptr), (pold), (new), sizeof(*(ptr)))
 
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2 3/3] xen: Use sync_try_cmpxchg instead of sync_cmpxchg
  2023-10-01 10:39 [PATCH v2 1/3] locking/generic: Add generic support for sync_try_cmpxchg and its fallback Uros Bizjak
  2023-10-01 10:39 ` [PATCH v2 2/3] locking/x86: Introduce arch_sync_try_cmpxchg Uros Bizjak
@ 2023-10-01 10:39 ` Uros Bizjak
  1 sibling, 0 replies; 3+ messages in thread
From: Uros Bizjak @ 2023-10-01 10:39 UTC (permalink / raw)
  To: linux-kernel, x86, xen-devel
  Cc: Uros Bizjak, Peter Zijlstra, Ingo Molnar, Stefano Stabellini,
	Oleksandr Tyshchenko, Juergen Gross

Use sync_try_cmpxchg instead of sync_cmpxchg (*ptr, old, new) == old
in clear_masked_cond(), clear_linked() and
gnttab_end_foreign_access_ref_v1(). x86 CMPXCHG instruction returns
success in ZF flag, so this change saves a compare after cmpxchg
(and related move instruction in front of cmpxchg), improving the
cmpxchg loop in gnttab_end_foreign_access_ref_v1 from:

     174:	eb 0e                	jmp    184 <...>
     176:	89 d0                	mov    %edx,%eax
     178:	f0 66 0f b1 31       	lock cmpxchg %si,(%rcx)
     17d:	66 39 c2             	cmp    %ax,%dx
     180:	74 11                	je     193 <...>
     182:	89 c2                	mov    %eax,%edx
     184:	89 d6                	mov    %edx,%esi
     186:	66 83 e6 18          	and    $0x18,%si
     18a:	74 ea                	je     176 <...>

to:

     614:	89 c1                	mov    %eax,%ecx
     616:	66 83 e1 18          	and    $0x18,%cx
     61a:	75 11                	jne    62d <...>
     61c:	f0 66 0f b1 0a       	lock cmpxchg %cx,(%rdx)
     621:	75 f1                	jne    614 <...>

No functional change intended.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Acked-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
v2: Improve commit description.
---
 drivers/xen/events/events_fifo.c | 26 ++++++++++++--------------
 drivers/xen/grant-table.c        | 10 ++++------
 2 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index ad9fe51d3fb3..655775db7caf 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -226,21 +226,20 @@ static bool evtchn_fifo_is_masked(evtchn_port_t port)
  */
 static bool clear_masked_cond(volatile event_word_t *word)
 {
-	event_word_t new, old, w;
+	event_word_t new, old;
 
-	w = *word;
+	old = *word;
 
 	do {
-		if (!(w & (1 << EVTCHN_FIFO_MASKED)))
+		if (!(old & (1 << EVTCHN_FIFO_MASKED)))
 			return true;
 
-		if (w & (1 << EVTCHN_FIFO_PENDING))
+		if (old & (1 << EVTCHN_FIFO_PENDING))
 			return false;
 
-		old = w & ~(1 << EVTCHN_FIFO_BUSY);
+		old = old & ~(1 << EVTCHN_FIFO_BUSY);
 		new = old & ~(1 << EVTCHN_FIFO_MASKED);
-		w = sync_cmpxchg(word, old, new);
-	} while (w != old);
+	} while (!sync_try_cmpxchg(word, &old, new));
 
 	return true;
 }
@@ -259,17 +258,16 @@ static void evtchn_fifo_unmask(evtchn_port_t port)
 
 static uint32_t clear_linked(volatile event_word_t *word)
 {
-	event_word_t new, old, w;
+	event_word_t new, old;
 
-	w = *word;
+	old = *word;
 
 	do {
-		old = w;
-		new = (w & ~((1 << EVTCHN_FIFO_LINKED)
-			     | EVTCHN_FIFO_LINK_MASK));
-	} while ((w = sync_cmpxchg(word, old, new)) != old);
+		new = (old & ~((1 << EVTCHN_FIFO_LINKED)
+			       | EVTCHN_FIFO_LINK_MASK));
+	} while (!sync_try_cmpxchg(word, &old, new));
 
-	return w & EVTCHN_FIFO_LINK_MASK;
+	return old & EVTCHN_FIFO_LINK_MASK;
 }
 
 static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 35659bf70746..04a6b470b15d 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -427,16 +427,14 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
 
 static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref)
 {
-	u16 flags, nflags;
-	u16 *pflags;
+	u16 *pflags = &gnttab_shared.v1[ref].flags;
+	u16 flags;
 
-	pflags = &gnttab_shared.v1[ref].flags;
-	nflags = *pflags;
+	flags = *pflags;
 	do {
-		flags = nflags;
 		if (flags & (GTF_reading|GTF_writing))
 			return 0;
-	} while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
+	} while (!sync_try_cmpxchg(pflags, &flags, 0));
 
 	return 1;
 }
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-10-01 10:41 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-01 10:39 [PATCH v2 1/3] locking/generic: Add generic support for sync_try_cmpxchg and its fallback Uros Bizjak
2023-10-01 10:39 ` [PATCH v2 2/3] locking/x86: Introduce arch_sync_try_cmpxchg Uros Bizjak
2023-10-01 10:39 ` [PATCH v2 3/3] xen: Use sync_try_cmpxchg instead of sync_cmpxchg Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox