* [PATCH v2 2/3] locking/x86: Introduce arch_sync_try_cmpxchg
2023-10-01 10:39 [PATCH v2 1/3] locking/generic: Add generic support for sync_try_cmpxchg and its fallback Uros Bizjak
@ 2023-10-01 10:39 ` Uros Bizjak
2023-10-01 10:39 ` [PATCH v2 3/3] xen: Use sync_try_cmpxchg instead of sync_cmpxchg Uros Bizjak
1 sibling, 0 replies; 3+ messages in thread
From: Uros Bizjak @ 2023-10-01 10:39 UTC (permalink / raw)
To: linux-kernel, x86, xen-devel
Cc: Uros Bizjak, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, Dave Hansen, H. Peter Anvin
Introduce arch_sync_try_cmpxchg macro to improve code using
sync_try_cmpxchg locking primitive. The new definitions use existing
__raw_try_cmpxchg macros, but use its own "lock; " prefix.
The new macros improve assembly of the cmpxchg loop in
evtchn_fifo_unmask() from drivers/xen/events/events_fifo.c from:
57a: 85 c0 test %eax,%eax
57c: 78 52 js 5d0 <...>
57e: 89 c1 mov %eax,%ecx
580: 25 ff ff ff af and $0xafffffff,%eax
585: c7 04 24 00 00 00 00 movl $0x0,(%rsp)
58c: 81 e1 ff ff ff ef and $0xefffffff,%ecx
592: 89 4c 24 04 mov %ecx,0x4(%rsp)
596: 89 44 24 08 mov %eax,0x8(%rsp)
59a: 8b 74 24 08 mov 0x8(%rsp),%esi
59e: 8b 44 24 04 mov 0x4(%rsp),%eax
5a2: f0 0f b1 32 lock cmpxchg %esi,(%rdx)
5a6: 89 04 24 mov %eax,(%rsp)
5a9: 8b 04 24 mov (%rsp),%eax
5ac: 39 c1 cmp %eax,%ecx
5ae: 74 07 je 5b7 <...>
5b0: a9 00 00 00 40 test $0x40000000,%eax
5b5: 75 c3 jne 57a <...>
<...>
to:
578: a9 00 00 00 40 test $0x40000000,%eax
57d: 74 2b je 5aa <...>
57f: 85 c0 test %eax,%eax
581: 78 40 js 5c3 <...>
583: 89 c1 mov %eax,%ecx
585: 25 ff ff ff af and $0xafffffff,%eax
58a: 81 e1 ff ff ff ef and $0xefffffff,%ecx
590: 89 4c 24 04 mov %ecx,0x4(%rsp)
594: 89 44 24 08 mov %eax,0x8(%rsp)
598: 8b 4c 24 08 mov 0x8(%rsp),%ecx
59c: 8b 44 24 04 mov 0x4(%rsp),%eax
5a0: f0 0f b1 0a lock cmpxchg %ecx,(%rdx)
5a4: 89 44 24 04 mov %eax,0x4(%rsp)
5a8: 75 30 jne 5da <...>
<...>
5da: 8b 44 24 04 mov 0x4(%rsp),%eax
5de: eb 98 jmp 578 <...>
The new code removes move instructions from 585: 5a6: and 5a9:
and the compare from 5ac:. Additionally, the compiler assumes that
cmpxchg success is more probable and optimizes code flow accordingly.
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
v2: Improve commit description.
---
arch/x86/include/asm/cmpxchg.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index d53636506134..5612648b0202 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -221,12 +221,18 @@ extern void __add_wrong_size(void)
#define __try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
+#define __sync_try_cmpxchg(ptr, pold, new, size) \
+ __raw_try_cmpxchg((ptr), (pold), (new), (size), "lock; ")
+
#define __try_cmpxchg_local(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), "")
#define arch_try_cmpxchg(ptr, pold, new) \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+#define arch_sync_try_cmpxchg(ptr, pold, new) \
+ __sync_try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+
#define arch_try_cmpxchg_local(ptr, pold, new) \
__try_cmpxchg_local((ptr), (pold), (new), sizeof(*(ptr)))
--
2.41.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v2 3/3] xen: Use sync_try_cmpxchg instead of sync_cmpxchg
2023-10-01 10:39 [PATCH v2 1/3] locking/generic: Add generic support for sync_try_cmpxchg and its fallback Uros Bizjak
2023-10-01 10:39 ` [PATCH v2 2/3] locking/x86: Introduce arch_sync_try_cmpxchg Uros Bizjak
@ 2023-10-01 10:39 ` Uros Bizjak
1 sibling, 0 replies; 3+ messages in thread
From: Uros Bizjak @ 2023-10-01 10:39 UTC (permalink / raw)
To: linux-kernel, x86, xen-devel
Cc: Uros Bizjak, Peter Zijlstra, Ingo Molnar, Stefano Stabellini,
Oleksandr Tyshchenko, Juergen Gross
Use sync_try_cmpxchg instead of sync_cmpxchg (*ptr, old, new) == old
in clear_masked_cond(), clear_linked() and
gnttab_end_foreign_access_ref_v1(). x86 CMPXCHG instruction returns
success in ZF flag, so this change saves a compare after cmpxchg
(and related move instruction in front of cmpxchg), improving the
cmpxchg loop in gnttab_end_foreign_access_ref_v1 from:
174: eb 0e jmp 184 <...>
176: 89 d0 mov %edx,%eax
178: f0 66 0f b1 31 lock cmpxchg %si,(%rcx)
17d: 66 39 c2 cmp %ax,%dx
180: 74 11 je 193 <...>
182: 89 c2 mov %eax,%edx
184: 89 d6 mov %edx,%esi
186: 66 83 e6 18 and $0x18,%si
18a: 74 ea je 176 <...>
to:
614: 89 c1 mov %eax,%ecx
616: 66 83 e1 18 and $0x18,%cx
61a: 75 11 jne 62d <...>
61c: f0 66 0f b1 0a lock cmpxchg %cx,(%rdx)
621: 75 f1 jne 614 <...>
No functional change intended.
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Acked-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
v2: Improve commit description.
---
drivers/xen/events/events_fifo.c | 26 ++++++++++++--------------
drivers/xen/grant-table.c | 10 ++++------
2 files changed, 16 insertions(+), 20 deletions(-)
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index ad9fe51d3fb3..655775db7caf 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -226,21 +226,20 @@ static bool evtchn_fifo_is_masked(evtchn_port_t port)
*/
static bool clear_masked_cond(volatile event_word_t *word)
{
- event_word_t new, old, w;
+ event_word_t new, old;
- w = *word;
+ old = *word;
do {
- if (!(w & (1 << EVTCHN_FIFO_MASKED)))
+ if (!(old & (1 << EVTCHN_FIFO_MASKED)))
return true;
- if (w & (1 << EVTCHN_FIFO_PENDING))
+ if (old & (1 << EVTCHN_FIFO_PENDING))
return false;
- old = w & ~(1 << EVTCHN_FIFO_BUSY);
+ old = old & ~(1 << EVTCHN_FIFO_BUSY);
new = old & ~(1 << EVTCHN_FIFO_MASKED);
- w = sync_cmpxchg(word, old, new);
- } while (w != old);
+ } while (!sync_try_cmpxchg(word, &old, new));
return true;
}
@@ -259,17 +258,16 @@ static void evtchn_fifo_unmask(evtchn_port_t port)
static uint32_t clear_linked(volatile event_word_t *word)
{
- event_word_t new, old, w;
+ event_word_t new, old;
- w = *word;
+ old = *word;
do {
- old = w;
- new = (w & ~((1 << EVTCHN_FIFO_LINKED)
- | EVTCHN_FIFO_LINK_MASK));
- } while ((w = sync_cmpxchg(word, old, new)) != old);
+ new = (old & ~((1 << EVTCHN_FIFO_LINKED)
+ | EVTCHN_FIFO_LINK_MASK));
+ } while (!sync_try_cmpxchg(word, &old, new));
- return w & EVTCHN_FIFO_LINK_MASK;
+ return old & EVTCHN_FIFO_LINK_MASK;
}
static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 35659bf70746..04a6b470b15d 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -427,16 +427,14 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref)
{
- u16 flags, nflags;
- u16 *pflags;
+ u16 *pflags = &gnttab_shared.v1[ref].flags;
+ u16 flags;
- pflags = &gnttab_shared.v1[ref].flags;
- nflags = *pflags;
+ flags = *pflags;
do {
- flags = nflags;
if (flags & (GTF_reading|GTF_writing))
return 0;
- } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
+ } while (!sync_try_cmpxchg(pflags, &flags, 0));
return 1;
}
--
2.41.0
^ permalink raw reply related [flat|nested] 3+ messages in thread