From: Yeoreum Yun <yeoreum.yun@arm.com>
To: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>,
broonie@kernel.org, maz@kernel.org, oliver.upton@linux.dev,
joey.gouly@arm.com, james.morse@arm.com, ardb@kernel.org,
scott@os.amperecomputing.com, suzuki.poulose@arm.com,
yuzenghui@huawei.com, mark.rutland@arm.com,
linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH RESEND v7 4/6] arm64: futex: refactor futex atomic operation
Date: Tue, 16 Sep 2025 13:50:57 +0100 [thread overview]
Message-ID: <aMldMSCz+zHBguRK@e129823.arm.com> (raw)
In-Reply-To: <aMk49UZ1EiUu3-Y2@willie-the-truck>
Hi,
[...]
> > +#define LSUI_CMPXCHG_HELPER(suffix, start_bit) \
> > +static __always_inline int \
> > +__lsui_cmpxchg_helper_##suffix(u64 __user *uaddr, u32 oldval, u32 newval) \
> > +{ \
> > + int ret = 0; \
> > + u64 oval, nval, tmp; \
> > + \
> > + asm volatile("//__lsui_cmpxchg_helper_" #suffix "\n" \
> > + __LSUI_PREAMBLE \
> > +" prfm pstl1strm, %2\n" \
> > +"1: ldtr %x1, %2\n" \
> > +" mov %x3, %x1\n" \
> > +" bfi %x1, %x5, #" #start_bit ", #32\n" \
> > +" bfi %x3, %x6, #" #start_bit ", #32\n" \
> > +" mov %x4, %x1\n" \
> > +"2: caslt %x1, %x3, %2\n" \
> > +" sub %x1, %x1, %x4\n" \
> > +" cbz %x1, 3f\n" \
> > +" mov %w0, %w7\n" \
> > +"3:\n" \
> > +" dmb ish\n" \
> > +"4:\n" \
> > + _ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0) \
> > + _ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0) \
> > + : "+r" (ret), "=&r" (oval), "+Q" (*uaddr), "=&r" (nval), "=&r" (tmp) \
> > + : "r" (oldval), "r" (newval), "Ir" (-EAGAIN) \
> > + : "memory"); \
>
> The vast majority of this can be written in C.
Here is the version with C base on patch 6:
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 1d6d9f856ac5..68af15ba545a 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -127,81 +127,77 @@ LSUI_FUTEX_ATOMIC_OP(andnot, ldtclr, al)
LSUI_FUTEX_ATOMIC_OP(set, swpt, al)
static __always_inline int
-__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval)
+__lsui_cmpxchg_helper(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
{
- return __lsui_futex_atomic_andnot(~oparg, uaddr, oval);
-}
+ int ret = -EAGAIN;
+ u64 __user *uaddr_al;
+ u64 oval64, nval64, tmp;
+ static const u64 lo32_mask = GENMASK_U64(31, 0);
+
+ uaddr_al = (u64 __user *) ALIGN_DOWN((unsigned long)uaddr, sizeof(u64));
+ unsafe_get_user(oval64, uaddr_al, err_fault);
+
+ if ((u32 __user *)uaddr_al != uaddr) {
+ nval64 = ((oval64 & lo32_mask) | ((u64)newval << 32));
+ oval64 = ((oval64 & lo32_mask) | ((u64)oldval << 32));
+ } else {
+ nval64 = ((oval64 & ~lo32_mask) | newval);
+ oval64 = ((oval64 & ~lo32_mask) | oldval);
+ }
-static __always_inline int
-__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval)
-{
- unsigned int loops = FUTEX_MAX_LOOPS;
- int ret, oldval, tmp;
+ tmp = oval64;
- /*
- * there are no ldteor/stteor instructions...
- */
- asm volatile("// __lsui_futex_atomic_eor\n"
+ asm volatile("//__lsui_cmpxchg_helper\n"
__LSUI_PREAMBLE
-" prfm pstl1strm, %2\n"
-"1: ldtxr %w1, %2\n"
-" eor %w3, %w1, %w5\n"
-"2: stltxr %w0, %w3, %2\n"
-" cbz %w0, 3f\n"
-" sub %w4, %w4, %w0\n"
-" cbnz %w4, 1b\n"
-" mov %w0, %w6\n"
-"3:\n"
+"1: caslt %x1, %x3, %2\n"
+" sub %x1, %x1, %x4\n"
+" cbnz %x1, 2f\n"
+" mov %w0, %w5\n"
+"2:\n"
" dmb ish\n"
+"3:\n"
_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0)
- _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0)
- : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp),
- "+r" (loops)
- : "r" (oparg), "Ir" (-EAGAIN)
+ : "+r" (ret), "=&r" (oval64), "+Q" (*uaddr_al)
+ : "r" (nval64), "r" (tmp), "Ir" (0)
: "memory");
if (!ret)
*oval = oldval;
+err_fault:
return ret;
}
static __always_inline int
-__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
+__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval)
{
- int ret = 0;
- unsigned int loops = FUTEX_MAX_LOOPS;
- u32 val, tmp;
+ return __lsui_futex_atomic_andnot(~oparg, uaddr, oval);
+}
+
+static __always_inline int
+__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval)
+{
+{
+ int ret = -EAGAIN;
+ u32 oldval, newval;
/*
- * cas{al}t doesn't support word size...
+ * there are no ldteor/stteor instructions...
*/
- asm volatile("//__lsui_futex_cmpxchg\n"
- __LSUI_PREAMBLE
-" prfm pstl1strm, %2\n"
-"1: ldtxr %w1, %2\n"
-" eor %w3, %w1, %w5\n"
-" cbnz %w3, 4f\n"
-"2: stltxr %w3, %w6, %2\n"
-" cbz %w3, 3f\n"
-" sub %w4, %w4, %w3\n"
-" cbnz %w4, 1b\n"
-" mov %w0, %w7\n"
-"3:\n"
-" dmb ish\n"
-"4:\n"
- _ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0)
- _ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0)
- : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
- : "r" (oldval), "r" (newval), "Ir" (-EAGAIN)
- : "memory");
+ unsafe_get_user(oldval, uaddr, err_fault);
+ newval = oldval ^ oparg;
- if (!ret)
- *oval = oldval;
+ ret = __lsui_cmpxchg_helper(uaddr, oldval, newval, oval);
+err_fault:
return ret;
}
+static __always_inline int
+__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
+{
+ return __lsui_cmpxchg_helper(uaddr, oldval, newval, oval);
+}
+
#define __lsui_llsc_body(op, ...) \
({ \
alternative_has_cap_likely(ARM64_HAS_LSUI) ? \
(END)
I'm not sure this is good for you.
But If you share your thought, That's would be greatful.
(Note:
When I test with 256 threads for futex_atomic_eor op, there is not much
difference with former assembly version)
Thanks!
--
Sincerely,
Yeoreum Yun
next prev parent reply other threads:[~2025-09-16 12:51 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-16 15:19 [PATCH RESEND v7 0/6] support FEAT_LSUI and apply it on futex atomic ops Yeoreum Yun
2025-08-16 15:19 ` [PATCH RESEND v7 1/6] arm64: cpufeature: add FEAT_LSUI Yeoreum Yun
2025-09-12 16:12 ` Catalin Marinas
2025-08-16 15:19 ` [PATCH RESEND v7 2/6] KVM: arm64: expose FEAT_LSUI to guest Yeoreum Yun
2025-09-12 16:25 ` Catalin Marinas
2025-08-16 15:19 ` [PATCH RESEND v7 3/6] arm64: Kconfig: add LSUI Kconfig Yeoreum Yun
2025-09-12 16:24 ` Catalin Marinas
2025-09-15 10:42 ` Yeoreum Yun
2025-09-15 11:32 ` Will Deacon
2025-09-15 11:41 ` Yeoreum Yun
2025-08-16 15:19 ` [PATCH RESEND v7 4/6] arm64: futex: refactor futex atomic operation Yeoreum Yun
2025-09-11 15:38 ` Will Deacon
2025-09-11 16:04 ` Yeoreum Yun
2025-09-12 16:44 ` Catalin Marinas
2025-09-12 17:01 ` Catalin Marinas
2025-09-15 10:39 ` Yeoreum Yun
2025-09-12 16:53 ` Catalin Marinas
2025-09-15 10:32 ` Yeoreum Yun
2025-09-15 19:40 ` Catalin Marinas
2025-09-15 20:35 ` Will Deacon
2025-09-16 7:02 ` Catalin Marinas
2025-09-16 9:15 ` Yeoreum Yun
2025-09-16 9:24 ` Yeoreum Yun
2025-09-16 10:02 ` Yeoreum Yun
2025-09-16 10:16 ` Will Deacon
2025-09-16 12:50 ` Yeoreum Yun [this message]
2025-09-17 9:32 ` Yeoreum Yun
2025-09-16 12:47 ` Mark Rutland
2025-09-16 13:27 ` Yeoreum Yun
2025-09-16 13:45 ` Mark Rutland
2025-09-16 13:58 ` Yeoreum Yun
2025-09-16 14:07 ` Mark Rutland
2025-09-16 14:15 ` Yeoreum Yun
2025-09-15 22:34 ` Yeoreum Yun
2025-09-16 12:53 ` Catalin Marinas
2025-08-16 15:19 ` [PATCH v7 RESEND 5/6] arm64: futex: small optimisation for __llsc_futex_atomic_set() Yeoreum Yun
2025-09-11 15:28 ` Will Deacon
2025-09-11 16:19 ` Yeoreum Yun
2025-09-12 16:36 ` Catalin Marinas
2025-09-15 10:41 ` Yeoreum Yun
2025-08-16 15:19 ` [PATCH RESEND v7 6/6] arm64: futex: support futex with FEAT_LSUI Yeoreum Yun
2025-09-11 15:22 ` Will Deacon
2025-09-11 16:45 ` Yeoreum Yun
2025-09-12 17:16 ` Catalin Marinas
2025-09-15 9:15 ` Yeoreum Yun
2025-09-12 17:09 ` Catalin Marinas
2025-09-15 8:24 ` Yeoreum Yun
2025-09-01 10:06 ` [PATCH RESEND v7 0/6] support FEAT_LSUI and apply it on futex atomic ops Yeoreum Yun
2025-09-11 15:09 ` Will Deacon
2025-09-11 16:22 ` Catalin Marinas
2025-09-15 20:37 ` Will Deacon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aMldMSCz+zHBguRK@e129823.arm.com \
--to=yeoreum.yun@arm.com \
--cc=ardb@kernel.org \
--cc=broonie@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=james.morse@arm.com \
--cc=joey.gouly@arm.com \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=maz@kernel.org \
--cc=oliver.upton@linux.dev \
--cc=scott@os.amperecomputing.com \
--cc=suzuki.poulose@arm.com \
--cc=will@kernel.org \
--cc=yuzenghui@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.