* [PATCH v3 1/6] x86/uaccess: Avoid barrier_nospec() in 64-bit copy_from_user()
2024-10-29 1:56 [PATCH v3 0/6] x86/uaccess: avoid barrier_nospec() Josh Poimboeuf
@ 2024-10-29 1:56 ` Josh Poimboeuf
2024-10-29 8:13 ` Kirill A . Shutemov
2024-10-30 2:03 ` Linus Torvalds
2024-10-29 1:56 ` [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user() Josh Poimboeuf
` (4 subsequent siblings)
5 siblings, 2 replies; 29+ messages in thread
From: Josh Poimboeuf @ 2024-10-29 1:56 UTC (permalink / raw)
To: x86
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Pawan Gupta, Waiman Long, Dave Hansen, Ingo Molnar,
Linus Torvalds, Michael Ellerman, linuxppc-dev, Andrew Cooper,
Mark Rutland, Kirill A . Shutemov
The barrier_nospec() in 64-bit copy_from_user() is slow. Instead use
pointer masking to force the user pointer to all 1's if the access_ok()
mispredicted true for an invalid address.
The kernel test robot reports a 2.6% improvement in the per_thread_ops
benchmark (see link below).
To avoid regressing powerpc and 32-bit x86, move their barrier_nospec()
calls to their respective raw_copy_from_user() implementations so
there's no functional change there.
Note that for safety on some AMD CPUs, this relies on recent commit
86e6b1547b3d ("x86: fix user address masking non-canonical speculation
issue").
Link: https://lore.kernel.org/202410281344.d02c72a2-oliver.sang@intel.com
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
arch/powerpc/include/asm/uaccess.h | 2 ++
arch/x86/include/asm/uaccess_32.h | 1 +
arch/x86/include/asm/uaccess_64.h | 1 +
include/linux/uaccess.h | 6 ------
4 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 4f5a46a77fa2..12abb8bf5eda 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -7,6 +7,7 @@
#include <asm/extable.h>
#include <asm/kup.h>
#include <asm/asm-compat.h>
+#include <asm/barrier.h>
#ifdef __powerpc64__
/* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */
@@ -341,6 +342,7 @@ static inline unsigned long raw_copy_from_user(void *to,
{
unsigned long ret;
+ barrier_nospec();
allow_read_from_user(from, n);
ret = __copy_tofrom_user((__force void __user *)to, from, n);
prevent_read_from_user(from, n);
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 40379a1adbb8..8393ba104b2c 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -23,6 +23,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
static __always_inline unsigned long
raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
+ barrier_nospec();
return __copy_user_ll(to, (__force const void *)from, n);
}
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index b0a887209400..7ce84090f0ec 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -138,6 +138,7 @@ copy_user_generic(void *to, const void *from, unsigned long len)
static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
+ src = mask_user_address(src);
return copy_user_generic(dst, (__force void *)src, size);
}
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 39c7cf82b0c2..dda9725a9559 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -160,12 +160,6 @@ _inline_copy_from_user(void *to, const void __user *from, unsigned long n)
unsigned long res = n;
might_fault();
if (!should_fail_usercopy() && likely(access_ok(from, n))) {
- /*
- * Ensure that bad access_ok() speculation will not
- * lead to nasty side effects *after* the copy is
- * finished:
- */
- barrier_nospec();
instrument_copy_from_user_before(to, from, n);
res = raw_copy_from_user(to, from, n);
instrument_copy_from_user_after(to, from, n, res);
--
2.47.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH v3 1/6] x86/uaccess: Avoid barrier_nospec() in 64-bit copy_from_user()
2024-10-29 1:56 ` [PATCH v3 1/6] x86/uaccess: Avoid barrier_nospec() in 64-bit copy_from_user() Josh Poimboeuf
@ 2024-10-29 8:13 ` Kirill A . Shutemov
2024-10-30 2:03 ` Linus Torvalds
1 sibling, 0 replies; 29+ messages in thread
From: Kirill A . Shutemov @ 2024-10-29 8:13 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: x86, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Pawan Gupta, Waiman Long, Dave Hansen,
Ingo Molnar, Linus Torvalds, Michael Ellerman, linuxppc-dev,
Andrew Cooper, Mark Rutland
On Mon, Oct 28, 2024 at 06:56:14PM -0700, Josh Poimboeuf wrote:
> The barrier_nospec() in 64-bit copy_from_user() is slow. Instead use
> pointer masking to force the user pointer to all 1's if the access_ok()
> mispredicted true for an invalid address.
>
> The kernel test robot reports a 2.6% improvement in the per_thread_ops
> benchmark (see link below).
>
> To avoid regressing powerpc and 32-bit x86, move their barrier_nospec()
> calls to their respective raw_copy_from_user() implementations so
> there's no functional change there.
>
> Note that for safety on some AMD CPUs, this relies on recent commit
> 86e6b1547b3d ("x86: fix user address masking non-canonical speculation
> issue").
>
> Link: https://lore.kernel.org/202410281344.d02c72a2-oliver.sang@intel.com
> Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
--
Kiryl Shutsemau / Kirill A. Shutemov
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH v3 1/6] x86/uaccess: Avoid barrier_nospec() in 64-bit copy_from_user()
2024-10-29 1:56 ` [PATCH v3 1/6] x86/uaccess: Avoid barrier_nospec() in 64-bit copy_from_user() Josh Poimboeuf
2024-10-29 8:13 ` Kirill A . Shutemov
@ 2024-10-30 2:03 ` Linus Torvalds
2024-10-30 4:59 ` Josh Poimboeuf
1 sibling, 1 reply; 29+ messages in thread
From: Linus Torvalds @ 2024-10-30 2:03 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: x86, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Pawan Gupta, Waiman Long, Dave Hansen,
Ingo Molnar, Michael Ellerman, linuxppc-dev, Andrew Cooper,
Mark Rutland, Kirill A . Shutemov
[-- Attachment #1: Type: text/plain, Size: 1099 bytes --]
On Mon, 28 Oct 2024 at 15:56, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> The barrier_nospec() in 64-bit copy_from_user() is slow. Instead use
> pointer masking to force the user pointer to all 1's if the access_ok()
> mispredicted true for an invalid address.
>
> The kernel test robot reports a 2.6% improvement in the per_thread_ops
> benchmark (see link below).
Hmm. So it strikes me that this still does the "access_ok()", but
that's pointless for the actual pointer masking case. One of the whole
points of the pointer masking is that we can just do this without
actually checking the address (or length) at all.
That's why the strncpy_from_user() has the pattern of
if (can_do_masked_user_access()) {
... don't worry about the size of the address space ..
and I think this code should do that too.
IOW, I think we can do even better than your patch with something
(UNTESTED!) like the attached.
That will also mean that any other architecture that starts doing the
user address masking trick will pick up on this automatically.
Hmm?
Linus
[-- Attachment #2: patch.diff --]
[-- Type: text/x-patch, Size: 1433 bytes --]
include/linux/uaccess.h | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 39c7cf82b0c2..43844510d5d0 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -38,6 +38,7 @@
#else
#define can_do_masked_user_access() 0
#define masked_user_access_begin(src) NULL
+ #define mask_user_address(src) (src)
#endif
/*
@@ -159,19 +160,27 @@ _inline_copy_from_user(void *to, const void __user *from, unsigned long n)
{
unsigned long res = n;
might_fault();
- if (!should_fail_usercopy() && likely(access_ok(from, n))) {
+ if (should_fail_usercopy())
+ goto fail;
+ if (can_do_masked_user_access())
+ from = mask_user_address(from);
+ else {
+ if (!access_ok(from, n))
+ goto fail;
/*
* Ensure that bad access_ok() speculation will not
* lead to nasty side effects *after* the copy is
* finished:
*/
barrier_nospec();
- instrument_copy_from_user_before(to, from, n);
- res = raw_copy_from_user(to, from, n);
- instrument_copy_from_user_after(to, from, n, res);
}
- if (unlikely(res))
- memset(to + (n - res), 0, res);
+ instrument_copy_from_user_before(to, from, n);
+ res = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_after(to, from, n, res);
+ if (likely(!res))
+ return 0;
+fail:
+ memset(to + (n - res), 0, res);
return res;
}
extern __must_check unsigned long
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH v3 1/6] x86/uaccess: Avoid barrier_nospec() in 64-bit copy_from_user()
2024-10-30 2:03 ` Linus Torvalds
@ 2024-10-30 4:59 ` Josh Poimboeuf
0 siblings, 0 replies; 29+ messages in thread
From: Josh Poimboeuf @ 2024-10-30 4:59 UTC (permalink / raw)
To: Linus Torvalds
Cc: x86, linux-kernel, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Pawan Gupta, Waiman Long, Dave Hansen,
Ingo Molnar, Michael Ellerman, linuxppc-dev, Andrew Cooper,
Mark Rutland, Kirill A . Shutemov
On Tue, Oct 29, 2024 at 04:03:31PM -1000, Linus Torvalds wrote:
> Hmm. So it strikes me that this still does the "access_ok()", but
> that's pointless for the actual pointer masking case. One of the whole
> points of the pointer masking is that we can just do this without
> actually checking the address (or length) at all.
>
> That's why the strncpy_from_user() has the pattern of
>
> if (can_do_masked_user_access()) {
> ... don't worry about the size of the address space ..
>
> and I think this code should do that too.
>
> IOW, I think we can do even better than your patch with something
> (UNTESTED!) like the attached.
>
> That will also mean that any other architecture that starts doing the
> user address masking trick will pick up on this automatically.
>
> Hmm?
Yeah, it makes sense to hook into that existing
can_do_masked_user_access() thing. The patch looks good, and it boots
without blowing up. Thanks!
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
--
Josh
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-10-29 1:56 [PATCH v3 0/6] x86/uaccess: avoid barrier_nospec() Josh Poimboeuf
2024-10-29 1:56 ` [PATCH v3 1/6] x86/uaccess: Avoid barrier_nospec() in 64-bit copy_from_user() Josh Poimboeuf
@ 2024-10-29 1:56 ` Josh Poimboeuf
2024-10-29 3:27 ` Josh Poimboeuf
2024-10-29 1:56 ` [PATCH v3 3/6] x86/uaccess: Avoid barrier_nospec() in 32-bit copy_from_user() Josh Poimboeuf
` (3 subsequent siblings)
5 siblings, 1 reply; 29+ messages in thread
From: Josh Poimboeuf @ 2024-10-29 1:56 UTC (permalink / raw)
To: x86
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Pawan Gupta, Waiman Long, Dave Hansen, Ingo Molnar,
Linus Torvalds, Michael Ellerman, linuxppc-dev, Andrew Cooper,
Mark Rutland, Kirill A . Shutemov
The barrier_nospec() in 64-bit __get_user() is slow. Instead use
pointer masking to force the user pointer to all 1's if a previous
access_ok() mispredicted true for an invalid address.
Note that for safety on some AMD CPUs, this relies on recent commit
86e6b1547b3d ("x86: fix user address masking non-canonical speculation
issue").
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
arch/x86/lib/getuser.S | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 4357ec2a0bfc..998d5be6b794 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -112,8 +112,12 @@ EXPORT_SYMBOL(__get_user_8)
/* .. and the same for __get_user, just without the range checks */
SYM_FUNC_START(__get_user_nocheck_1)
- ASM_STAC
+#ifdef CONFIG_X86_64
+ check_range size=1
+#else
ASM_BARRIER_NOSPEC
+#endif
+ ASM_STAC
UACCESS movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
@@ -122,8 +126,12 @@ SYM_FUNC_END(__get_user_nocheck_1)
EXPORT_SYMBOL(__get_user_nocheck_1)
SYM_FUNC_START(__get_user_nocheck_2)
- ASM_STAC
+#ifdef CONFIG_X86_64
+ check_range size=2
+#else
ASM_BARRIER_NOSPEC
+#endif
+ ASM_STAC
UACCESS movzwl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
@@ -132,8 +140,12 @@ SYM_FUNC_END(__get_user_nocheck_2)
EXPORT_SYMBOL(__get_user_nocheck_2)
SYM_FUNC_START(__get_user_nocheck_4)
- ASM_STAC
+#ifdef CONFIG_X86_64
+ check_range size=4
+#else
ASM_BARRIER_NOSPEC
+#endif
+ ASM_STAC
UACCESS movl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
@@ -142,8 +154,12 @@ SYM_FUNC_END(__get_user_nocheck_4)
EXPORT_SYMBOL(__get_user_nocheck_4)
SYM_FUNC_START(__get_user_nocheck_8)
- ASM_STAC
+#ifdef CONFIG_X86_64
+ check_range size=8
+#else
ASM_BARRIER_NOSPEC
+#endif
+ ASM_STAC
#ifdef CONFIG_X86_64
UACCESS movq (%_ASM_AX),%rdx
#else
--
2.47.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-10-29 1:56 ` [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user() Josh Poimboeuf
@ 2024-10-29 3:27 ` Josh Poimboeuf
2024-11-08 17:12 ` David Laight
0 siblings, 1 reply; 29+ messages in thread
From: Josh Poimboeuf @ 2024-10-29 3:27 UTC (permalink / raw)
To: x86
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Pawan Gupta, Waiman Long, Dave Hansen, Ingo Molnar,
Linus Torvalds, Michael Ellerman, linuxppc-dev, Andrew Cooper,
Mark Rutland, Kirill A . Shutemov
On Mon, Oct 28, 2024 at 06:56:15PM -0700, Josh Poimboeuf wrote:
> The barrier_nospec() in 64-bit __get_user() is slow. Instead use
> pointer masking to force the user pointer to all 1's if a previous
> access_ok() mispredicted true for an invalid address.
Linus pointed out that __get_user() may be used by some code to access
both kernel and user space and in fact I found one such usage in
vc_read_mem()....
So I self-NAK this patch for now.
Still, it would be great if patch 1 could get merged as that gives a
significant performance boost.
--
Josh
^ permalink raw reply [flat|nested] 29+ messages in thread
* RE: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-10-29 3:27 ` Josh Poimboeuf
@ 2024-11-08 17:12 ` David Laight
2024-11-15 23:06 ` 'Josh Poimboeuf'
0 siblings, 1 reply; 29+ messages in thread
From: David Laight @ 2024-11-08 17:12 UTC (permalink / raw)
To: 'Josh Poimboeuf', x86@kernel.org
Cc: linux-kernel@vger.kernel.org, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Pawan Gupta, Waiman Long, Dave Hansen,
Ingo Molnar, Linus Torvalds, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
From: Josh Poimboeuf
> Sent: 29 October 2024 03:28
>
> On Mon, Oct 28, 2024 at 06:56:15PM -0700, Josh Poimboeuf wrote:
> > The barrier_nospec() in 64-bit __get_user() is slow. Instead use
> > pointer masking to force the user pointer to all 1's if a previous
> > access_ok() mispredicted true for an invalid address.
>
> Linus pointed out that __get_user() may be used by some code to access
> both kernel and user space and in fact I found one such usage in
> vc_read_mem()....
>
> So I self-NAK this patch for now.
>
> Still, it would be great if patch 1 could get merged as that gives a
> significant performance boost.
I'm a bit late to the party and still a week behind :-(
But I've wondered if access_ok() ought to be implemented using an
'asm goto with output' - much like get_user().
Then the use would be:
masked_address = access_ok(maybe_bad_address, size, jump_label);
with later user accesses using the masked_address.
Once you've done that __get_user() doesn't need to contain address masking.
Given that clac/stac iare so slow should there are be something that
combines stac with access_ok() bracketed with a 'user_access_end'
or an actual fault.
I've sure there is code (maybe reading iovec[] or in sys_poll())
that wants to do multiple get/put_user in a short loop rather that
calling copy_to/from_user().
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-08 17:12 ` David Laight
@ 2024-11-15 23:06 ` 'Josh Poimboeuf'
2024-11-16 1:27 ` Linus Torvalds
0 siblings, 1 reply; 29+ messages in thread
From: 'Josh Poimboeuf' @ 2024-11-15 23:06 UTC (permalink / raw)
To: David Laight
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, Thomas Gleixner,
Borislav Petkov, Peter Zijlstra, Pawan Gupta, Waiman Long,
Dave Hansen, Ingo Molnar, Linus Torvalds, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
On Fri, Nov 08, 2024 at 05:12:53PM +0000, David Laight wrote:
> From: Josh Poimboeuf
> > On Mon, Oct 28, 2024 at 06:56:15PM -0700, Josh Poimboeuf wrote:
> > > The barrier_nospec() in 64-bit __get_user() is slow. Instead use
> > > pointer masking to force the user pointer to all 1's if a previous
> > > access_ok() mispredicted true for an invalid address.
> >
> > Linus pointed out that __get_user() may be used by some code to access
> > both kernel and user space and in fact I found one such usage in
> > vc_read_mem()....
.. which sucks because I got a "will-it-scale.per_process_ops 1.9%
improvement" report for this patch.
It's sad that __get_user() is now slower than get_user() on x86, it kind
of defeats the whole point!
I know at least the "coco" code is misusing __get_user(). Unless
somebody wants to audit all the other callers, we could do something
horrific:
.macro __get_user_nocheck_nospec
#ifdef CONFIG_X86_64
movq $0x0123456789abcdef, %rdx
1:
.pushsection runtime_ptr_USER_PTR_MAX, "a"
.long 1b - 8 - .
.popsection
cmp %rax, %rdx
jb 10f
sbb %rdx, %rdx
or %rdx, %rax
jmp 11f
10: /*
* Stop access_ok() branch misprediction -- both of them ;-)
*
* As a benefit this also punishes callers who intentionally call this
* with a kernel address. Once they're rooted out, __get_user() can
* just become an alias of get_user().
*
* TODO: Add WARN_ON()
*/
#endif
ASM_BARRIER_NOSPEC
11:
.endm
/* .. and the same for __get_user, just without the range checks */
SYM_FUNC_START(__get_user_nocheck_1)
__get_user_nocheck_nospec
ASM_STAC
UACCESS movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
RET
SYM_FUNC_END(__get_user_nocheck_1)
EXPORT_SYMBOL(__get_user_nocheck_1)
Yes, I know adding another access_ok() is bad, but it would be a
definite speedup. And adding a WARN_ON() would root out any other bad
callers pretty quick.
> But I've wondered if access_ok() ought to be implemented using an
> 'asm goto with output' - much like get_user().
>
> Then the use would be:
> masked_address = access_ok(maybe_bad_address, size, jump_label);
> with later user accesses using the masked_address.
>
> Once you've done that __get_user() doesn't need to contain address masking.
Sure, we just need a volunteer to change all the access_ok() implementations
and callers tree-wide ;-)
> Given that clac/stac iare so slow should there are be something that
> combines stac with access_ok() bracketed with a 'user_access_end'
> or an actual fault.
>
> I've sure there is code (maybe reading iovec[] or in sys_poll())
> that wants to do multiple get/put_user in a short loop rather that
> calling copy_to/from_user().
We already have this with user_access_begin() + unsafe_get_user().
There's also a version which masks the address: masked_user_access_begin().
We just need to start porting things over.
--
Josh
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-15 23:06 ` 'Josh Poimboeuf'
@ 2024-11-16 1:27 ` Linus Torvalds
2024-11-16 21:38 ` David Laight
2024-11-21 21:40 ` Josh Poimboeuf
0 siblings, 2 replies; 29+ messages in thread
From: Linus Torvalds @ 2024-11-16 1:27 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: David Laight, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
On Fri, 15 Nov 2024 at 15:06, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> It's sad that __get_user() is now slower than get_user() on x86, it kind
> of defeats the whole point!
Well, honestly, we've been trying to get away from __get_user() and
__put_user() for a long long time.
With CLAC/STAC, it's been probably a decade or two since __get_user()
and friends were actually a worthwhile optimization, so let's just
strive to get rid of the ones that matter.
So I think the thing to do is
(a) find out which __get_user() it is that matters so much for that load
Do you have a profile somewhere?
(b) convert them to use "unsafe_get_user()", with that whole
if (can_do_masked_user_access())
from = masked_user_access_begin(from);
else if (!user_read_access_begin(from, sizeof(*from)))
return -EFAULT;
sequence before it.
And if it's just a single __get_user() (rather than a sequence of
them), just convert it to get_user().
Hmm?
Linus
^ permalink raw reply [flat|nested] 29+ messages in thread* RE: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-16 1:27 ` Linus Torvalds
@ 2024-11-16 21:38 ` David Laight
2024-11-16 23:08 ` Linus Torvalds
2024-11-21 21:40 ` Josh Poimboeuf
1 sibling, 1 reply; 29+ messages in thread
From: David Laight @ 2024-11-16 21:38 UTC (permalink / raw)
To: 'Linus Torvalds', Josh Poimboeuf
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, Thomas Gleixner,
Borislav Petkov, Peter Zijlstra, Pawan Gupta, Waiman Long,
Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
From: Linus Torvalds
> Sent: 16 November 2024 01:27
>
> On Fri, 15 Nov 2024 at 15:06, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> >
> > It's sad that __get_user() is now slower than get_user() on x86, it kind
> > of defeats the whole point!
>
> Well, honestly, we've been trying to get away from __get_user() and
> __put_user() for a long long time.
>
> With CLAC/STAC, it's been probably a decade or two since __get_user()
> and friends were actually a worthwhile optimization, so let's just
> strive to get rid of the ones that matter.
Thinks....
If __get_user() is the same as get_user() then all the access_ok()
outside of get/put_user() and copy_to/from_user() can be removed
because they are pointless (anyone that brave?).
There is no point optimising the code to fast-path bad user pointers.
> We already have this with user_access_begin() + unsafe_get_user().
> There's also a version which masks the address: masked_user_access_begin().
That sounds as though it is begging for a simple to use:
masked_addr = user_access_begin(addr, size, error_label);
and
val = unsafe_get_user(masked_addr, error_label);
form?
Probably with objtool checking they are all in a valid sequence
with no functions calls (etc).
If address masking isn't needed (by an architecture) the address can be left
unchanged.
A quick grep shows access_ok() in 66 .c and 8 .h files outside the arch code.
And 69 .c file in arch, most of the arch .h are uaccess.h and futex.h.
I suspect the audit wouldn't tale that long.
Getting any patches accepted is another matter.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-16 21:38 ` David Laight
@ 2024-11-16 23:08 ` Linus Torvalds
0 siblings, 0 replies; 29+ messages in thread
From: Linus Torvalds @ 2024-11-16 23:08 UTC (permalink / raw)
To: David Laight
Cc: Josh Poimboeuf, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
On Sat, 16 Nov 2024 at 13:38, David Laight <David.Laight@aculab.com> wrote:
>
> If __get_user() is the same as get_user() [..]
No, the problem is that it's the same from a performance angle (and
now it's actually slower), but some hacky code paths depend on
__get_user() not checking the address.
They then use that to read from either user space _or_ kernel space.
Wrong? Yes. Architecture-specific? Yes. But it sadly happens.
Linus
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-16 1:27 ` Linus Torvalds
2024-11-16 21:38 ` David Laight
@ 2024-11-21 21:40 ` Josh Poimboeuf
2024-11-21 22:16 ` Linus Torvalds
1 sibling, 1 reply; 29+ messages in thread
From: Josh Poimboeuf @ 2024-11-21 21:40 UTC (permalink / raw)
To: Linus Torvalds
Cc: David Laight, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
> On Fri, 15 Nov 2024 at 15:06, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> So I think the thing to do is
>
> (a) find out which __get_user() it is that matters so much for that load
>
> Do you have a profile somewhere?
>
> (b) convert them to use "unsafe_get_user()", with that whole
>
> if (can_do_masked_user_access())
> from = masked_user_access_begin(from);
> else if (!user_read_access_begin(from, sizeof(*from)))
> return -EFAULT;
>
> sequence before it.
>
> And if it's just a single __get_user() (rather than a sequence of
> them), just convert it to get_user().
>
> Hmm?
The profile is showing futex_get_value_locked():
int futex_get_value_locked(u32 *dest, u32 __user *from)
{
int ret;
pagefault_disable();
ret = __get_user(*dest, from);
pagefault_enable();
return ret ? -EFAULT : 0;
}
That has several callers, so we can probably just use get_user() there?
Also, is there any harm in speeding up __get_user()? It still has ~80
callers and it's likely to be slowing down things we don't know about.
It's usually only the regressions which get noticed, and that LFENCE
went in almost 7 years ago, when there was much less automated
performance regression testing.
As a bonus, that patch will root out any "bad" users, which will
eventually allow us to simplify things and just make __get_user() an
alias of get_user().
In fact, if we aliased it for all arches, that could help in getting rid
of __get_user() altogether as there would no longer be any (real or
advertised) benefit to using it.
--
Josh
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-21 21:40 ` Josh Poimboeuf
@ 2024-11-21 22:16 ` Linus Torvalds
2024-11-22 0:12 ` Josh Poimboeuf
2024-11-22 9:38 ` David Laight
0 siblings, 2 replies; 29+ messages in thread
From: Linus Torvalds @ 2024-11-21 22:16 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: David Laight, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
[-- Attachment #1: Type: text/plain, Size: 2303 bytes --]
On Thu, 21 Nov 2024 at 13:40, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> The profile is showing futex_get_value_locked():
Ahh.
> That has several callers, so we can probably just use get_user() there?
Yeah, that's the simplest thing. That thing isn't even some inline
function, so the real cost is the call.
That said, exactly because it's not inlined, and calls are expensive,
and this is apparently really critical, we can just do it with the
full "unsafe_get_user()" model.
It's not so complicated. The attached patch is untested, but I did
check that it generates almost perfect code:
mov %gs:0x0,%rax # current
incl 0x1a9c(%rax) # current->pagefault_disable++
movabs $0x123456789abcdef,%rcx # magic virtual address size
cmp %rsi,%rcx # address masking
sbb %rcx,%rcx
or %rsi,%rcx
stac # enable user space acccess
mov (%rcx),%ecx # get the value
clac # disable user space access
decl 0x1a9c(%rax) # current->pagefault_disable--
mov %ecx,(%rdi) # save the value
xor %eax,%eax # return 0
ret
(with the error case for the page fault all out-of-line).
So this should be _faster_ than the old __get_user(), because while
the address masking is not needed, it's cheaper than the function call
used to be and the error handling is better.
If you can test this and verify that it actually help, I'll take it as
a patch. Consider it signed-off after testing.
> Also, is there any harm in speeding up __get_user()? It still has ~80
> callers and it's likely to be slowing down things we don't know about.
How would you speed it up? We definitely can't replace the fence with
addressing tricks. So we can't just replace it with "get_user()",
because of those horrid architecture-specific kernel uses.
Now, we could possibly say "just remove the fence in __get_user()
entirely", but that would involve moving it to access_ok().
And then it wouldn't actually speed anything up (except the horrid
architecture-specific kernel uses that then don't call access_ok() at
all - and we don't care about *those*).
Linus
[-- Attachment #2: patch.diff --]
[-- Type: text/x-patch, Size: 1128 bytes --]
kernel/futex/core.c | 25 ++++++++++++++++++++++---
1 file changed, 22 insertions(+), 3 deletions(-)
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 326bfe6549d7..9e1bd76652d8 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -464,13 +464,32 @@ int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 new
int futex_get_value_locked(u32 *dest, u32 __user *from)
{
- int ret;
+ u32 val;
pagefault_disable();
- ret = __get_user(*dest, from);
+ /*
+ * The user address has been verified by get_futex_key(),
+ * and futex_cmpxchg_value_locked() trusts that, but we do
+ * not have any other ways to do it well, so we do the
+ * full user access song and dance here.
+ */
+ if (can_do_masked_user_access())
+ from = masked_user_access_begin(from);
+ else if (!user_read_access_begin(from, sizeof(*from)))
+ goto enable_and_error;
+
+ unsafe_get_user(val, from, Efault);
+ user_access_end();
pagefault_enable();
- return ret ? -EFAULT : 0;
+ *dest = val;
+ return 0;
+
+Efault:
+ user_access_end();
+enable_and_error:
+ pagefault_enable();
+ return -EFAULT;
}
/**
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-21 22:16 ` Linus Torvalds
@ 2024-11-22 0:12 ` Josh Poimboeuf
2024-11-22 1:02 ` Linus Torvalds
2024-11-22 9:38 ` David Laight
1 sibling, 1 reply; 29+ messages in thread
From: Josh Poimboeuf @ 2024-11-22 0:12 UTC (permalink / raw)
To: Linus Torvalds
Cc: David Laight, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
On Thu, Nov 21, 2024 at 02:16:12PM -0800, Linus Torvalds wrote:
> mov %gs:0x0,%rax # current
> incl 0x1a9c(%rax) # current->pagefault_disable++
> movabs $0x123456789abcdef,%rcx # magic virtual address size
> cmp %rsi,%rcx # address masking
> sbb %rcx,%rcx
> or %rsi,%rcx
> stac # enable user space acccess
> mov (%rcx),%ecx # get the value
> clac # disable user space access
> decl 0x1a9c(%rax) # current->pagefault_disable--
> mov %ecx,(%rdi) # save the value
> xor %eax,%eax # return 0
> ret
The asm looks good, but the C exploded a bit... why not just have an
inline get_user()?
> If you can test this and verify that it actually help, I'll take it as
> a patch. Consider it signed-off after testing.
Let me see if I can recreate the original report (or get the automatic
testing to see the commit).
> > Also, is there any harm in speeding up __get_user()? It still has ~80
> > callers and it's likely to be slowing down things we don't know about.
>
> How would you speed it up? We definitely can't replace the fence with
> addressing tricks. So we can't just replace it with "get_user()",
> because of those horrid architecture-specific kernel uses.
I'm not sure if you saw the example code snippet I posted up-thread,
here it is below.
It adds a conditional branch to test if it's a user address. If so, it
does pointer masking. If not, it does LFENCE. So "bad" users get the
slow path, and we could even add a WARN_ON().
Yes, another conditional branch isn't ideal, but it's still much faster
than the current code, and it would root out any bad users with a
WARN_ON() so that eventually it can just become a get_user() alias.
.macro __get_user_nocheck_nospec
#ifdef CONFIG_X86_64
movq $0x0123456789abcdef, %rdx
1:
.pushsection runtime_ptr_USER_PTR_MAX, "a"
.long 1b - 8 - .
.popsection
cmp %rax, %rdx
jb 10f
sbb %rdx, %rdx
or %rdx, %rax
jmp 11f
10: /*
* Stop access_ok() branch misprediction -- both of them ;-)
*
* As a benefit this also punishes callers who intentionally call this
* with a kernel address. Once they're rooted out, __get_user() can
* just become an alias of get_user().
*
* TODO: Add WARN_ON()
*/
#endif
ASM_BARRIER_NOSPEC
11:
.endm
/* .. and the same for __get_user, just without the range checks */
SYM_FUNC_START(__get_user_nocheck_1)
__get_user_nocheck_nospec
ASM_STAC
UACCESS movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
RET
SYM_FUNC_END(__get_user_nocheck_1)
EXPORT_SYMBOL(__get_user_nocheck_1)
--
Josh
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-22 0:12 ` Josh Poimboeuf
@ 2024-11-22 1:02 ` Linus Torvalds
2024-11-22 3:11 ` Josh Poimboeuf
0 siblings, 1 reply; 29+ messages in thread
From: Linus Torvalds @ 2024-11-22 1:02 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: David Laight, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
On Thu, 21 Nov 2024 at 16:12, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> The asm looks good, but the C exploded a bit... why not just have an
> inline get_user()?
That was originally one of my goals for the "unsafe" ones - if done
right, they'd be the proper building blocks for a get_user(), and we'd
only really need one single good implementation.
But it really does blow up the code size quite noticeably. The old
sign-based thing wasn't quite so bad, and was one of the main reasons
I really didn't like having to switch to the big constant thing, but
with the constant, the "get_user()" part is basically 27 bytes per
location.
The uninlined call is 5 bytes.
(And both then have the error handling - the inlined one can use the
asm goto to *maybe* make up for some of it because it avoids tests,
but I suspect it ends up being pretty similar in the end).
So I'm not really sure inlining makes sense - except if you have code
that you've profiled.
Part of the problem is that you can't really just make an inline
function. You need to make one for each size. Which we've done, but it
gets real messy real quick. I don't want to have yet another "switch
(sizeof..)" thing.
Or you'd make it a macro (which makes dealing with different types
easy), but then it would have to be a statement expression to return
the error, and to take advantage of that exception handling error
handling gets messed up real quick too.
End result: the
if (can_do_masked_user_access())
from = masked_user_access_begin(from);
else if (!user_read_access_begin(from, sizeof(*from)))
goto enable_and_error;
unsafe_get_user(val, from, Efault);
user_access_end();
pattern is very good for generating fine code, but it's rather nasty
to encapsulate as one single macro somewhere. It really ends up having
those two error cases: the one that just returns the error, and the
one that has to do user_access_end().
[ Time passes ]
Ugh. I tried it. It looks like this:
#define inlined_get_user(res, ptr) ({ \
__label__ fail2, fail1; \
__auto_type __up = (ptr); \
int __ret = 0; \
if (can_do_masked_user_access()) \
__up = masked_user_access_begin(__up); \
else if (!user_read_access_begin(__up, sizeof(*__up))) \
goto fail1; \
unsafe_get_user(res, ptr, fail2); \
user_access_end(); \
if (0) { \
fail2: user_access_end(); \
fail1: __ret = -EFAULT; \
} \
__ret; })
and maybe that works. It generated ok code in this case, where I did
int futex_get_value_locked(u32 *dest, u32 __user *from)
{
int ret;
pagefault_disable();
ret = inlined_get_user(*dest, from);
pagefault_enable();
return ret;
}
but I'm not convinced it's better than open-coding it. We have some
ugly macros in the kernel, but that may be one of the ugliest I've
ever written.
> > How would you speed it up?
>
> I'm not sure if you saw the example code snippet I posted up-thread,
> here it is below.
Oh, ok, no I hadn't seen that one.
Yeah, it looks like that would work. What a horrendous crock. But your
point that it would find the nasty __get_user() cases is nicely made.
Linus
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-22 1:02 ` Linus Torvalds
@ 2024-11-22 3:11 ` Josh Poimboeuf
2024-11-22 3:57 ` Linus Torvalds
0 siblings, 1 reply; 29+ messages in thread
From: Josh Poimboeuf @ 2024-11-22 3:11 UTC (permalink / raw)
To: Linus Torvalds
Cc: David Laight, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
On Thu, Nov 21, 2024 at 05:02:06PM -0800, Linus Torvalds wrote:
> [ Time passes ]
>
> Ugh. I tried it. It looks like this:
>
> #define inlined_get_user(res, ptr) ({ \
> __label__ fail2, fail1; \
> __auto_type __up = (ptr); \
> int __ret = 0; \
> if (can_do_masked_user_access()) \
> __up = masked_user_access_begin(__up); \
> else if (!user_read_access_begin(__up, sizeof(*__up))) \
> goto fail1; \
> unsafe_get_user(res, ptr, fail2); \
> user_access_end(); \
> if (0) { \
> fail2: user_access_end(); \
> fail1: __ret = -EFAULT; \
> } \
> __ret; })
That actually doesn't seem so bad, it's easy enough to follow the logic.
And it contains the ugly/fidgety all in one place so the callers' hands
don't have to get dirty.
We could easily use that macro in size-specific inline functions
selected by a macro with a sizeof(type) switch statement -- not so bad
IMO if they improve code usage and generation.
So all the user has to do is get_user_new_and_improved() -- resolving to
get_user_new_and_improved_x() -- and the compiler decides on the
inlining. Which on average is hopefully better than Joe Developer's
inlining decisions? Otherwise we've got bigger problems?
Then all the arches have to do is implement unsafe_*_user_{1,2,4,8} and
the "one good implementation" idea comes together?
BTW, looking at some other arches, I notice that get_user() is already
unconditionally inline for arm64, riscv, powerpc, and s390.
I also see that arm64 already defines get_user() to __get_user(), with
__get_user() having an access_ok().
It would be really nice to have the same behavior and shared code across
all the arches.
--
Josh
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-22 3:11 ` Josh Poimboeuf
@ 2024-11-22 3:57 ` Linus Torvalds
2024-11-22 9:06 ` Christophe Leroy
2024-11-22 19:13 ` Linus Torvalds
0 siblings, 2 replies; 29+ messages in thread
From: Linus Torvalds @ 2024-11-22 3:57 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: David Laight, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
On Thu, 21 Nov 2024 at 19:11, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> On Thu, Nov 21, 2024 at 05:02:06PM -0800, Linus Torvalds wrote:
> > [ Time passes ]
> >
> > Ugh. I tried it. It looks like this:
> >
> > #define inlined_get_user(res, ptr) ({ \
> > __label__ fail2, fail1; \
> > __auto_type __up = (ptr); \
> > int __ret = 0; \
> > if (can_do_masked_user_access()) \
> > __up = masked_user_access_begin(__up); \
> > else if (!user_read_access_begin(__up, sizeof(*__up))) \
> > goto fail1; \
> > unsafe_get_user(res, ptr, fail2); \
That 'ptr' needs to be '__up' of course.
Other than that it actually seems to work.
And by "seems to work" I mean "I replaced get_user() with this macro
instead, and my default kernel continued to build fine".
I didn't actually *test* the end result, although i did look at a few
more cases of code generation, and visually it looks sane enough.
> That actually doesn't seem so bad, it's easy enough to follow the logic.
I'm not loving the "if (0)" with the labels inside of it. But it's the
only way I can see to make a statement expression like this work,
since you can't have a "return" inside of it.
> We could easily use that macro in size-specific inline functions
> selected by a macro with a sizeof(type) switch statement -- not so bad
> IMO if they improve code usage and generation.
The point of it being a macro is that then it doesn't need the
size-specific games at all, because it "just works" since the types
end up percolating inside the logic.
Of course, that depends on unsafe_get_user() itself having the
size-specific games in it, so that's a bit of a lie, but at least it
needs the games in just one place.
And yes, having inline wrappers anyway could then allow for the
compiler making the "inline or not" choice, but the compiler really
doesn't end up having any idea of whether something is more important
from a performance angle, so that wouldn't actually be a real
improvement.
> Then all the arches have to do is implement unsafe_*_user_{1,2,4,8} and
> the "one good implementation" idea comes together?
Yeah, except honestly, basically *none* of them do.
The list or architectures that actually implement the unsafe accessors
is sad: it's x86, powerpc, and arm64. That's it.
Which is - along with my bloat worry - what makes me go "it's not
worth fighting".
Also, honestly, it is *very* rare that "get_user()" and "put_user()"
is performance-critical or even noticeable. We have lots of important
user copies, and the path and argument copy (aka
"strncpy_from_user()") is very important, but very few other places
tend to be.
So I see "copy_from_user()" in profiles, and I see
"strncpy_from_user()", and I've seen a few special cases that I've
converted (look at get_sigset_argpack(), for example - it shows up on
some select-heavy loads).
And now you found another one in that futex code, and that is indeed special.
But honestly, most get_user() cases are really in things like random ioctls etc.
Which is why I suspect we'd be better off just doing the important
ones one by one.
And doing the important ones individually may sound really nasty, but
if they are important, it does kind of make sense.
For example, one place I suspect *is* worth looking at is the execve()
argument handling. But to optimize that isn't about inlining
get_user(). It's about doing more than one word for each user access,
particularly with CLAC/STAC being as slow as they often are.
So what you actually would want to do is to combine these two
ret = -EFAULT;
str = get_user_arg_ptr(argv, argc);
if (IS_ERR(str))
goto out;
len = strnlen_user(str, MAX_ARG_STRLEN);
if (!len)
goto out;
into one thing, if you really cared enough. I've looked at it, and
always gone "I don't _quite_ care that much", even though argument
handling definitely shows up on some benchmarks (partly because it
mostly shows up on fairly artificial ones - the rest of execve is so
expensive that even when we waste some time on argument handling, it's
not noticeable enough to be worth spending huge amount of effort on).
But you could also look at the pure argv counting code (aka "count()"
in fs/exec.c). That *also* shows up quite a bit, and there batching
things migth be a much easier thing. But again, it's not about
inlining get_user(), it's about doing multiple accesses without
turning user accesses on and off all the time.
Anyway, that was a long way of saying: I really think we should just
special-case the (few) important cases that get reported. Because any
*big* improvements will come not from just inlining.
Linus
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-22 3:57 ` Linus Torvalds
@ 2024-11-22 9:06 ` Christophe Leroy
2024-11-22 18:16 ` Linus Torvalds
2024-11-22 19:13 ` Linus Torvalds
1 sibling, 1 reply; 29+ messages in thread
From: Christophe Leroy @ 2024-11-22 9:06 UTC (permalink / raw)
To: Linus Torvalds, Josh Poimboeuf
Cc: David Laight, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
Le 22/11/2024 à 04:57, Linus Torvalds a écrit :
>
> I'm not loving the "if (0)" with the labels inside of it. But it's the
> only way I can see to make a statement expression like this work,
> since you can't have a "return" inside of it.
>
On powerpc for this kind of stuff I have been using do { } while (0);
with a break; in the middle, see for instance __put_user() or
__get_user_size_allowed() in arch/powerpc/include/asm/uaccess.h
Christophe
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-22 9:06 ` Christophe Leroy
@ 2024-11-22 18:16 ` Linus Torvalds
0 siblings, 0 replies; 29+ messages in thread
From: Linus Torvalds @ 2024-11-22 18:16 UTC (permalink / raw)
To: Christophe Leroy
Cc: Josh Poimboeuf, David Laight, x86@kernel.org,
linux-kernel@vger.kernel.org, Thomas Gleixner, Borislav Petkov,
Peter Zijlstra, Pawan Gupta, Waiman Long, Dave Hansen,
Ingo Molnar, Michael Ellerman, linuxppc-dev@lists.ozlabs.org,
Andrew Cooper, Mark Rutland, Kirill A . Shutemov
On Fri, 22 Nov 2024 at 01:20, Christophe Leroy
<christophe.leroy@csgroup.eu> wrote:
>
> On powerpc for this kind of stuff I have been using do { } while (0);
> with a break; in the middle, see for instance __put_user() or
> __get_user_size_allowed() in arch/powerpc/include/asm/uaccess.h
Oh, that's indeed a nicer syntax. I'll try to keep that in mind for
next time I come up with disgusting macros (although as mentioned, in
this case I think we're better off not playing that particular game at
all).
Linus
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-22 3:57 ` Linus Torvalds
2024-11-22 9:06 ` Christophe Leroy
@ 2024-11-22 19:13 ` Linus Torvalds
2024-11-22 19:35 ` Linus Torvalds
1 sibling, 1 reply; 29+ messages in thread
From: Linus Torvalds @ 2024-11-22 19:13 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: David Laight, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
[-- Attachment #1: Type: text/plain, Size: 1009 bytes --]
On Thu, 21 Nov 2024 at 19:57, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> Anyway, that was a long way of saying: I really think we should just
> special-case the (few) important cases that get reported. Because any
> *big* improvements will come not from just inlining.
Looking around at the futex code some more, I note:
- the cmpxchg case and futex ops use an explicit barrier too, which is bad
- we'd actually be better off inlining not just the user access, but
the whole futex_get_value_locked(), because then the compiler will be
able to do CSE on the user address masking, and only do it once
(several places do multiple different futex_get_value_locked() calls).
iow, I think the fix for the futex case ends up being a patch
something like the attached.
[ Annoyingly, we need "can_do_masked_user_access()" even on x86,
because the 32-bit case doesn't do the address masking trick ]
I've only compiled it so far, about to actually boot into it. Pray for me,
Linus
[-- Attachment #2: patch.diff --]
[-- Type: text/x-patch, Size: 4488 bytes --]
arch/x86/include/asm/futex.h | 8 ++++--
kernel/futex/core.c | 22 -----------------
kernel/futex/futex.h | 59 ++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 63 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 99d345b686fa..6e2458088800 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -48,7 +48,9 @@ do { \
static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
u32 __user *uaddr)
{
- if (!user_access_begin(uaddr, sizeof(u32)))
+ if (can_do_masked_user_access())
+ uaddr = masked_user_access_begin(uaddr);
+ else if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
@@ -84,7 +86,9 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
{
int ret = 0;
- if (!user_access_begin(uaddr, sizeof(u32)))
+ if (can_do_masked_user_access())
+ uaddr = masked_user_access_begin(uaddr);
+ else if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
asm volatile("\n"
"1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 326bfe6549d7..9833fdb63e39 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -451,28 +451,6 @@ struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *
return NULL;
}
-int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
-{
- int ret;
-
- pagefault_disable();
- ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
- pagefault_enable();
-
- return ret;
-}
-
-int futex_get_value_locked(u32 *dest, u32 __user *from)
-{
- int ret;
-
- pagefault_disable();
- ret = __get_user(*dest, from);
- pagefault_enable();
-
- return ret ? -EFAULT : 0;
-}
-
/**
* wait_for_owner_exiting - Block until the owner has exited
* @ret: owner's current futex lock status
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 8b195d06f4e8..323572014b32 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -6,6 +6,7 @@
#include <linux/rtmutex.h>
#include <linux/sched/wake_q.h>
#include <linux/compat.h>
+#include <linux/uaccess.h>
#ifdef CONFIG_PREEMPT_RT
#include <linux/rcuwait.h>
@@ -225,10 +226,64 @@ extern bool __futex_wake_mark(struct futex_q *q);
extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
extern int fault_in_user_writeable(u32 __user *uaddr);
-extern int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval);
-extern int futex_get_value_locked(u32 *dest, u32 __user *from);
extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
+static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
+{
+ int ret;
+
+ pagefault_disable();
+ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
+ pagefault_enable();
+
+ return ret;
+}
+
+/*
+ * This does a plain atomic user space read, and the user pointer has
+ * already been verified earlier by get_futex_key() to be both aligned
+ * and actually in user space, just like futex_atomic_cmpxchg_inatomic().
+ *
+ * We still want to avoid any speculation, and while __get_user() is
+ * the traditional model for this, it's actually slower then doing
+ * this manually these days.
+ *
+ * We could just have a per-architecture special function for it,
+ * the same way we do futex_atomic_cmpxchg_inatomic(), but rather
+ * than force everybody to do that, write it out long-hand using
+ * the low-level user-access infrastructure.
+ *
+ * This looks a bit overkill, but generally just results in a couple
+ * of instructions.
+ */
+static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
+{
+ u32 val;
+
+ if (can_do_masked_user_access())
+ from = masked_user_access_begin(from);
+ else if (!user_read_access_begin(from, sizeof(*from)))
+ return -EFAULT;
+ unsafe_get_user(val, from, Efault);
+ user_access_end();
+ *dest = val;
+ return 0;
+Efault:
+ user_access_end();
+ return -EFAULT;
+}
+
+static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
+{
+ int ret;
+
+ pagefault_disable();
+ ret = futex_read_inatomic(dest, from);
+ pagefault_enable();
+
+ return ret;
+}
+
extern void __futex_unqueue(struct futex_q *q);
extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
extern int futex_unqueue(struct futex_q *q);
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-22 19:13 ` Linus Torvalds
@ 2024-11-22 19:35 ` Linus Torvalds
2024-11-24 16:11 ` David Laight
0 siblings, 1 reply; 29+ messages in thread
From: Linus Torvalds @ 2024-11-22 19:35 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: David Laight, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
On Fri, 22 Nov 2024 at 11:13, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> I've only compiled it so far, about to actually boot into it.
Looks fine. Sent out a proper patch with commit message etc at
https://lore.kernel.org/all/20241122193305.7316-1-torvalds@linux-foundation.org/
because it looks good to me. Comments?
Linus
^ permalink raw reply [flat|nested] 29+ messages in thread* RE: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-22 19:35 ` Linus Torvalds
@ 2024-11-24 16:11 ` David Laight
2024-11-24 18:16 ` Linus Torvalds
0 siblings, 1 reply; 29+ messages in thread
From: David Laight @ 2024-11-24 16:11 UTC (permalink / raw)
To: 'Linus Torvalds', Josh Poimboeuf
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, Thomas Gleixner,
Borislav Petkov, Peter Zijlstra, Pawan Gupta, Waiman Long,
Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
From: Linus Torvalds
> Sent: 22 November 2024 19:35
>
> On Fri, 22 Nov 2024 at 11:13, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > I've only compiled it so far, about to actually boot into it.
>
> Looks fine. Sent out a proper patch with commit message etc at
>
> https://lore.kernel.org/all/20241122193305.7316-1-torvalds@linux-foundation.org/
>
> because it looks good to me. Comments?
+static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
+{
+ u32 val;
+
+ if (can_do_masked_user_access())
+ from = masked_user_access_begin(from);
+ else if (!user_read_access_begin(from, sizeof(*from)))
+ return -EFAULT;
+ unsafe_get_user(val, from, Efault);
+ user_access_end();
+ *dest = val;
+ return 0;
+Efault:
+ user_access_end();
+ return -EFAULT;
+}
+
+static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
+{
+ int ret;
+
+ pagefault_disable();
+ ret = futex_read_inatomic(dest, from);
+ pagefault_enable();
+
+ return ret;
+}
Is there an 'unsafe_get_user_nofault()' that uses a trap handler
that won't fault in a page?
That would save the inc/dec done by pagefault_en/disable().
I'd also have thought that the trap handler for unsafe_get_user()
would jump to the Efault label having already done user_access_end().
But maybe it doesn't work out that way?
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-24 16:11 ` David Laight
@ 2024-11-24 18:16 ` Linus Torvalds
0 siblings, 0 replies; 29+ messages in thread
From: Linus Torvalds @ 2024-11-24 18:16 UTC (permalink / raw)
To: David Laight
Cc: Josh Poimboeuf, x86@kernel.org, linux-kernel@vger.kernel.org,
Thomas Gleixner, Borislav Petkov, Peter Zijlstra, Pawan Gupta,
Waiman Long, Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
On Sun, 24 Nov 2024 at 08:11, David Laight <David.Laight@aculab.com> wrote:
>
> Is there an 'unsafe_get_user_nofault()' that uses a trap handler
> that won't fault in a page?
Nope. I was thinking about the same thing, but we actually don't look
up the fault handler early - we only do it at failure time.
So the pagefault_disable() thus acts as the failure trigger that makes
us look up the fault handler. Without that, we'd never even check if
there's a exception note on the instruction.
> I'd also have thought that the trap handler for unsafe_get_user()
> would jump to the Efault label having already done user_access_end().
> But maybe it doesn't work out that way?
I actually at one point had a local version that did exactly that,
because it allowed us to avoid doing the user_access_end in the
exception path.
It got ugly. In particular, it gets really ugly for the
"copy_to/from_user()" case where we want to be byte-accurate, and a
64-bit access fails, and we go back to doing the last few accesses one
byte at a time.
See the exception table in arch/x86/lib/copy_user_64.S where it jumps
to .Lcopy_user_tail for an example of this.
Yes, yes, you could just do a "stac" again in the exception path to
undo the fact that the fault handler would have turned off user
accesses again...
But look at that copy_user_64 code again and you'll see that it's
actually a generic replacement for "rep movs" with fault handling, and
can be used for the "copy_from_kernel_nofault" cases too.
So I decided that it was just too ugly for words to have the fault
handler basically change the state of the faultee that way.
Linus
^ permalink raw reply [flat|nested] 29+ messages in thread
* RE: [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user()
2024-11-21 22:16 ` Linus Torvalds
2024-11-22 0:12 ` Josh Poimboeuf
@ 2024-11-22 9:38 ` David Laight
1 sibling, 0 replies; 29+ messages in thread
From: David Laight @ 2024-11-22 9:38 UTC (permalink / raw)
To: 'Linus Torvalds', Josh Poimboeuf
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, Thomas Gleixner,
Borislav Petkov, Peter Zijlstra, Pawan Gupta, Waiman Long,
Dave Hansen, Ingo Molnar, Michael Ellerman,
linuxppc-dev@lists.ozlabs.org, Andrew Cooper, Mark Rutland,
Kirill A . Shutemov
From: Linus Torvalds
> Sent: 21 November 2024 22:16
>
> On Thu, 21 Nov 2024 at 13:40, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> >
> > The profile is showing futex_get_value_locked():
>
> Ahh.
>
> > That has several callers, so we can probably just use get_user() there?
>
> Yeah, that's the simplest thing. That thing isn't even some inline
> function, so the real cost is the call.
>
> That said, exactly because it's not inlined, and calls are expensive,
> and this is apparently really critical, we can just do it with the
> full "unsafe_get_user()" model.
>
> It's not so complicated. The attached patch is untested, but I did
> check that it generates almost perfect code:
>
> mov %gs:0x0,%rax # current
> incl 0x1a9c(%rax) # current->pagefault_disable++
> movabs $0x123456789abcdef,%rcx # magic virtual address size
> cmp %rsi,%rcx # address masking
> sbb %rcx,%rcx
> or %rsi,%rcx
> stac # enable user space acccess
> mov (%rcx),%ecx # get the value
> clac # disable user space access
> decl 0x1a9c(%rax) # current->pagefault_disable--
> mov %ecx,(%rdi) # save the value
> xor %eax,%eax # return 0
> ret
>
> (with the error case for the page fault all out-of-line).
I presume you are assuming an earlier access_ok() call?
IIRC all x86 from 286 onwards fault accesses that cross the ~0 to 0 boundary.
So you don't need to have called access_ok() prior to the above
for non-byte accesses.
Even for byte accesses (and with ~0 being a valid address) do the
address mask before pagefault_disable++ and the error test is 'jc label'
after the 'cmp'.
Don't you also get better code for an 'asm output with goto' form?
While it requires the caller have a 'label: return -EFAULT;' somewhere
that is quite common in kernel code.
> So this should be _faster_ than the old __get_user(), because while
> the address masking is not needed, it's cheaper than the function call
> used to be and the error handling is better.
Perhaps get_user() should be the function call and __get_user() inlined.
Both including address validation but different calling conventions?
(After fixing the code that doesn't want address masking.)
...
> Now, we could possibly say "just remove the fence in __get_user()
> entirely", but that would involve moving it to access_ok().
How valid would it be to assume an explicit access_ok() was far
enough away from the get_user() that you couldn't speculate all the
way to something that used the read value to perform another
kernel access?
> And then it wouldn't actually speed anything up (except the horrid
> architecture-specific kernel uses that then don't call access_ok() at
> all - and we don't care about *those*).
Find and kill :-)
David
>
> Linus
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH v3 3/6] x86/uaccess: Avoid barrier_nospec() in 32-bit copy_from_user()
2024-10-29 1:56 [PATCH v3 0/6] x86/uaccess: avoid barrier_nospec() Josh Poimboeuf
2024-10-29 1:56 ` [PATCH v3 1/6] x86/uaccess: Avoid barrier_nospec() in 64-bit copy_from_user() Josh Poimboeuf
2024-10-29 1:56 ` [PATCH v3 2/6] x86/uaccess: Avoid barrier_nospec() in 64-bit __get_user() Josh Poimboeuf
@ 2024-10-29 1:56 ` Josh Poimboeuf
2024-10-29 1:56 ` [PATCH v3 4/6] x86/uaccess: Convert 32-bit get_user() to unconditional pointer masking Josh Poimboeuf
` (2 subsequent siblings)
5 siblings, 0 replies; 29+ messages in thread
From: Josh Poimboeuf @ 2024-10-29 1:56 UTC (permalink / raw)
To: x86
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Pawan Gupta, Waiman Long, Dave Hansen, Ingo Molnar,
Linus Torvalds, Michael Ellerman, linuxppc-dev, Andrew Cooper,
Mark Rutland, Kirill A . Shutemov
The barrier_nospec() in 32-bit copy_from_user() is slow. Instead use
pointer masking to force the user pointer to all 1's if a previous
access_ok() mispredicted true for an invalid address.
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
arch/x86/include/asm/uaccess.h | 34 +++++++++++++++++++++++++++++++
arch/x86/include/asm/uaccess_32.h | 2 +-
arch/x86/include/asm/uaccess_64.h | 29 +-------------------------
3 files changed, 36 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 3a7755c1a441..e7ac97d42bc2 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -15,6 +15,40 @@
#include <asm/smap.h>
#include <asm/extable.h>
#include <asm/tlbflush.h>
+#include <asm/runtime-const.h>
+
+#ifdef CONFIG_X86_64
+/*
+ * Virtual variable: there's no actual backing store for this,
+ * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)'
+ */
+extern unsigned long USER_PTR_MAX;
+# define USER_PTR_MAX_CONST runtime_const_ptr(USER_PTR_MAX)
+#else
+# define USER_PTR_MAX_CONST TASK_SIZE_MAX-1
+#endif
+
+/*
+ * Masking the user address is an alternative to a conditional
+ * user_access_begin that can avoid the fencing. This only works
+ * for dense accesses starting at the address.
+ */
+static inline void __user *mask_user_address(const void __user *ptr)
+{
+ unsigned long mask;
+ asm("cmp %1,%0\n\t"
+ "sbb %0,%0"
+ :"=r" (mask)
+ :"r" (ptr),
+ "0" (USER_PTR_MAX_CONST));
+ return (__force void __user *)(mask | (__force unsigned long)ptr);
+}
+
+#define masked_user_access_begin(x) ({ \
+ __auto_type __masked_ptr = (x); \
+ __masked_ptr = mask_user_address(__masked_ptr); \
+ __uaccess_begin(); __masked_ptr; })
+
#ifdef CONFIG_X86_32
# include <asm/uaccess_32.h>
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 8393ba104b2c..6ec2d73f8bba 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -23,7 +23,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
static __always_inline unsigned long
raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
- barrier_nospec();
+ from = mask_user_address(from);
return __copy_user_ll(to, (__force const void *)from, n);
}
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 7ce84090f0ec..dfb78154ac26 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -12,13 +12,6 @@
#include <asm/cpufeatures.h>
#include <asm/page.h>
#include <asm/percpu.h>
-#include <asm/runtime-const.h>
-
-/*
- * Virtual variable: there's no actual backing store for this,
- * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)'
- */
-extern unsigned long USER_PTR_MAX;
#ifdef CONFIG_ADDRESS_MASKING
/*
@@ -54,27 +47,7 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
#endif
#define valid_user_address(x) \
- ((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX))
-
-/*
- * Masking the user address is an alternative to a conditional
- * user_access_begin that can avoid the fencing. This only works
- * for dense accesses starting at the address.
- */
-static inline void __user *mask_user_address(const void __user *ptr)
-{
- unsigned long mask;
- asm("cmp %1,%0\n\t"
- "sbb %0,%0"
- :"=r" (mask)
- :"r" (ptr),
- "0" (runtime_const_ptr(USER_PTR_MAX)));
- return (__force void __user *)(mask | (__force unsigned long)ptr);
-}
-#define masked_user_access_begin(x) ({ \
- __auto_type __masked_ptr = (x); \
- __masked_ptr = mask_user_address(__masked_ptr); \
- __uaccess_begin(); __masked_ptr; })
+ ((__force unsigned long)(x) <= USER_PTR_MAX_CONST)
/*
* User pointers can have tag bits on x86-64. This scheme tolerates
--
2.47.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* [PATCH v3 4/6] x86/uaccess: Convert 32-bit get_user() to unconditional pointer masking
2024-10-29 1:56 [PATCH v3 0/6] x86/uaccess: avoid barrier_nospec() Josh Poimboeuf
` (2 preceding siblings ...)
2024-10-29 1:56 ` [PATCH v3 3/6] x86/uaccess: Avoid barrier_nospec() in 32-bit copy_from_user() Josh Poimboeuf
@ 2024-10-29 1:56 ` Josh Poimboeuf
2024-10-29 1:56 ` [PATCH v3 5/6] x86/uaccess: Avoid barrier_nospec() in 32-bit __get_user() Josh Poimboeuf
2024-10-29 1:56 ` [PATCH v3 6/6] x86/uaccess: Converge [__]get_user() implementations Josh Poimboeuf
5 siblings, 0 replies; 29+ messages in thread
From: Josh Poimboeuf @ 2024-10-29 1:56 UTC (permalink / raw)
To: x86
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Pawan Gupta, Waiman Long, Dave Hansen, Ingo Molnar,
Linus Torvalds, Michael Ellerman, linuxppc-dev, Andrew Cooper,
Mark Rutland, Kirill A . Shutemov
Convert the 32-bit get_user() implementations to use the new
unconditional masking scheme for consistency with 64-bit.
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
arch/x86/lib/getuser.S | 33 ++++++++++++++-------------------
1 file changed, 14 insertions(+), 19 deletions(-)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 998d5be6b794..5bce27670baa 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -37,22 +37,19 @@
#define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC
-.macro check_range size:req
+.macro mask_user_address size:req
.if IS_ENABLED(CONFIG_X86_64)
movq $0x0123456789abcdef,%rdx
1:
.pushsection runtime_ptr_USER_PTR_MAX,"a"
.long 1b - 8 - .
.popsection
- cmp %rax, %rdx
- sbb %rdx, %rdx
- or %rdx, %rax
.else
- cmp $TASK_SIZE_MAX-\size+1, %eax
- jae .Lbad_get_user
- sbb %edx, %edx /* array_index_mask_nospec() */
- and %edx, %eax
+ mov $TASK_SIZE_MAX-\size, %edx
.endif
+ cmp %_ASM_AX, %_ASM_DX
+ sbb %_ASM_DX, %_ASM_DX
+ or %_ASM_DX, %_ASM_AX
.endm
.macro UACCESS op src dst
@@ -63,7 +60,7 @@
.text
SYM_FUNC_START(__get_user_1)
- check_range size=1
+ mask_user_address size=1
ASM_STAC
UACCESS movzbl (%_ASM_AX),%edx
xor %eax,%eax
@@ -73,7 +70,7 @@ SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
SYM_FUNC_START(__get_user_2)
- check_range size=2
+ mask_user_address size=2
ASM_STAC
UACCESS movzwl (%_ASM_AX),%edx
xor %eax,%eax
@@ -83,7 +80,7 @@ SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
SYM_FUNC_START(__get_user_4)
- check_range size=4
+ mask_user_address size=4
ASM_STAC
UACCESS movl (%_ASM_AX),%edx
xor %eax,%eax
@@ -93,14 +90,12 @@ SYM_FUNC_END(__get_user_4)
EXPORT_SYMBOL(__get_user_4)
SYM_FUNC_START(__get_user_8)
-#ifndef CONFIG_X86_64
- xor %ecx,%ecx
-#endif
- check_range size=8
+ mask_user_address size=8
ASM_STAC
#ifdef CONFIG_X86_64
UACCESS movq (%_ASM_AX),%rdx
#else
+ xor %ecx,%ecx
UACCESS movl (%_ASM_AX),%edx
UACCESS movl 4(%_ASM_AX),%ecx
#endif
@@ -113,7 +108,7 @@ EXPORT_SYMBOL(__get_user_8)
/* .. and the same for __get_user, just without the range checks */
SYM_FUNC_START(__get_user_nocheck_1)
#ifdef CONFIG_X86_64
- check_range size=1
+ mask_user_address size=1
#else
ASM_BARRIER_NOSPEC
#endif
@@ -127,7 +122,7 @@ EXPORT_SYMBOL(__get_user_nocheck_1)
SYM_FUNC_START(__get_user_nocheck_2)
#ifdef CONFIG_X86_64
- check_range size=2
+ mask_user_address size=2
#else
ASM_BARRIER_NOSPEC
#endif
@@ -141,7 +136,7 @@ EXPORT_SYMBOL(__get_user_nocheck_2)
SYM_FUNC_START(__get_user_nocheck_4)
#ifdef CONFIG_X86_64
- check_range size=4
+ mask_user_address size=4
#else
ASM_BARRIER_NOSPEC
#endif
@@ -155,7 +150,7 @@ EXPORT_SYMBOL(__get_user_nocheck_4)
SYM_FUNC_START(__get_user_nocheck_8)
#ifdef CONFIG_X86_64
- check_range size=8
+ mask_user_address size=8
#else
ASM_BARRIER_NOSPEC
#endif
--
2.47.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* [PATCH v3 5/6] x86/uaccess: Avoid barrier_nospec() in 32-bit __get_user()
2024-10-29 1:56 [PATCH v3 0/6] x86/uaccess: avoid barrier_nospec() Josh Poimboeuf
` (3 preceding siblings ...)
2024-10-29 1:56 ` [PATCH v3 4/6] x86/uaccess: Convert 32-bit get_user() to unconditional pointer masking Josh Poimboeuf
@ 2024-10-29 1:56 ` Josh Poimboeuf
2024-10-29 1:56 ` [PATCH v3 6/6] x86/uaccess: Converge [__]get_user() implementations Josh Poimboeuf
5 siblings, 0 replies; 29+ messages in thread
From: Josh Poimboeuf @ 2024-10-29 1:56 UTC (permalink / raw)
To: x86
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Pawan Gupta, Waiman Long, Dave Hansen, Ingo Molnar,
Linus Torvalds, Michael Ellerman, linuxppc-dev, Andrew Cooper,
Mark Rutland, Kirill A . Shutemov
The barrier_nospec() in 34-bit __get_user() is slow. Instead use
pointer masking to force the user pointer to all 1's if the access_ok()
mispredicted true for an invalid address.
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
arch/x86/lib/getuser.S | 18 ------------------
1 file changed, 18 deletions(-)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 5bce27670baa..7da4fc75eba9 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -35,8 +35,6 @@
#include <asm/asm.h>
#include <asm/smap.h>
-#define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC
-
.macro mask_user_address size:req
.if IS_ENABLED(CONFIG_X86_64)
movq $0x0123456789abcdef,%rdx
@@ -107,11 +105,7 @@ EXPORT_SYMBOL(__get_user_8)
/* .. and the same for __get_user, just without the range checks */
SYM_FUNC_START(__get_user_nocheck_1)
-#ifdef CONFIG_X86_64
mask_user_address size=1
-#else
- ASM_BARRIER_NOSPEC
-#endif
ASM_STAC
UACCESS movzbl (%_ASM_AX),%edx
xor %eax,%eax
@@ -121,11 +115,7 @@ SYM_FUNC_END(__get_user_nocheck_1)
EXPORT_SYMBOL(__get_user_nocheck_1)
SYM_FUNC_START(__get_user_nocheck_2)
-#ifdef CONFIG_X86_64
mask_user_address size=2
-#else
- ASM_BARRIER_NOSPEC
-#endif
ASM_STAC
UACCESS movzwl (%_ASM_AX),%edx
xor %eax,%eax
@@ -135,11 +125,7 @@ SYM_FUNC_END(__get_user_nocheck_2)
EXPORT_SYMBOL(__get_user_nocheck_2)
SYM_FUNC_START(__get_user_nocheck_4)
-#ifdef CONFIG_X86_64
mask_user_address size=4
-#else
- ASM_BARRIER_NOSPEC
-#endif
ASM_STAC
UACCESS movl (%_ASM_AX),%edx
xor %eax,%eax
@@ -149,11 +135,7 @@ SYM_FUNC_END(__get_user_nocheck_4)
EXPORT_SYMBOL(__get_user_nocheck_4)
SYM_FUNC_START(__get_user_nocheck_8)
-#ifdef CONFIG_X86_64
mask_user_address size=8
-#else
- ASM_BARRIER_NOSPEC
-#endif
ASM_STAC
#ifdef CONFIG_X86_64
UACCESS movq (%_ASM_AX),%rdx
--
2.47.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* [PATCH v3 6/6] x86/uaccess: Converge [__]get_user() implementations
2024-10-29 1:56 [PATCH v3 0/6] x86/uaccess: avoid barrier_nospec() Josh Poimboeuf
` (4 preceding siblings ...)
2024-10-29 1:56 ` [PATCH v3 5/6] x86/uaccess: Avoid barrier_nospec() in 32-bit __get_user() Josh Poimboeuf
@ 2024-10-29 1:56 ` Josh Poimboeuf
5 siblings, 0 replies; 29+ messages in thread
From: Josh Poimboeuf @ 2024-10-29 1:56 UTC (permalink / raw)
To: x86
Cc: linux-kernel, Thomas Gleixner, Borislav Petkov, Peter Zijlstra,
Pawan Gupta, Waiman Long, Dave Hansen, Ingo Molnar,
Linus Torvalds, Michael Ellerman, linuxppc-dev, Andrew Cooper,
Mark Rutland, Kirill A . Shutemov
The x86 implementations of get_user() and __get_user() are now
identical. Merge their implementations and make the __get_user()
implementations aliases of their get_user() counterparts.
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
arch/x86/lib/getuser.S | 58 +++++++++---------------------------------
1 file changed, 12 insertions(+), 46 deletions(-)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 7da4fc75eba9..6f4dcb80dd46 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -103,53 +103,19 @@ SYM_FUNC_START(__get_user_8)
SYM_FUNC_END(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
-/* .. and the same for __get_user, just without the range checks */
-SYM_FUNC_START(__get_user_nocheck_1)
- mask_user_address size=1
- ASM_STAC
- UACCESS movzbl (%_ASM_AX),%edx
- xor %eax,%eax
- ASM_CLAC
- RET
-SYM_FUNC_END(__get_user_nocheck_1)
-EXPORT_SYMBOL(__get_user_nocheck_1)
-
-SYM_FUNC_START(__get_user_nocheck_2)
- mask_user_address size=2
- ASM_STAC
- UACCESS movzwl (%_ASM_AX),%edx
- xor %eax,%eax
- ASM_CLAC
- RET
-SYM_FUNC_END(__get_user_nocheck_2)
-EXPORT_SYMBOL(__get_user_nocheck_2)
-
-SYM_FUNC_START(__get_user_nocheck_4)
- mask_user_address size=4
- ASM_STAC
- UACCESS movl (%_ASM_AX),%edx
- xor %eax,%eax
- ASM_CLAC
- RET
-SYM_FUNC_END(__get_user_nocheck_4)
-EXPORT_SYMBOL(__get_user_nocheck_4)
-
-SYM_FUNC_START(__get_user_nocheck_8)
- mask_user_address size=8
- ASM_STAC
-#ifdef CONFIG_X86_64
- UACCESS movq (%_ASM_AX),%rdx
-#else
- xor %ecx,%ecx
- UACCESS movl (%_ASM_AX),%edx
- UACCESS movl 4(%_ASM_AX),%ecx
-#endif
- xor %eax,%eax
- ASM_CLAC
- RET
-SYM_FUNC_END(__get_user_nocheck_8)
-EXPORT_SYMBOL(__get_user_nocheck_8)
+/*
+ * On x86-64, get_user() does address masking rather than a conditional bounds
+ * check so there's no functional difference compared to __get_user().
+ */
+SYM_FUNC_ALIAS(__get_user_nocheck_1, __get_user_1);
+SYM_FUNC_ALIAS(__get_user_nocheck_2, __get_user_2);
+SYM_FUNC_ALIAS(__get_user_nocheck_4, __get_user_4);
+SYM_FUNC_ALIAS(__get_user_nocheck_8, __get_user_8);
+EXPORT_SYMBOL(__get_user_nocheck_1);
+EXPORT_SYMBOL(__get_user_nocheck_2);
+EXPORT_SYMBOL(__get_user_nocheck_4);
+EXPORT_SYMBOL(__get_user_nocheck_8);
SYM_CODE_START_LOCAL(__get_user_handle_exception)
ASM_CLAC
--
2.47.0
^ permalink raw reply related [flat|nested] 29+ messages in thread