public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: david laight <david.laight@runbox.com>
To: Uros Bizjak <ubizjak@gmail.com>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: Re: [PATCH] x86: Optimize __get_user_asm() assembly
Date: Wed, 26 Nov 2025 15:18:48 +0000	[thread overview]
Message-ID: <20251126151848.2f57d5d4@pumpkin> (raw)
In-Reply-To: <20251126132352.448052-1-ubizjak@gmail.com>

On Wed, 26 Nov 2025 14:23:32 +0100
Uros Bizjak <ubizjak@gmail.com> wrote:

> On x86, byte and word moves (MOVB, MOVW) write only to the low
> portion of a 32-bit register, leaving the upper bits unchanged.
> Modern compilers therefore prefer using MOVZBL and MOVZWL to load
> 8-bit and 16-bit values with zero-extension to the full register
> width.
> 
> Update __get_user_asm() to follow this convention by explicitly
> zero-extending 8-bit and 16-bit loads from memory.
> 
> An additional benefit of this change is that it enables the full
> integer register set to be used for 8-bit loads. Also, it
> eliminates the need for manual zero-extension of 8-bit values.
> 
> There is only a minimal increase in code size:

Interesting, where does that come from.
I'd have thought it should shrink it.
The mov[sz]x is one byte longer.
Perhaps a lot of 8-bit values are never zero-extended?

I'm trying to remember what the magic 'k' is for.
Does gas treat %krxx as %dxx ?
Which would matter if 'x' is 64bit when using "=r" for 32bit reads.
But, in that case, I think you need it on the 'movl' as well.

There is the slight problem (which affects for of the asm) is that
gcc (I think) can't assume that the high 32bits of a register result
from an inline asm function are zero.
But if you return 'u64' then everything at the call site has to
be extended as well.
I wonder (not looked at the generated code yet) whether casting
the u64 result from the function to u32 has the effect of avoiding
the zero extend if the value is needed as a 64bit one.
This may (or may not) affect get_user() but will affect the 'bit scan'
functions.

This means you might get better code if 'x' is always u64 (with gas
generating instructions that write to the low 32 bits - zero the high ones)
but with a cast to u32 before any value can be used (esp when inlined).

Of course, I might be smoking substances again...

	David

> 
>       text     data     bss      dec     hex filename
>   28258526  4810554  740932 33810012  03e65c vmlinux-old.o
>   28258550  4810554  740932 33810036  03e674 vmlinux-new.o
> 
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> -
> ---
>  arch/x86/include/asm/uaccess.h | 40 +++++++++++++++-------------------
>  1 file changed, 17 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
> index 367297b188c3..cb463c1301f6 100644
> --- a/arch/x86/include/asm/uaccess.h
> +++ b/arch/x86/include/asm/uaccess.h
> @@ -260,30 +260,27 @@ do {									\
>  	unsigned int __gu_low, __gu_high;				\
>  	const unsigned int __user *__gu_ptr;				\
>  	__gu_ptr = (const void __user *)(ptr);				\
> -	__get_user_asm(__gu_low, __gu_ptr, "l", "=r", label);		\
> -	__get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label);	\
> +	__get_user_asm(__gu_low, __gu_ptr, "movl", "", label);		\
> +	__get_user_asm(__gu_high, __gu_ptr+1, "movl", "", label);	\
>  	(x) = ((unsigned long long)__gu_high << 32) | __gu_low;		\
>  } while (0)
>  #else
>  #define __get_user_asm_u64(x, ptr, label)				\
> -	__get_user_asm(x, ptr, "q", "=r", label)
> +	__get_user_asm(x, ptr, "movq", "", label)
>  #endif
>  
>  #define __get_user_size(x, ptr, size, label)				\
>  do {									\
>  	__chk_user_ptr(ptr);						\
>  	switch (size) {							\
> -	case 1:	{							\
> -		unsigned char x_u8__;					\
> -		__get_user_asm(x_u8__, ptr, "b", "=q", label);		\
> -		(x) = x_u8__;						\
> +	case 1:								\
> +		__get_user_asm(x, ptr, "movzbl", "k", label);		\
>  		break;							\
> -	}								\
>  	case 2:								\
> -		__get_user_asm(x, ptr, "w", "=r", label);		\
> +		__get_user_asm(x, ptr, "movzwl", "k", label);		\
>  		break;							\
>  	case 4:								\
> -		__get_user_asm(x, ptr, "l", "=r", label);		\
> +		__get_user_asm(x, ptr, "movl", "", label);		\
>  		break;							\
>  	case 8:								\
>  		__get_user_asm_u64(x, ptr, label);			\
> @@ -294,11 +291,11 @@ do {									\
>  	instrument_get_user(x);						\
>  } while (0)
>  
> -#define __get_user_asm(x, addr, itype, ltype, label)			\
> +#define __get_user_asm(x, addr, insn, opmod, label)			\
>  	asm_goto_output("\n"						\
> -		     "1:	mov"itype" %[umem],%[output]\n"		\
> +		     "1:	" insn " %[umem],%" opmod "[output]\n"	\
>  		     _ASM_EXTABLE_UA(1b, %l2)				\
> -		     : [output] ltype(x)				\
> +		     : [output] "=r" (x)				\
>  		     : [umem] "m" (__m(addr))				\
>  		     : : label)
>  
> @@ -326,26 +323,23 @@ do {									\
>  })
>  
>  #else
> -#define __get_user_asm_u64(x, ptr, retval) \
> -	 __get_user_asm(x, ptr, retval, "q")
> +#define __get_user_asm_u64(x, ptr, retval)				\
> +	__get_user_asm(x, ptr, "movq", "", retval)
>  #endif
>  
>  #define __get_user_size(x, ptr, size, retval)				\
>  do {									\
> -	unsigned char x_u8__;						\
> -									\
>  	retval = 0;							\
>  	__chk_user_ptr(ptr);						\
>  	switch (size) {							\
>  	case 1:								\
> -		__get_user_asm(x_u8__, ptr, retval, "b");		\
> -		(x) = x_u8__;						\
> +		 __get_user_asm(x, ptr, "movzbl", "k", retval);		\
>  		break;							\
>  	case 2:								\
> -		__get_user_asm(x, ptr, retval, "w");			\
> +		__get_user_asm(x, ptr, "movzwl", "k", retval);		\
>  		break;							\
>  	case 4:								\
> -		__get_user_asm(x, ptr, retval, "l");			\
> +		__get_user_asm(x, ptr, "movl", "", retval);		\
>  		break;							\
>  	case 8:								\
>  		__get_user_asm_u64(x, ptr, retval);			\
> @@ -355,9 +349,9 @@ do {									\
>  	}								\
>  } while (0)
>  
> -#define __get_user_asm(x, addr, err, itype)				\
> +#define __get_user_asm(x, addr, insn, opmod, err)			\
>  	asm volatile("\n"						\
> -		     "1:	mov"itype" %[umem],%[output]\n"		\
> +		     "1:	" insn " %[umem],%" opmod "[output]\n"	\
>  		     "2:\n"						\
>  		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG | \
>  					   EX_FLAG_CLEAR_AX,		\


  reply	other threads:[~2025-11-26 15:19 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-26 13:23 [PATCH] x86: Optimize __get_user_asm() assembly Uros Bizjak
2025-11-26 15:18 ` david laight [this message]
2025-11-26 16:22   ` Uros Bizjak
2025-11-26 19:23     ` david laight
2025-11-26 20:29       ` Uros Bizjak
2025-11-28 10:37         ` david laight

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251126151848.2f57d5d4@pumpkin \
    --to=david.laight@runbox.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=ubizjak@gmail.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox