From: David Laight <david.laight.linux@gmail.com>
To: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
Peter Zijlstra <peterz@infradead.org>,
Ard Biesheuvel <ardb@kernel.org>,
"Paul E. McKenney" <paulmck@kernel.org>,
Josh Poimboeuf <jpoimboe@kernel.org>,
Xiongwei Song <xiongwei.song@windriver.com>,
Xin Li <xin3.li@intel.com>,
"Mike Rapoport (IBM)" <rppt@kernel.org>,
Brijesh Singh <brijesh.singh@amd.com>,
Michael Roth <michael.roth@amd.com>,
Tony Luck <tony.luck@intel.com>,
Alexey Kardashevskiy <aik@amd.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Jonathan Corbet <corbet@lwn.net>,
Sohil Mehta <sohil.mehta@intel.com>,
Ingo Molnar <mingo@kernel.org>,
Pawan Gupta <pawan.kumar.gupta@linux.intel.com>,
Daniel Sneddon <daniel.sneddon@linux.intel.com>,
Kai Huang <kai.huang@intel.com>,
Sandipan Das <sandipan.das@amd.com>,
Breno Leitao <leitao@debian.org>,
Rick Edgecombe <rick.p.edgecombe@intel.com>,
Alexei Starovoitov <ast@kernel.org>, Hou Tao <houtao1@huawei.com>,
Juergen Gross <jgross@suse.com>,
Vegard Nossum <vegard.nossum@oracle.com>,
Kees Cook <kees@kernel.org>, Eric Biggers <ebiggers@google.com>,
Jason Gunthorpe <jgg@ziepe.ca>,
"Masami Hiramatsu (Google)" <mhiramat@kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
Luis Chamberlain <mcgrof@kernel.org>,
Yuntao Wang <ytcoode@gmail.com>,
Rasmus Villemoes <linux@rasmusvillemoes.dk>,
Christophe Leroy <christophe.leroy@csgroup.eu>,
Tejun Heo <tj@kernel.org>, Changbin Du <changbin.du@huawei.com>,
Huang Shijie <shijie@os.amperecomputing.com>,
Geert Uytterhoeven <geert+renesas@glider.be>,
Namhyung Kim <namhyung@kernel.org>,
Arnaldo Carvalho de Melo <acme@redhat.com>,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-efi@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [PATCHv8 02/17] x86/asm: Introduce inline memcpy and memset
Date: Thu, 3 Jul 2025 09:44:17 +0100 [thread overview]
Message-ID: <20250703094417.165e5893@pumpkin> (raw)
In-Reply-To: <20250701095849.2360685-3-kirill.shutemov@linux.intel.com>
On Tue, 1 Jul 2025 12:58:31 +0300
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> wrote:
> Extract memcpy and memset functions from copy_user_generic() and
> __clear_user().
>
> They can be used as inline memcpy and memset instead of the GCC builtins
> whenever necessary. LASS requires them to handle text_poke.
Except they contain the fault handlers so aren't generic calls.
>
> Originally-by: Peter Zijlstra <peterz@infradead.org>
> Link: https://lore.kernel.org/all/20241029184840.GJ14555@noisy.programming.kicks-ass.net/
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
> arch/x86/include/asm/string.h | 46 +++++++++++++++++++++++++++++++
> arch/x86/include/asm/uaccess_64.h | 38 +++++++------------------
> arch/x86/lib/clear_page_64.S | 13 +++++++--
> 3 files changed, 67 insertions(+), 30 deletions(-)
>
> diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
> index c3c2c1914d65..17f6b5bfa8c1 100644
> --- a/arch/x86/include/asm/string.h
> +++ b/arch/x86/include/asm/string.h
> @@ -1,6 +1,52 @@
> /* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_X86_STRING_H
> +#define _ASM_X86_STRING_H
> +
> +#include <asm/asm.h>
> +#include <asm/alternative.h>
> +#include <asm/cpufeatures.h>
> +
> #ifdef CONFIG_X86_32
> # include <asm/string_32.h>
> #else
> # include <asm/string_64.h>
> #endif
> +
> +#ifdef CONFIG_X86_64
> +#define ALT_64(orig, alt, feat) ALTERNATIVE(orig, alt, feat)
> +#else
> +#define ALT_64(orig, alt, feat) orig "\n"
> +#endif
> +
> +static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len)
> +{
> + void *ret = to;
> +
> + asm volatile("1:\n\t"
> + ALT_64("rep movsb",
> + "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM))
> + "2:\n\t"
> + _ASM_EXTABLE_UA(1b, 2b)
> + : "+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
> + : : "memory", _ASM_AX);
> +
> + return ret + len;
> +}
> +
> +static __always_inline void *__inline_memset(void *addr, int v, size_t len)
> +{
> + void *ret = addr;
> +
> + asm volatile("1:\n\t"
> + ALT_64("rep stosb",
> + "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRM))
> + "2:\n\t"
> + _ASM_EXTABLE_UA(1b, 2b)
> + : "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT
> + : "a" ((uint8_t)v)
You shouldn't need the (uint8_t) cast (should that be (u8) anyway).
At best it doesn't matter, at worst it will add code to mask with 0xff.
> + : "memory", _ASM_SI, _ASM_DX);
> +
> + return ret + len;
> +}
> +
> +#endif /* _ASM_X86_STRING_H */
> diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
> index c8a5ae35c871..eb531e13e659 100644
> --- a/arch/x86/include/asm/uaccess_64.h
> +++ b/arch/x86/include/asm/uaccess_64.h
> @@ -13,6 +13,7 @@
> #include <asm/page.h>
> #include <asm/percpu.h>
> #include <asm/runtime-const.h>
> +#include <asm/string.h>
>
> /*
> * Virtual variable: there's no actual backing store for this,
> @@ -118,21 +119,12 @@ rep_movs_alternative(void *to, const void *from, unsigned len);
> static __always_inline __must_check unsigned long
> copy_user_generic(void *to, const void *from, unsigned long len)
> {
> + void *ret;
> +
> stac();
> - /*
> - * If CPU has FSRM feature, use 'rep movs'.
> - * Otherwise, use rep_movs_alternative.
> - */
> - asm volatile(
> - "1:\n\t"
> - ALTERNATIVE("rep movsb",
> - "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM))
> - "2:\n"
> - _ASM_EXTABLE_UA(1b, 2b)
> - :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
> - : : "memory", "rax");
> + ret = __inline_memcpy(to, from, len);
> clac();
> - return len;
> + return ret - to;
> }
>
> static __always_inline __must_check unsigned long
> @@ -178,25 +170,15 @@ rep_stos_alternative(void __user *addr, unsigned long len);
>
> static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size)
> {
> + void *ptr = (__force void *)addr;
> + void *ret;
> +
> might_fault();
> stac();
> -
> - /*
> - * No memory constraint because it doesn't change any memory gcc
> - * knows about.
> - */
> - asm volatile(
> - "1:\n\t"
> - ALTERNATIVE("rep stosb",
> - "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS))
> - "2:\n"
> - _ASM_EXTABLE_UA(1b, 2b)
> - : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT
> - : "a" (0));
> -
> + ret = __inline_memset(ptr, 0, size);
> clac();
>
> - return size;
> + return ret - ptr;
> }
>
> static __always_inline unsigned long clear_user(void __user *to, unsigned long n)
> diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
> index a508e4a8c66a..47b613690f84 100644
> --- a/arch/x86/lib/clear_page_64.S
> +++ b/arch/x86/lib/clear_page_64.S
> @@ -55,17 +55,26 @@ SYM_FUNC_END(clear_page_erms)
> EXPORT_SYMBOL_GPL(clear_page_erms)
>
> /*
> - * Default clear user-space.
> + * Default memset.
> * Input:
> * rdi destination
> + * rsi scratch
> * rcx count
> - * rax is zero
> + * al is value
> *
> * Output:
> * rcx: uncleared bytes or 0 if successful.
> + * rdx: clobbered
> */
> SYM_FUNC_START(rep_stos_alternative)
> ANNOTATE_NOENDBR
> +
> + movzbq %al, %rsi
> + movabs $0x0101010101010101, %rax
> +
> + /* RDX:RAX = RAX * RSI */
> + mulq %rsi
NAK - you can't do that here.
Neither %rsi nor %rdx can be trashed.
The function has a very explicit calling convention.
It is also almost certainly a waste of time.
Pretty much all the calls will be for a constant 0x00.
Rename it all memzero() ...
David
> +
> cmpq $64,%rcx
> jae .Lunrolled
>
next prev parent reply other threads:[~2025-07-03 8:44 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-01 9:58 [PATCHv8 00/17] x86: Enable Linear Address Space Separation support Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 01/17] x86/cpu: Enumerate the LASS feature bits Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 02/17] x86/asm: Introduce inline memcpy and memset Kirill A. Shutemov
2025-07-03 8:44 ` David Laight [this message]
2025-07-03 10:39 ` Kirill A. Shutemov
2025-07-03 12:15 ` David Laight
2025-07-03 13:33 ` Vegard Nossum
2025-07-03 16:52 ` David Laight
2025-07-03 14:10 ` Kirill A. Shutemov
2025-07-03 17:02 ` David Laight
2025-07-03 17:13 ` Dave Hansen
2025-07-04 9:04 ` Kirill A. Shutemov
2025-07-06 9:13 ` David Laight
2025-07-07 8:02 ` Kirill A. Shutemov
2025-07-07 9:33 ` David Laight
2025-07-01 9:58 ` [PATCHv8 03/17] x86/alternatives: Disable LASS when patching kernel alternatives Kirill A. Shutemov
2025-07-01 18:44 ` Sohil Mehta
2025-07-01 9:58 ` [PATCHv8 04/17] x86/cpu: Defer CR pinning setup until after EFI initialization Kirill A. Shutemov
2025-07-01 19:03 ` Sohil Mehta
2025-07-02 9:47 ` Kirill A. Shutemov
2025-07-01 23:10 ` Dave Hansen
2025-07-02 10:05 ` Kirill A. Shutemov
2025-07-04 12:23 ` Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 05/17] efi: Disable LASS around set_virtual_address_map() EFI call Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 06/17] x86/vsyscall: Do not require X86_PF_INSTR to emulate vsyscall Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 07/17] x86/vsyscall: Reorganize the #PF emulation code Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 08/17] x86/traps: Consolidate user fixups in exc_general_protection() Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 09/17] x86/vsyscall: Add vsyscall emulation for #GP Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 10/17] x86/vsyscall: Disable LASS if vsyscall mode is set to EMULATE Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 11/17] x86/cpu: Set LASS CR4 bit as pinning sensitive Kirill A. Shutemov
2025-07-01 22:51 ` Sohil Mehta
2025-07-01 9:58 ` [PATCHv8 12/17] x86/traps: Communicate a LASS violation in #GP message Kirill A. Shutemov
2025-07-02 0:36 ` Sohil Mehta
2025-07-02 10:10 ` Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 13/17] x86/traps: Generalize #GP address decode and hint code Kirill A. Shutemov
2025-07-02 0:54 ` Sohil Mehta
2025-07-01 9:58 ` [PATCHv8 14/17] x86/traps: Handle LASS thrown #SS Kirill A. Shutemov
2025-07-02 1:35 ` Sohil Mehta
2025-07-02 2:00 ` H. Peter Anvin
2025-07-02 2:06 ` H. Peter Anvin
2025-07-02 10:17 ` Kirill A. Shutemov
2025-07-02 14:37 ` H. Peter Anvin
2025-07-02 14:47 ` Kirill A. Shutemov
2025-07-02 17:10 ` H. Peter Anvin
2025-07-02 23:42 ` Andrew Cooper
2025-07-03 0:44 ` H. Peter Anvin
2025-07-06 9:22 ` David Laight
2025-07-06 15:07 ` H. Peter Anvin
2025-07-02 13:27 ` Kirill A. Shutemov
2025-07-02 17:56 ` Sohil Mehta
2025-07-03 10:40 ` Kirill A. Shutemov
2025-07-02 20:05 ` Sohil Mehta
2025-07-03 11:31 ` Kirill A. Shutemov
2025-07-03 20:12 ` Sohil Mehta
2025-07-04 9:23 ` Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 15/17] x86/cpu: Make LAM depend on LASS Kirill A. Shutemov
2025-07-01 23:03 ` Sohil Mehta
2025-07-01 9:58 ` [PATCHv8 16/17] x86/cpu: Enable LASS during CPU initialization Kirill A. Shutemov
2025-07-01 9:58 ` [PATCHv8 17/17] x86: Re-enable Linear Address Masking Kirill A. Shutemov
2025-07-01 23:13 ` Sohil Mehta
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250703094417.165e5893@pumpkin \
--to=david.laight.linux@gmail.com \
--cc=acme@redhat.com \
--cc=aik@amd.com \
--cc=akpm@linux-foundation.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=ardb@kernel.org \
--cc=ast@kernel.org \
--cc=bp@alien8.de \
--cc=brijesh.singh@amd.com \
--cc=changbin.du@huawei.com \
--cc=christophe.leroy@csgroup.eu \
--cc=corbet@lwn.net \
--cc=daniel.sneddon@linux.intel.com \
--cc=dave.hansen@linux.intel.com \
--cc=ebiggers@google.com \
--cc=geert+renesas@glider.be \
--cc=houtao1@huawei.com \
--cc=hpa@zytor.com \
--cc=jgg@ziepe.ca \
--cc=jgross@suse.com \
--cc=jpoimboe@kernel.org \
--cc=kai.huang@intel.com \
--cc=kees@kernel.org \
--cc=kirill.shutemov@linux.intel.com \
--cc=leitao@debian.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-efi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux@rasmusvillemoes.dk \
--cc=luto@kernel.org \
--cc=mcgrof@kernel.org \
--cc=mhiramat@kernel.org \
--cc=michael.roth@amd.com \
--cc=mingo@kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=paulmck@kernel.org \
--cc=pawan.kumar.gupta@linux.intel.com \
--cc=peterz@infradead.org \
--cc=rick.p.edgecombe@intel.com \
--cc=rppt@kernel.org \
--cc=sandipan.das@amd.com \
--cc=shijie@os.amperecomputing.com \
--cc=sohil.mehta@intel.com \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=tony.luck@intel.com \
--cc=vegard.nossum@oracle.com \
--cc=x86@kernel.org \
--cc=xin3.li@intel.com \
--cc=xiongwei.song@windriver.com \
--cc=ytcoode@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.