LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v1 9/9] uaccess: Convert small fixed size copy_{to/from}_user() to scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

copy_{to/from}_user() is a heavy function optimised for copy of large
blocs of memory between user and kernel space.

When the number of bytes to be copied is known at build time and small,
using scoped user access removes the burden of that optimisation.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 include/linux/uaccess.h | 47 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 33b7d0f5f808..3ac544527af2 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -50,6 +50,8 @@
  #define mask_user_address(src) (src)
 #endif
 
+#define SMALL_COPY_USER		64
+
 /*
  * Architectures should provide two primitives (raw_copy_{to,from}_user())
  * and get rid of their private instances of copy_{to,from}_user() and
@@ -191,6 +193,9 @@ _inline_copy_from_user(void *to, const void __user *from, unsigned long n)
 	return res;
 }
 
+static __always_inline __must_check unsigned long
+_small_copy_from_user(void *to, const void __user *from, unsigned long n);
+
 extern __must_check unsigned long
 _copy_from_user(void *, const void __user *, unsigned long);
 
@@ -207,6 +212,9 @@ _inline_copy_to_user(void __user *to, const void *from, unsigned long n)
 	return n;
 }
 
+static __always_inline __must_check unsigned long
+_small_copy_to_user(void __user *to, const void *from, unsigned long n);
+
 extern __must_check unsigned long
 _copy_to_user(void __user *, const void *, unsigned long);
 
@@ -215,6 +223,8 @@ copy_from_user_common(void *to, const void __user *from, unsigned long n, bool p
 {
 	if (!check_copy_size(to, n, false))
 		return n;
+	if (!partial && __builtin_constant_p(n) && n <= SMALL_COPY_USER)
+		return _small_copy_from_user(to, from, n);
 	if (IS_ENABLED(ARCH_WANTS_NOINLINE_COPY_USER))
 		return _copy_from_user(to, from, n);
 	else
@@ -239,6 +249,8 @@ copy_to_user_common(void __user *to, const void *from, unsigned long n, bool par
 	if (!check_copy_size(from, n, true))
 		return n;
 
+	if (!partial && __builtin_constant_p(n) && n <= SMALL_COPY_USER)
+		return _small_copy_to_user(to, from, n);
 	if (IS_ENABLED(ARCH_WANTS_NOINLINE_COPY_USER))
 		return _copy_to_user(to, from, n);
 	else
@@ -838,6 +850,41 @@ for (bool done = false; !done; done = true)					\
 #define scoped_user_rw_access(uptr, elbl)				\
 	scoped_user_rw_access_size(uptr, sizeof(*(uptr)), elbl)
 
+static __always_inline __must_check unsigned long
+_small_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	might_fault();
+	instrument_copy_from_user_before(to, from, n);
+	scoped_user_read_access_size(from, n, failed) {
+		/*
+		 * Ensure that bad access_ok() speculation will not lead
+		 * to nasty side effects *after* the copy is finished:
+		 */
+		if (!can_do_masked_user_access())
+			barrier_nospec();
+		unsafe_copy_from_user(to, from, n, failed);
+	}
+	instrument_copy_from_user_after(to, from, n, 0);
+	return 0;
+failed:
+	instrument_copy_from_user_after(to, from, n, n);
+	return n;
+}
+
+static __always_inline __must_check unsigned long
+_small_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	might_fault();
+	if (should_fail_usercopy())
+		return n;
+	instrument_copy_to_user(to, from, n);
+	scoped_user_write_access_size(to, n, failed)
+		unsafe_copy_to_user(to, from, n, failed);
+	return 0;
+failed:
+	return n;
+}
+
 /**
  * get_user_inline - Read user data inlined
  * @val:	The variable to store the value read from user memory
-- 
2.49.0



^ permalink raw reply related

* Re: [RFC PATCH v1 7/9] x86: Add unsafe_copy_from_user()
From: Yury Norov @ 2026-04-27 17:58 UTC (permalink / raw)
  To: Christophe Leroy (CS GROUP)
  Cc: Andrew Morton, Linus Torvalds, David Laight, Thomas Gleixner,
	linux-alpha, linux-kernel, linux-snps-arc, linux-arm-kernel,
	linux-mips, linuxppc-dev, kvm, linux-riscv, linux-s390,
	sparclinux, linux-um, dmaengine, linux-efi, linux-fsi, amd-gfx,
	dri-devel, intel-gfx, linux-wpan, netdev, linux-wireless,
	linux-spi, linux-media, linux-staging, linux-serial, linux-usb,
	xen-devel, linux-fsdevel, ocfs2-devel, bpf, kasan-dev, linux-mm,
	linux-x25, rust-for-linux, linux-sound, sound-open-firmware,
	linux-csky, linux-hexagon, loongarch, linux-m68k, linux-openrisc,
	linux-parisc, linux-sh, linux-arch
In-Reply-To: <0ee46bb228d97163fbdc14f2a7c52b93d8bc34ce.1777306795.git.chleroy@kernel.org>

On Mon, Apr 27, 2026 at 07:13:48PM +0200, Christophe Leroy (CS GROUP) wrote:
> At the time being, x86 and arm64 are missing unsafe_copy_from_user().

No, they don't. They (should) rely on a generic implementation from
linux/uaccess.h, like every other arch, except for  PPC and RISCV.

But they #define arch_unsafe_get_user, and the unsafe_copy_from_user()
becomes undefined conditionally on that.

So please, fix that bug instead of introducing another arch flavor.
We'd always choose generic version, unless there's strong evidence
that arch one is better.  


Thanks,
Yury
 
> Add it.
> 
> Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
> ---
>  arch/x86/include/asm/uaccess.h | 29 ++++++++++++++++++++++++-----
>  1 file changed, 24 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
> index 3a0dd3c2b233..10c458ffa399 100644
> --- a/arch/x86/include/asm/uaccess.h
> +++ b/arch/x86/include/asm/uaccess.h
> @@ -598,7 +598,7 @@ _label:									\
>   * We want the unsafe accessors to always be inlined and use
>   * the error labels - thus the macro games.
>   */
> -#define unsafe_copy_loop(dst, src, len, type, label)				\
> +#define unsafe_put_loop(dst, src, len, type, label)				\
>  	while (len >= sizeof(type)) {						\
>  		unsafe_put_user(*(type *)(src),(type __user *)(dst),label);	\
>  		dst += sizeof(type);						\
> @@ -611,10 +611,29 @@ do {									\
>  	char __user *__ucu_dst = (_dst);				\
>  	const char *__ucu_src = (_src);					\
>  	size_t __ucu_len = (_len);					\
> -	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
> -	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
> -	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
> -	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
> +	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
> +	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
> +	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
> +	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
> +} while (0)
> +
> +#define unsafe_get_loop(dst, src, len, type, label)				\
> +	while (len >= sizeof(type)) {						\
> +		unsafe_get_user(*(type __user *)(src),(type *)(dst),label);	\
> +		dst += sizeof(type);						\
> +		src += sizeof(type);						\
> +		len -= sizeof(type);						\
> +	}
> +
> +#define unsafe_copy_from_user(_dst,_src,_len,label)			\
> +do {									\
> +	char *__ucu_dst = (_dst);					\
> +	const char __user *__ucu_src = (_src);				\
> +	size_t __ucu_len = (_len);					\
> +	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
> +	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
> +	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
> +	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
>  } while (0)
>  
>  #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
> -- 
> 2.49.0
> 


^ permalink raw reply

* Re: [RFC PATCH v1 5/9] uaccess: Switch to copy_{to/from}_user_partial() when relevant
From: Alice Ryhl @ 2026-04-27 18:07 UTC (permalink / raw)
  To: Christophe Leroy (CS GROUP)
  Cc: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner, linux-alpha, linux-kernel, linux-snps-arc,
	linux-arm-kernel, linux-mips, linuxppc-dev, kvm, linux-riscv,
	linux-s390, sparclinux, linux-um, dmaengine, linux-efi, linux-fsi,
	amd-gfx, dri-devel, intel-gfx, linux-wpan, netdev, linux-wireless,
	linux-spi, linux-media, linux-staging, linux-serial, linux-usb,
	xen-devel, linux-fsdevel, ocfs2-devel, bpf, kasan-dev, linux-mm,
	linux-x25, rust-for-linux, linux-sound, sound-open-firmware,
	linux-csky, linux-hexagon, loongarch, linux-m68k, linux-openrisc,
	linux-parisc, linux-sh, linux-arch
In-Reply-To: <289b424e243ba2c4139ea04009cf8b9c448a87ff.1777306795.git.chleroy@kernel.org>

On Mon, Apr 27, 2026 at 07:13:46PM +0200, Christophe Leroy (CS GROUP) wrote:
> diff --git a/rust/helpers/uaccess.c b/rust/helpers/uaccess.c
> index 01de4fbbcc84..710e07cd60ae 100644
> --- a/rust/helpers/uaccess.c
> +++ b/rust/helpers/uaccess.c
> @@ -5,13 +5,13 @@
>  __rust_helper unsigned long
>  rust_helper_copy_from_user(void *to, const void __user *from, unsigned long n)
>  {
> -	return copy_from_user(to, from, n);
> +	return copy_from_user_partial(to, from, n);
>  }
>  
>  __rust_helper unsigned long
>  rust_helper_copy_to_user(void __user *to, const void *from, unsigned long n)
>  {
> -	return copy_to_user(to, from, n);
> +	return copy_to_user_partial(to, from, n);
>  }

No Rust code uses the return value for anything other than comparing it
with zero, so you can keep these as copy_[from|to]_user() without
issues.

Thanks, Alice


^ permalink raw reply

* Re: [RFC PATCH v1 7/9] x86: Add unsafe_copy_from_user()
From: Christophe Leroy (CS GROUP) @ 2026-04-27 18:20 UTC (permalink / raw)
  To: Yury Norov
  Cc: Andrew Morton, Linus Torvalds, David Laight, Thomas Gleixner,
	linux-alpha, linux-kernel, linux-snps-arc, linux-arm-kernel,
	linux-mips, linuxppc-dev, kvm, linux-riscv, linux-s390,
	sparclinux, linux-um, dmaengine, linux-efi, linux-fsi, amd-gfx,
	dri-devel, intel-gfx, linux-wpan, netdev, linux-wireless,
	linux-spi, linux-media, linux-staging, linux-serial, linux-usb,
	xen-devel, linux-fsdevel, ocfs2-devel, bpf, kasan-dev, linux-mm,
	linux-x25, rust-for-linux, linux-sound, sound-open-firmware,
	linux-csky, linux-hexagon, loongarch, linux-m68k, linux-openrisc,
	linux-parisc, linux-sh, linux-arch
In-Reply-To: <ae-j2_QirCySZD02@yury>



Le 27/04/2026 à 19:58, Yury Norov a écrit :
> On Mon, Apr 27, 2026 at 07:13:48PM +0200, Christophe Leroy (CS GROUP) wrote:
>> At the time being, x86 and arm64 are missing unsafe_copy_from_user().
> 
> No, they don't. They (should) rely on a generic implementation from
> linux/uaccess.h, like every other arch, except for  PPC and RISCV.
> 
> But they #define arch_unsafe_get_user, and the unsafe_copy_from_user()
> becomes undefined conditionally on that.
> 
> So please, fix that bug instead of introducing another arch flavor.
> We'd always choose generic version, unless there's strong evidence
> that arch one is better.

But they both implement the exact same unsafe_copy_to_user(). What is 
the difference here ?

Should that function become generic too ?

Christophe

> 
> 
> Thanks,
> Yury
>   
>> Add it.
>>
>> Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
>> ---
>>   arch/x86/include/asm/uaccess.h | 29 ++++++++++++++++++++++++-----
>>   1 file changed, 24 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
>> index 3a0dd3c2b233..10c458ffa399 100644
>> --- a/arch/x86/include/asm/uaccess.h
>> +++ b/arch/x86/include/asm/uaccess.h
>> @@ -598,7 +598,7 @@ _label:									\
>>    * We want the unsafe accessors to always be inlined and use
>>    * the error labels - thus the macro games.
>>    */
>> -#define unsafe_copy_loop(dst, src, len, type, label)				\
>> +#define unsafe_put_loop(dst, src, len, type, label)				\
>>   	while (len >= sizeof(type)) {						\
>>   		unsafe_put_user(*(type *)(src),(type __user *)(dst),label);	\
>>   		dst += sizeof(type);						\
>> @@ -611,10 +611,29 @@ do {									\
>>   	char __user *__ucu_dst = (_dst);				\
>>   	const char *__ucu_src = (_src);					\
>>   	size_t __ucu_len = (_len);					\
>> -	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
>> -	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
>> -	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
>> -	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
>> +	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
>> +	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
>> +	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
>> +	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
>> +} while (0)
>> +
>> +#define unsafe_get_loop(dst, src, len, type, label)				\
>> +	while (len >= sizeof(type)) {						\
>> +		unsafe_get_user(*(type __user *)(src),(type *)(dst),label);	\
>> +		dst += sizeof(type);						\
>> +		src += sizeof(type);						\
>> +		len -= sizeof(type);						\
>> +	}
>> +
>> +#define unsafe_copy_from_user(_dst,_src,_len,label)			\
>> +do {									\
>> +	char *__ucu_dst = (_dst);					\
>> +	const char __user *__ucu_src = (_src);				\
>> +	size_t __ucu_len = (_len);					\
>> +	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
>> +	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
>> +	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
>> +	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
>>   } while (0)
>>   
>>   #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
>> -- 
>> 2.49.0
>>



^ permalink raw reply

* Re: [RFC PATCH v1 2/9] uaccess: Convert INLINE_COPY_{TO/FROM}_USER to kconfig and reduce ifdefery
From: Yury Norov @ 2026-04-27 18:39 UTC (permalink / raw)
  To: Christophe Leroy (CS GROUP)
  Cc: Andrew Morton, Linus Torvalds, David Laight, Thomas Gleixner,
	linux-alpha, Yury Norov, linux-kernel, linux-snps-arc,
	linux-arm-kernel, linux-mips, linuxppc-dev, kvm, linux-riscv,
	linux-s390, sparclinux, linux-um, dmaengine, linux-efi, linux-fsi,
	amd-gfx, dri-devel, intel-gfx, linux-wpan, netdev, linux-wireless,
	linux-spi, linux-media, linux-staging, linux-serial, linux-usb,
	xen-devel, linux-fsdevel, ocfs2-devel, bpf, kasan-dev, linux-mm,
	linux-x25, rust-for-linux, linux-sound, sound-open-firmware,
	linux-csky, linux-hexagon, loongarch, linux-m68k, linux-openrisc,
	linux-parisc, linux-sh, linux-arch
In-Reply-To: <9fe875d2f55af59c12708336c571a46038528678.1777306795.git.chleroy@kernel.org>

On Mon, Apr 27, 2026 at 07:13:43PM +0200, Christophe Leroy (CS GROUP) wrote:
> Among the 21 architectures supported by the kernel, 16 define both
> INLINE_COPY_TO_USER and INLINE_COPY_FROM_USER while the 5 other ones
> don't define any of the two.
> 
> To simplify and reduce risk of mistakes, convert them to a single
> kconfig item named CONFIG_ARCH_WANTS_NOINLINE_COPY which will be

We've got a special word for it: outline. Can you name it
CONFIG_OUTLINE_USERCOPY, or similar?

> selected by the 5 architectures that don't want inlined copy.
> 
> To minimise complication in a later patch, also remove
> ifdefery and replace it with IS_ENABLED().
> 
> Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>

Andrew has taken my consolidation patch for INLINE_COPY_USER:

https://lore.kernel.org/all/20260427085814.7ca0b134603b8d5813e23396@linux-foundation.org/

Please base your series on top of it.

I'm not sure this patch is relevant to the goal of your series. Maybe
send it separately?

Thanks,
Yury


^ permalink raw reply

* Re: [RFC PATCH v1 0/9] uaccess: Convert small fixed size copy_{to/from}_user() to scoped user access
From: Helge Deller @ 2026-04-27 19:01 UTC (permalink / raw)
  To: Christophe Leroy (CS GROUP), Yury Norov, Andrew Morton,
	Linus Torvalds, David Laight, Thomas Gleixner
  Cc: linux-alpha, linux-kernel, linux-snps-arc, linux-arm-kernel,
	linux-mips, linuxppc-dev, kvm, linux-riscv, linux-s390,
	sparclinux, linux-um, dmaengine, linux-efi, linux-fsi, amd-gfx,
	dri-devel, intel-gfx, linux-wpan, netdev, linux-wireless,
	linux-spi, linux-media, linux-staging, linux-serial, linux-usb,
	xen-devel, linux-fsdevel, ocfs2-devel, bpf, kasan-dev, linux-mm,
	linux-x25, rust-for-linux, linux-sound, sound-open-firmware,
	linux-csky, linux-hexagon, loongarch, linux-m68k, linux-openrisc,
	linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

Hello Christophe,

On 4/27/26 19:13, Christophe Leroy (CS GROUP) wrote:
> A lot of copy_from_user() and copy_to_user() perform copies of small
> fixed size pieces of data between kernel and userspace, and don't
> care about partial copies.
> 
> copy_from_user() and copy_to_user() are big functions optimised for
> copying large amount of data, with cache management, etc ... 

They take care of much more: alignments, exception handling (e.g. if userpage
is read-only and kernel writes to it), various rules when to return faults
(e.g. sometime reading from page0 is allowed for other arches not), and
much more. I've seen so many strange things during the last few years,
and you would need to get it right if you want to "make small" versions
of those functions.

> This is often overkill for small copies that could just be inlined
> instead.

Isn't put_user() and get_user() for that ?
And if you inline you need to take care of faults as well, so indirectly
you will add more fault handlers (or fault pointers) to the generated code,
effectively making the kernel bigger.

> What makes things a bit more tricky is that those copy functions
> are designed to handle partial copies in case of page fault. But among
> the 6000 callers of those functions, only 2% really care about the
> quantity of no-copied data that those functions return. All other ones
> fails as soon as the returned value is not 0, returning -EACCESS.
> 
> So first step in this series is to introduce variants called
> copy_from_user_partial() and copy_to_user_partial() which will be
> called by the 2% users that care about the partial copy, then the
> original copy_from_user() and copy_to_user() are changed to return
> -EFAULT when the copy fails.
> 
> Then the second step is to implement copy of small fixed-size data
> with scoped user access instead of calling the arch specific heavy
> user copy functions.

I'm not against your idea or your patch, but I wonder if you
really gain much from it.
Have you done some size or speed comparisons ?

Helge

> Patch 5, can be split in different patches for each archicture or
> subsystem, but let's get a first feedback and agree on the principle.
> 
> Christophe Leroy (CS GROUP) (9):
>    uaccess: Split check_zeroed_user() out of usercopy.c
>    uaccess: Convert INLINE_COPY_{TO/FROM}_USER to kconfig and reduce
>      ifdefery
>    x86/umip: Be stricter in fixup_umip_exception()
>    uaccess: Introduce copy_{to/from}_user_partial()
>    uaccess: Switch to copy_{to/from}_user_partial() when relevant
>    uaccess: Change copy_{to/from}_user to return -EFAULT
>    x86: Add unsafe_copy_from_user()
>    arm64: Add unsafe_copy_from_user()
>    uaccess: Convert small fixed size copy_{to/from}_user() to scoped user
>      access
> 
>   arch/alpha/Kconfig                            |   1 +
>   arch/alpha/kernel/osf_sys.c                   |   4 +-
>   arch/alpha/kernel/termios.c                   |   2 +-
>   arch/arc/include/asm/uaccess.h                |   3 -
>   arch/arc/kernel/disasm.c                      |   2 +-
>   arch/arm/include/asm/uaccess.h                |   2 -
>   arch/arm64/include/asm/gcs.h                  |   2 +-
>   arch/arm64/include/asm/uaccess.h              |  30 +++--
>   arch/arm64/kernel/signal32.c                  |   2 +-
>   arch/csky/Kconfig                             |   1 +
>   arch/hexagon/include/asm/uaccess.h            |   3 -
>   arch/loongarch/include/asm/uaccess.h          |   3 -
>   arch/m68k/include/asm/uaccess.h               |   3 -
>   arch/microblaze/include/asm/uaccess.h         |   2 -
>   arch/mips/include/asm/uaccess.h               |   3 -
>   arch/mips/kernel/rtlx.c                       |   8 +-
>   arch/mips/kernel/vpe.c                        |   2 +-
>   arch/nios2/include/asm/uaccess.h              |   2 -
>   arch/openrisc/include/asm/uaccess.h           |   2 -
>   arch/parisc/include/asm/uaccess.h             |   3 -
>   arch/powerpc/Kconfig                          |   1 +
>   arch/powerpc/kvm/book3s_64_mmu_hv.c           |   4 +-
>   arch/powerpc/kvm/book3s_64_mmu_radix.c        |   4 +-
>   arch/powerpc/kvm/book3s_hv.c                  |   2 +-
>   arch/riscv/Kconfig                            |   1 +
>   arch/riscv/kernel/signal.c                    |   2 +-
>   arch/s390/include/asm/idals.h                 |   8 +-
>   arch/s390/include/asm/uaccess.h               |   3 -
>   arch/sh/include/asm/uaccess.h                 |   2 -
>   arch/sparc/include/asm/uaccess_32.h           |   3 -
>   arch/sparc/include/asm/uaccess_64.h           |   2 -
>   arch/sparc/kernel/termios.c                   |   2 +-
>   arch/um/include/asm/uaccess.h                 |   3 -
>   arch/um/kernel/process.c                      |   2 +-
>   arch/x86/Kconfig                              |   1 +
>   arch/x86/include/asm/uaccess.h                |  29 ++++-
>   arch/x86/kernel/umip.c                        |   2 +-
>   arch/x86/lib/insn-eval.c                      |   2 +-
>   arch/x86/um/signal.c                          |   2 +-
>   arch/xtensa/include/asm/uaccess.h             |   2 -
>   drivers/android/binder_alloc.c                |   2 +-
>   drivers/comedi/comedi_fops.c                  |   4 +-
>   drivers/dma/idxd/cdev.c                       |   2 +-
>   drivers/firmware/efi/test/efi_test.c          |   2 +-
>   drivers/fsi/fsi-scom.c                        |   2 +-
>   .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c |   2 +-
>   drivers/gpu/drm/i915/gt/intel_sseu.c          |   4 +-
>   drivers/gpu/drm/i915/i915_gem.c               |   4 +-
>   drivers/hwtracing/intel_th/msu.c              |   2 +-
>   drivers/misc/ibmvmc.c                         |   2 +-
>   drivers/misc/vmw_vmci/vmci_host.c             |   2 +-
>   drivers/most/most_cdev.c                      |   2 +-
>   drivers/net/ieee802154/ca8210.c               |   4 +-
>   drivers/net/wireless/ath/wil6210/debugfs.c    |   2 +-
>   .../intel/iwlwifi/pcie/gen1_2/trans.c         |   2 +-
>   drivers/net/wireless/ti/wlcore/debugfs.c      |   2 +-
>   drivers/ps3/ps3-lpm.c                         |   2 +-
>   drivers/s390/crypto/zcrypt_api.h              |   4 +-
>   drivers/spi/spidev.c                          |   2 +-
>   .../staging/media/atomisp/pci/atomisp_cmd.c   |   8 +-
>   drivers/tty/tty_ioctl.c                       |  14 +--
>   drivers/tty/vt/vc_screen.c                    |   4 +-
>   drivers/usb/gadget/function/f_hid.c           |   4 +-
>   drivers/usb/gadget/function/f_printer.c       |   2 +-
>   drivers/vfio/vfio_iommu_type1.c               |   4 +-
>   drivers/xen/xenbus/xenbus_dev_frontend.c      |   2 +-
>   fs/namespace.c                                |   2 +-
>   fs/ocfs2/dlmfs/dlmfs.c                        |   2 +-
>   fs/proc/base.c                                |   4 +-
>   include/asm-generic/uaccess.h                 |   2 -
>   include/linux/bpfptr.h                        |   2 +-
>   include/linux/sockptr.h                       |   4 +-
>   include/linux/uaccess.h                       | 107 ++++++++++++++----
>   ipc/msg.c                                     |   8 +-
>   ipc/sem.c                                     |   8 +-
>   ipc/shm.c                                     |  18 +--
>   kernel/regset.c                               |   2 +-
>   kernel/sys.c                                  |   4 +-
>   lib/Kconfig                                   |   3 +
>   lib/Makefile                                  |   4 +-
>   lib/kfifo.c                                   |   8 +-
>   lib/{usercopy.c => usercheck.c}               |  22 ----
>   lib/usercopy.c                                |  66 -----------
>   mm/kasan/kasan_test_c.c                       |   4 +-
>   mm/memory.c                                   |   2 +-
>   net/x25/af_x25.c                              |   2 +-
>   rust/helpers/uaccess.c                        |   6 +-
>   sound/pci/emu10k1/emufx.c                     |   4 +-
>   sound/pci/rme9652/hdsp.c                      |   6 +-
>   sound/soc/intel/avs/probes.c                  |   6 +-
>   sound/soc/sof/compress.c                      |  12 +-
>   sound/soc/sof/sof-client-probes.c             |   6 +-
>   92 files changed, 269 insertions(+), 288 deletions(-)
>   copy lib/{usercopy.c => usercheck.c} (73%)
> 



^ permalink raw reply

* Re: [RFC PATCH v1 5/9] uaccess: Switch to copy_{to/from}_user_partial() when relevant
From: Linus Torvalds @ 2026-04-27 19:01 UTC (permalink / raw)
  To: Christophe Leroy (CS GROUP)
  Cc: Yury Norov, Andrew Morton, David Laight, Thomas Gleixner,
	linux-alpha, linux-kernel, linux-snps-arc, linux-arm-kernel,
	linux-mips, linuxppc-dev, kvm, linux-riscv, linux-s390,
	sparclinux, linux-um, dmaengine, linux-efi, linux-fsi, amd-gfx,
	dri-devel, intel-gfx, linux-wpan, netdev, linux-wireless,
	linux-spi, linux-media, linux-staging, linux-serial, linux-usb,
	xen-devel, linux-fsdevel, ocfs2-devel, bpf, kasan-dev, linux-mm,
	linux-x25, rust-for-linux, linux-sound, sound-open-firmware,
	linux-csky, linux-hexagon, loongarch, linux-m68k, linux-openrisc,
	linux-parisc, linux-sh, linux-arch
In-Reply-To: <289b424e243ba2c4139ea04009cf8b9c448a87ff.1777306795.git.chleroy@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 6419 bytes --]

On Mon, 27 Apr 2026 at 10:18, Christophe Leroy (CS GROUP)
<chleroy@kernel.org> wrote:
>
> In a subsequent patch, copy_{to/from}_user() will be modified to
> return -EFAULT when copy fails.

Please don't do this.

This is a maintenance nightmare, and changes pretty much three decades
of semantics, and will cause *very* subtle backporting issues if
somebody happens to rely on the old / new behavior.

I understand the reasoning for the change, but I really don't think
the pain of creating yet another user copy interface is worth it.

We already have a lot of different versions of user copies for
different reasons, and while they all tend to have a good reason (and
some not-so-good, but historical reasons) for existing, this one
doesn't seem worth it.

The main - perhaps only - reason for this "partial" version is that
you want to do that "automatically inlined and optimized fixed-sized
case".

But here's the thing: I think you can already do that. Yes, it
requires some improvements to unsafe_copy_from_user(), but *that*
interface doesn't have three decades of history associated with it,
_and_ you're extending on that one anyway in this series.

"unsafe_copy_from_user()" is very odd, is meant only for small simple
copies that can be inlined and it's special-cased for 'objtool' anyway
(because objtool would have complained about an out-of-line call,
although it could have been special-cased other ways).

In other words: unsafe_copy_from_user() is *very* close to what you
want for that "Oh, I noticed that it's a small fixed-size copy, so I
want to special-case copy-from-user for that".

The _only_ issue with unsafe_copy_from_user() is that you can't see
that there were partial successes. But if *that* was fixed, then this
whole "create a new copy_from_user interface" issue would just go
away.

So please - let's just change unsafe_copy_from_user() to be usable for
the partial case.

And the thing is, all the existing unsafe_copy_from_user()
implementations already effectively *have* the "how much did I not
copy" internally, and they actually do extra work to hide it, ie they
have things like that

        int _i;

that is "how many bytes have I copied" in the powerpc implementation,
or the x86 code does

        size_t __ucu_len = (_len);

where that "ucu_len" is updated as you go along and is literally the
"how many bytes are left to copy" return value that is missing from
this interface.

So what I would suggest is

 - introduce a new user accessor helper that is used for *both*
unsafe_copy_to/from_user() *and* the "inline small constant-sized
normal copy_to/from_user()" calls

 - it's the same thing as the existing  unsafe_copy_to/from_user()
implementation, except it exposes how many bytes are left to be copied
to the exception label.

IOW, it would look something like

     #define unsafe_copy_to_user_outlen(_dst,_src,_len,label)...

which is exactly the same as the current unsafe_copy_to_user(),
*except* it changes "_len" as it does along.

And then you use that for both the "real" unsafe_copy_user and for the
"small constant values" case.

Just as an example, attached is a completely stupid rough draft of a
patch that does this for x86 and only for unsafe_copy_to_user().

And I made a very very hacky change to kernel/sys.c to see what the
code generation looks like.

This is what it results in on x86 with clang (with all the magic
.section data edited out):

        ... edited out the code to generate the times
        ... this is the actual user copy:
        # HERE!
        movabsq $81985529216486895, %rcx        # imm = 0x123456789ABCDEF
        cmpq    %rcx, %rbx
        cmovaq  %rcx, %rbx
        stac
        movq    %r13, (%rbx)                    # exception to .LBB45_8
        movq    %r14, 8(%rbx)                   # exception to .LBB45_8
        movq    %r15, 16(%rbx)                  # exception to .LBB45_8
        movq    %rax, 24(%rbx)                  # exception to .LBB45_8
        clac
.LBB45_6:
        movq    jiffies(%rip), %rdi
        callq   jiffies_64_to_clock_t
.LBB45_7:
        addq    $16, %rsp
        popq    %rbx
        popq    %r12
        popq    %r13
        popq    %r14
        popq    %r15
        retq
.LBB45_8:
        clac
        movq    $-14, %rax
        jmp     .LBB45_7

and notice how the compiler noticed that the 'outlen' isn't actually
used, and turned the exception label into just a "return -EFAULT" and
never actually generated any code for updating remaining lengths?

That actually looks pretty much optimal for a 32-byte user copy.

And it didn't involve changing the semantics at all.

Just to check, I changed that "times()" system call to return the
number of bytes uncopied instead (to emulate the "I actually want to
know what's left" case), and it generated this:

        # HERE!
        movabsq $81985529216486895, %rcx        # imm = 0x123456789ABCDEF
        cmpq    %rcx, %rbx
        cmovaq  %rcx, %rbx
        stac
        movl    $32, %ecx
        movq    %r13, (%rbx)                    # exception to .LBB45_7
        movl    $24, %ecx
        movq    %r15, 8(%rbx)                   # exception to .LBB45_7
        movl    $16, %ecx
        movq    %r14, 16(%rbx)                  # exception to .LBB45_7
        movl    $8, %ecx
        movq    %rax, 24(%rbx)                  # exception to .LBB45_7
        clac
        xorl    %ecx, %ecx
.LBB45_8:
        movq    %rcx, %rax
        addq    $16, %rsp
        popq    %rbx
        popq    %r12
        popq    %r13
        popq    %r14
        popq    %r15
        retq
.LBB45_6:
        movq    jiffies(%rip), %rdi
        jmp     jiffies_64_to_clock_t           # TAILCALL
.LBB45_7:
        clac
        jmp     .LBB45_8

so it all seems to work - although obviously the above is *not* the normal case.

NOTE NOTE NOTE! The attached patch is entirely untested. I obviously
did some "test code generation" with it, but I only *looked* at the
result, and maybe it has some fundamental problem that I just didn't
notice. So treat this as a "how about this approach" patch, not as
anything more serious than that.

And the kerrnel/sys.c hack is very obviously just that: a complate
hack for testing.

A real patch would do that "for small constant-sized copies, turn
copy_to_user() automatically into "_small_copy_to_user()".

The attached is *not* a real patch. Treat it with the contempt it deserves.

             Linus

[-- Attachment #2: patch.diff --]
[-- Type: text/x-patch, Size: 2637 bytes --]

 arch/x86/include/asm/uaccess.h | 17 +++++++++++------
 include/linux/uaccess.h        | 16 ++++++++++++++++
 kernel/sys.c                   |  3 ++-
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 3a0dd3c2b233..3b2c57c91418 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -606,15 +606,20 @@ _label:									\
 		len -= sizeof(type);						\
 	}
 
-#define unsafe_copy_to_user(_dst,_src,_len,label)			\
+#define unsafe_copy_to_user_outlen(_dst,_src,_len,label)	\
 do {									\
 	char __user *__ucu_dst = (_dst);				\
 	const char *__ucu_src = (_src);					\
-	size_t __ucu_len = (_len);					\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, _len, u64, label);	\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, _len, u32, label);	\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, _len, u16, label);	\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, _len, u8, label);	\
+} while (0)
+
+#define unsafe_copy_to_user(_dst,_src,_len,label)			\
+do {									\
+	size_t __ucu_len = _len;					\
+	unsafe_copy_to_user_outlen(_dst,_src,__ucu_len,label);		\
 } while (0)
 
 #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 56328601218c..1a70ef70784c 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -874,4 +874,20 @@ void __noreturn usercopy_abort(const char *name, const char *detail,
 			       unsigned long len);
 #endif
 
+static __always_inline __must_check unsigned long
+_small_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	size_t uncopied = n;
+
+	might_fault();
+	if (should_fail_usercopy())
+		return n;
+	instrument_copy_to_user(to, from, n);
+	scoped_user_write_access_size(to, n, failed)
+		unsafe_copy_to_user_outlen(to, from, uncopied, failed);
+	return 0;
+failed:
+       return uncopied;
+}
+
 #endif		/* __LINUX_UACCESS_H__ */
diff --git a/kernel/sys.c b/kernel/sys.c
index 62e842055cc9..65b2d0103a73 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1067,7 +1067,8 @@ SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
 		struct tms tmp;
 
 		do_sys_times(&tmp);
-		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
+		asm volatile("# HERE!");
+		if (_small_copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
 	force_successful_syscall_return();

^ permalink raw reply related

* Re: [RFC PATCH v1 7/9] x86: Add unsafe_copy_from_user()
From: Yury Norov @ 2026-04-27 19:19 UTC (permalink / raw)
  To: Christophe Leroy (CS GROUP)
  Cc: Andrew Morton, Linus Torvalds, David Laight, Thomas Gleixner,
	linux-alpha, Yury Norov, linux-kernel, linux-snps-arc,
	linux-arm-kernel, linux-mips, linuxppc-dev, kvm, linux-riscv,
	linux-s390, sparclinux, linux-um, dmaengine, linux-efi, linux-fsi,
	amd-gfx, dri-devel, intel-gfx, linux-wpan, netdev, linux-wireless,
	linux-spi, linux-media, linux-staging, linux-serial, linux-usb,
	xen-devel, linux-fsdevel, ocfs2-devel, bpf, kasan-dev, linux-mm,
	linux-x25, rust-for-linux, linux-sound, sound-open-firmware,
	linux-csky, linux-hexagon, loongarch, linux-m68k, linux-openrisc,
	linux-parisc, linux-sh, linux-arch
In-Reply-To: <63a4d0f6-0eb3-48cd-9f98-bf7b223b2606@kernel.org>

On Mon, Apr 27, 2026 at 08:20:38PM +0200, Christophe Leroy (CS GROUP) wrote:
> 
> 
> Le 27/04/2026 à 19:58, Yury Norov a écrit :
> > On Mon, Apr 27, 2026 at 07:13:48PM +0200, Christophe Leroy (CS GROUP) wrote:
> > > At the time being, x86 and arm64 are missing unsafe_copy_from_user().
> > 
> > No, they don't. They (should) rely on a generic implementation from
> > linux/uaccess.h, like every other arch, except for  PPC and RISCV.
> > 
> > But they #define arch_unsafe_get_user, and the unsafe_copy_from_user()
> > becomes undefined conditionally on that.
> > 
> > So please, fix that bug instead of introducing another arch flavor.
> > We'd always choose generic version, unless there's strong evidence
> > that arch one is better.
> 
> But they both implement the exact same unsafe_copy_to_user(). What is the
> difference here ?
>
> Should that function become generic too ?

This is what Linus said when added x86 implementation for copy_from_user()
in c512c69187197:

  Note that it only does this [arch version] for the copying _to_ user space,
  and we still don't have a unsafe version of copy_from_user().
  
  That's partly because we have no current users of it, but also partly
  because the copy_from_user() case is slightly different and cannot
  efficiently be implemented in terms of a unsafe_get_user() loop (because
  gcc can't do asm goto with outputs).

In the unsafe_copy_to_user case, arch versions were justified. Just as
said, I'm not against arch version for unsafe_copy_from_user(), but it
should be explained very well.


^ permalink raw reply

* Re: [RFC PATCH v1 9/9] uaccess: Convert small fixed size copy_{to/from}_user() to scoped user access
From: Yury Norov @ 2026-04-27 20:12 UTC (permalink / raw)
  To: Christophe Leroy (CS GROUP)
  Cc: Andrew Morton, Linus Torvalds, David Laight, Thomas Gleixner,
	linux-alpha, Yury Norov, linux-kernel, linux-snps-arc,
	linux-arm-kernel, linux-mips, linuxppc-dev, kvm, linux-riscv,
	linux-s390, sparclinux, linux-um, dmaengine, linux-efi, linux-fsi,
	amd-gfx, dri-devel, intel-gfx, linux-wpan, netdev, linux-wireless,
	linux-spi, linux-media, linux-staging, linux-serial, linux-usb,
	xen-devel, linux-fsdevel, ocfs2-devel, bpf, kasan-dev, linux-mm,
	linux-x25, rust-for-linux, linux-sound, sound-open-firmware,
	linux-csky, linux-hexagon, loongarch, linux-m68k, linux-openrisc,
	linux-parisc, linux-sh, linux-arch
In-Reply-To: <8780eb2ef80575931a339e5225bc80eb13e9be6c.1777306795.git.chleroy@kernel.org>

On Mon, Apr 27, 2026 at 07:13:50PM +0200, Christophe Leroy (CS GROUP) wrote:
> copy_{to/from}_user() is a heavy function optimised for copy of large
> blocs of memory between user and kernel space.
> 
> When the number of bytes to be copied is known at build time and small,
> using scoped user access removes the burden of that optimisation.
> 
> Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
> ---
>  include/linux/uaccess.h | 47 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 47 insertions(+)
> 
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index 33b7d0f5f808..3ac544527af2 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -50,6 +50,8 @@
>   #define mask_user_address(src) (src)
>  #endif
>  
> +#define SMALL_COPY_USER		64

Let's make it L1_CACHE_BYTES, and maybe configurable?



^ permalink raw reply

* Re: [RFC PATCH v1 2/9] uaccess: Convert INLINE_COPY_{TO/FROM}_USER to kconfig and reduce ifdefery
From: Andrew Cooper @ 2026-04-27 20:39 UTC (permalink / raw)
  To: Yury Norov, Christophe Leroy (CS GROUP)
  Cc: Andrew Cooper, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner, linux-alpha, Yury Norov, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <ae-tVFVfx72oCC_i@yury>

On 27/04/2026 7:39 pm, Yury Norov wrote:
> On Mon, Apr 27, 2026 at 07:13:43PM +0200, Christophe Leroy (CS GROUP) wrote:
>> Among the 21 architectures supported by the kernel, 16 define both
>> INLINE_COPY_TO_USER and INLINE_COPY_FROM_USER while the 5 other ones
>> don't define any of the two.
>>
>> To simplify and reduce risk of mistakes, convert them to a single
>> kconfig item named CONFIG_ARCH_WANTS_NOINLINE_COPY which will be
> We've got a special word for it: outline. Can you name it
> CONFIG_OUTLINE_USERCOPY, or similar?

You can't swap the "in" for "out" like this.  "out of line" is the
opposite of "inline" in this context, while "outline" means something
different and unrelated.

~Andrew


^ permalink raw reply

* Re: [RFC PATCH v1 2/9] uaccess: Convert INLINE_COPY_{TO/FROM}_USER to kconfig and reduce ifdefery
From: Yury Norov @ 2026-04-27 20:47 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Christophe Leroy (CS GROUP), Andrew Morton, Linus Torvalds,
	David Laight, Thomas Gleixner, linux-alpha, Yury Norov,
	linux-kernel, linux-snps-arc, linux-arm-kernel, linux-mips,
	linuxppc-dev, kvm, linux-riscv, linux-s390, sparclinux, linux-um,
	dmaengine, linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx,
	linux-wpan, netdev, linux-wireless, linux-spi, linux-media,
	linux-staging, linux-serial, linux-usb, xen-devel, linux-fsdevel,
	ocfs2-devel, bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux,
	linux-sound, sound-open-firmware, linux-csky, linux-hexagon,
	loongarch, linux-m68k, linux-openrisc, linux-parisc, linux-sh,
	linux-arch
In-Reply-To: <f54c3c2b-33da-42a0-80b7-0f6615d930ce@citrix.com>

On Mon, Apr 27, 2026 at 09:39:33PM +0100, Andrew Cooper wrote:
> On 27/04/2026 7:39 pm, Yury Norov wrote:
> > On Mon, Apr 27, 2026 at 07:13:43PM +0200, Christophe Leroy (CS GROUP) wrote:
> >> Among the 21 architectures supported by the kernel, 16 define both
> >> INLINE_COPY_TO_USER and INLINE_COPY_FROM_USER while the 5 other ones
> >> don't define any of the two.
> >>
> >> To simplify and reduce risk of mistakes, convert them to a single
> >> kconfig item named CONFIG_ARCH_WANTS_NOINLINE_COPY which will be
> > We've got a special word for it: outline. Can you name it
> > CONFIG_OUTLINE_USERCOPY, or similar?
> 
> You can't swap the "in" for "out" like this.  "out of line" is the
> opposite of "inline" in this context, while "outline" means something
> different and unrelated.

Check KASAN_OUTLINE vs KASAN_INLINE for example


^ permalink raw reply

* Re: [RFC PATCH v1 5/9] uaccess: Switch to copy_{to/from}_user_partial() when relevant
From: David Laight @ 2026-04-27 21:29 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Christophe Leroy (CS GROUP), Yury Norov, Andrew Morton,
	Thomas Gleixner, linux-alpha, linux-kernel, linux-snps-arc,
	linux-arm-kernel, linux-mips, linuxppc-dev, kvm, linux-riscv,
	linux-s390, sparclinux, linux-um, dmaengine, linux-efi, linux-fsi,
	amd-gfx, dri-devel, intel-gfx, linux-wpan, netdev, linux-wireless,
	linux-spi, linux-media, linux-staging, linux-serial, linux-usb,
	xen-devel, linux-fsdevel, ocfs2-devel, bpf, kasan-dev, linux-mm,
	linux-x25, rust-for-linux, linux-sound, sound-open-firmware,
	linux-csky, linux-hexagon, loongarch, linux-m68k, linux-openrisc,
	linux-parisc, linux-sh, linux-arch
In-Reply-To: <CAHk-=whC1DZojwdMB1=sJWG2=dsCdfyU8N6tDE1qx50HRZ-WJQ@mail.gmail.com>

On Mon, 27 Apr 2026 12:01:23 -0700
Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Mon, 27 Apr 2026 at 10:18, Christophe Leroy (CS GROUP)
> <chleroy@kernel.org> wrote:
> >
> > In a subsequent patch, copy_{to/from}_user() will be modified to
> > return -EFAULT when copy fails.  
> 
> Please don't do this.
> 
> This is a maintenance nightmare, and changes pretty much three decades
> of semantics, and will cause *very* subtle backporting issues if
> somebody happens to rely on the old / new behavior.
> 
> I understand the reasoning for the change, but I really don't think
> the pain of creating yet another user copy interface is worth it.
> 
> We already have a lot of different versions of user copies for
> different reasons, and while they all tend to have a good reason (and
> some not-so-good, but historical reasons) for existing, this one
> doesn't seem worth it.
> 
> The main - perhaps only - reason for this "partial" version is that
> you want to do that "automatically inlined and optimized fixed-sized
> case".
> 
> But here's the thing: I think you can already do that. Yes, it
> requires some improvements to unsafe_copy_from_user(), but *that*
> interface doesn't have three decades of history associated with it,
> _and_ you're extending on that one anyway in this series.
> 
> "unsafe_copy_from_user()" is very odd, is meant only for small simple
> copies that can be inlined and it's special-cased for 'objtool' anyway
> (because objtool would have complained about an out-of-line call,
> although it could have been special-cased other ways).
> 
> In other words: unsafe_copy_from_user() is *very* close to what you
> want for that "Oh, I noticed that it's a small fixed-size copy, so I
> want to special-case copy-from-user for that".
> 
> The _only_ issue with unsafe_copy_from_user() is that you can't see
> that there were partial successes. But if *that* was fixed, then this
> whole "create a new copy_from_user interface" issue would just go
> away.
> 
> So please - let's just change unsafe_copy_from_user() to be usable for
> the partial case.
> 
> And the thing is, all the existing unsafe_copy_from_user()
> implementations already effectively *have* the "how much did I not
> copy" internally, and they actually do extra work to hide it, ie they
> have things like that
> 
>         int _i;
> 
> that is "how many bytes have I copied" in the powerpc implementation,
> or the x86 code does
> 
>         size_t __ucu_len = (_len);
> 
> where that "ucu_len" is updated as you go along and is literally the
> "how many bytes are left to copy" return value that is missing from
> this interface.
> 
> So what I would suggest is
> 
>  - introduce a new user accessor helper that is used for *both*
> unsafe_copy_to/from_user() *and* the "inline small constant-sized
> normal copy_to/from_user()" calls
> 
>  - it's the same thing as the existing  unsafe_copy_to/from_user()
> implementation, except it exposes how many bytes are left to be copied
> to the exception label.

I think there is a slight difference in that the normal copy_to_user()
will determine the exact offset of the error by retrying with byte copies.

There is also the issue of misaligned copies.

Then there is the 'bugbear' of hardened user copies.
Chasing down the stack to find whether the kernel buffer crosses
a stack frame is probably more expensive than the copy for the typically
small copies that will use on-stack buffers.

	David

> 
> IOW, it would look something like
> 
>      #define unsafe_copy_to_user_outlen(_dst,_src,_len,label)...
> 
> which is exactly the same as the current unsafe_copy_to_user(),
> *except* it changes "_len" as it does along.
> 
> And then you use that for both the "real" unsafe_copy_user and for the
> "small constant values" case.
> 
> Just as an example, attached is a completely stupid rough draft of a
> patch that does this for x86 and only for unsafe_copy_to_user().
> 
> And I made a very very hacky change to kernel/sys.c to see what the
> code generation looks like.
> 
> This is what it results in on x86 with clang (with all the magic
> .section data edited out):
> 
>         ... edited out the code to generate the times
>         ... this is the actual user copy:
>         # HERE!
>         movabsq $81985529216486895, %rcx        # imm = 0x123456789ABCDEF
>         cmpq    %rcx, %rbx
>         cmovaq  %rcx, %rbx
>         stac
>         movq    %r13, (%rbx)                    # exception to .LBB45_8
>         movq    %r14, 8(%rbx)                   # exception to .LBB45_8
>         movq    %r15, 16(%rbx)                  # exception to .LBB45_8
>         movq    %rax, 24(%rbx)                  # exception to .LBB45_8
>         clac
> .LBB45_6:
>         movq    jiffies(%rip), %rdi
>         callq   jiffies_64_to_clock_t
> .LBB45_7:
>         addq    $16, %rsp
>         popq    %rbx
>         popq    %r12
>         popq    %r13
>         popq    %r14
>         popq    %r15
>         retq
> .LBB45_8:
>         clac
>         movq    $-14, %rax
>         jmp     .LBB45_7
> 
> and notice how the compiler noticed that the 'outlen' isn't actually
> used, and turned the exception label into just a "return -EFAULT" and
> never actually generated any code for updating remaining lengths?
> 
> That actually looks pretty much optimal for a 32-byte user copy.
> 
> And it didn't involve changing the semantics at all.
> 
> Just to check, I changed that "times()" system call to return the
> number of bytes uncopied instead (to emulate the "I actually want to
> know what's left" case), and it generated this:
> 
>         # HERE!
>         movabsq $81985529216486895, %rcx        # imm = 0x123456789ABCDEF
>         cmpq    %rcx, %rbx
>         cmovaq  %rcx, %rbx
>         stac
>         movl    $32, %ecx
>         movq    %r13, (%rbx)                    # exception to .LBB45_7
>         movl    $24, %ecx
>         movq    %r15, 8(%rbx)                   # exception to .LBB45_7
>         movl    $16, %ecx
>         movq    %r14, 16(%rbx)                  # exception to .LBB45_7
>         movl    $8, %ecx
>         movq    %rax, 24(%rbx)                  # exception to .LBB45_7
>         clac
>         xorl    %ecx, %ecx
> .LBB45_8:
>         movq    %rcx, %rax
>         addq    $16, %rsp
>         popq    %rbx
>         popq    %r12
>         popq    %r13
>         popq    %r14
>         popq    %r15
>         retq
> .LBB45_6:
>         movq    jiffies(%rip), %rdi
>         jmp     jiffies_64_to_clock_t           # TAILCALL
> .LBB45_7:
>         clac
>         jmp     .LBB45_8
> 
> so it all seems to work - although obviously the above is *not* the normal case.
> 
> NOTE NOTE NOTE! The attached patch is entirely untested. I obviously
> did some "test code generation" with it, but I only *looked* at the
> result, and maybe it has some fundamental problem that I just didn't
> notice. So treat this as a "how about this approach" patch, not as
> anything more serious than that.
> 
> And the kerrnel/sys.c hack is very obviously just that: a complate
> hack for testing.
> 
> A real patch would do that "for small constant-sized copies, turn
> copy_to_user() automatically into "_small_copy_to_user()".
> 
> The attached is *not* a real patch. Treat it with the contempt it deserves.
> 
>              Linus



^ permalink raw reply

* Re: [RFC PATCH v1 5/9] uaccess: Switch to copy_{to/from}_user_partial() when relevant
From: Linus Torvalds @ 2026-04-27 21:39 UTC (permalink / raw)
  To: David Laight
  Cc: Christophe Leroy (CS GROUP), Yury Norov, Andrew Morton,
	Thomas Gleixner, linux-alpha, linux-kernel, linux-snps-arc,
	linux-arm-kernel, linux-mips, linuxppc-dev, kvm, linux-riscv,
	linux-s390, sparclinux, linux-um, dmaengine, linux-efi, linux-fsi,
	amd-gfx, dri-devel, intel-gfx, linux-wpan, netdev, linux-wireless,
	linux-spi, linux-media, linux-staging, linux-serial, linux-usb,
	xen-devel, linux-fsdevel, ocfs2-devel, bpf, kasan-dev, linux-mm,
	linux-x25, rust-for-linux, linux-sound, sound-open-firmware,
	linux-csky, linux-hexagon, loongarch, linux-m68k, linux-openrisc,
	linux-parisc, linux-sh, linux-arch
In-Reply-To: <20260427222914.1cb2dd3b@pumpkin>

On Mon, 27 Apr 2026 at 14:29, David Laight <david.laight.linux@gmail.com> wrote:
>
> I think there is a slight difference in that the normal copy_to_user()
> will determine the exact offset of the error by retrying with byte copies.

I have this dim memory that we decided that you can't reply on byte
exactness anyway, because not all architectures gave that guarantee
for the user copies.

But that thing came up many years ago, I might mis-remember.

            Linus


^ permalink raw reply

* Re: [RFC PATCH v1 7/9] x86: Add unsafe_copy_from_user()
From: Linus Torvalds @ 2026-04-27 21:52 UTC (permalink / raw)
  To: Yury Norov
  Cc: Christophe Leroy (CS GROUP), Andrew Morton, David Laight,
	Thomas Gleixner, linux-alpha, Yury Norov, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <ae-2yLWSGnfeTvh1@yury>

On Mon, 27 Apr 2026 at 12:19, Yury Norov <ynorov@nvidia.com> wrote:
>
> This is what Linus said when added x86 implementation for copy_from_user()
> in c512c69187197:

Note that some things have happily changed in the six+ years since...

>   That's partly because we have no current users of it, but also partly
>   because the copy_from_user() case is slightly different and cannot
>   efficiently be implemented in terms of a unsafe_get_user() loop (because
>   gcc can't do asm goto with outputs).

now everybody can do asm goto with outputs.

Yes, it's disabled on older versions, so it's not *always* available,
but all modern versions do it. And if you care about performance, you
won't be using an old compiler.

             Linus


^ permalink raw reply

* Re: [RFC PATCH v1 7/9] x86: Add unsafe_copy_from_user()
From: Yury Norov @ 2026-04-27 22:30 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Christophe Leroy (CS GROUP), Andrew Morton, David Laight,
	Thomas Gleixner, linux-alpha, Yury Norov, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <CAHk-=wgPrLy0FR3sEWBYQuNAac1axDASYMnTuPuxEU0WytzL7w@mail.gmail.com>

On Mon, Apr 27, 2026 at 02:52:05PM -0700, Linus Torvalds wrote:
> On Mon, 27 Apr 2026 at 12:19, Yury Norov <ynorov@nvidia.com> wrote:
> >
> > This is what Linus said when added x86 implementation for copy_from_user()
> > in c512c69187197:
> 
> Note that some things have happily changed in the six+ years since...
> 
> >   That's partly because we have no current users of it, but also partly
> >   because the copy_from_user() case is slightly different and cannot
> >   efficiently be implemented in terms of a unsafe_get_user() loop (because
> >   gcc can't do asm goto with outputs).
> 
> now everybody can do asm goto with outputs.
> 
> Yes, it's disabled on older versions, so it's not *always* available,
> but all modern versions do it. And if you care about performance, you
> won't be using an old compiler.

The minimal GCC version is 8.1, and asm goto with outputs is supported
since GCC-11. That would brake the build, if we just switch to using it
without "CC_IS_GCC && (GCC_VERSION >= 110100)" guard.

Is it worth to maintain 2 version of the function? I don't know...

Thanks,
Yury


^ permalink raw reply

* Re: [mainline][BUG] Observed Workqueue lockups on offline CPUs.
From: Boqun Feng @ 2026-04-27 15:43 UTC (permalink / raw)
  To: Samir M
  Cc: Paul E . McKenney, Boqun Feng, LKML, Tejun Heo, RCU, linuxppc-dev,
	Shrikanth Hegde
In-Reply-To: <688280dc-78a2-4796-9eaf-e1c058836012@linux.ibm.com>

On Mon, Apr 27, 2026 at 05:00:10PM +0530, Samir M wrote:
> 

Hi Samir,

> On 27/04/26 3:32 pm, Samir M wrote:
> > Hi Paul,
> > 
> > I've been testing the latest upstream kernel on a PowerPC system and
> > encountered workqueue lockup issues that I've bisected to commit
> > 61bbcfb50514 ("srcu: Push srcu_node allocation to GP when
> > non-preemptible").
> > After booting, I'm seeing workqueue lockup warnings for CPUs 81-96,
> > which are offline on my system. The workqueues remain stuck for over 237
> > seconds:
> > 
> > [  243.309302][    C0] BUG: workqueue lockup - pool cpus=81 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309311][    C0] BUG: workqueue lockup - pool cpus=82 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309318][    C0] BUG: workqueue lockup - pool cpus=83 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309326][    C0] BUG: workqueue lockup - pool cpus=84 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309333][    C0] BUG: workqueue lockup - pool cpus=85 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309341][    C0] BUG: workqueue lockup - pool cpus=86 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309348][    C0] BUG: workqueue lockup - pool cpus=87 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309355][    C0] BUG: workqueue lockup - pool cpus=88 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309363][    C0] BUG: workqueue lockup - pool cpus=89 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309370][    C0] BUG: workqueue lockup - pool cpus=90 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309377][    C0] BUG: workqueue lockup - pool cpus=91 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309384][    C0] BUG: workqueue lockup - pool cpus=92 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309392][    C0] BUG: workqueue lockup - pool cpus=93 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309399][    C0] BUG: workqueue lockup - pool cpus=94 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309406][    C0] BUG: workqueue lockup - pool cpus=95 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > [  243.309413][    C0] BUG: workqueue lockup - pool cpus=96 node=0
> > flags=0x4 nice=0 stuck for 237s!
> > 
> > Git bisect identified this as the first bad commit:
> > 
> > commit 61bbcfb50514a8a94e035a7349697a3790ab4783
> > Author: Paul E. McKenney <paulmck@kernel.org>
> > Date:   Fri Mar 20 20:29:20 2026 -0700
> > 
> >     srcu: Push srcu_node allocation to GP when non-preemptible
> > 
> >     When the srcutree.convert_to_big and srcutree.big_cpu_lim kernel boot
> >     parameters specify initialization-time allocation of the srcu_node
> >     tree for statically allocated srcu_struct structures (for example, in
> >     DEFINE_SRCU() at build time instead of init_srcu_struct() at
> > runtime),
> >     init_srcu_struct_nodes() will attempt to dynamically allocate this
> > tree
> >     at the first run-time update-side use of this srcu_struct structure,
> >     but while holding a raw spinlock. Because the memory allocator can
> >     acquire non-raw spinlocks, this can result in lockdep splats.
> > 
> >     This commit therefore uses the same SRCU_SIZE_ALLOC trick that is
> > used
> >     when the first run-time update-side use of this srcu_struct structure
> >     happens before srcu_init() is called. The actual allocation then
> > takes
> >     place from workqueue context at the ends of upcoming SRCU grace
> > periods.
> > 
> >     [boqun: Adjust the sha1 of the Fixes tag]
> > 
> >     Fixes: 175b45ed343a ("srcu: Use raw spinlocks so call_srcu() can be
> > used under preempt_disable()")
> >     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> >     Signed-off-by: Boqun Feng <boqun@kernel.org>
> > 
> >  kernel/rcu/srcutree.c | 7 +++++--
> >  1 file changed, 5 insertions(+), 2 deletions(-)
> > 
> > Reverting this commit resolves the issue.
> > 
> > The problem appears to be that the workqueue is attempting to execute on
> > offline CPUs. The commit moves SRCU node allocation to workqueue context
> > to avoid lockdep issues with memory allocation under raw spinlocks,
> > which makes sense. However, it seems the workqueue scheduling doesn't
> > properly account for CPU online/offline state in this code path.
> > 
> > My test environment:
> > - Architecture: PowerPC
> > - Kernel version: Latest upstream (7.1-rc1)
> > - CPUs 81-96 are offline at boot time
> > 
> > I suspect the issue might be related to:
> > 1. Workqueue not checking CPU online status before scheduling SRCU
> > allocation work
> > 2. Missing CPU hotplug awareness in the new workqueue-based allocation
> > path
> > 3. Possible race condition with CPU hotplug events
> > 
> > Would it make sense to use queue_work_on() with explicit online CPU
> > selection, or add CPU hotplug handlers for this workqueue? I'm not
> > deeply familiar with the workqueue internals, so I might be missing
> > something.
> > Please let me know if you need any additional details or if you'd like
> > me to test any patches.
> > 
> > If you happen to fix the above issue, then please add below tag.
> > Reported-by: Samir M <samir@linux.ibm.com>
> > 
> > 
> > Thanks,
> > Samir
> 
> Hi Paul,
> 
> 
> I worked on fixing the issue and introduced the changes below. With these
> updates, I no longer observe any workqueue lockup messages for offline CPUs.
> Could you please review the changes and share your feedback?
> 
> The commit 61bbcfb50514 ("srcu: Push srcu_node allocation to GP when
> non-preemptible") introduced workqueue lockups on systems with offline
> CPUs. The issue occurs because srcu_queue_delayed_work_on() calls
> queue_work_on() with sdp->cpu, which may be offline, causing the
> workqueue to spin indefinitely on that CPU.
> 
> This patch fixes the issue by checking if the target CPU is online
> before queuing work on it. If the CPU is offline, we fall back to
> using queue_work() which will schedule the work on any available
> online CPU.
> 
> Fixes: 61bbcfb50514 ("srcu: Push srcu_node allocation to GP when
> non-preemptible")
> 
> Signed-off-by: Samir <samir@linux.ibm.com>

Thanks for the patch, but I wonder: have you checked this email thread:

	https://lore.kernel.org/rcu/ttd89ul@ub.hpns/	

Paul had a fix [1], and TJ had a "fix" [2] on workqueue side.

In general I think we discovered that as long as a CPU has been onlined
once, it's OK to queue the work on that CPU (which may be offlined) even
with our TJ's patch (whether we should do that is a different problem
;-)). Please do check whether Paul's fix works for your case, thanks!

[1]: https://lore.kernel.org/rcu/ed1fa6cd-7343-4ca3-8b9d-d699ca496f83@paulmck-laptop/
[2]: https://lore.kernel.org/rcu/adlHKowvhn8AGXCc@slm.duckdns.org/

Regards,
Boqun

> ---
>  kernel/rcu/srcutree.c | 7 ++++++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 0d01cd8c4b4a..55a90dd4a030 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -869,10 +869,15 @@ static void srcu_delay_timer(struct timer_list *t)
>  static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
>  unsigned long delay)
>  {
> -       if (!delay) {
> +       if (!delay && cpu_online(sdp->cpu)) {
>                 queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
>                 return;
> +       } else if (!delay) {
> +               /* CPU is offline, queue on any available CPU */
> +               queue_work(rcu_gp_wq, &sdp->work);
> +               return;
> +       }
> 
>         timer_reduce(&sdp->delay_work, jiffies + delay);
>  }
> --
> 
> 
> Thanks,
> Samir


^ permalink raw reply

* Re: [PATCH v6 00/24] PCI: Convert all dynamic sysfs attributes to static
From: Krzysztof Wilczyński @ 2026-04-27 23:34 UTC (permalink / raw)
  To: Shivaprasad G Bhat
  Cc: Bjorn Helgaas, Bjorn Helgaas, Manivannan Sadhasivam,
	Lorenzo Pieralisi, Magnus Lindholm, Matt Turner,
	Richard Henderson, Christophe Leroy, Madhavan Srinivasan,
	Michael Ellerman, Nicholas Piggin, Dexuan Cui,
	Krzysztof Hałasa, Lukas Wunner, Oliver O'Halloran,
	Saurabh Singh Sengar, Shuan He, Srivatsa Bhat, Ilpo Järvinen,
	linux-pci, linux-alpha, linuxppc-dev
In-Reply-To: <56ce57ca-b5ce-415c-b17e-bdcddf93770e@linux.ibm.com>

Hello,

> > I sadly do not own any Alpha or PowerPC hardware, so when I was testing
> > these architectures while working on the series, it would be only under
> > QEMU.
> 
> I tested the patches on PPC64 machines running as pSeries(on PowerVM and
> KVM) also PowerNV.
> 
> The sysfs attributes looks normal on boot, and on hotplug/unplug of devices
> and SRIOV use cases.
> 
> I see no warnings/errors in dmesg during PCI scans, initialization.

I appreciate that you took the time to test this on a real hardware.
 
> Tested-By: Shivaprasad G Bhat <sbhat@linux.ibm.com>

Thank you!

	Krzysztof


^ permalink raw reply

* Re: [PATCH v3] ibmveth: Disable GSO for packets with small MSS
From: patchwork-bot+netdevbpf @ 2026-04-28  2:20 UTC (permalink / raw)
  To: Mingming Cao
  Cc: netdev, davem, kuba, edumazet, pabeni, horms, bjking1, maddy, mpe,
	linuxppc-dev, stable, shaik.abdulla1, naveedaus
In-Reply-To: <20260424162917.65725-1-mmc@linux.ibm.com>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Fri, 24 Apr 2026 09:29:17 -0700 you wrote:
> Some physical adapters on Power systems do not support segmentation
> offload when the MSS is less than 224 bytes. Attempting to send such
> packets causes the adapter to freeze, stopping all traffic until
> manually reset.
> 
> Implement ndo_features_check to disable GSO for packets with small MSS
> values. The network stack will perform software segmentation instead.
> 
> [...]

Here is the summary with links:
  - [v3] ibmveth: Disable GSO for packets with small MSS
    https://git.kernel.org/netdev/net/c/cc427d24ac64

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html




^ permalink raw reply

* Re: [PATCH v7 4/6] mm/sparse-vmemmap: Fix DAX vmemmap accounting with optimization
From: Muchun Song @ 2026-04-28  2:21 UTC (permalink / raw)
  To: David Hildenbrand (Arm)
  Cc: Muchun Song, Andrew Morton, Oscar Salvador, Michael Ellerman,
	Madhavan Srinivasan, Lorenzo Stoakes, Liam R . Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Nicholas Piggin, Christophe Leroy, aneesh.kumar, joao.m.martins,
	linux-mm, linuxppc-dev, linux-kernel, stable
In-Reply-To: <09298afa-9a36-4f29-a8e1-d4750c338df2@kernel.org>



> On Apr 27, 2026, at 18:17, David Hildenbrand (Arm) <david@kernel.org> wrote:
> 
> On 4/26/26 11:26, Muchun Song wrote:
>> When vmemmap optimization is enabled for DAX, the nr_memmap_pages
>> counter in /proc/vmstat is incorrect. The current code always accounts
>> for the full, non-optimized vmemmap size, but vmemmap optimization
>> reduces the actual number of vmemmap pages by reusing tail pages. This
>> causes the system to overcount vmemmap usage, leading to inaccurate
>> page statistics in /proc/vmstat.
>> 
>> Fix this by introducing section_nr_vmemmap_pages(), which returns the exact
>> vmemmap page count for a given pfn range based on whether optimization
>> is in effect.
>> 
>> Fixes: 15995a352474 ("mm: report per-page metadata information")
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>> Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
>> Acked-by: Oscar Salvador <osalvador@suse.de>
>> ---
>> v6 -> v7:
>> - Refine the alignment assertions in section_nr_vmemmap_pages().
>> ---
>> mm/sparse-vmemmap.c | 34 ++++++++++++++++++++++++++++++----
>> 1 file changed, 30 insertions(+), 4 deletions(-)
>> 
>> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
>> index 3340f6d30b01..01f448607bad 100644
>> --- a/mm/sparse-vmemmap.c
>> +++ b/mm/sparse-vmemmap.c
>> @@ -652,6 +652,31 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
>> }
>> }
>> 
>> +static int __meminit section_nr_vmemmap_pages(unsigned long pfn, unsigned long nr_pages,
>> + 		struct vmem_altmap *altmap, struct dev_pagemap *pgmap)
>> +{
>> + 	const unsigned int order = pgmap ? pgmap->vmemmap_shift : 0;
>> + 	const unsigned long pages_per_compound = 1UL << order;
>> +
>> + 	VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SUBSECTION));
>> +
>> + 	if (!vmemmap_can_optimize(altmap, pgmap))
>> + 		return DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE);
>> +
>> + 	if (order < PFN_SECTION_SHIFT) {
>> + 		VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, pages_per_compound));
>> + 		return VMEMMAP_RESERVE_NR * nr_pages / pages_per_compound;
>> + 	}
>> +
>> + 	VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION));
>> + 	VM_WARN_ON_ONCE(nr_pages > PAGES_PER_SECTION);
> 
> I would just have done that at the very top, as this check applies to all cases.

My initial reasoning was that the current formula holds for compound pages smaller
than the section size, and we only need to impose limits when the page size exceeds
it. While the current callers of section_nr_vmemmap_pages() don't pass sizes larger
than a section, this will change in the future (see [1]).

I might have been overthinking the future-proofing, which led to this specific
implementation. However, I’m inclined to keep it as is for now, as I'll be updating
that series [1] soon and it will involve further changes to section_nr_vmemmap_pages().
That said, I'd love to hear your thoughts before I proceed.

[1] https://lore.kernel.org/linux-mm/20260405125240.2558577-43-songmuchun@bytedance.com/


> 
> Acked-by: David Hildenbrand (Arm) <david@kernel.org>

Thanks.

> 
> -- 
> Cheers,
> 
> David




^ permalink raw reply

* Re: [PATCH 5/5] fpga: m10bmc-sec: switch show_canceled_csk() to using sysfs_emit()
From: Yury Norov @ 2026-04-28  2:56 UTC (permalink / raw)
  To: Xu Yilun
  Cc: linux-kernel, Christophe Leroy (CS GROUP), Peter Zijlstra (Intel),
	Rafael J. Wysocki, Alexander Shishkin, Daniel Lezcano,
	Ingo Molnar, James Clark, Kees Cook, Lukasz Luba,
	Madhavan Srinivasan, Michael Ellerman, Mike Leach, Moritz Fischer,
	Nicholas Piggin, Russ Weight, Shrikanth Hegde, Suki K Poulose,
	Tom Rix, Thomas Weißschuh, Xu Yilun, Yury Norov, Zhang Rui,
	coresight, linux-arm-kernel, linux-fpga, linux-pm, linuxppc-dev,
	Jakub Kicinski
In-Reply-To: <acON/EU/GFvxRO1e@yilunxu-OptiPlex-7050>

On Wed, Mar 25, 2026 at 03:25:48PM +0800, Xu Yilun wrote:
> On Tue, Mar 24, 2026 at 02:38:04PM -0400, Yury Norov wrote:
> > On Tue, Mar 24, 2026 at 05:15:33PM +0800, Xu Yilun wrote:
> > > On Tue, Mar 03, 2026 at 03:08:41PM -0500, Yury Norov wrote:
> > > > Switch show_canceled_csk() to use the proper sysfs_emit("%*pbl").
> > > > 
> > > > Reviewed-by: Russ Weight <russ.weight@linux.dev>
> > > > Suggested-by: Thomas Weißschuh <linux@weissschuh.net>
> > > > Signed-off-by: Yury Norov <ynorov@nvidia.com>
> > > > ---
> > > >  drivers/fpga/intel-m10-bmc-sec-update.c | 3 ++-
> > > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/drivers/fpga/intel-m10-bmc-sec-update.c b/drivers/fpga/intel-m10-bmc-sec-update.c
> > > > index 10f678b9ed36..ae364c6636eb 100644
> > > > --- a/drivers/fpga/intel-m10-bmc-sec-update.c
> > > > +++ b/drivers/fpga/intel-m10-bmc-sec-update.c
> > > > @@ -10,6 +10,7 @@
> > > >  #include <linux/firmware.h>
> > > >  #include <linux/mfd/intel-m10-bmc.h>
> > > >  #include <linux/mod_devicetable.h>
> > > > +#include <linux/mm.h>
> > > 
> > > Why add this header file?
> > 
> > When I was preparing the series, I had build issues without this. But
> > now I checked it against -rc5, and it's clean. Would you like me to
> > resend?
> 
> No need. Given that I'll pick this patch alone to fpga for-next with the
> fix.
> 
> Reviewed-by: Xu Yilun <yilun.xu@intel.com>

Hi Xu,

This patch is still not applied. This is the last non-lib user of
the bitmap_print_to_pagebuf(), and it blocks me from removing the
function.

Are you still going to pick it yourself? I can move the patch with
bitmap-for-next if you prefer.

Thanks,
Yury


^ permalink raw reply

* [PATCH] ASoC: fsl_xcvr: Fix event generation for cached controls
From: Cássio Gabriel @ 2026-04-28  3:07 UTC (permalink / raw)
  To: Shengjiu Wang, Xiubo Li, Fabio Estevam, Nicolin Chen,
	Liam Girdwood, Mark Brown, Jaroslav Kysela, Takashi Iwai,
	Viorel Suman
  Cc: linux-sound, linuxppc-dev, linux-kernel, Cássio Gabriel

ALSA controls should return 1 from a put callback when the control
value changes. fsl_xcvr_capds_put() and fsl_xcvr_tx_cs_put() both
update cached control data but always return 0, so ALSA suppresses
change notifications for the Capabilities Data Structure and playback
IEC958 channel status controls.

Compare the old and new cached values before copying the new data,
and return whether the control value changed.

Fixes: 28564486866f ("ASoC: fsl_xcvr: Add XCVR ASoC CPU DAI driver")
Signed-off-by: Cássio Gabriel <cassiogabrielcontato@gmail.com>
---
 sound/soc/fsl/fsl_xcvr.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c
index ee16cf681488..6677d3bf36ec 100644
--- a/sound/soc/fsl/fsl_xcvr.c
+++ b/sound/soc/fsl/fsl_xcvr.c
@@ -228,10 +228,14 @@ static int fsl_xcvr_capds_put(struct snd_kcontrol *kcontrol,
 {
 	struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol);
 	struct fsl_xcvr *xcvr = snd_soc_dai_get_drvdata(dai);
+	int changed;
 
-	memcpy(xcvr->cap_ds, ucontrol->value.bytes.data, FSL_XCVR_CAPDS_SIZE);
+	changed = memcmp(xcvr->cap_ds, ucontrol->value.bytes.data,
+			 sizeof(xcvr->cap_ds)) != 0;
+	memcpy(xcvr->cap_ds, ucontrol->value.bytes.data,
+	       sizeof(xcvr->cap_ds));
 
-	return 0;
+	return changed;
 }
 
 static struct snd_kcontrol_new fsl_xcvr_earc_capds_kctl = {
@@ -1040,10 +1044,15 @@ static int fsl_xcvr_tx_cs_put(struct snd_kcontrol *kcontrol,
 {
 	struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol);
 	struct fsl_xcvr *xcvr = snd_soc_dai_get_drvdata(dai);
+	int changed;
 
-	memcpy(xcvr->tx_iec958.status, ucontrol->value.iec958.status, 24);
+	changed = memcmp(xcvr->tx_iec958.status,
+			 ucontrol->value.iec958.status,
+			 sizeof(xcvr->tx_iec958.status)) != 0;
+	memcpy(xcvr->tx_iec958.status, ucontrol->value.iec958.status,
+	       sizeof(xcvr->tx_iec958.status));
 
-	return 0;
+	return changed;
 }
 
 static struct snd_kcontrol_new fsl_xcvr_rx_ctls[] = {

---
base-commit: 98421d94a1a6dcc3e8582eb62bedeccecda93339
change-id: 20260427-asoc-fsl-xcvr-event-generation-2f57c539de28

Best regards,
--  
Cássio Gabriel <cassiogabrielcontato@gmail.com>



^ permalink raw reply related

* Re: [PATCH v12 00/15] arm64/riscv: Add support for crashkernel CMA reservation
From: Jinjie Ruan @ 2026-04-28  3:30 UTC (permalink / raw)
  To: corbet, skhan, catalin.marinas, will, chenhuacai, kernel, maddy,
	mpe, npiggin, chleroy, pjw, palmer, aou, alex, tglx, mingo, bp,
	dave.hansen, hpa, robh, saravanak, akpm, bhe, vgoyal, dyoung,
	rdunlap, peterz, pawan.kumar.gupta, feng.tang, dapeng1.mi, kees,
	elver, paulmck, lirongqing, rppt, leitao, ardb, jbohac, cfsworks,
	tangyouling, sourabhjain, ritesh.list, hbathini, eajames, guoren,
	songshuaishuai, kevin.brodsky, vishal.moola, junhui.liu, coxu,
	fuqiang.wang, liaoyuanhong, takahiro.akashi, james.morse,
	lizhengyu3, x86, linux-doc, linux-kernel, linux-arm-kernel,
	loongarch, linuxppc-dev, linux-riscv, devicetree, kexec
In-Reply-To: <20260402072701.628293-1-ruanjinjie@huawei.com>



On 4/2/2026 3:26 PM, Jinjie Ruan wrote:
> The crash memory allocation, and the exclude of crashk_res, crashk_low_res
> and crashk_cma memory are almost identical across different architectures,
> This patch set handle them in crash core in a general way, which eliminate
> a lot of duplication code.
> 
> And add support for crashkernel CMA reservation for arm64 and riscv.
> 
> Rebased on v7.0-rc1.
> 
> Basic second kernel boot test were performed on QEMU platforms for x86,
> ARM64, and RISC-V architectures with the following parameters:
> 
> 	"cma=256M crashkernel=256M crashkernel=64M,cma"
> 
> Changes in v12:
> - Remove the unused "nr_mem_ranges" for x86.
> - Add "Fix crashk_low_res not exclude bug" test log.
> - Provide a separate patch for each architecture for using
>   crash_prepare_headers(), which will make the review more convenient.
> - Add Reviewed-by and Tested-by.
> - Link to v11: https://lore.kernel.org/all/20260328074013.3589544-1-ruanjinjie@huawei.com/
> 
> Changes in v11:
> - Avoid silently drop crash memory if the crash kernel is built without
>   CONFIG_CMA.
> - Remove unnecessary "cmem->nr_ranges = 0" for arch_crash_populate_cmem()
>   as we use kvzalloc().
> - Provide a separate patch for each architecture to fix the existing
>   buffer overflow issue.
> - Add Acked-bys for arm64.
> 
> Changes in v10:
> - Fix crashk_low_res not excluded bug in the existing
>   RISC-V code.
> - Fix an existing memory leak issue in the existing PowerPC code.
> - Fix the ordering issue of adding CMA ranges to
>   "linux,usable-memory-range".
> - Fix an existing concurrency issue. A Concurrent memory hotplug may occur
>   between reading memblock and attempting to fill cmem during kexec_load()
>   for almost all existing architectures.
> - Link to v9: https://lore.kernel.org/all/20260323072745.2481719-1-ruanjinjie@huawei.com/
> 
> Changes in v9:
> - Collect Reviewed-by and Acked-by, and prepare for Sashiko AI review.
> - Link to v8: https://lore.kernel.org/all/20260302035315.3892241-1-ruanjinjie@huawei.com/
> 
> Changes in v8:
> - Fix the build issues reported by kernel test robot and Sourabh.
> - Link to v7: https://lore.kernel.org/all/20260226130437.1867658-1-ruanjinjie@huawei.com/
> 
> Changes in v7:
> - Correct the inclusion of CMA-reserved ranges for kdump kernel in of/kexec
>   for arm64 and riscv.
> - Add Acked-by.
> - Link to v6: https://lore.kernel.org/all/20260224085342.387996-1-ruanjinjie@huawei.com/
> 
> Changes in v6:
> - Update the crash core exclude code as Mike suggested.
> - Rebased on v7.0-rc1.
> - Add acked-by.
> - Link to v5: https://lore.kernel.org/all/20260212101001.343158-1-ruanjinjie@huawei.com/
> 
> Jinjie Ruan (14):
>   riscv: kexec_file: Fix crashk_low_res not exclude bug
>   powerpc/crash: Fix possible memory leak in update_crash_elfcorehdr()
>   x86/kexec: Fix potential buffer overflow in prepare_elf_headers()
>   arm64: kexec_file: Fix potential buffer overflow in
>     prepare_elf_headers()
>   riscv: kexec_file: Fix potential buffer overflow in
>     prepare_elf_headers()
>   LoongArch: kexec: Fix potential buffer overflow in
>     prepare_elf_headers()
>   crash: Add crash_prepare_headers() to exclude crash kernel memory
>   arm64: kexec_file: Use crash_prepare_headers() helper to simplify code
>   x86/kexec: Use crash_prepare_headers() helper to simplify code
>   riscv: kexec_file: Use crash_prepare_headers() helper to simplify code
>   LoongArch: kexec: Use crash_prepare_headers() helper to simplify code
>   crash: Use crash_exclude_core_ranges() on powerpc
>   arm64: kexec: Add support for crashkernel CMA reservation
>   riscv: kexec: Add support for crashkernel CMA reservation

Hi, every one,

It seems the patches for crash core rework, powerpc, riscv, and arm64
are mostly ready to be merged. Could any maintainer help pick these up,
or should I rebase the entire series against v7.1-rc1?

> 
> Sourabh Jain (1):
>   powerpc/crash: sort crash memory ranges before preparing elfcorehdr
> 
>  .../admin-guide/kernel-parameters.txt         |  16 +--
>  arch/arm64/kernel/machine_kexec_file.c        |  43 +++-----
>  arch/arm64/mm/init.c                          |   5 +-
>  arch/loongarch/kernel/machine_kexec_file.c    |  43 +++-----
>  arch/powerpc/include/asm/kexec_ranges.h       |   1 -
>  arch/powerpc/kexec/crash.c                    |   7 +-
>  arch/powerpc/kexec/ranges.c                   | 101 +-----------------
>  arch/riscv/kernel/machine_kexec_file.c        |  42 +++-----
>  arch/riscv/mm/init.c                          |   5 +-
>  arch/x86/kernel/crash.c                       |  92 +++-------------
>  drivers/of/fdt.c                              |   9 +-
>  drivers/of/kexec.c                            |   9 ++
>  include/linux/crash_core.h                    |   9 ++
>  include/linux/crash_reserve.h                 |   4 +-
>  kernel/crash_core.c                           |  89 ++++++++++++++-
>  15 files changed, 193 insertions(+), 282 deletions(-)
> 



^ permalink raw reply

* Re: [PATCH 5/5] fpga: m10bmc-sec: switch show_canceled_csk() to using sysfs_emit()
From: Xu Yilun @ 2026-04-28  3:33 UTC (permalink / raw)
  To: Yury Norov
  Cc: linux-kernel, Christophe Leroy (CS GROUP), Peter Zijlstra (Intel),
	Rafael J. Wysocki, Alexander Shishkin, Daniel Lezcano,
	Ingo Molnar, James Clark, Kees Cook, Lukasz Luba,
	Madhavan Srinivasan, Michael Ellerman, Mike Leach, Moritz Fischer,
	Nicholas Piggin, Russ Weight, Shrikanth Hegde, Suki K Poulose,
	Tom Rix, Thomas Weißschuh, Xu Yilun, Yury Norov, Zhang Rui,
	coresight, linux-arm-kernel, linux-fpga, linux-pm, linuxppc-dev,
	Jakub Kicinski
In-Reply-To: <afAh1hq_mkRN3q2t@yury>

On Mon, Apr 27, 2026 at 10:56:22PM -0400, Yury Norov wrote:
> On Wed, Mar 25, 2026 at 03:25:48PM +0800, Xu Yilun wrote:
> > On Tue, Mar 24, 2026 at 02:38:04PM -0400, Yury Norov wrote:
> > > On Tue, Mar 24, 2026 at 05:15:33PM +0800, Xu Yilun wrote:
> > > > On Tue, Mar 03, 2026 at 03:08:41PM -0500, Yury Norov wrote:
> > > > > Switch show_canceled_csk() to use the proper sysfs_emit("%*pbl").
> > > > > 
> > > > > Reviewed-by: Russ Weight <russ.weight@linux.dev>
> > > > > Suggested-by: Thomas Weißschuh <linux@weissschuh.net>
> > > > > Signed-off-by: Yury Norov <ynorov@nvidia.com>
> > > > > ---
> > > > >  drivers/fpga/intel-m10-bmc-sec-update.c | 3 ++-
> > > > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/drivers/fpga/intel-m10-bmc-sec-update.c b/drivers/fpga/intel-m10-bmc-sec-update.c
> > > > > index 10f678b9ed36..ae364c6636eb 100644
> > > > > --- a/drivers/fpga/intel-m10-bmc-sec-update.c
> > > > > +++ b/drivers/fpga/intel-m10-bmc-sec-update.c
> > > > > @@ -10,6 +10,7 @@
> > > > >  #include <linux/firmware.h>
> > > > >  #include <linux/mfd/intel-m10-bmc.h>
> > > > >  #include <linux/mod_devicetable.h>
> > > > > +#include <linux/mm.h>
> > > > 
> > > > Why add this header file?
> > > 
> > > When I was preparing the series, I had build issues without this. But
> > > now I checked it against -rc5, and it's clean. Would you like me to
> > > resend?
> > 
> > No need. Given that I'll pick this patch alone to fpga for-next with the
> > fix.
> > 
> > Reviewed-by: Xu Yilun <yilun.xu@intel.com>
> 
> Hi Xu,
> 
> This patch is still not applied. This is the last non-lib user of

I see it is already in v7.1-rc1, what's your expectation?

> the bitmap_print_to_pagebuf(), and it blocks me from removing the
> function.
> 
> Are you still going to pick it yourself? I can move the patch with
> bitmap-for-next if you prefer.
> 
> Thanks,
> Yury


^ permalink raw reply

* [PATCH v5] char/nvram: Remove redundant nvram_mutex
From: Venkat Rao Bagalkote @ 2026-04-28  6:15 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, linux-kbuild, linuxppc-dev, Arnd Bergmann,
	Christophe Leroy, Ritesh Harjani, Madhavan Srinivasan,
	Tellakula Yeswanth Krishna, Venkat Rao Bagalkote

The global nvram_mutex in drivers/char/nvram.c is redundant and unused,
and this triggers compiler warnings on some configurations.

All platform-specific nvram operations already provide their own internal
synchronization, meaning the wrapper-level mutex does not provide any
additional safety.

Remove the nvram_mutex definition along with all remaining lock/unlock
users across PPC32, x86, and m68k code paths, and rely entirely on the
per-architecture nvram implementations for locking.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Tellakula Yeswanth Krishna <yeswanth@linux.ibm.com>
Signed-off-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
---
Changes since v4:
- No code changes
- Resent after v7.1-rc1 as suggested by Arnd Bergmann

 drivers/char/nvram.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index 9eff426a9286..e89cc1f1c89e 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -53,7 +53,6 @@
 #include <asm/nvram.h>
 #endif
 
-static DEFINE_MUTEX(nvram_mutex);
 static DEFINE_SPINLOCK(nvram_state_lock);
 static int nvram_open_cnt;	/* #times opened */
 static int nvram_open_mode;	/* special open modes */
@@ -310,11 +309,8 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd,
 		break;
 #ifdef CONFIG_PPC32
 	case IOC_NVRAM_SYNC:
-		if (ppc_md.nvram_sync != NULL) {
-			mutex_lock(&nvram_mutex);
+		if (ppc_md.nvram_sync)
 			ppc_md.nvram_sync();
-			mutex_unlock(&nvram_mutex);
-		}
 		ret = 0;
 		break;
 #endif
@@ -324,11 +320,8 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd,
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 
-		if (arch_nvram_ops.initialize != NULL) {
-			mutex_lock(&nvram_mutex);
+		if (arch_nvram_ops.initialize)
 			ret = arch_nvram_ops.initialize();
-			mutex_unlock(&nvram_mutex);
-		}
 		break;
 	case NVRAM_SETCKS:
 		/* just set checksum, contents unchanged (maybe useful after
@@ -336,11 +329,8 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd,
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 
-		if (arch_nvram_ops.set_checksum != NULL) {
-			mutex_lock(&nvram_mutex);
+		if (arch_nvram_ops.set_checksum)
 			ret = arch_nvram_ops.set_checksum();
-			mutex_unlock(&nvram_mutex);
-		}
 		break;
 #endif /* CONFIG_X86 || CONFIG_M68K */
 	}
-- 
2.45.2



^ permalink raw reply related

* Re: [PATCH 09/49] mm: panic on memory allocation failure in sparse_init_nid()
From: Mike Rapoport @ 2026-04-28  6:56 UTC (permalink / raw)
  To: Muchun Song
  Cc: Andrew Morton, David Hildenbrand, Muchun Song, Oscar Salvador,
	Michael Ellerman, Madhavan Srinivasan, Lorenzo Stoakes,
	Liam R . Howlett, Vlastimil Babka, Suren Baghdasaryan,
	Michal Hocko, Nicholas Piggin, Christophe Leroy, aneesh.kumar,
	joao.m.martins, linux-mm, linuxppc-dev, linux-kernel
In-Reply-To: <20260405125240.2558577-10-songmuchun@bytedance.com>

On Sun, Apr 05, 2026 at 08:52:00PM +0800, Muchun Song wrote:
> When vmemmap pages allocation or usemap allocation fails, sparse_init_nid()
> currently only marks the corresponding section as non-present. However,
> subsequent code like memmap_init() iterating over PFNs does not check for
> non-present sections, leading to invalid memory access (additional,
> subsection_map_init() accessing the unallocated usemap as well).
> 
> It is complex to audit and fix all boot-time PFN iterators to handle these
> partially initialized sections correctly. Since vmemmap and usemap allocation
> failures are extremely rare during early boot, the more appropriate approach
> is to expose the problem as early as possible.
> 
> Therefore, use BUG_ON() to panic immediately if allocation fails, instead of
> attempting a partial recovery that leads to obscure crashes later.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>

Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>

> ---
>  mm/sparse.c | 37 ++++++++-----------------------------
>  1 file changed, 8 insertions(+), 29 deletions(-)
> 
> diff --git a/mm/sparse.c b/mm/sparse.c
> index effdac6b0ab1..5c12b979a618 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -354,19 +354,15 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
>  				   unsigned long map_count)
>  {
>  	unsigned long pnum;
> -	struct page *map;
> -	struct mem_section *ms;
> -
> -	if (sparse_usage_init(nid, map_count)) {
> -		pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
> -		goto failed;
> -	}
>  
> +	if (sparse_usage_init(nid, map_count))
> +		panic("The node[%d] usemap allocation failed\n", nid);

Please consider using memblock_alloc_or_panic() in sparse_usage_init(), it
would simplify the code even more.

>  	sparse_buffer_init(map_count * section_map_size(), nid);
>  
>  	sparse_vmemmap_init_nid_early(nid);
>  
>  	for_each_present_section_nr(pnum_begin, pnum) {
> +		struct mem_section *ms;
>  		unsigned long pfn = section_nr_to_pfn(pnum);
>  
>  		if (pnum >= pnum_end)
> @@ -374,16 +370,12 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
>  
>  		ms = __nr_to_section(pnum);
>  		if (!preinited_vmemmap_section(ms)) {
> +			struct page *map;
> +
>  			map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
> -					nid, NULL, NULL);
> -			if (!map) {
> -				pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
> -				       __func__, nid);
> -				pnum_begin = pnum;
> -				sparse_usage_fini();
> -				sparse_buffer_fini();
> -				goto failed;
> -			}
> +							nid, NULL, NULL);
> +			if (!map)
> +				panic("Populate section (%ld) on node[%d] failed\n", pnum, nid);
>  			memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page),
>  							   PAGE_SIZE));
>  			sparse_init_early_section(nid, map, pnum, 0);
> @@ -391,19 +383,6 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
>  	}
>  	sparse_usage_fini();
>  	sparse_buffer_fini();
> -	return;
> -failed:
> -	/*
> -	 * We failed to allocate, mark all the following pnums as not present,
> -	 * except the ones already initialized earlier.
> -	 */
> -	for_each_present_section_nr(pnum_begin, pnum) {
> -		if (pnum >= pnum_end)
> -			break;
> -		ms = __nr_to_section(pnum);
> -		if (!preinited_vmemmap_section(ms))
> -			ms->section_mem_map = 0;
> -	}
>  }
>  
>  /*
> -- 
> 2.20.1
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox