LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v1 9/9] uaccess: Convert small fixed size copy_{to/from}_user() to scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

copy_{to/from}_user() is a heavy function optimised for copy of large
blocs of memory between user and kernel space.

When the number of bytes to be copied is known at build time and small,
using scoped user access removes the burden of that optimisation.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 include/linux/uaccess.h | 47 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 33b7d0f5f808..3ac544527af2 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -50,6 +50,8 @@
  #define mask_user_address(src) (src)
 #endif
 
+#define SMALL_COPY_USER		64
+
 /*
  * Architectures should provide two primitives (raw_copy_{to,from}_user())
  * and get rid of their private instances of copy_{to,from}_user() and
@@ -191,6 +193,9 @@ _inline_copy_from_user(void *to, const void __user *from, unsigned long n)
 	return res;
 }
 
+static __always_inline __must_check unsigned long
+_small_copy_from_user(void *to, const void __user *from, unsigned long n);
+
 extern __must_check unsigned long
 _copy_from_user(void *, const void __user *, unsigned long);
 
@@ -207,6 +212,9 @@ _inline_copy_to_user(void __user *to, const void *from, unsigned long n)
 	return n;
 }
 
+static __always_inline __must_check unsigned long
+_small_copy_to_user(void __user *to, const void *from, unsigned long n);
+
 extern __must_check unsigned long
 _copy_to_user(void __user *, const void *, unsigned long);
 
@@ -215,6 +223,8 @@ copy_from_user_common(void *to, const void __user *from, unsigned long n, bool p
 {
 	if (!check_copy_size(to, n, false))
 		return n;
+	if (!partial && __builtin_constant_p(n) && n <= SMALL_COPY_USER)
+		return _small_copy_from_user(to, from, n);
 	if (IS_ENABLED(ARCH_WANTS_NOINLINE_COPY_USER))
 		return _copy_from_user(to, from, n);
 	else
@@ -239,6 +249,8 @@ copy_to_user_common(void __user *to, const void *from, unsigned long n, bool par
 	if (!check_copy_size(from, n, true))
 		return n;
 
+	if (!partial && __builtin_constant_p(n) && n <= SMALL_COPY_USER)
+		return _small_copy_to_user(to, from, n);
 	if (IS_ENABLED(ARCH_WANTS_NOINLINE_COPY_USER))
 		return _copy_to_user(to, from, n);
 	else
@@ -838,6 +850,41 @@ for (bool done = false; !done; done = true)					\
 #define scoped_user_rw_access(uptr, elbl)				\
 	scoped_user_rw_access_size(uptr, sizeof(*(uptr)), elbl)
 
+static __always_inline __must_check unsigned long
+_small_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	might_fault();
+	instrument_copy_from_user_before(to, from, n);
+	scoped_user_read_access_size(from, n, failed) {
+		/*
+		 * Ensure that bad access_ok() speculation will not lead
+		 * to nasty side effects *after* the copy is finished:
+		 */
+		if (!can_do_masked_user_access())
+			barrier_nospec();
+		unsafe_copy_from_user(to, from, n, failed);
+	}
+	instrument_copy_from_user_after(to, from, n, 0);
+	return 0;
+failed:
+	instrument_copy_from_user_after(to, from, n, n);
+	return n;
+}
+
+static __always_inline __must_check unsigned long
+_small_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	might_fault();
+	if (should_fail_usercopy())
+		return n;
+	instrument_copy_to_user(to, from, n);
+	scoped_user_write_access_size(to, n, failed)
+		unsafe_copy_to_user(to, from, n, failed);
+	return 0;
+failed:
+	return n;
+}
+
 /**
  * get_user_inline - Read user data inlined
  * @val:	The variable to store the value read from user memory
-- 
2.49.0



^ permalink raw reply related

* [RFC PATCH v1 8/9] arm64: Add unsafe_copy_from_user()
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

At the time being, x86 and arm64 are missing unsafe_copy_from_user().

Add it.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/arm64/include/asm/uaccess.h | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 1e20ec91b56f..adfdb52cd82b 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -437,7 +437,7 @@ static inline void user_access_restore(unsigned long enabled) { }
  * We want the unsafe accessors to always be inlined and use
  * the error labels - thus the macro games.
  */
-#define unsafe_copy_loop(dst, src, len, type, label)				\
+#define unsafe_put_loop(dst, src, len, type, label)				\
 	while (len >= sizeof(type)) {						\
 		unsafe_put_user(*(type *)(src),(type __user *)(dst),label);	\
 		dst += sizeof(type);						\
@@ -450,10 +450,29 @@ do {									\
 	char __user *__ucu_dst = (_dst);				\
 	const char *__ucu_src = (_src);					\
 	size_t __ucu_len = (_len);					\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
+	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
+	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
+	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
+	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
+} while (0)
+
+#define unsafe_get_loop(dst, src, len, type, label)				\
+	while (len >= sizeof(type)) {						\
+		unsafe_get_user(*(type __user *)(src),(type *)(dst),label);	\
+		dst += sizeof(type);						\
+		src += sizeof(type);						\
+		len -= sizeof(type);						\
+	}
+
+#define unsafe_copy_from_user(_dst,_src,_len,label)			\
+do {									\
+	char *__ucu_dst = (_dst);					\
+	const char __user *__ucu_src = (_src);				\
+	size_t __ucu_len = (_len);					\
+	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
+	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
+	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
+	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
 } while (0)
 
 extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n);
-- 
2.49.0



^ permalink raw reply related

* [RFC PATCH v1 7/9] x86: Add unsafe_copy_from_user()
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

At the time being, x86 and arm64 are missing unsafe_copy_from_user().

Add it.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/x86/include/asm/uaccess.h | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 3a0dd3c2b233..10c458ffa399 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -598,7 +598,7 @@ _label:									\
  * We want the unsafe accessors to always be inlined and use
  * the error labels - thus the macro games.
  */
-#define unsafe_copy_loop(dst, src, len, type, label)				\
+#define unsafe_put_loop(dst, src, len, type, label)				\
 	while (len >= sizeof(type)) {						\
 		unsafe_put_user(*(type *)(src),(type __user *)(dst),label);	\
 		dst += sizeof(type);						\
@@ -611,10 +611,29 @@ do {									\
 	char __user *__ucu_dst = (_dst);				\
 	const char *__ucu_src = (_src);					\
 	size_t __ucu_len = (_len);					\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
-	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
+	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
+	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
+	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
+	unsafe_put_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
+} while (0)
+
+#define unsafe_get_loop(dst, src, len, type, label)				\
+	while (len >= sizeof(type)) {						\
+		unsafe_get_user(*(type __user *)(src),(type *)(dst),label);	\
+		dst += sizeof(type);						\
+		src += sizeof(type);						\
+		len -= sizeof(type);						\
+	}
+
+#define unsafe_copy_from_user(_dst,_src,_len,label)			\
+do {									\
+	char *__ucu_dst = (_dst);					\
+	const char __user *__ucu_src = (_src);				\
+	size_t __ucu_len = (_len);					\
+	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
+	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
+	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
+	unsafe_get_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
 } while (0)
 
 #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
-- 
2.49.0



^ permalink raw reply related

* [RFC PATCH v1 6/9] uaccess: Change copy_{to/from}_user to return -EFAULT
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

Now that copy_{to/from}_user_partial() are used by callers which expect
partial copy with number of not copied bytes as return value, change
copy_{to/from}_user() to return an int, and return -EFAULT when the
copy is not complete.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 include/linux/uaccess.h | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 2d37173782b3..33b7d0f5f808 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -211,7 +211,7 @@ extern __must_check unsigned long
 _copy_to_user(void __user *, const void *, unsigned long);
 
 static __always_inline unsigned long __must_check
-copy_from_user(void *to, const void __user *from, unsigned long n)
+copy_from_user_common(void *to, const void __user *from, unsigned long n, bool partial)
 {
 	if (!check_copy_size(to, n, false))
 		return n;
@@ -221,10 +221,20 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
 		return _inline_copy_from_user(to, from, n);
 }
 
-#define copy_from_user_partial copy_from_user
+static __always_inline unsigned long __must_check
+copy_from_user_partial(void *to, const void __user *from, unsigned long n)
+{
+	return copy_from_user_common(to, from, n, true);
+}
+
+static __always_inline int __must_check
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	return copy_from_user_common(to, from, n, false) ? -EFAULT : 0;
+}
 
 static __always_inline unsigned long __must_check
-copy_to_user(void __user *to, const void *from, unsigned long n)
+copy_to_user_common(void __user *to, const void *from, unsigned long n, bool partial)
 {
 	if (!check_copy_size(from, n, true))
 		return n;
@@ -235,7 +245,17 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 		return _inline_copy_to_user(to, from, n);
 }
 
-#define copy_to_user_partial copy_to_user
+static __always_inline unsigned long __must_check
+copy_to_user_partial(void __user *to, const void *from, unsigned long n)
+{
+	return copy_to_user_common(to, from, n, true);
+}
+
+static __always_inline int __must_check
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	return copy_to_user_common(to, from, n, false) ? -EFAULT : 0;
+}
 
 #ifndef copy_mc_to_kernel
 /*
-- 
2.49.0



^ permalink raw reply related

* [RFC PATCH v1 5/9] uaccess: Switch to copy_{to/from}_user_partial() when relevant
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

In a subsequent patch, copy_{to/from}_user() will be modified to
return -EFAULT when copy fails.

Among the 6000 calls to copy_{to/from}_user(), around 2% rely on
copy_{to/from}_user() doing partial copies and returning amount of not
copied bytes. Change those users to copy_{to/from}_user_partial().

This change was done based on whether callers assign the returned value
to a variable or just check whether the return value is 0 or not.

Several of them only use it for debug to print the amount of bytes not
copied. Those could maybe be changed to stop reporting that amount and
not be converted to partial copy.

Some not trivial handling might have been unecessarily converted. This
is not a problem and they can be converted back later for better
performance.

The callers where located with following commands then reviewed one by
one:

	sed -i s/"return copy_to_user("/"return copy_to_user_partial("/g `git grep -l "return copy_to_user("`
	sed -i s/" = copy_to_user("/" = copy_to_user_partial("/g `git grep -l " = copy_to_user("`
	sed -i s/" += copy_to_user("/" += copy_to_user_partial("/g `git grep -l " += copy_to_user("`
	sed -i s/" -= copy_to_user("/" -= copy_to_user_partial("/g `git grep -l " -= copy_to_user("`

Then the same was done with copy_from_user().

During the review, patterns like the following were rejected and kept
as is:

-	return copy_to_user(osf_stat, &tmp, sizeof(tmp)) ? -EFAULT : 0;
+	return copy_to_user_partial(osf_stat, &tmp, sizeof(tmp)) ? -EFAULT : 0;

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/alpha/kernel/osf_sys.c                    |  4 ++--
 arch/alpha/kernel/termios.c                    |  2 +-
 arch/arc/kernel/disasm.c                       |  2 +-
 arch/arm64/include/asm/gcs.h                   |  2 +-
 arch/arm64/kernel/signal32.c                   |  2 +-
 arch/mips/kernel/rtlx.c                        |  8 ++++----
 arch/mips/kernel/vpe.c                         |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c            |  4 ++--
 arch/powerpc/kvm/book3s_64_mmu_radix.c         |  4 ++--
 arch/powerpc/kvm/book3s_hv.c                   |  2 +-
 arch/riscv/kernel/signal.c                     |  2 +-
 arch/s390/include/asm/idals.h                  |  8 ++++----
 arch/sparc/kernel/termios.c                    |  2 +-
 arch/um/kernel/process.c                       |  2 +-
 arch/x86/lib/insn-eval.c                       |  2 +-
 arch/x86/um/signal.c                           |  2 +-
 drivers/android/binder_alloc.c                 |  2 +-
 drivers/comedi/comedi_fops.c                   |  4 ++--
 drivers/dma/idxd/cdev.c                        |  2 +-
 drivers/firmware/efi/test/efi_test.c           |  2 +-
 drivers/fsi/fsi-scom.c                         |  2 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c  |  2 +-
 drivers/gpu/drm/i915/gt/intel_sseu.c           |  4 ++--
 drivers/gpu/drm/i915/i915_gem.c                |  4 ++--
 drivers/hwtracing/intel_th/msu.c               |  2 +-
 drivers/misc/ibmvmc.c                          |  2 +-
 drivers/misc/vmw_vmci/vmci_host.c              |  2 +-
 drivers/most/most_cdev.c                       |  2 +-
 drivers/net/ieee802154/ca8210.c                |  4 ++--
 drivers/net/wireless/ath/wil6210/debugfs.c     |  2 +-
 .../wireless/intel/iwlwifi/pcie/gen1_2/trans.c |  2 +-
 drivers/net/wireless/ti/wlcore/debugfs.c       |  2 +-
 drivers/ps3/ps3-lpm.c                          |  2 +-
 drivers/s390/crypto/zcrypt_api.h               |  4 ++--
 drivers/spi/spidev.c                           |  2 +-
 .../staging/media/atomisp/pci/atomisp_cmd.c    |  8 ++++----
 drivers/tty/tty_ioctl.c                        | 14 +++++++-------
 drivers/tty/vt/vc_screen.c                     |  4 ++--
 drivers/usb/gadget/function/f_hid.c            |  4 ++--
 drivers/usb/gadget/function/f_printer.c        |  2 +-
 drivers/vfio/vfio_iommu_type1.c                |  4 ++--
 drivers/xen/xenbus/xenbus_dev_frontend.c       |  2 +-
 fs/namespace.c                                 |  2 +-
 fs/ocfs2/dlmfs/dlmfs.c                         |  2 +-
 fs/proc/base.c                                 |  4 ++--
 include/linux/bpfptr.h                         |  2 +-
 include/linux/sockptr.h                        |  4 ++--
 ipc/msg.c                                      |  8 ++++----
 ipc/sem.c                                      |  8 ++++----
 ipc/shm.c                                      | 18 +++++++++---------
 kernel/regset.c                                |  2 +-
 kernel/sys.c                                   |  4 ++--
 lib/kfifo.c                                    |  8 ++++----
 mm/kasan/kasan_test_c.c                        |  4 ++--
 mm/memory.c                                    |  2 +-
 net/x25/af_x25.c                               |  2 +-
 rust/helpers/uaccess.c                         |  4 ++--
 sound/pci/emu10k1/emufx.c                      |  4 ++--
 sound/pci/rme9652/hdsp.c                       |  6 +++---
 sound/soc/intel/avs/probes.c                   |  6 +++---
 sound/soc/sof/compress.c                       | 12 ++++++------
 sound/soc/sof/sof-client-probes.c              |  6 +++---
 62 files changed, 122 insertions(+), 122 deletions(-)

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 7b6543d2cca3..c8ea39fdbb9f 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -944,7 +944,7 @@ get_tv32(struct timespec64 *o, struct timeval32 __user *i)
 static inline long
 put_tv32(struct timeval32 __user *o, struct timespec64 *i)
 {
-	return copy_to_user(o, &(struct timeval32){
+	return copy_to_user_partial(o, &(struct timeval32){
 				.tv_sec = i->tv_sec,
 				.tv_usec = i->tv_nsec / NSEC_PER_USEC},
 			    sizeof(struct timeval32));
@@ -953,7 +953,7 @@ put_tv32(struct timeval32 __user *o, struct timespec64 *i)
 static inline long
 put_tv_to_tv32(struct timeval32 __user *o, struct __kernel_old_timeval *i)
 {
-	return copy_to_user(o, &(struct timeval32){
+	return copy_to_user_partial(o, &(struct timeval32){
 				.tv_sec = i->tv_sec,
 				.tv_usec = i->tv_usec},
 			    sizeof(struct timeval32));
diff --git a/arch/alpha/kernel/termios.c b/arch/alpha/kernel/termios.c
index a4c29a22edf7..a3693c29a0fd 100644
--- a/arch/alpha/kernel/termios.c
+++ b/arch/alpha/kernel/termios.c
@@ -52,5 +52,5 @@ int kernel_termios_to_user_termio(struct termio __user *termio,
 	v.c_cc[_VEOL2]  = termios->c_cc[VEOL2];
 	v.c_cc[_VSWTC]  = termios->c_cc[VSWTC];
 
-	return copy_to_user(termio, &v, sizeof(struct termio));
+	return copy_to_user_partial(termio, &v, sizeof(struct termio));
 }
diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c
index ccc7e8c39eb3..a3ef9d079e7f 100644
--- a/arch/arc/kernel/disasm.c
+++ b/arch/arc/kernel/disasm.c
@@ -34,7 +34,7 @@ void __kprobes disasm_instr(unsigned long addr, struct disasm_state *state,
 	/* This fetches the upper part of the 32 bit instruction
 	 * in both the cases of Little Endian or Big Endian configurations. */
 	if (userspace) {
-		bytes_not_copied = copy_from_user(ins_buf,
+		bytes_not_copied = copy_from_user_partial(ins_buf,
 						(const void __user *) addr, 8);
 		if (bytes_not_copied > 6)
 			goto fault;
diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h
index 8fa0707069e8..7ee23a8130b0 100644
--- a/arch/arm64/include/asm/gcs.h
+++ b/arch/arm64/include/asm/gcs.h
@@ -139,7 +139,7 @@ static inline u64 get_user_gcs(unsigned long __user *addr, int *err)
 
 	/* Ensure previous GCS operation are visible before we read the page */
 	gcsb_dsync();
-	ret = copy_from_user(&load, addr, sizeof(load));
+	ret = copy_from_user_partial(&load, addr, sizeof(load));
 	if (ret != 0)
 		*err = ret;
 	return load;
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index bb3b526ff43f..7016d2a3bb76 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -53,7 +53,7 @@ static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
 	cset.sig[0] = set->sig[0] & 0xffffffffull;
 	cset.sig[1] = set->sig[0] >> 32;
 
-	return copy_to_user(uset, &cset, sizeof(*uset));
+	return copy_to_user_partial(uset, &cset, sizeof(*uset));
 }
 
 static inline int get_sigset_t(sigset_t *set,
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index 18c509c59f33..bc468064194d 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -262,13 +262,13 @@ ssize_t rtlx_read(int index, void __user *buff, size_t count)
 	/* then how much from the read pointer onwards */
 	fl = min(count, (size_t)lx->buffer_size - lx->lx_read);
 
-	failed = copy_to_user(buff, lx->lx_buffer + lx->lx_read, fl);
+	failed = copy_to_user_partial(buff, lx->lx_buffer + lx->lx_read, fl);
 	if (failed)
 		goto out;
 
 	/* and if there is anything left at the beginning of the buffer */
 	if (count - fl)
-		failed = copy_to_user(buff + fl, lx->lx_buffer, count - fl);
+		failed = copy_to_user_partial(buff + fl, lx->lx_buffer, count - fl);
 
 out:
 	count -= failed;
@@ -304,13 +304,13 @@ ssize_t rtlx_write(int index, const void __user *buffer, size_t count)
 	/* first bit from write pointer to the end of the buffer, or count */
 	fl = min(count, (size_t) rt->buffer_size - rt->rt_write);
 
-	failed = copy_from_user(rt->rt_buffer + rt->rt_write, buffer, fl);
+	failed = copy_from_user_partial(rt->rt_buffer + rt->rt_write, buffer, fl);
 	if (failed)
 		goto out;
 
 	/* if there's any left copy to the beginning of the buffer */
 	if (count - fl)
-		failed = copy_from_user(rt->rt_buffer, buffer + fl, count - fl);
+		failed = copy_from_user_partial(rt->rt_buffer, buffer + fl, count - fl);
 
 out:
 	count -= failed;
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index b05ee21a1d67..5a8d72d6c80c 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -854,7 +854,7 @@ static ssize_t vpe_write(struct file *file, const char __user *buffer,
 		return -ENOMEM;
 	}
 
-	count -= copy_from_user(v->pbuffer + v->len, buffer, count);
+	count -= copy_from_user_partial(v->pbuffer + v->len, buffer, count);
 	if (!count)
 		return -EFAULT;
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2ccb3d138f46..1c43c7b8e801 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -2028,7 +2028,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
 		n = p->chars_left;
 		if (n > len)
 			n = len;
-		r = copy_to_user(buf, p->buf + p->buf_index, n);
+		r = copy_to_user_partial(buf, p->buf + p->buf_index, n);
 		n -= r;
 		p->chars_left -= n;
 		p->buf_index += n;
@@ -2068,7 +2068,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
 		p->chars_left = n;
 		if (n > len)
 			n = len;
-		r = copy_to_user(buf, p->buf, n);
+		r = copy_to_user_partial(buf, p->buf, n);
 		n -= r;
 		p->chars_left -= n;
 		p->buf_index = n;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 933fc7cb9afc..0a27e018d27b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -1307,7 +1307,7 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
 		n = p->chars_left;
 		if (n > len)
 			n = len;
-		r = copy_to_user(buf, p->buf + p->buf_index, n);
+		r = copy_to_user_partial(buf, p->buf + p->buf_index, n);
 		n -= r;
 		p->chars_left -= n;
 		p->buf_index += n;
@@ -1407,7 +1407,7 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
 		p->chars_left = n;
 		if (n > len)
 			n = len;
-		r = copy_to_user(buf, p->buf, n);
+		r = copy_to_user_partial(buf, p->buf, n);
 		n -= r;
 		p->chars_left -= n;
 		p->buf_index = n;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 61dbeea317f3..4c7a8f687c99 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2916,7 +2916,7 @@ static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
 		return 0;
 	if (len > p->buflen - pos)
 		len = p->buflen - pos;
-	n = copy_to_user(buf, p->buf + pos, len);
+	n = copy_to_user_partial(buf, p->buf + pos, len);
 	if (n) {
 		if (n == len)
 			return -EFAULT;
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 59784dc117e4..4630dbad7428 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -139,7 +139,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
 	 * Copy the whole vector content from user space datap. Use
 	 * copy_from_user to prevent information leak.
 	 */
-	return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
+	return copy_from_user_partial(current->thread.vstate.datap, datap, riscv_v_vsize);
 }
 
 static long save_cfiss_state(struct pt_regs *regs, void __user *sc_cfi)
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index 06e1ec2afd5a..d86f4eb1ce42 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -301,14 +301,14 @@ static inline size_t idal_buffer_to_user(struct idal_buffer *ib, void __user *to
 	BUG_ON(count > ib->size);
 	for (i = 0; count > IDA_BLOCK_SIZE; i++) {
 		vaddr = dma64_to_virt(ib->data[i]);
-		left = copy_to_user(to, vaddr, IDA_BLOCK_SIZE);
+		left = copy_to_user_partial(to, vaddr, IDA_BLOCK_SIZE);
 		if (left)
 			return left + count - IDA_BLOCK_SIZE;
 		to = (void __user *)to + IDA_BLOCK_SIZE;
 		count -= IDA_BLOCK_SIZE;
 	}
 	vaddr = dma64_to_virt(ib->data[i]);
-	return copy_to_user(to, vaddr, count);
+	return copy_to_user_partial(to, vaddr, count);
 }
 
 /*
@@ -323,14 +323,14 @@ static inline size_t idal_buffer_from_user(struct idal_buffer *ib, const void __
 	BUG_ON(count > ib->size);
 	for (i = 0; count > IDA_BLOCK_SIZE; i++) {
 		vaddr = dma64_to_virt(ib->data[i]);
-		left = copy_from_user(vaddr, from, IDA_BLOCK_SIZE);
+		left = copy_from_user_partial(vaddr, from, IDA_BLOCK_SIZE);
 		if (left)
 			return left + count - IDA_BLOCK_SIZE;
 		from = (void __user *)from + IDA_BLOCK_SIZE;
 		count -= IDA_BLOCK_SIZE;
 	}
 	vaddr = dma64_to_virt(ib->data[i]);
-	return copy_from_user(vaddr, from, count);
+	return copy_from_user_partial(vaddr, from, count);
 }
 
 #endif
diff --git a/arch/sparc/kernel/termios.c b/arch/sparc/kernel/termios.c
index ee64965c27cd..db9c07b7d5ee 100644
--- a/arch/sparc/kernel/termios.c
+++ b/arch/sparc/kernel/termios.c
@@ -27,7 +27,7 @@ int kernel_termios_to_user_termio(struct termio __user *termio,
 		v.c_cc[_VMIN] = termios->c_cc[VMIN];
 		v.c_cc[_VTIME] = termios->c_cc[VTIME];
 	}
-	return copy_to_user(termio, &v, sizeof(struct termio));
+	return copy_to_user_partial(termio, &v, sizeof(struct termio));
 }
 
 int user_termios_to_kernel_termios(struct ktermios *k,
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 63b38a3f73f7..d41625dfa00b 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -252,7 +252,7 @@ EXPORT_SYMBOL(uml_strdup);
 
 int copy_from_user_proc(void *to, void __user *from, int size)
 {
-	return copy_from_user(to, from, size);
+	return copy_from_user_partial(to, from, size);
 }
 
 int singlestepping(void)
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index e03eeec55cfe..e7cb03ab26f1 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1512,7 +1512,7 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
 	if (insn_get_effective_ip(regs, &ip))
 		return -EINVAL;
 
-	not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
+	not_copied = copy_from_user_partial(buf, (void __user *)ip, MAX_INSN_SIZE);
 
 	return MAX_INSN_SIZE - not_copied;
 }
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 2934e170b0fe..e0fab7c1625b 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -40,7 +40,7 @@ static int copy_sc_from_user(struct pt_regs *regs,
 	/* Always make any pending restarted system calls return -EINTR */
 	current->restart_block.fn = do_no_restart_syscall;
 
-	err = copy_from_user(&sc, from, sizeof(sc));
+	err = copy_from_user_partial(&sc, from, sizeof(sc));
 	if (err)
 		return err;
 
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index e4488ad86a65..8ba9c57b489c 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -1346,7 +1346,7 @@ binder_alloc_copy_user_to_buffer(struct binder_alloc *alloc,
 					     buffer_offset, &pgoff);
 		size = min_t(size_t, bytes, PAGE_SIZE - pgoff);
 		kptr = kmap_local_page(page) + pgoff;
-		ret = copy_from_user(kptr, from, size);
+		ret = copy_from_user_partial(kptr, from, size);
 		kunmap_local(kptr);
 		if (ret)
 			return bytes - size + ret;
diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c
index c09bbe04be6c..272fdc54fb81 100644
--- a/drivers/comedi/comedi_fops.c
+++ b/drivers/comedi/comedi_fops.c
@@ -2659,7 +2659,7 @@ static unsigned int comedi_buf_copy_to_user(struct comedi_subdevice *s,
 		unsigned int copy_amount = min(n, PAGE_SIZE - offset);
 		unsigned int uncopied;
 
-		uncopied = copy_to_user(dest, buf_page_list[page].virt_addr +
+		uncopied = copy_to_user_partial(dest, buf_page_list[page].virt_addr +
 					offset, copy_amount);
 		copy_amount -= uncopied;
 		n -= copy_amount;
@@ -2687,7 +2687,7 @@ static unsigned int comedi_buf_copy_from_user(struct comedi_subdevice *s,
 		unsigned int copy_amount = min(n, PAGE_SIZE - offset);
 		unsigned int uncopied;
 
-		uncopied = copy_from_user(buf_page_list[page].virt_addr +
+		uncopied = copy_from_user_partial(buf_page_list[page].virt_addr +
 					  offset, src, copy_amount);
 		copy_amount -= uncopied;
 		n -= copy_amount;
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 0366c7cf3502..ac79bab6d6c3 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -751,7 +751,7 @@ int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr,
 	 * to addr in the mm.
 	 */
 	kthread_use_mm(mm);
-	left = copy_to_user((void __user *)addr + status_size, cr + status_size,
+	left = copy_to_user_partial((void __user *)addr + status_size, cr + status_size,
 			    len - status_size);
 	/*
 	 * Copy status only after the rest of completion record is copied
diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c
index d54d6a671326..43b280ceb955 100644
--- a/drivers/firmware/efi/test/efi_test.c
+++ b/drivers/firmware/efi/test/efi_test.c
@@ -133,7 +133,7 @@ copy_ucs2_to_user_len(efi_char16_t __user *dst, efi_char16_t *src, size_t len)
 	if (!src)
 		return 0;
 
-	return copy_to_user(dst, src, len);
+	return copy_to_user_partial(dst, src, len);
 }
 
 static long efi_runtime_get_variable(unsigned long arg)
diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c
index bb4d3700c934..370ec75b20e6 100644
--- a/drivers/fsi/fsi-scom.c
+++ b/drivers/fsi/fsi-scom.c
@@ -332,7 +332,7 @@ static ssize_t scom_read(struct file *filep, char __user *buf, size_t len,
 		return rc;
 	}
 
-	rc = copy_to_user(buf, &val, len);
+	rc = copy_to_user_partial(buf, &val, len);
 	if (rc)
 		dev_dbg(dev, "copy to user failed:%d\n", rc);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 2409ac72b166..712605ec7ecc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -1346,7 +1346,7 @@ static ssize_t dp_sdp_message_debugfs_write(struct file *f, const char __user *b
 
 	acrtc_state = to_dm_crtc_state(connector->base.state->crtc->state);
 
-	r = copy_from_user(data, buf, write_size);
+	r = copy_from_user_partial(data, buf, write_size);
 
 	write_size -= r;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 656a499b2706..3f5b450a914a 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -114,7 +114,7 @@ int intel_sseu_copy_eumask_to_user(void __user *to,
 		}
 	}
 
-	return copy_to_user(to, eu_mask, len);
+	return copy_to_user_partial(to, eu_mask, len);
 }
 
 /**
@@ -146,7 +146,7 @@ int intel_sseu_copy_ssmask_to_user(void __user *to,
 		}
 	}
 
-	return copy_to_user(to, ss_mask, len);
+	return copy_to_user_partial(to, ss_mask, len);
 }
 
 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a432daf8038a..c1c2e762498f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -291,7 +291,7 @@ gtt_user_read(struct io_mapping *mapping,
 	io_mapping_unmap_atomic(vaddr);
 	if (unwritten) {
 		vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
-		unwritten = copy_to_user(user_data,
+		unwritten = copy_to_user_partial(user_data,
 					 (void __force *)vaddr + offset,
 					 length);
 		io_mapping_unmap(vaddr);
@@ -525,7 +525,7 @@ ggtt_write(struct io_mapping *mapping,
 	io_mapping_unmap_atomic(vaddr);
 	if (unwritten) {
 		vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
-		unwritten = copy_from_user((void __force *)vaddr + offset,
+		unwritten = copy_from_user_partial((void __force *)vaddr + offset,
 					   user_data, length);
 		io_mapping_unmap(vaddr);
 	}
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index a82cf74f39ad..9b97b71b44f1 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -1457,7 +1457,7 @@ static unsigned long msc_win_to_user(void *data, void *src, size_t len)
 	struct msc_win_to_user_struct *u = data;
 	unsigned long ret;
 
-	ret = copy_to_user(u->buf + u->offset, src, len);
+	ret = copy_to_user_partial(u->buf + u->offset, src, len);
 	u->offset += len - ret;
 
 	return ret;
diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c
index beb18c34f20d..e1d99354dd29 100644
--- a/drivers/misc/ibmvmc.c
+++ b/drivers/misc/ibmvmc.c
@@ -1112,7 +1112,7 @@ static ssize_t ibmvmc_write(struct file *file, const char *buffer,
 	while (c > 0) {
 		bytes = min_t(size_t, c, vmc_buffer->size);
 
-		bytes -= copy_from_user(buf, p, bytes);
+		bytes -= copy_from_user_partial(buf, p, bytes);
 		if (!bytes) {
 			ret = -EFAULT;
 			goto out;
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index b71ca1bf0a20..bd502edbc173 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -213,7 +213,7 @@ static int drv_cp_harray_to_user(void __user *user_buf_uva,
 
 	*user_buf_size = array_size * sizeof(*handles);
 	if (*user_buf_size)
-		*retval = copy_to_user(user_buf_uva,
+		*retval = copy_to_user_partial(user_buf_uva,
 				       vmci_handle_arr_get_handles
 				       (handle_array), *user_buf_size);
 
diff --git a/drivers/most/most_cdev.c b/drivers/most/most_cdev.c
index 5df508d8d60a..969c865ccbef 100644
--- a/drivers/most/most_cdev.c
+++ b/drivers/most/most_cdev.c
@@ -265,7 +265,7 @@ comp_read(struct file *filp, char __user *buf, size_t count, loff_t *offset)
 			count,
 			mbo->processed_length - c->mbo_offs);
 
-	not_copied = copy_to_user(buf,
+	not_copied = copy_to_user_partial(buf,
 				  mbo->virt_address + c->mbo_offs,
 				  to_copy);
 
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index ed4178155a5d..d474a008c73e 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -2460,7 +2460,7 @@ static ssize_t ca8210_test_int_user_write(
 		return -EBADE;
 	}
 
-	ret = copy_from_user(command, in_buf, len);
+	ret = copy_from_user_partial(command, in_buf, len);
 	if (ret) {
 		dev_err(
 			&priv->spi->dev,
@@ -2548,7 +2548,7 @@ static ssize_t ca8210_test_int_user_read(
 	cmdlen = fifo_buffer[1];
 	bytes_not_copied = cmdlen + 2;
 
-	bytes_not_copied = copy_to_user(buf, fifo_buffer, bytes_not_copied);
+	bytes_not_copied = copy_to_user_partial(buf, fifo_buffer, bytes_not_copied);
 	if (bytes_not_copied > 0) {
 		dev_err(
 			&priv->spi->dev,
diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index b8cb736a7185..f2130248fb7f 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -659,7 +659,7 @@ static ssize_t wil_read_file_ioblob(struct file *file, char __user *user_buf,
 	wil_memcpy_fromio_32(buf, (const void __iomem *)
 			     wil_blob->blob.data + aligned_pos, aligned_count);
 
-	ret = copy_to_user(user_buf, buf + unaligned_bytes, count);
+	ret = copy_to_user_partial(user_buf, buf + unaligned_bytes, count);
 
 	wil_mem_access_unlock(wil);
 	wil_pm_runtime_put(wil);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c
index a05f60f9224b..66ddaa0d8e36 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c
@@ -3060,7 +3060,7 @@ static bool iwl_write_to_user_buf(char __user *user_buf, ssize_t count,
 	if (*size > buf_size_left)
 		*size = buf_size_left;
 
-	*size -= copy_to_user(user_buf, buf, *size);
+	*size -= copy_to_user_partial(user_buf, buf, *size);
 	*bytes_copied += *size;
 
 	if (buf_size_left == *size)
diff --git a/drivers/net/wireless/ti/wlcore/debugfs.c b/drivers/net/wireless/ti/wlcore/debugfs.c
index bbfd2725215b..d359baea5100 100644
--- a/drivers/net/wireless/ti/wlcore/debugfs.c
+++ b/drivers/net/wireless/ti/wlcore/debugfs.c
@@ -1088,7 +1088,7 @@ static ssize_t dev_mem_read(struct file *file,
 	mutex_unlock(&wl->mutex);
 
 	if (ret == 0) {
-		ret = copy_to_user(user_buf, buf, bytes);
+		ret = copy_to_user_partial(user_buf, buf, bytes);
 		if (ret < bytes) {
 			bytes -= ret;
 			*ppos += bytes;
diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c
index f8d8f607134a..5a2b150cda49 100644
--- a/drivers/ps3/ps3-lpm.c
+++ b/drivers/ps3/ps3-lpm.c
@@ -999,7 +999,7 @@ int ps3_lpm_copy_tb_to_user(unsigned long offset, void __user *buf,
 			return result == LV1_WRONG_STATE ? -EBUSY : -EINVAL;
 		}
 
-		result = copy_to_user(buf, lpm_priv->tb_cache, tmp);
+		result = copy_to_user_partial(buf, lpm_priv->tb_cache, tmp);
 
 		if (result) {
 			dev_dbg(sbd_core(), "%s:%u: 0x%llx bytes at 0x%p\n",
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index 6ef8850a42df..61a5de90c354 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -185,7 +185,7 @@ static inline unsigned long z_copy_from_user(bool userspace,
 					     unsigned long n)
 {
 	if (likely(userspace))
-		return copy_from_user(to, from, n);
+		return copy_from_user_partial(to, from, n);
 	memcpy(to, (void __force *)from, n);
 	return 0;
 }
@@ -196,7 +196,7 @@ static inline unsigned long z_copy_to_user(bool userspace,
 					   unsigned long n)
 {
 	if (likely(userspace))
-		return copy_to_user(to, from, n);
+		return copy_to_user_partial(to, from, n);
 	memcpy((void __force *)to, from, n);
 	return 0;
 }
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 638221178384..5b42fabcf4c4 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -157,7 +157,7 @@ spidev_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
 	if (status > 0) {
 		unsigned long	missing;
 
-		missing = copy_to_user(buf, spidev->rx_buffer, status);
+		missing = copy_to_user_partial(buf, spidev->rx_buffer, status);
 		if (missing == status)
 			status = -EFAULT;
 		else
diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
index fec369575d88..10a7aff375a9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
@@ -1491,7 +1491,7 @@ int atomisp_gdc_cac_table(struct atomisp_sub_device *asd, int flag,
 		}
 
 		for (i = 0; i < IA_CSS_MORPH_TABLE_NUM_PLANES; i++) {
-			ret = copy_from_user(tab->coordinates_x[i],
+			ret = copy_from_user_partial(tab->coordinates_x[i],
 					     config->coordinates_x[i],
 					     config->height * config->width *
 					     sizeof(*config->coordinates_x[i]));
@@ -1502,7 +1502,7 @@ int atomisp_gdc_cac_table(struct atomisp_sub_device *asd, int flag,
 				atomisp_css_morph_table_free(tab);
 				return -EFAULT;
 			}
-			ret = copy_from_user(tab->coordinates_y[i],
+			ret = copy_from_user_partial(tab->coordinates_y[i],
 					     config->coordinates_y[i],
 					     config->height * config->width *
 					     sizeof(*config->coordinates_y[i]));
@@ -1709,7 +1709,7 @@ int atomisp_3a_stat(struct atomisp_sub_device *asd, int flag,
 	config->exp_id = s3a_buf->s3a_data->exp_id;
 	config->isp_config_id = s3a_buf->s3a_data->isp_config_id;
 
-	ret = copy_to_user(config->data, asd->params.s3a_user_stat->data,
+	ret = copy_to_user_partial(config->data, asd->params.s3a_user_stat->data,
 			   asd->params.s3a_output_bytes);
 	if (ret) {
 		dev_err(isp->dev, "copy to user failed: copied %lu bytes\n",
@@ -2031,7 +2031,7 @@ static unsigned int long copy_from_compatible(void *to, const void *from,
 	unsigned long n, bool from_user)
 {
 	if (from_user)
-		return copy_from_user(to, (void __user *)from, n);
+		return copy_from_user_partial(to, (void __user *)from, n);
 	else
 		memcpy(to, from, n);
 	return 0;
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index 90c70d8d14e3..cdc274c0ff81 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -388,29 +388,29 @@ __weak int kernel_termios_to_user_termio(struct termio __user *termio,
 	v.c_lflag = termios->c_lflag;
 	v.c_line = termios->c_line;
 	memcpy(v.c_cc, termios->c_cc, NCC);
-	return copy_to_user(termio, &v, sizeof(struct termio));
+	return copy_to_user_partial(termio, &v, sizeof(struct termio));
 }
 
 #ifdef TCGETS2
 __weak int user_termios_to_kernel_termios(struct ktermios *k,
 						 struct termios2 __user *u)
 {
-	return copy_from_user(k, u, sizeof(struct termios2));
+	return copy_from_user_partial(k, u, sizeof(struct termios2));
 }
 __weak int kernel_termios_to_user_termios(struct termios2 __user *u,
 						 struct ktermios *k)
 {
-	return copy_to_user(u, k, sizeof(struct termios2));
+	return copy_to_user_partial(u, k, sizeof(struct termios2));
 }
 __weak int user_termios_to_kernel_termios_1(struct ktermios *k,
 						   struct termios __user *u)
 {
-	return copy_from_user(k, u, sizeof(struct termios));
+	return copy_from_user_partial(k, u, sizeof(struct termios));
 }
 __weak int kernel_termios_to_user_termios_1(struct termios __user *u,
 						   struct ktermios *k)
 {
-	return copy_to_user(u, k, sizeof(struct termios));
+	return copy_to_user_partial(u, k, sizeof(struct termios));
 }
 
 #else
@@ -418,12 +418,12 @@ __weak int kernel_termios_to_user_termios_1(struct termios __user *u,
 __weak int user_termios_to_kernel_termios(struct ktermios *k,
 						 struct termios __user *u)
 {
-	return copy_from_user(k, u, sizeof(struct termios));
+	return copy_from_user_partial(k, u, sizeof(struct termios));
 }
 __weak int kernel_termios_to_user_termios(struct termios __user *u,
 						 struct ktermios *k)
 {
-	return copy_to_user(u, k, sizeof(struct termios));
+	return copy_to_user_partial(u, k, sizeof(struct termios));
 }
 #endif /* TCGETS2 */
 
diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index 4d2d46c95fef..e54c708149c3 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -450,7 +450,7 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 		 */
 
 		console_unlock();
-		ret = copy_to_user(buf, con_buf + skip, this_round);
+		ret = copy_to_user_partial(buf, con_buf + skip, this_round);
 		console_lock();
 
 		if (ret) {
@@ -630,7 +630,7 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 		 * in the write data from userspace safely.
 		 */
 		console_unlock();
-		ret = copy_from_user(con_buf, buf, this_round);
+		ret = copy_from_user_partial(con_buf, buf, this_round);
 		console_lock();
 
 		if (ret) {
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index c5a12a6760ea..f22dd3697a46 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -349,7 +349,7 @@ static ssize_t f_hidg_intout_read(struct file *file, char __user *buffer,
 	spin_unlock_irqrestore(&hidg->read_spinlock, flags);
 
 	/* copy to user outside spinlock */
-	count -= copy_to_user(buffer, req->buf + list->pos, count);
+	count -= copy_to_user_partial(buffer, req->buf + list->pos, count);
 	list->pos += count;
 
 	/*
@@ -410,7 +410,7 @@ static ssize_t f_hidg_ssreport_read(struct file *file, char __user *buffer,
 	spin_unlock_irqrestore(&hidg->read_spinlock, flags);
 
 	if (tmp_buf != NULL) {
-		count -= copy_to_user(buffer, tmp_buf, count);
+		count -= copy_to_user_partial(buffer, tmp_buf, count);
 		kfree(tmp_buf);
 	} else {
 		count = -ENOMEM;
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index e4f7828ae75d..4fbed987b639 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -525,7 +525,7 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
 		else
 			size = len;
 
-		size -= copy_to_user(buf, current_rx_buf, size);
+		size -= copy_to_user_partial(buf, current_rx_buf, size);
 		bytes_copied += size;
 		len -= size;
 		buf += size;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c8151ba54de3..ad74a891aa80 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -3173,7 +3173,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
 	vaddr = dma->vaddr + offset;
 
 	if (write) {
-		*copied = copy_to_user((void __user *)vaddr, data,
+		*copied = copy_to_user_partial((void __user *)vaddr, data,
 					 count) ? 0 : count;
 		if (*copied && iommu->dirty_page_tracking) {
 			unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
@@ -3186,7 +3186,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
 				   (offset >> pgshift) + 1);
 		}
 	} else
-		*copied = copy_from_user(data, (void __user *)vaddr,
+		*copied = copy_from_user_partial(data, (void __user *)vaddr,
 					   count) ? 0 : count;
 	if (kthread)
 		kthread_unuse_mm(mm);
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 61db6932a9d2..b1db90dac1d1 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -150,7 +150,7 @@ static ssize_t xenbus_file_read(struct file *filp,
 	while (i < len) {
 		size_t sz = min_t(size_t, len - i, rb->len - rb->cons);
 
-		ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz);
+		ret = copy_to_user_partial(ubuf + i, &rb->msg[rb->cons], sz);
 
 		i += sz - ret;
 		rb->cons += sz - ret;
diff --git a/fs/namespace.c b/fs/namespace.c
index fe919abd2f01..27afb73fef20 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4033,7 +4033,7 @@ static void *copy_mount_options(const void __user * data)
 	if (!copy)
 		return ERR_PTR(-ENOMEM);
 
-	left = copy_from_user(copy, data, PAGE_SIZE);
+	left = copy_from_user_partial(copy, data, PAGE_SIZE);
 
 	/*
 	 * Not all architectures have an exact copy_from_user(). Resort to
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 5821e33df78f..97c0b391b98e 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -255,7 +255,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
 	if (!count)
 		return 0;
 
-	bytes_left = copy_from_user(lvb_buf, buf, count);
+	bytes_left = copy_from_user_partial(lvb_buf, buf, count);
 	count -= bytes_left;
 	if (count)
 		user_dlm_write_lvb(inode, lvb_buf, count);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d9acfa89c894..49577662ae70 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -278,7 +278,7 @@ static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
 			len -= pos;
 			if (len > count)
 				len = count;
-			len -= copy_to_user(buf, page+pos, len);
+			len -= copy_to_user_partial(buf, page+pos, len);
 			if (!len)
 				len = -EFAULT;
 			ret = len;
@@ -359,7 +359,7 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
 		got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
 		if (got <= 0)
 			break;
-		got -= copy_to_user(buf, page, got);
+		got -= copy_to_user_partial(buf, page, got);
 		if (unlikely(!got)) {
 			if (!len)
 				len = -EFAULT;
diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
index f6e0795db484..e4444d0f0cfe 100644
--- a/include/linux/bpfptr.h
+++ b/include/linux/bpfptr.h
@@ -50,7 +50,7 @@ static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src,
 					  size_t offset, size_t size)
 {
 	if (!bpfptr_is_kernel(src))
-		return copy_from_user(dst, src.user + offset, size);
+		return copy_from_user_partial(dst, src.user + offset, size);
 	return copy_from_kernel_nofault(dst, src.kernel + offset, size);
 }
 
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index 3e6c8e9d67ae..52ddddfe728d 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -45,7 +45,7 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src,
 		size_t offset, size_t size)
 {
 	if (!sockptr_is_kernel(src))
-		return copy_from_user(dst, src.user + offset, size);
+		return copy_from_user_partial(dst, src.user + offset, size);
 	memcpy(dst, src.kernel + offset, size);
 	return 0;
 }
@@ -111,7 +111,7 @@ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset,
 		const void *src, size_t size)
 {
 	if (!sockptr_is_kernel(dst))
-		return copy_to_user(dst.user + offset, src, size);
+		return copy_to_user_partial(dst.user + offset, src, size);
 	memcpy(dst.kernel + offset, src, size);
 	return 0;
 }
diff --git a/ipc/msg.c b/ipc/msg.c
index 62996b97f0ac..39848238219d 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -322,7 +322,7 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
 {
 	switch (version) {
 	case IPC_64:
-		return copy_to_user(buf, in, sizeof(*in));
+		return copy_to_user_partial(buf, in, sizeof(*in));
 	case IPC_OLD:
 	{
 		struct msqid_ds out;
@@ -355,7 +355,7 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
 		out.msg_lspid		= in->msg_lspid;
 		out.msg_lrpid		= in->msg_lrpid;
 
-		return copy_to_user(buf, &out, sizeof(out));
+		return copy_to_user_partial(buf, &out, sizeof(out));
 	}
 	default:
 		return -EINVAL;
@@ -712,7 +712,7 @@ static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in,
 		v.msg_qbytes = in->msg_qbytes;
 		v.msg_lspid = in->msg_lspid;
 		v.msg_lrpid = in->msg_lrpid;
-		return copy_to_user(buf, &v, sizeof(v));
+		return copy_to_user_partial(buf, &v, sizeof(v));
 	} else {
 		struct compat_msqid_ds v;
 		memset(&v, 0, sizeof(v));
@@ -725,7 +725,7 @@ static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in,
 		v.msg_qbytes = in->msg_qbytes;
 		v.msg_lspid = in->msg_lspid;
 		v.msg_lrpid = in->msg_lrpid;
-		return copy_to_user(buf, &v, sizeof(v));
+		return copy_to_user_partial(buf, &v, sizeof(v));
 	}
 }
 
diff --git a/ipc/sem.c b/ipc/sem.c
index 6cdf862b1f5c..3b56086ba07d 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1196,7 +1196,7 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in,
 {
 	switch (version) {
 	case IPC_64:
-		return copy_to_user(buf, in, sizeof(*in));
+		return copy_to_user_partial(buf, in, sizeof(*in));
 	case IPC_OLD:
 	    {
 		struct semid_ds out;
@@ -1209,7 +1209,7 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in,
 		out.sem_ctime	= in->sem_ctime;
 		out.sem_nsems	= in->sem_nsems;
 
-		return copy_to_user(buf, &out, sizeof(out));
+		return copy_to_user_partial(buf, &out, sizeof(out));
 	    }
 	default:
 		return -EINVAL;
@@ -1759,7 +1759,7 @@ static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in,
 		v.sem_ctime	 = lower_32_bits(in->sem_ctime);
 		v.sem_ctime_high = upper_32_bits(in->sem_ctime);
 		v.sem_nsems = in->sem_nsems;
-		return copy_to_user(buf, &v, sizeof(v));
+		return copy_to_user_partial(buf, &v, sizeof(v));
 	} else {
 		struct compat_semid_ds v;
 		memset(&v, 0, sizeof(v));
@@ -1767,7 +1767,7 @@ static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in,
 		v.sem_otime = in->sem_otime;
 		v.sem_ctime = in->sem_ctime;
 		v.sem_nsems = in->sem_nsems;
-		return copy_to_user(buf, &v, sizeof(v));
+		return copy_to_user_partial(buf, &v, sizeof(v));
 	}
 }
 
diff --git a/ipc/shm.c b/ipc/shm.c
index a95dae447707..1eb53c3df3b9 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -853,7 +853,7 @@ static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_
 {
 	switch (version) {
 	case IPC_64:
-		return copy_to_user(buf, in, sizeof(*in));
+		return copy_to_user_partial(buf, in, sizeof(*in));
 	case IPC_OLD:
 	    {
 		struct shmid_ds out;
@@ -868,7 +868,7 @@ static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_
 		out.shm_lpid	= in->shm_lpid;
 		out.shm_nattch	= in->shm_nattch;
 
-		return copy_to_user(buf, &out, sizeof(out));
+		return copy_to_user_partial(buf, &out, sizeof(out));
 	    }
 	default:
 		return -EINVAL;
@@ -905,7 +905,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
 {
 	switch (version) {
 	case IPC_64:
-		return copy_to_user(buf, in, sizeof(*in));
+		return copy_to_user_partial(buf, in, sizeof(*in));
 	case IPC_OLD:
 	    {
 		struct shminfo out;
@@ -920,7 +920,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
 		out.shmseg	= in->shmseg;
 		out.shmall	= in->shmall;
 
-		return copy_to_user(buf, &out, sizeof(out));
+		return copy_to_user_partial(buf, &out, sizeof(out));
 	    }
 	default:
 		return -EINVAL;
@@ -1359,7 +1359,7 @@ static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
 		info.shmmni = in->shmmni;
 		info.shmseg = in->shmseg;
 		info.shmall = in->shmall;
-		return copy_to_user(buf, &info, sizeof(info));
+		return copy_to_user_partial(buf, &info, sizeof(info));
 	} else {
 		struct shminfo info;
 		memset(&info, 0, sizeof(info));
@@ -1368,7 +1368,7 @@ static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
 		info.shmmni = in->shmmni;
 		info.shmseg = in->shmseg;
 		info.shmall = in->shmall;
-		return copy_to_user(buf, &info, sizeof(info));
+		return copy_to_user_partial(buf, &info, sizeof(info));
 	}
 }
 
@@ -1384,7 +1384,7 @@ static int put_compat_shm_info(struct shm_info *ip,
 	info.shm_swp = ip->shm_swp;
 	info.swap_attempts = ip->swap_attempts;
 	info.swap_successes = ip->swap_successes;
-	return copy_to_user(uip, &info, sizeof(info));
+	return copy_to_user_partial(uip, &info, sizeof(info));
 }
 
 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
@@ -1404,7 +1404,7 @@ static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
 		v.shm_nattch = in->shm_nattch;
 		v.shm_cpid = in->shm_cpid;
 		v.shm_lpid = in->shm_lpid;
-		return copy_to_user(buf, &v, sizeof(v));
+		return copy_to_user_partial(buf, &v, sizeof(v));
 	} else {
 		struct compat_shmid_ds v;
 		memset(&v, 0, sizeof(v));
@@ -1417,7 +1417,7 @@ static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
 		v.shm_nattch = in->shm_nattch;
 		v.shm_cpid = in->shm_cpid;
 		v.shm_lpid = in->shm_lpid;
-		return copy_to_user(buf, &v, sizeof(v));
+		return copy_to_user_partial(buf, &v, sizeof(v));
 	}
 }
 
diff --git a/kernel/regset.c b/kernel/regset.c
index b2871fa68b2a..29c6d19c3465 100644
--- a/kernel/regset.c
+++ b/kernel/regset.c
@@ -70,7 +70,7 @@ int copy_regset_to_user(struct task_struct *target,
 
 	ret = regset_get_alloc(target, regset, size, &buf);
 	if (ret > 0)
-		ret = copy_to_user(data, buf, ret) ? -EFAULT : 0;
+		ret = copy_to_user_partial(data, buf, ret) ? -EFAULT : 0;
 	kvfree(buf);
 	return ret;
 }
diff --git a/kernel/sys.c b/kernel/sys.c
index 62e842055cc9..8e1ce8c26884 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1343,7 +1343,7 @@ static int override_release(char __user *release, size_t len)
 		v = LINUX_VERSION_PATCHLEVEL + 60;
 		copy = clamp_t(size_t, len, 1, sizeof(buf));
 		copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
-		ret = copy_to_user(release, buf, copy + 1);
+		ret = copy_to_user_partial(release, buf, copy + 1);
 	}
 	return ret;
 }
@@ -1567,7 +1567,7 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 
 	ret = do_prlimit(current, resource, NULL, &value);
 	if (!ret)
-		ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
+		ret = copy_to_user_partial(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
 
 	return ret;
 }
diff --git a/lib/kfifo.c b/lib/kfifo.c
index 2633f9cc336c..00c19a321aae 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -203,11 +203,11 @@ static unsigned long kfifo_copy_from_user(struct __kfifo *fifo,
 	}
 	l = min(len, size - off);
 
-	ret = copy_from_user(fifo->data + off, from, l);
+	ret = copy_from_user_partial(fifo->data + off, from, l);
 	if (unlikely(ret))
 		ret = DIV_ROUND_UP(ret + len - l, esize);
 	else {
-		ret = copy_from_user(fifo->data, from + l, len - l);
+		ret = copy_from_user_partial(fifo->data, from + l, len - l);
 		if (unlikely(ret))
 			ret = DIV_ROUND_UP(ret, esize);
 	}
@@ -263,11 +263,11 @@ static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to,
 	}
 	l = min(len, size - off);
 
-	ret = copy_to_user(to, fifo->data + off, l);
+	ret = copy_to_user_partial(to, fifo->data + off, l);
 	if (unlikely(ret))
 		ret = DIV_ROUND_UP(ret + len - l, esize);
 	else {
-		ret = copy_to_user(to + l, fifo->data, len - l);
+		ret = copy_to_user_partial(to + l, fifo->data, len - l);
 		if (unlikely(ret))
 			ret = DIV_ROUND_UP(ret, esize);
 	}
diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c
index 32d06cbf6a31..a4d19fc1068a 100644
--- a/mm/kasan/kasan_test_c.c
+++ b/mm/kasan/kasan_test_c.c
@@ -2169,9 +2169,9 @@ static void copy_user_test_oob(struct kunit *test)
 	usermem = (char __user *)useraddr;
 
 	KUNIT_EXPECT_KASAN_FAIL(test,
-		unused = copy_from_user(kmem, usermem, size + 1));
+		unused = copy_from_user_partial(kmem, usermem, size + 1));
 	KUNIT_EXPECT_KASAN_FAIL_READ(test,
-		unused = copy_to_user(usermem, kmem, size + 1));
+		unused = copy_to_user_partial(usermem, kmem, size + 1));
 	KUNIT_EXPECT_KASAN_FAIL(test,
 		unused = __copy_from_user(kmem, usermem, size + 1));
 	KUNIT_EXPECT_KASAN_FAIL_READ(test,
diff --git a/mm/memory.c b/mm/memory.c
index ea6568571131..5a2f7543a2da 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -7529,7 +7529,7 @@ long copy_folio_from_user(struct folio *dst_folio,
 		kaddr = kmap_local_page(subpage);
 		if (!allow_pagefault)
 			pagefault_disable();
-		rc = copy_from_user(kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE);
+		rc = copy_from_user_partial(kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE);
 		if (!allow_pagefault)
 			pagefault_enable();
 		kunmap_local(kaddr);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index af8762b24039..7327c98b206a 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -471,7 +471,7 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
 		goto out;
 
 	val = test_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
-	rc = copy_to_user(optval, &val, len) ? -EFAULT : 0;
+	rc = copy_to_user_partial(optval, &val, len) ? -EFAULT : 0;
 out:
 	return rc;
 }
diff --git a/rust/helpers/uaccess.c b/rust/helpers/uaccess.c
index 01de4fbbcc84..710e07cd60ae 100644
--- a/rust/helpers/uaccess.c
+++ b/rust/helpers/uaccess.c
@@ -5,13 +5,13 @@
 __rust_helper unsigned long
 rust_helper_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	return copy_from_user(to, from, n);
+	return copy_from_user_partial(to, from, n);
 }
 
 __rust_helper unsigned long
 rust_helper_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	return copy_to_user(to, from, n);
+	return copy_to_user_partial(to, from, n);
 }
 
 #ifndef CONFIG_ARCH_WANTS_NOINLINE_COPY_USER
diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
index 08e0556bf161..3941bf9666a9 100644
--- a/sound/pci/emu10k1/emufx.c
+++ b/sound/pci/emu10k1/emufx.c
@@ -739,10 +739,10 @@ static int copy_gctl_to_user(struct snd_emu10k1 *emu,
 
 	_dst = (struct snd_emu10k1_fx8010_control_gpr __user *)dst;
 	if (emu->support_tlv)
-		return copy_to_user(&_dst[idx], src, sizeof(*src));
+		return copy_to_user_partial(&_dst[idx], src, sizeof(*src));
 	
 	octl = (struct snd_emu10k1_fx8010_control_old_gpr __user *)dst;
-	return copy_to_user(&octl[idx], src, sizeof(*octl));
+	return copy_to_user_partial(&octl[idx], src, sizeof(*octl));
 }
 
 static int copy_ctl_elem_id(const struct emu10k1_ctl_elem_id *list, int i,
diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index 31cc2d91c8d2..d5842d8a8509 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -4541,7 +4541,7 @@ static int snd_hdsp_capture_release(struct snd_pcm_substream *substream)
 static inline int copy_u32_le(void __user *dest, void __iomem *src)
 {
 	u32 val = readl(src);
-	return copy_to_user(dest, &val, 4);
+	return copy_to_user_partial(dest, &val, 4);
 }
 
 static inline int copy_u64_le(void __user *dest, void __iomem *src_low, void __iomem *src_high)
@@ -4551,7 +4551,7 @@ static inline int copy_u64_le(void __user *dest, void __iomem *src_low, void __i
 	rms_low = readl(src_low);
 	rms_high = readl(src_high);
 	rms = ((u64)rms_high << 32) | rms_low;
-	return copy_to_user(dest, &rms, 8);
+	return copy_to_user_partial(dest, &rms, 8);
 }
 
 static inline int copy_u48_le(void __user *dest, void __iomem *src_low, void __iomem *src_high)
@@ -4561,7 +4561,7 @@ static inline int copy_u48_le(void __user *dest, void __iomem *src_low, void __i
 	rms_low = readl(src_low) & 0xffffff00;
 	rms_high = readl(src_high) & 0xffffff00;
 	rms = ((u64)rms_high << 32) | rms_low;
-	return copy_to_user(dest, &rms, 8);
+	return copy_to_user_partial(dest, &rms, 8);
 }
 
 static int hdsp_9652_get_peak(struct hdsp *hdsp, struct hdsp_peak_rms __user *peak_rms)
diff --git a/sound/soc/intel/avs/probes.c b/sound/soc/intel/avs/probes.c
index 099119ad28b3..bc2871d3e18c 100644
--- a/sound/soc/intel/avs/probes.c
+++ b/sound/soc/intel/avs/probes.c
@@ -244,10 +244,10 @@ static int avs_probe_compr_copy(struct snd_soc_component *comp, struct snd_compr
 	n = rtd->buffer_size - offset;
 
 	if (count < n) {
-		ret = copy_to_user(buf, ptr, count);
+		ret = copy_to_user_partial(buf, ptr, count);
 	} else {
-		ret = copy_to_user(buf, ptr, n);
-		ret += copy_to_user(buf + n, rtd->dma_area, count - n);
+		ret = copy_to_user_partial(buf, ptr, n);
+		ret += copy_to_user_partial(buf + n, rtd->dma_area, count - n);
 	}
 
 	if (ret)
diff --git a/sound/soc/sof/compress.c b/sound/soc/sof/compress.c
index 93f2376585db..d54be8a188ec 100644
--- a/sound/soc/sof/compress.c
+++ b/sound/soc/sof/compress.c
@@ -324,10 +324,10 @@ static int sof_compr_copy_playback(struct snd_compr_runtime *rtd,
 	n = rtd->buffer_size - offset;
 
 	if (count < n) {
-		ret = copy_from_user(ptr, buf, count);
+		ret = copy_from_user_partial(ptr, buf, count);
 	} else {
-		ret = copy_from_user(ptr, buf, n);
-		ret += copy_from_user(rtd->dma_area, buf + n, count - n);
+		ret = copy_from_user_partial(ptr, buf, n);
+		ret += copy_from_user_partial(rtd->dma_area, buf + n, count - n);
 	}
 
 	return count - ret;
@@ -345,10 +345,10 @@ static int sof_compr_copy_capture(struct snd_compr_runtime *rtd,
 	n = rtd->buffer_size - offset;
 
 	if (count < n) {
-		ret = copy_to_user(buf, ptr, count);
+		ret = copy_to_user_partial(buf, ptr, count);
 	} else {
-		ret = copy_to_user(buf, ptr, n);
-		ret += copy_to_user(buf + n, rtd->dma_area, count - n);
+		ret = copy_to_user_partial(buf, ptr, n);
+		ret += copy_to_user_partial(buf + n, rtd->dma_area, count - n);
 	}
 
 	return count - ret;
diff --git a/sound/soc/sof/sof-client-probes.c b/sound/soc/sof/sof-client-probes.c
index 124f55508159..4c5f4f016ff8 100644
--- a/sound/soc/sof/sof-client-probes.c
+++ b/sound/soc/sof/sof-client-probes.c
@@ -184,10 +184,10 @@ static int sof_probes_compr_copy(struct snd_soc_component *component,
 	n = rtd->buffer_size - offset;
 
 	if (count < n) {
-		ret = copy_to_user(buf, ptr, count);
+		ret = copy_to_user_partial(buf, ptr, count);
 	} else {
-		ret = copy_to_user(buf, ptr, n);
-		ret += copy_to_user(buf + n, rtd->dma_area, count - n);
+		ret = copy_to_user_partial(buf, ptr, n);
+		ret += copy_to_user_partial(buf + n, rtd->dma_area, count - n);
 	}
 
 	if (ret)
-- 
2.49.0



^ permalink raw reply related

* [RFC PATCH v1 4/9] uaccess: Introduce copy_{to/from}_user_partial()
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

Today there are approximately 3000 calls for copy_to_user() and
3000 calls to copy_from_user().

The majority of callers of copy_{to/from}_user() don't care about the
return value, they only check whether it is 0 or not, and when it is
not 0 they handle it as a -EACCES.

In order to allow better optimisation of copy_{to/from}_user() when
the size of the copy is known at build time, create new fonctions
named copy_{to/from}_user_partial() to be used by the few callers
that are interested in partial copies and need to now how many
bytes remain at the end of the copy.

For the time being it is just the same as copy_{to/from}_user().

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 include/linux/uaccess.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index bd1201c81d94..2d37173782b3 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -221,6 +221,8 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
 		return _inline_copy_from_user(to, from, n);
 }
 
+#define copy_from_user_partial copy_from_user
+
 static __always_inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
@@ -233,6 +235,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 		return _inline_copy_to_user(to, from, n);
 }
 
+#define copy_to_user_partial copy_to_user
+
 #ifndef copy_mc_to_kernel
 /*
  * Without arch opt-in this generic copy_mc_to_kernel() will not handle
-- 
2.49.0



^ permalink raw reply related

* [RFC PATCH v1 3/9] x86/umip: Be stricter in fixup_umip_exception()
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

fixup_umip_exception() calls copy_to_user() and checks whether
the returned value is strictly positive.

A subsequent patch will change the return of copy_to_user() to
return -EFAULT in case of error.

Change the test to checking that the result is not 0.

At the time being copy_to_user() return an unsigned value so
'strictly positive' is the same as 'not 0'.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/x86/kernel/umip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 3ce99cbcf187..dfff28ea1dea 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -409,7 +409,7 @@ bool fixup_umip_exception(struct pt_regs *regs)
 			return false;
 
 		nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size);
-		if (nr_copied  > 0) {
+		if (nr_copied) {
 			/*
 			 * If copy fails, send a signal and tell caller that
 			 * fault was fixed up.
-- 
2.49.0



^ permalink raw reply related

* [RFC PATCH v1 2/9] uaccess: Convert INLINE_COPY_{TO/FROM}_USER to kconfig and reduce ifdefery
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

Among the 21 architectures supported by the kernel, 16 define both
INLINE_COPY_TO_USER and INLINE_COPY_FROM_USER while the 5 other ones
don't define any of the two.

To simplify and reduce risk of mistakes, convert them to a single
kconfig item named CONFIG_ARCH_WANTS_NOINLINE_COPY which will be
selected by the 5 architectures that don't want inlined copy.

To minimise complication in a later patch, also remove
ifdefery and replace it with IS_ENABLED().

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/alpha/Kconfig                    |  1 +
 arch/arc/include/asm/uaccess.h        |  3 ---
 arch/arm/include/asm/uaccess.h        |  2 --
 arch/arm64/include/asm/uaccess.h      |  3 ---
 arch/csky/Kconfig                     |  1 +
 arch/hexagon/include/asm/uaccess.h    |  3 ---
 arch/loongarch/include/asm/uaccess.h  |  3 ---
 arch/m68k/include/asm/uaccess.h       |  3 ---
 arch/microblaze/include/asm/uaccess.h |  2 --
 arch/mips/include/asm/uaccess.h       |  3 ---
 arch/nios2/include/asm/uaccess.h      |  2 --
 arch/openrisc/include/asm/uaccess.h   |  2 --
 arch/parisc/include/asm/uaccess.h     |  3 ---
 arch/powerpc/Kconfig                  |  1 +
 arch/riscv/Kconfig                    |  1 +
 arch/s390/include/asm/uaccess.h       |  3 ---
 arch/sh/include/asm/uaccess.h         |  2 --
 arch/sparc/include/asm/uaccess_32.h   |  3 ---
 arch/sparc/include/asm/uaccess_64.h   |  2 --
 arch/um/include/asm/uaccess.h         |  3 ---
 arch/x86/Kconfig                      |  1 +
 arch/xtensa/include/asm/uaccess.h     |  2 --
 include/asm-generic/uaccess.h         |  2 --
 include/linux/uaccess.h               | 32 ++++++++++++---------------
 lib/Kconfig                           |  3 +++
 lib/Makefile                          |  3 ++-
 lib/usercopy.c                        |  4 ----
 rust/helpers/uaccess.c                |  2 +-
 28 files changed, 25 insertions(+), 70 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 7b7dafe7d9df..65e533cead6b 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -11,6 +11,7 @@ config ALPHA
 	select ARCH_NO_PREEMPT
 	select ARCH_NO_SG_CHAIN
 	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_WANTS_NOINLINE_COPY_USER
 	select FORCE_PCI
 	select PCI_DOMAINS if PCI
 	select PCI_SYSCALL if PCI
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 1e8809ea000a..e8b161b37a03 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -628,9 +628,6 @@ static inline unsigned long __clear_user(void __user *to, unsigned long n)
 	return res;
 }
 
-#define INLINE_COPY_TO_USER
-#define INLINE_COPY_FROM_USER
-
 #define __clear_user			__clear_user
 
 #include <asm-generic/uaccess.h>
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index d6ae80b5df36..7280c162bb71 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -616,8 +616,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 }
 #define __clear_user(addr, n)		(memset((void __force *)addr, 0, n), 0)
 #endif
-#define INLINE_COPY_TO_USER
-#define INLINE_COPY_FROM_USER
 
 static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
 {
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index b0c83a08dda9..1e20ec91b56f 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -456,9 +456,6 @@ do {									\
 	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
 } while (0)
 
-#define INLINE_COPY_TO_USER
-#define INLINE_COPY_FROM_USER
-
 extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n);
 static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
 {
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 4331313a42ff..d010d7eb47bf 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -40,6 +40,7 @@ config CSKY
 	select ARCH_NEED_CMPXCHG_1_EMU
 	select ARCH_WANT_FRAME_POINTERS if !CPU_CK610 && $(cc-option,-mbacktrace)
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+	select ARCH_WANTS_NOINLINE_COPY_USER
 	select COMMON_CLK
 	select CLKSRC_MMIO
 	select CSKY_MPINTC if CPU_CK860
diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h
index bff77efc0d9a..4bf863217636 100644
--- a/arch/hexagon/include/asm/uaccess.h
+++ b/arch/hexagon/include/asm/uaccess.h
@@ -26,9 +26,6 @@ unsigned long raw_copy_from_user(void *to, const void __user *from,
 				     unsigned long n);
 unsigned long raw_copy_to_user(void __user *to, const void *from,
 				   unsigned long n);
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
 __kernel_size_t __clear_user_hexagon(void __user *dest, unsigned long count);
 #define __clear_user(a, s) __clear_user_hexagon((a), (s))
 
diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h
index 438269313e78..72a04ac88549 100644
--- a/arch/loongarch/include/asm/uaccess.h
+++ b/arch/loongarch/include/asm/uaccess.h
@@ -292,9 +292,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 	return __copy_user((__force void *)to, from, n);
 }
 
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
 /*
  * __clear_user: - Zero a block of memory in user space, with less checking.
  * @addr: Destination address, in user space.
diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h
index 64914872a5c9..20e249a6ad07 100644
--- a/arch/m68k/include/asm/uaccess.h
+++ b/arch/m68k/include/asm/uaccess.h
@@ -377,9 +377,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 		return __constant_copy_to_user(to, from, n);
 	return __generic_copy_to_user(to, from, n);
 }
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
 #define __get_kernel_nofault(dst, src, type, err_label)			\
 do {									\
 	type *__gk_dst = (type *)(dst);					\
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index 3aab2f17e046..3355f541e12a 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -250,8 +250,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	return __copy_tofrom_user(to, (__force const void __user *)from, n);
 }
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
 
 /*
  * Copy a null terminated string from userspace.
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index c0cede273c7c..8714caefbac8 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -433,9 +433,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 	return __cu_len_r;
 }
 
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
 extern __kernel_size_t __bzero(void __user *addr, __kernel_size_t size);
 
 /*
diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
index 6ccc9a232c23..46d7312a1c96 100644
--- a/arch/nios2/include/asm/uaccess.h
+++ b/arch/nios2/include/asm/uaccess.h
@@ -57,8 +57,6 @@ extern unsigned long
 raw_copy_from_user(void *to, const void __user *from, unsigned long n);
 extern unsigned long
 raw_copy_to_user(void __user *to, const void *from, unsigned long n);
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
 
 extern long strncpy_from_user(char *__to, const char __user *__from,
 			      long __len);
diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
index d6500a374e18..c84effde867a 100644
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -218,8 +218,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long size)
 {
 	return __copy_tofrom_user((__force void *)to, from, size);
 }
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
 
 extern unsigned long __clear_user(void __user *addr, unsigned long size);
 
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 6c531d2c847e..1dd6a1dd653f 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -197,7 +197,4 @@ unsigned long __must_check raw_copy_to_user(void __user *dst, const void *src,
 					    unsigned long len);
 unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src,
 					    unsigned long len);
-#define INLINE_COPY_TO_USER
-#define INLINE_COPY_FROM_USER
-
 #endif /* __PARISC_UACCESS_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e93df95b79e7..6816f402fe3d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -187,6 +187,7 @@ config PPC
 	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP	if PPC_RADIX_MMU
 	select ARCH_WANTS_MODULES_DATA_IN_VMALLOC	if PPC_BOOK3S_32 || PPC_8xx
+	select ARCH_WANTS_NOINLINE_COPY_USER
 	select ARCH_WEAK_RELEASE_ACQUIRE
 	select AUDIT_ARCH_COMPAT_GENERIC
 	select BINFMT_ELF
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d235396c4514..492b920c1a51 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -88,6 +88,7 @@ config RISCV
 	select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
 	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
 	select ARCH_WANTS_NO_INSTR
+	select ARCH_WANTS_NOINLINE_COPY_USER if MMU
 	select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS
 	select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index dff035372601..2e0472c20da0 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -30,9 +30,6 @@ void debug_user_asce(int exit);
 #define uaccess_kmsan_or_inline __always_inline
 #endif
 
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
 static uaccess_kmsan_or_inline __must_check unsigned long
 raw_copy_from_user(void *to, const void __user *from, unsigned long size)
 {
diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h
index a79609eb14be..0cd75308e6d3 100644
--- a/arch/sh/include/asm/uaccess.h
+++ b/arch/sh/include/asm/uaccess.h
@@ -95,8 +95,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	return __copy_user((__force void *)to, from, n);
 }
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
 
 /*
  * Clear the area and return remaining number of bytes
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 43284b6ec46a..e01f43c6421c 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -190,9 +190,6 @@ static inline unsigned long raw_copy_from_user(void *to, const void __user *from
 	return __copy_user((__force void __user *) to, from, n);
 }
 
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
 static inline unsigned long __clear_user(void __user *addr, unsigned long size)
 {
 	unsigned long ret;
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index b825a5dd0210..62ee0b074fec 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -231,8 +231,6 @@ unsigned long __must_check raw_copy_from_user(void *to,
 unsigned long __must_check raw_copy_to_user(void __user *to,
 					   const void *from,
 					   unsigned long size);
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
 
 unsigned long __must_check raw_copy_in_user(void __user *to,
 					   const void __user *from,
diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h
index 0df9ea4abda8..1e14260c7f0f 100644
--- a/arch/um/include/asm/uaccess.h
+++ b/arch/um/include/asm/uaccess.h
@@ -27,9 +27,6 @@ static inline int __access_ok(const void __user *ptr, unsigned long size);
 #define __access_ok __access_ok
 #define __clear_user __clear_user
 
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
 #include <asm-generic/uaccess.h>
 
 static inline int __access_ok(const void __user *ptr, unsigned long size)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f3f7cb01d69d..c1e58d8c6864 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -143,6 +143,7 @@ config X86
 	select ARCH_WANTS_CLOCKSOURCE_READ_INLINE	if X86_64
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANTS_NO_INSTR
+	select ARCH_WANTS_NOINLINE_COPY_USER
 	select ARCH_WANT_GENERAL_HUGETLB
 	select ARCH_WANT_HUGE_PMD_SHARE		if X86_64
 	select ARCH_WANT_LD_ORPHAN_WARN
diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index 56aec6d504fe..f9e1623a7be9 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h
@@ -237,8 +237,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 	prefetch(from);
 	return __xtensa_copy_user((__force void *)to, from, n);
 }
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
 
 /*
  * We need to return the number of bytes not cleared.  Our memset()
diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
index b276f783494c..fb33a71fd24e 100644
--- a/include/asm-generic/uaccess.h
+++ b/include/asm-generic/uaccess.h
@@ -91,8 +91,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 	memcpy((void __force *)to, from, n);
 	return 0;
 }
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
 #endif /* CONFIG_UACCESS_MEMCPY */
 
 /*
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 56328601218c..bd1201c81d94 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -83,8 +83,8 @@
  * with that.  They should not be used directly; they are used to implement
  * the 6 functions (copy_{to,from}_user(), __copy_{to,from}_user_inatomic())
  * that are used instead.  Out of those, __... ones are inlined.  Plain
- * copy_{to,from}_user() might or might not be inlined.  If you want them
- * inlined, have asm/uaccess.h define INLINE_COPY_{TO,FROM}_USER.
+ * copy_{to,from}_user() might or might not be inlined.  If you don't want them
+ * inlined, select CONFIG_ARCH_WANTS_NOINLINE_COPY_USER.
  *
  * NOTE: only copy_from_user() zero-pads the destination in case of short copy.
  * Neither __copy_from_user() nor __copy_from_user_inatomic() zero anything
@@ -157,8 +157,8 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
 }
 
 /*
- * Architectures that #define INLINE_COPY_TO_USER use this function
- * directly in the normal copy_to/from_user(), the other ones go
+ * Architectures that don't select CONFIG_ARCH_WANTS_NOINLINE_COPY_USER use
+ * this function directly in the normal copy_to/from_user(), the other ones go
  * through an extern _copy_to/from_user(), which expands the same code
  * here.
  */
@@ -190,10 +190,9 @@ _inline_copy_from_user(void *to, const void __user *from, unsigned long n)
 	memset(to + (n - res), 0, res);
 	return res;
 }
-#ifndef INLINE_COPY_FROM_USER
+
 extern __must_check unsigned long
 _copy_from_user(void *, const void __user *, unsigned long);
-#endif
 
 static inline __must_check unsigned long
 _inline_copy_to_user(void __user *to, const void *from, unsigned long n)
@@ -207,21 +206,19 @@ _inline_copy_to_user(void __user *to, const void *from, unsigned long n)
 	}
 	return n;
 }
-#ifndef INLINE_COPY_TO_USER
+
 extern __must_check unsigned long
 _copy_to_user(void __user *, const void *, unsigned long);
-#endif
 
 static __always_inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (!check_copy_size(to, n, false))
 		return n;
-#ifdef INLINE_COPY_FROM_USER
-	return _inline_copy_from_user(to, from, n);
-#else
-	return _copy_from_user(to, from, n);
-#endif
+	if (IS_ENABLED(ARCH_WANTS_NOINLINE_COPY_USER))
+		return _copy_from_user(to, from, n);
+	else
+		return _inline_copy_from_user(to, from, n);
 }
 
 static __always_inline unsigned long __must_check
@@ -230,11 +227,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 	if (!check_copy_size(from, n, true))
 		return n;
 
-#ifdef INLINE_COPY_TO_USER
-	return _inline_copy_to_user(to, from, n);
-#else
-	return _copy_to_user(to, from, n);
-#endif
+	if (IS_ENABLED(ARCH_WANTS_NOINLINE_COPY_USER))
+		return _copy_to_user(to, from, n);
+	else
+		return _inline_copy_to_user(to, from, n);
 }
 
 #ifndef copy_mc_to_kernel
diff --git a/lib/Kconfig b/lib/Kconfig
index 00a9509636c1..a2e07d4dd2bf 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -68,6 +68,9 @@ config ARCH_HAS_STRNCPY_FROM_USER
 config ARCH_HAS_STRNLEN_USER
 	bool
 
+config ARCH_WANTS_NOINLINE_COPY_USER
+	bool
+
 config GENERIC_STRNCPY_FROM_USER
 	def_bool !ARCH_HAS_STRNCPY_FROM_USER
 
diff --git a/lib/Makefile b/lib/Makefile
index 7c0334d7675b..f4d577910671 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -57,9 +57,10 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
 	 list_sort.o uuid.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o lwq.o memweight.o kfifo.o \
 	 percpu-refcount.o rhashtable.o base64.o \
-	 once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \
+	 once.o refcount.o rcuref.o errseq.o bucket_locks.o \
 	 generic-radix-tree.o bitmap-str.o
 obj-y += usercheck.o
+obj-$(CONFIG_ARCH_WANTS_NOINLINE_COPY_USER) += usercopy.o
 obj-y += string_helpers.o
 obj-y += hexdump.o
 obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
diff --git a/lib/usercopy.c b/lib/usercopy.c
index 7a93f56d81dd..d2deb4b0a3c5 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -4,18 +4,14 @@
 
 /* out-of-line parts */
 
-#if !defined(INLINE_COPY_FROM_USER)
 unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	return _inline_copy_from_user(to, from, n);
 }
 EXPORT_SYMBOL(_copy_from_user);
-#endif
 
-#if !defined(INLINE_COPY_TO_USER)
 unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	return _inline_copy_to_user(to, from, n);
 }
 EXPORT_SYMBOL(_copy_to_user);
-#endif
diff --git a/rust/helpers/uaccess.c b/rust/helpers/uaccess.c
index d9625b9ee046..01de4fbbcc84 100644
--- a/rust/helpers/uaccess.c
+++ b/rust/helpers/uaccess.c
@@ -14,7 +14,7 @@ rust_helper_copy_to_user(void __user *to, const void *from, unsigned long n)
 	return copy_to_user(to, from, n);
 }
 
-#ifdef INLINE_COPY_FROM_USER
+#ifndef CONFIG_ARCH_WANTS_NOINLINE_COPY_USER
 __rust_helper
 unsigned long rust_helper__copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-- 
2.49.0



^ permalink raw reply related

* [RFC PATCH v1 1/9] uaccess: Split check_zeroed_user() out of usercopy.c
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch
In-Reply-To: <cover.1777306795.git.chleroy@kernel.org>

Until commit f5a1a536fa14 ("lib: introduce copy_struct_from_user()
helper"), lib/usercopy.c was containing only the out-line version
of user copy fonctions.

That commit added function check_zeroed_user() into the same file.
Move that function into a new file named usercheck.c, so that next
patch can change usercopy.c build to a conditional build.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 lib/Makefile                    |  1 +
 lib/{usercopy.c => usercheck.c} | 22 ------------
 lib/usercopy.c                  | 62 ---------------------------------
 3 files changed, 1 insertion(+), 84 deletions(-)
 copy lib/{usercopy.c => usercheck.c} (73%)

diff --git a/lib/Makefile b/lib/Makefile
index f33a24bf1c19..7c0334d7675b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -59,6 +59,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
 	 percpu-refcount.o rhashtable.o base64.o \
 	 once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \
 	 generic-radix-tree.o bitmap-str.o
+obj-y += usercheck.o
 obj-y += string_helpers.o
 obj-y += hexdump.o
 obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
diff --git a/lib/usercopy.c b/lib/usercheck.c
similarity index 73%
copy from lib/usercopy.c
copy to lib/usercheck.c
index b00a3a957de6..15b0d9a18435 100644
--- a/lib/usercopy.c
+++ b/lib/usercheck.c
@@ -2,32 +2,10 @@
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/export.h>
-#include <linux/fault-inject-usercopy.h>
-#include <linux/instrumented.h>
 #include <linux/kernel.h>
-#include <linux/nospec.h>
-#include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/wordpart.h>
 
-/* out-of-line parts */
-
-#if !defined(INLINE_COPY_FROM_USER)
-unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	return _inline_copy_from_user(to, from, n);
-}
-EXPORT_SYMBOL(_copy_from_user);
-#endif
-
-#if !defined(INLINE_COPY_TO_USER)
-unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	return _inline_copy_to_user(to, from, n);
-}
-EXPORT_SYMBOL(_copy_to_user);
-#endif
-
 /**
  * check_zeroed_user: check if a userspace buffer only contains zero bytes
  * @from: Source address, in userspace.
diff --git a/lib/usercopy.c b/lib/usercopy.c
index b00a3a957de6..7a93f56d81dd 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -1,14 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/compiler.h>
-#include <linux/errno.h>
 #include <linux/export.h>
-#include <linux/fault-inject-usercopy.h>
-#include <linux/instrumented.h>
-#include <linux/kernel.h>
-#include <linux/nospec.h>
-#include <linux/string.h>
 #include <linux/uaccess.h>
-#include <linux/wordpart.h>
 
 /* out-of-line parts */
 
@@ -27,57 +19,3 @@ unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
 }
 EXPORT_SYMBOL(_copy_to_user);
 #endif
-
-/**
- * check_zeroed_user: check if a userspace buffer only contains zero bytes
- * @from: Source address, in userspace.
- * @size: Size of buffer.
- *
- * This is effectively shorthand for "memchr_inv(from, 0, size) == NULL" for
- * userspace addresses (and is more efficient because we don't care where the
- * first non-zero byte is).
- *
- * Returns:
- *  * 0: There were non-zero bytes present in the buffer.
- *  * 1: The buffer was full of zero bytes.
- *  * -EFAULT: access to userspace failed.
- */
-int check_zeroed_user(const void __user *from, size_t size)
-{
-	unsigned long val;
-	uintptr_t align = (uintptr_t) from % sizeof(unsigned long);
-
-	if (unlikely(size == 0))
-		return 1;
-
-	from -= align;
-	size += align;
-
-	if (!user_read_access_begin(from, size))
-		return -EFAULT;
-
-	unsafe_get_user(val, (unsigned long __user *) from, err_fault);
-	if (align)
-		val &= ~aligned_byte_mask(align);
-
-	while (size > sizeof(unsigned long)) {
-		if (unlikely(val))
-			goto done;
-
-		from += sizeof(unsigned long);
-		size -= sizeof(unsigned long);
-
-		unsafe_get_user(val, (unsigned long __user *) from, err_fault);
-	}
-
-	if (size < sizeof(unsigned long))
-		val &= aligned_byte_mask(size);
-
-done:
-	user_read_access_end();
-	return (val == 0);
-err_fault:
-	user_read_access_end();
-	return -EFAULT;
-}
-EXPORT_SYMBOL(check_zeroed_user);
-- 
2.49.0



^ permalink raw reply related

* [RFC PATCH v1 0/9] uaccess: Convert small fixed size copy_{to/from}_user() to scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-04-27 17:13 UTC (permalink / raw)
  To: Yury Norov, Andrew Morton, Linus Torvalds, David Laight,
	Thomas Gleixner
  Cc: Christophe Leroy (CS GROUP), linux-alpha, linux-kernel,
	linux-snps-arc, linux-arm-kernel, linux-mips, linuxppc-dev, kvm,
	linux-riscv, linux-s390, sparclinux, linux-um, dmaengine,
	linux-efi, linux-fsi, amd-gfx, dri-devel, intel-gfx, linux-wpan,
	netdev, linux-wireless, linux-spi, linux-media, linux-staging,
	linux-serial, linux-usb, xen-devel, linux-fsdevel, ocfs2-devel,
	bpf, kasan-dev, linux-mm, linux-x25, rust-for-linux, linux-sound,
	sound-open-firmware, linux-csky, linux-hexagon, loongarch,
	linux-m68k, linux-openrisc, linux-parisc, linux-sh, linux-arch

A lot of copy_from_user() and copy_to_user() perform copies of small
fixed size pieces of data between kernel and userspace, and don't
care about partial copies.

copy_from_user() and copy_to_user() are big functions optimised for
copying large amount of data, with cache management, etc ... This is
often overkill for small copies that could just be inlined instead.

What makes things a bit more tricky is that those copy functions
are designed to handle partial copies in case of page fault. But among
the 6000 callers of those functions, only 2% really care about the
quantity of no-copied data that those functions return. All other ones
fails as soon as the returned value is not 0, returning -EACCESS.

So first step in this series is to introduce variants called
copy_from_user_partial() and copy_to_user_partial() which will be
called by the 2% users that care about the partial copy, then the
original copy_from_user() and copy_to_user() are changed to return
-EFAULT when the copy fails.

Then the second step is to implement copy of small fixed-size data
with scoped user access instead of calling the arch specific heavy
user copy functions.

Patch 5, can be split in different patches for each archicture or
subsystem, but let's get a first feedback and agree on the principle.

Christophe Leroy (CS GROUP) (9):
  uaccess: Split check_zeroed_user() out of usercopy.c
  uaccess: Convert INLINE_COPY_{TO/FROM}_USER to kconfig and reduce
    ifdefery
  x86/umip: Be stricter in fixup_umip_exception()
  uaccess: Introduce copy_{to/from}_user_partial()
  uaccess: Switch to copy_{to/from}_user_partial() when relevant
  uaccess: Change copy_{to/from}_user to return -EFAULT
  x86: Add unsafe_copy_from_user()
  arm64: Add unsafe_copy_from_user()
  uaccess: Convert small fixed size copy_{to/from}_user() to scoped user
    access

 arch/alpha/Kconfig                            |   1 +
 arch/alpha/kernel/osf_sys.c                   |   4 +-
 arch/alpha/kernel/termios.c                   |   2 +-
 arch/arc/include/asm/uaccess.h                |   3 -
 arch/arc/kernel/disasm.c                      |   2 +-
 arch/arm/include/asm/uaccess.h                |   2 -
 arch/arm64/include/asm/gcs.h                  |   2 +-
 arch/arm64/include/asm/uaccess.h              |  30 +++--
 arch/arm64/kernel/signal32.c                  |   2 +-
 arch/csky/Kconfig                             |   1 +
 arch/hexagon/include/asm/uaccess.h            |   3 -
 arch/loongarch/include/asm/uaccess.h          |   3 -
 arch/m68k/include/asm/uaccess.h               |   3 -
 arch/microblaze/include/asm/uaccess.h         |   2 -
 arch/mips/include/asm/uaccess.h               |   3 -
 arch/mips/kernel/rtlx.c                       |   8 +-
 arch/mips/kernel/vpe.c                        |   2 +-
 arch/nios2/include/asm/uaccess.h              |   2 -
 arch/openrisc/include/asm/uaccess.h           |   2 -
 arch/parisc/include/asm/uaccess.h             |   3 -
 arch/powerpc/Kconfig                          |   1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c           |   4 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c        |   4 +-
 arch/powerpc/kvm/book3s_hv.c                  |   2 +-
 arch/riscv/Kconfig                            |   1 +
 arch/riscv/kernel/signal.c                    |   2 +-
 arch/s390/include/asm/idals.h                 |   8 +-
 arch/s390/include/asm/uaccess.h               |   3 -
 arch/sh/include/asm/uaccess.h                 |   2 -
 arch/sparc/include/asm/uaccess_32.h           |   3 -
 arch/sparc/include/asm/uaccess_64.h           |   2 -
 arch/sparc/kernel/termios.c                   |   2 +-
 arch/um/include/asm/uaccess.h                 |   3 -
 arch/um/kernel/process.c                      |   2 +-
 arch/x86/Kconfig                              |   1 +
 arch/x86/include/asm/uaccess.h                |  29 ++++-
 arch/x86/kernel/umip.c                        |   2 +-
 arch/x86/lib/insn-eval.c                      |   2 +-
 arch/x86/um/signal.c                          |   2 +-
 arch/xtensa/include/asm/uaccess.h             |   2 -
 drivers/android/binder_alloc.c                |   2 +-
 drivers/comedi/comedi_fops.c                  |   4 +-
 drivers/dma/idxd/cdev.c                       |   2 +-
 drivers/firmware/efi/test/efi_test.c          |   2 +-
 drivers/fsi/fsi-scom.c                        |   2 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c |   2 +-
 drivers/gpu/drm/i915/gt/intel_sseu.c          |   4 +-
 drivers/gpu/drm/i915/i915_gem.c               |   4 +-
 drivers/hwtracing/intel_th/msu.c              |   2 +-
 drivers/misc/ibmvmc.c                         |   2 +-
 drivers/misc/vmw_vmci/vmci_host.c             |   2 +-
 drivers/most/most_cdev.c                      |   2 +-
 drivers/net/ieee802154/ca8210.c               |   4 +-
 drivers/net/wireless/ath/wil6210/debugfs.c    |   2 +-
 .../intel/iwlwifi/pcie/gen1_2/trans.c         |   2 +-
 drivers/net/wireless/ti/wlcore/debugfs.c      |   2 +-
 drivers/ps3/ps3-lpm.c                         |   2 +-
 drivers/s390/crypto/zcrypt_api.h              |   4 +-
 drivers/spi/spidev.c                          |   2 +-
 .../staging/media/atomisp/pci/atomisp_cmd.c   |   8 +-
 drivers/tty/tty_ioctl.c                       |  14 +--
 drivers/tty/vt/vc_screen.c                    |   4 +-
 drivers/usb/gadget/function/f_hid.c           |   4 +-
 drivers/usb/gadget/function/f_printer.c       |   2 +-
 drivers/vfio/vfio_iommu_type1.c               |   4 +-
 drivers/xen/xenbus/xenbus_dev_frontend.c      |   2 +-
 fs/namespace.c                                |   2 +-
 fs/ocfs2/dlmfs/dlmfs.c                        |   2 +-
 fs/proc/base.c                                |   4 +-
 include/asm-generic/uaccess.h                 |   2 -
 include/linux/bpfptr.h                        |   2 +-
 include/linux/sockptr.h                       |   4 +-
 include/linux/uaccess.h                       | 107 ++++++++++++++----
 ipc/msg.c                                     |   8 +-
 ipc/sem.c                                     |   8 +-
 ipc/shm.c                                     |  18 +--
 kernel/regset.c                               |   2 +-
 kernel/sys.c                                  |   4 +-
 lib/Kconfig                                   |   3 +
 lib/Makefile                                  |   4 +-
 lib/kfifo.c                                   |   8 +-
 lib/{usercopy.c => usercheck.c}               |  22 ----
 lib/usercopy.c                                |  66 -----------
 mm/kasan/kasan_test_c.c                       |   4 +-
 mm/memory.c                                   |   2 +-
 net/x25/af_x25.c                              |   2 +-
 rust/helpers/uaccess.c                        |   6 +-
 sound/pci/emu10k1/emufx.c                     |   4 +-
 sound/pci/rme9652/hdsp.c                      |   6 +-
 sound/soc/intel/avs/probes.c                  |   6 +-
 sound/soc/sof/compress.c                      |  12 +-
 sound/soc/sof/sof-client-probes.c             |   6 +-
 92 files changed, 269 insertions(+), 288 deletions(-)
 copy lib/{usercopy.c => usercheck.c} (73%)

-- 
2.49.0



^ permalink raw reply

* Re: [PATCH v2] powerpc/powermac: Remove pmac_low_i2c_{lock,unlock}()
From: Bart Van Assche @ 2026-04-27 17:10 UTC (permalink / raw)
  To: Madhavan Srinivasan, Michael Ellerman
  Cc: Nicholas Piggin, Christophe Leroy, linuxppc-dev, Thomas Gleixner,
	Ingo Molnar, Kees Cook
In-Reply-To: <20260316174747.3871924-1-bvanassche@acm.org>

On 3/16/26 10:47 AM, Bart Van Assche wrote:
> Commit a28d3af2a26c ("[PATCH] 2/5 powerpc: Rework PowerMac i2c part 2")
> removed the last calls to the pmac_low_i2c_{lock,unlock}() functions.
> Hence, remove these two functions.

It seems like this patch hasn't made it into Linux kernel v7.1-rc1. Has
it been sent to the right maintainer?

Thanks,

Bart.


^ permalink raw reply

* Re: [Linaro-mm-sig] Re: [PATCH V13 5/7] rust: Make __udivdi3() and __umoddi3() panic
From: Arnd Bergmann @ 2026-04-27 16:05 UTC (permalink / raw)
  To: Link Mauve, David Laight
  Cc: Mukesh Kumar Chaurasiya (IBM), Madhavan Srinivasan,
	Michael Ellerman, Nicholas Piggin, Christophe Leroy,
	Peter Zijlstra, Josh Poimboeuf, Jason Baron, Alice Ryhl,
	Steven Rostedt, Ard Biesheuvel, Sumit Semwal,
	Christian König, Miguel Ojeda, boqun, Gary Guo,
	Björn Roy Baron, Benno Lossin, Andreas Hindborg,
	Trevor Gross, Danilo Krummrich, Nathan Chancellor,
	Nick Desaulniers, Bill Wendling, Justin Stitt, tamird,
	Nicolas Schier, simona.vetter, linuxppc-dev, linux-kernel,
	linux-media, dri-devel, linaro-mm-sig, rust-for-linux, llvm
In-Reply-To: <aes7L3M1d_LvvGKa@luna>

On Fri, Apr 24, 2026, at 11:43, Link Mauve wrote:
> On Fri, Apr 24, 2026 at 10:14:25AM +0100, David Laight wrote:
>> On Fri, 24 Apr 2026 11:17:40 +0530
>> "Mukesh Kumar Chaurasiya (IBM)" <mkchauras@gmail.com> wrote:
>> 
>> > From: Link Mauve <linkmauve@linkmauve.fr>
>> > 
>> > The core crate currently depends on these two functions for i64/u64/
>> > i128/u128/core::time::Duration formatting, but we shouldn’t use that in
>> > the kernel so let’s panic if they are ever called.
>> 
>> Ugg.
>> Surely you can make it a link-time failure?
>> Or change the underlying code to avoid the divide.
>
> This is currently a link-time failure, which means if any of the core
> crate or drm_panic_qr are linked in, the kernel will fail to link.
>
> And since we rely on core, that means no kernel can be built on PowerPC
> without this patch.
>
> A possible solution would be to change the core crate to not divide
> u64s, but the last time I tried to do that I couldn’t quite figure out
> how to do formatting without it, maybe I will just open an issue against
> Rust.

I think you just need to rewrite the patch description, it appears
that everyone misunderstands what you are doing in the patch:

Like on arm32, normal kernel code will continue to cause
a link failure the same way in Rust as it does in C, and your
patch does not change that. When you (or Link Mauve) "let's panic",
I assume this is was just a mistake in summarizing the logic
that turns the unreachable code from the Rust core crate
into a runtime assertion since it would be impossible to link
the kernel otherwise.

Silently turning a 64-bit division into a panic() for normal
code would of course be unacceptable, but that's not what
you are doing here.

    Arnd


^ permalink raw reply

* Re: [PATCH v2] pseries/kexec: skip resetting CPUs added by firmware but not started by the kernel
From: Anushree Mathur @ 2026-04-27 10:48 UTC (permalink / raw)
  To: Vishal Chourasia, Shivang Upadhyay
  Cc: Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy, Srikar Dronamraju, Shrikanth Hegde,
	Nysal Jan K.A., Ritesh Harjani, Sourabh Jain, Anushree Mathur,
	linux-kernel, linuxppc-dev
In-Reply-To: <0732de44-964c-4e0f-b4fd-dcc631ba70fa@linux.ibm.com>



On 07/04/26 3:55 PM, Vishal Chourasia wrote:
> On 07/04/26 15:49, Shivang Upadhyay wrote:
>> Hi,
>> Thanks for your review.
>>
>> On Mon, 2026-04-06 at 14:22 +0530, Vishal Chourasia wrote:
>>> Hi Shivang,
>>>
>>> Thanks for working on this issue.
>>> A few questions and concerns about the approach:
>>>
>>> 1. Was this issue only observed with QEMU-based virtualization, or
>>> does
>>> it also reproduce on PowerVM/phyp? The commit message and sample logs
>>> don't clarify this. If this is QEMU-specific, I think we should fix
>>> this
>>> in QEMU rather than working around it in the kernel.
>> Currently this is only happening in Qemu (both tcg and kvm mode). But I
>> think this should be reproducible on phyp also. Ill confirm wheather it
>> is really the case or not.
>>
>>> 2. The approach taken here moves away from the PAPR interface. The
>>> kernel currently uses H_SIGNAL_SYS_RESET_ALL_OTHERS, which is the
>>> architecturally defined hcall for this purpose. Replacing it with a
>>> per-CPU loop that checks internal kernel state (paca cpu_start)
>>> breaks
>>> the clean abstraction between guest and
>>> QEMU's sPAPR implementation should behave the same way. The
>>> hypervisor
>> Yeah it is a valid concern about ownership for this resets. Ill try to
>> see if this fix is possible in qemu itself.
>>
>>> (QEMU) should maintain a list of CPUs that have been
>>> activated/online/started and given to the guest. When
>>> H_SIGNAL_SYS_RESET_ALL_OTHERS is called, QEMU should only reset those
>>> CPUs that the guest has actually started. Unless the guest makes the
>>> RTAS start-cpu call for a CPU, QEMU should not include that CPU in
>>> the
>>> set of CPUs to be reset.
>>>
>>> I think discussing this would help determine the right fix location.
>>>
>>> Can you refer to the following commit in QEMU to see if help in this
>>> case.
>>>
>>> commit fb802acdc8b162084e9e60d42aeba79097d14d2b
>>> Author: Nicholas Piggin <npiggin@gmail.com>
>>> Date:   Tue Mar 18 15:03:48 2025 +1000
>>>
>>>       ppc/spapr: Fix RTAS stopped state
>>>
>> Thanks for this reference. cpu->quiesced state was introduced in this
>> patch, for modelling "RTAS stopped" state.
>>
>> as per the commit message:
>> A KVM spapr guest boots with all secondary CPUs defined to be in the
>> RTAS stopped" state. In this state, the CPU is only responsive to the
>> start-cpu RTAS call.
>>
>> So, we should be able to use this to check wheather cpu is started or
>> not. Only other concern here would be about phyp's implementation for
>> this.
>
> Yes, something like this.
>
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
>
> index 032805a8d0..8c51372cf8 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1105,6 +1105,9 @@ static target_ulong 
> h_signal_sys_reset(PowerPCCPU *cpu,
>                      continue;
>                  }
>              }
> +
> +            if (c->env.quiesced) continue;
> +
>              run_on_cpu(cs, spapr_do_system_reset_on_cpu, 
> RUN_ON_CPU_NULL);
>          }
>          return H_SUCCESS;
>
>>
>> Thanks.
>> ~Shivang.
>>
>> Hi Vishal/Shivang,
>>
>> I have tested this qemu patch mentioned here, after applying this the 
>> issue is getting fixed.
>>
>> Thank you!
>> Anushree Mathur
>>



^ permalink raw reply

* Re: [PATCH v2] pseries/kexec: skip resetting CPUs added by firmware but not started by the kernel
From: Anushree Mathur @ 2026-04-27 10:55 UTC (permalink / raw)
  To: Vishal Chourasia, Shivang Upadhyay
  Cc: Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy, Srikar Dronamraju, Shrikanth Hegde,
	Nysal Jan K.A., Ritesh Harjani, Sourabh Jain, Anushree Mathur,
	linux-kernel, linuxppc-dev
In-Reply-To: <0732de44-964c-4e0f-b4fd-dcc631ba70fa@linux.ibm.com>



On 07/04/26 3:55 PM, Vishal Chourasia wrote:
> On 07/04/26 15:49, Shivang Upadhyay wrote:
>> Hi,
>> Thanks for your review.
>>
>> On Mon, 2026-04-06 at 14:22 +0530, Vishal Chourasia wrote:
>>> Hi Shivang,
>>>
>>> Thanks for working on this issue.
>>> A few questions and concerns about the approach:
>>>
>>> 1. Was this issue only observed with QEMU-based virtualization, or
>>> does
>>> it also reproduce on PowerVM/phyp? The commit message and sample logs
>>> don't clarify this. If this is QEMU-specific, I think we should fix
>>> this
>>> in QEMU rather than working around it in the kernel.
>> Currently this is only happening in Qemu (both tcg and kvm mode). But I
>> think this should be reproducible on phyp also. Ill confirm wheather it
>> is really the case or not.
>>
>>> 2. The approach taken here moves away from the PAPR interface. The
>>> kernel currently uses H_SIGNAL_SYS_RESET_ALL_OTHERS, which is the
>>> architecturally defined hcall for this purpose. Replacing it with a
>>> per-CPU loop that checks internal kernel state (paca cpu_start)
>>> breaks
>>> the clean abstraction between guest and
>>> QEMU's sPAPR implementation should behave the same way. The
>>> hypervisor
>> Yeah it is a valid concern about ownership for this resets. Ill try to
>> see if this fix is possible in qemu itself.
>>
>>> (QEMU) should maintain a list of CPUs that have been
>>> activated/online/started and given to the guest. When
>>> H_SIGNAL_SYS_RESET_ALL_OTHERS is called, QEMU should only reset those
>>> CPUs that the guest has actually started. Unless the guest makes the
>>> RTAS start-cpu call for a CPU, QEMU should not include that CPU in
>>> the
>>> set of CPUs to be reset.
>>>
>>> I think discussing this would help determine the right fix location.
>>>
>>> Can you refer to the following commit in QEMU to see if help in this
>>> case.
>>>
>>> commit fb802acdc8b162084e9e60d42aeba79097d14d2b
>>> Author: Nicholas Piggin <npiggin@gmail.com>
>>> Date:   Tue Mar 18 15:03:48 2025 +1000
>>>
>>>       ppc/spapr: Fix RTAS stopped state
>>>
>> Thanks for this reference. cpu->quiesced state was introduced in this
>> patch, for modelling "RTAS stopped" state.
>>
>> as per the commit message:
>> A KVM spapr guest boots with all secondary CPUs defined to be in the
>> RTAS stopped" state. In this state, the CPU is only responsive to the
>> start-cpu RTAS call.
>>
>> So, we should be able to use this to check wheather cpu is started or
>> not. Only other concern here would be about phyp's implementation for
>> this.
>
> Yes, something like this.
>
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
>
> index 032805a8d0..8c51372cf8 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1105,6 +1105,9 @@ static target_ulong 
> h_signal_sys_reset(PowerPCCPU *cpu,
>                      continue;
>                  }
>              }
> +
> +            if (c->env.quiesced) continue;
> +
>              run_on_cpu(cs, spapr_do_system_reset_on_cpu, 
> RUN_ON_CPU_NULL);
>          }
>          return H_SUCCESS;
>
>>
>> Thanks.
>> ~Shivang.
>>
>>
>>


Hi Vishal/Shivang,

I have tested this qemu patch mentioned here, after applying this the 
issue is getting fixed. Will update all the logs once the new patch is 
sent out to qemu mailing list!

Thank you!
Anushree Mathur



^ permalink raw reply

* Re: [PATCH v6 00/24] PCI: Convert all dynamic sysfs attributes to static
From: Shivaprasad G Bhat @ 2026-04-27 14:09 UTC (permalink / raw)
  To: Krzysztof Wilczyński, Bjorn Helgaas
  Cc: Bjorn Helgaas, Manivannan Sadhasivam, Lorenzo Pieralisi,
	Magnus Lindholm, Matt Turner, Richard Henderson, Christophe Leroy,
	Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Dexuan Cui, Krzysztof Hałasa, Lukas Wunner,
	Oliver O'Halloran, Saurabh Singh Sengar, Shuan He,
	Srivatsa Bhat, Ilpo Järvinen, linux-pci, linux-alpha,
	linuxppc-dev
In-Reply-To: <20260423172200.GA2271460@rocinante>

Hi Krzysztof,


On 4/23/26 10:52 PM, Krzysztof Wilczyński wrote:
> Hello,
>
>> This series converts every dynamically allocated PCI sysfs attribute to
>> a static const definition.  After the full series, pci_sysfs_init() and
>> sysfs_initialized are gone, and every sysfs file is created by the
>> driver model at device_add() time.
> A note on testing:
>
>    0-day bot (recent test runs; newer builds will arrive later):
>      - https://lore.kernel.org/linux-pci/202604231622.DgR0zih3-lkp@intel.com
>      - https://lore.kernel.org/linux-pci/202604161928.DzuHQmeM-lkp@intel.com
>      - https://lore.kernel.org/linux-pci/202604121312.sF0Ua4gP-lkp@intel.com
>      - https://lore.kernel.org/linux-pci/202604111631.lrwAylMM-lkp@intel.com
>      - https://lore.kernel.org/linux-pci/202603170336.zSLrDvlj-lkp@intel.com
>      - https://lore.kernel.org/linux-pci/202603122052.tMV5rzNq-lkp@intel.com
>      - https://lore.kernel.org/linux-pci/202603081334.b91RGVS6-lkp@intel.com
>      - https://lore.kernel.org/linux-pci/202603060207.pnGfKgGa-lkp@intel.com
>
>    KernelCI (for the "for-kernelci" branch):
>      - https://dashboard.kernelci.org/tree/linux-pci/for-kernelci/941dd7c3e16840724dc961f10b84e193d13cdb57
>      - https://dashboard.kernelci.org/tree/linux-pci/for-kernelci/683e66b11da157f730101f6919c7468a09cf3e3f
>      - https://dashboard.kernelci.org/tree/linux-pci/for-kernelci/209e2cfd205a8aad4bae32e6f82b96b20902aa74
>      - https://dashboard.kernelci.org/tree/linux-pci/for-kernelci/70293477e2c0ae8cbc250098818e726e1d658b53
>      - https://dashboard.kernelci.org/tree?ts=pci
>
>    Sashiko's feedback:
>      - https://sashiko.dev/#/patchset/20260422161407.118748-1-kwilczynski%40kernel.org
>      - https://sashiko.dev/#/patchset/20260416180107.777065-1-kwilczynski%40kernel.org
>      - https://sashiko.dev/#/patchset/20260411080148.471335-1-kwilczynski%40kernel.org
>      - https://sashiko.dev/#/patchset/20260410055040.39233-1-kwilczynski%40kernel.org
>
> I sadly do not own any Alpha or PowerPC hardware, so when I was testing
> these architectures while working on the series, it would be only under
> QEMU.

I tested the patches on PPC64 machines running as pSeries(on PowerVM and 
KVM) also PowerNV.

The sysfs attributes looks normal on boot, and on hotplug/unplug of 
devices and SRIOV use cases.

I see no warnings/errors in dmesg during PCI scans, initialization.


Tested-By: Shivaprasad G Bhat <sbhat@linux.ibm.com>


Thanks,

Shivaprasad


> That said, Magnus Lindholm was able to test the series on the Alpha
> hardware he owns, see:
>
>    - https://lore.kernel.org/linux-pci/CA+=Fv5Q1tZQwnanw99NbvzT-QenfYz7vUdY02_TuPqHX32ZAiA@mail.gmail.com
>
> Lorenzo Pieralisi did some testing reported outside the mailing list (we
> talked on IRC), on the platform he had issues before, and while the issues
> were more with procfs races, similar to the sysfs ones this series aims to
> fix, he didn't notice regressions when having this series applied.
>
> Thank you!
>
>          Krzysztof
>


^ permalink raw reply

* Re: [PATCH V15 2/7] dma-resv: Fix undefined symbol when CONFIG_DMA_SHARED_BUFFER is disabled
From: Gary Guo @ 2026-04-27 13:39 UTC (permalink / raw)
  To: Mukesh Kumar Chaurasiya (IBM), maddy, mpe, npiggin, chleroy,
	peterz, jpoimboe, jbaron, aliceryhl, rostedt, ardb, ojeda, boqun,
	gary, bjorn3_gh, lossin, a.hindborg, tmgross, dakr, nathan,
	nick.desaulniers+lkml, morbo, justinstitt, daniel.almeida,
	fujita.tomonori, viresh.kumar, prafulrai522, gregkh, arnd, tamird,
	mark.rutland, lyude, lina+kernel, linuxppc-dev, linux-kernel,
	rust-for-linux, llvm
  Cc: Christian König
In-Reply-To: <20260426105932.2270364-3-mkchauras@gmail.com>

On Sun Apr 26, 2026 at 11:59 AM BST, Mukesh Kumar Chaurasiya (IBM) wrote:
> When building with LLVM=1 for architectures like powerpc where
> CONFIG_DMA_SHARED_BUFFER is not enabled, the build fails with:
> 
>   ld.lld: error: undefined symbol: dma_resv_reset_max_fences
>   >>> referenced by helpers.c
>   >>>               rust/helpers/helpers.o:(rust_helper_dma_resv_unlock)
> 
> The issue occurs because:
> 1. CONFIG_DEBUG_MUTEXES=y is enabled
> 2. CONFIG_DMA_SHARED_BUFFER is not enabled
> 3. dma_resv_reset_max_fences() is declared in the header when
>    CONFIG_DEBUG_MUTEXES is set
> 4. But the function is only compiled in drivers/dma-buf/dma-resv.c,
>    which is only built when CONFIG_DMA_SHARED_BUFFER is enabled
> 5. Rust helpers call dma_resv_unlock() which calls
>    dma_resv_reset_max_fences(), causing an undefined symbol
> 
> Fix this by compiling `dma-resv.c` file only when CONFIG_DMA_SHARED_BUFFER
> is enabled.
> 
> Fixes: 9b836641d3bf ("rust: helpers: Add bindings/wrappers for dma_resv_lock")
> Reviewed-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Mukesh Kumar Chaurasiya (IBM) <mkchauras@gmail.com>

Reviewed-by: Gary Guo <gary@garyguo.net>

> ---
>  rust/helpers/helpers.c | 2 ++
>  1 file changed, 2 insertions(+)



^ permalink raw reply

* Re: [PATCH V13 5/7] rust: Make __udivdi3() and __umoddi3() panic
From: Gary Guo @ 2026-04-27 13:04 UTC (permalink / raw)
  To: David Laight, Gary Guo
  Cc: Mukesh Kumar Chaurasiya, maddy, mpe, npiggin, chleroy, peterz,
	jpoimboe, jbaron, aliceryhl, rostedt, ardb, sumit.semwal,
	christian.koenig, ojeda, boqun, bjorn3_gh, lossin, a.hindborg,
	tmgross, dakr, nathan, nick.desaulniers+lkml, morbo, justinstitt,
	tamird, arnd, nsc, simona.vetter, linuxppc-dev, linux-kernel,
	linux-media, dri-devel, linaro-mm-sig, rust-for-linux, llvm,
	Link Mauve
In-Reply-To: <20260426230408.489c68c3@pumpkin>

On Sun Apr 26, 2026 at 11:04 PM BST, David Laight wrote:
> On Sun, 26 Apr 2026 15:20:31 +0100
> "Gary Guo" <gary@garyguo.net> wrote:
>
>> On Sun Apr 26, 2026 at 8:52 AM BST, Mukesh Kumar Chaurasiya wrote:
>> > On Fri, Apr 24, 2026 at 01:43:43PM +0100, Gary Guo wrote:  
>> >> On Fri Apr 24, 2026 at 6:47 AM BST, Mukesh Kumar Chaurasiya (IBM) wrote:  
>> >> > From: Link Mauve <linkmauve@linkmauve.fr>
>> >> >
>> >> > The core crate currently depends on these two functions for i64/u64/
>> >> > i128/u128/core::time::Duration formatting, but we shouldn’t use that in
>> >> > the kernel so let’s panic if they are ever called.
>> >> >
>> >> > This doesn’t yet fix drm_panic_qr.rs, which also uses __udivdi3 when
>> >> > CONFIG_CC_OPTIMIZE_FOR_SIZE=y, but at least makes the rest of the kernel
>> >> > build on PPC32.  
>> >> 
>> >> Can we always build libcore with `-C opt-level=2` even if
>> >> `CONFIG_CC_OPTIMIZE_FOR_SIZE` is specified? It feels like a better fix than
>> >> stubbing things out.
>> >> 
>> >> Best,
>> >> Gary
>> >>   
>> > The issue is not coming from libcore itself. It's the driver that's
>> > causing this.  
>> 
>> Sorry. I quoted the wrong part. I was asking if compiling libcore with O2 gets
>> rid of its use of the builtins, as that's what the change this commit is for.
>> 
>> Formatting of u64 will be needed, so we should make sure that these works as
>> intended.
>
> This code (from nolibc) will convert u64 to ascii in any base:
>
> [snip]
>
> Not hard to do without any divides at all.

I mean, the exact same logic is when LLVM lowers code where divisor is constant.
It just that it decides that the multiply-by-inverse lowering shouldn't be done
with `Os`.

In this case libcore is provided by Rust (think it as freestanding headers),
that code is not part of kernel. And currently they just use constant-divisor
divide and have this task performed by LLVM. We could ask Rust to explicitly use
multiply-by-inverse to avoid generating __udivdi3 in the future, but even if we
do that it won't be available to already-released Rust compilers. 

In the mean time we need to workaround this.

Best,
Gary


^ permalink raw reply

* [PATCH v5 8/8] powerpc: Remove unused functions
From: Mukesh Kumar Chaurasiya (IBM) @ 2026-04-27 12:27 UTC (permalink / raw)
  To: maddy, mpe, npiggin, chleroy, ryabinin.a.a, glider, andreyknvl,
	dvyukov, vincenzo.frascino, oleg, kees, luto, wad, mchauras,
	sshegde, thuth, ruanjinjie, akpm, macro, ldv, charlie, deller,
	kevin.brodsky, ritesh.list, yeoreum.yun, agordeev, segher,
	mark.rutland, ryan.roberts, pmladek, feng.tang, peterz, kan.liang,
	linuxppc-dev, linux-kernel, kasan-dev
  Cc: Mukesh Kumar Chaurasiya, Samir M, David Gow, Venkat Rao Bagalkote
In-Reply-To: <20260427122742.210074-1-mkchauras@gmail.com>

From: Mukesh Kumar Chaurasiya <mkchauras@linux.ibm.com>

After enabling GENERIC_ENTRY some functions are left unused.
Cleanup all those functions which includes:
 - do_syscall_trace_enter
 - do_syscall_trace_leave
 - do_notify_resume
 - do_seccomp

Signed-off-by: Mukesh Kumar Chaurasiya <mkchauras@linux.ibm.com>
Tested-by: Samir M <samir@linux.ibm.com>
Tested-by: David Gow <davidgow@google.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/include/asm/ptrace.h   |   3 -
 arch/powerpc/include/asm/signal.h   |   1 -
 arch/powerpc/kernel/ptrace/ptrace.c | 138 ----------------------------
 arch/powerpc/kernel/signal.c        |  17 ----
 4 files changed, 159 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 2e741ea57b80..fdeb97421785 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -177,9 +177,6 @@ extern unsigned long profile_pc(struct pt_regs *regs);
 #define profile_pc(regs) instruction_pointer(regs)
 #endif
 
-long do_syscall_trace_enter(struct pt_regs *regs);
-void do_syscall_trace_leave(struct pt_regs *regs);
-
 static inline void set_return_regs_changed(void)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
index 922d43700fb4..21af92cdb237 100644
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -7,7 +7,6 @@
 #include <uapi/asm/ptrace.h>
 
 struct pt_regs;
-void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
 
 unsigned long get_min_sigframe_size_32(void);
 unsigned long get_min_sigframe_size_64(void);
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index f006a03a0211..316d4f5ead8e 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -192,144 +192,6 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-#ifdef CONFIG_SECCOMP
-static int do_seccomp(struct pt_regs *regs)
-{
-	if (!test_thread_flag(TIF_SECCOMP))
-		return 0;
-
-	/*
-	 * The ABI we present to seccomp tracers is that r3 contains
-	 * the syscall return value and orig_gpr3 contains the first
-	 * syscall parameter. This is different to the ptrace ABI where
-	 * both r3 and orig_gpr3 contain the first syscall parameter.
-	 */
-	regs->gpr[3] = -ENOSYS;
-
-	/*
-	 * We use the __ version here because we have already checked
-	 * TIF_SECCOMP. If this fails, there is nothing left to do, we
-	 * have already loaded -ENOSYS into r3, or seccomp has put
-	 * something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
-	 */
-	if (__secure_computing())
-		return -1;
-
-	/*
-	 * The syscall was allowed by seccomp, restore the register
-	 * state to what audit expects.
-	 * Note that we use orig_gpr3, which means a seccomp tracer can
-	 * modify the first syscall parameter (in orig_gpr3) and also
-	 * allow the syscall to proceed.
-	 */
-	regs->gpr[3] = regs->orig_gpr3;
-
-	return 0;
-}
-#else
-static inline int do_seccomp(struct pt_regs *regs) { return 0; }
-#endif /* CONFIG_SECCOMP */
-
-/**
- * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
- * @regs: the pt_regs of the task to trace (current)
- *
- * Performs various types of tracing on syscall entry. This includes seccomp,
- * ptrace, syscall tracepoints and audit.
- *
- * The pt_regs are potentially visible to userspace via ptrace, so their
- * contents is ABI.
- *
- * One or more of the tracers may modify the contents of pt_regs, in particular
- * to modify arguments or even the syscall number itself.
- *
- * It's also possible that a tracer can choose to reject the system call. In
- * that case this function will return an illegal syscall number, and will put
- * an appropriate return value in regs->r3.
- *
- * Return: the (possibly changed) syscall number.
- */
-long do_syscall_trace_enter(struct pt_regs *regs)
-{
-	u32 flags;
-
-	flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
-
-	if (flags) {
-		int rc = ptrace_report_syscall_entry(regs);
-
-		if (unlikely(flags & _TIF_SYSCALL_EMU)) {
-			/*
-			 * A nonzero return code from
-			 * ptrace_report_syscall_entry() tells us to prevent
-			 * the syscall execution, but we are not going to
-			 * execute it anyway.
-			 *
-			 * Returning -1 will skip the syscall execution. We want
-			 * to avoid clobbering any registers, so we don't goto
-			 * the skip label below.
-			 */
-			return -1;
-		}
-
-		if (rc) {
-			/*
-			 * The tracer decided to abort the syscall. Note that
-			 * the tracer may also just change regs->gpr[0] to an
-			 * invalid syscall number, that is handled below on the
-			 * exit path.
-			 */
-			goto skip;
-		}
-	}
-
-	/* Run seccomp after ptrace; allow it to set gpr[3]. */
-	if (do_seccomp(regs))
-		return -1;
-
-	/* Avoid trace and audit when syscall is invalid. */
-	if (regs->gpr[0] >= NR_syscalls)
-		goto skip;
-
-	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_sys_enter(regs, regs->gpr[0]);
-
-	if (!is_32bit_task())
-		audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
-				    regs->gpr[5], regs->gpr[6]);
-	else
-		audit_syscall_entry(regs->gpr[0],
-				    regs->gpr[3] & 0xffffffff,
-				    regs->gpr[4] & 0xffffffff,
-				    regs->gpr[5] & 0xffffffff,
-				    regs->gpr[6] & 0xffffffff);
-
-	/* Return the possibly modified but valid syscall number */
-	return regs->gpr[0];
-
-skip:
-	/*
-	 * If we are aborting explicitly, or if the syscall number is
-	 * now invalid, set the return value to -ENOSYS.
-	 */
-	regs->gpr[3] = -ENOSYS;
-	return -1;
-}
-
-void do_syscall_trace_leave(struct pt_regs *regs)
-{
-	int step;
-
-	audit_syscall_exit(regs);
-
-	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_sys_exit(regs, regs->result);
-
-	step = test_thread_flag(TIF_SINGLESTEP);
-	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
-		ptrace_report_syscall_exit(regs, step);
-}
-
 void __init pt_regs_check(void);
 
 /*
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 9f1847b4742e..bb42a8b6c642 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -293,23 +293,6 @@ static void do_signal(struct task_struct *tsk)
 	signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP));
 }
 
-void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
-{
-	if (thread_info_flags & _TIF_UPROBE)
-		uprobe_notify_resume(regs);
-
-	if (thread_info_flags & _TIF_PATCH_PENDING)
-		klp_update_patch_state(current);
-
-	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
-		BUG_ON(regs != current->thread.regs);
-		do_signal(current);
-	}
-
-	if (thread_info_flags & _TIF_NOTIFY_RESUME)
-		resume_user_mode_work(regs);
-}
-
 static unsigned long get_tm_stackpointer(struct task_struct *tsk)
 {
 	/* When in an active transaction that takes a signal, we need to be
-- 
2.53.0



^ permalink raw reply related

* [PATCH v5 7/8] powerpc: Enable GENERIC_ENTRY feature
From: Mukesh Kumar Chaurasiya (IBM) @ 2026-04-27 12:27 UTC (permalink / raw)
  To: maddy, mpe, npiggin, chleroy, ryabinin.a.a, glider, andreyknvl,
	dvyukov, vincenzo.frascino, oleg, kees, luto, wad, mchauras,
	sshegde, thuth, ruanjinjie, akpm, macro, ldv, charlie, deller,
	kevin.brodsky, ritesh.list, yeoreum.yun, agordeev, segher,
	mark.rutland, ryan.roberts, pmladek, feng.tang, peterz, kan.liang,
	linuxppc-dev, linux-kernel, kasan-dev
  Cc: Mukesh Kumar Chaurasiya, Samir M, David Gow, Venkat Rao Bagalkote
In-Reply-To: <20260427122742.210074-1-mkchauras@gmail.com>

From: Mukesh Kumar Chaurasiya <mkchauras@linux.ibm.com>

Enable the generic IRQ entry/exit infrastructure on PowerPC by selecting
GENERIC_ENTRY and integrating the architecture-specific interrupt and
syscall handlers with the generic entry/exit APIs.

This change replaces PowerPC’s local interrupt entry/exit handling with
calls to the generic irqentry_* helpers, aligning the architecture with
the common kernel entry model. The macros that define interrupt, async,
and NMI handlers are updated to use irqentry_enter()/irqentry_exit()
and irqentry_nmi_enter()/irqentry_nmi_exit() where applicable also
convert the PowerPC syscall entry and exit paths to use the generic
entry/exit framework and integrating with the common syscall handling
routines.

Key updates include:
 - The architecture now selects GENERIC_ENTRY in Kconfig.
 - Replace interrupt_enter/exit_prepare() with arch_interrupt_* helpers.
 - Integrate irqentry_enter()/exit() in standard and async interrupt paths.
 - Integrate irqentry_nmi_enter()/exit() in NMI handlers.
 - Remove redundant irq_enter()/irq_exit() calls now handled generically.
 - Use irqentry_exit_cond_resched() for preemption checks.
 - interrupt.c and syscall.c are simplified to delegate context
   management and user exit handling to the generic entry path.
 - The new pt_regs field `exit_flags` introduced earlier is now used
   to carry per-syscall exit state flags (e.g. _TIF_RESTOREALL).
 - Remove unused code.

This change establishes the necessary wiring for PowerPC to use the
generic IRQ entry/exit framework while maintaining existing semantics.
This aligns PowerPC with the common entry code used by other
architectures and reduces duplicated logic around syscall tracing,
context tracking, and signal handling.

The performance benchmarks from perf bench basic syscall are below:

perf bench syscall usec/op (-ve is improvement)

| Syscall | Base        | test        | change % |
| ------- | ----------- | ----------- | -------- |
| basic   | 0.093543    | 0.093023    | -0.56    |
| execve  | 446.557781  | 450.107172  | +0.79    |
| fork    | 1142.204391 | 1156.377214 | +1.24    |
| getpgid | 0.097666    | 0.092677    | -5.11    |

perf bench syscall ops/sec (+ve is improvement)

| Syscall | Base     | New      | change % |
| ------- | -------- | -------- | -------- |
| basic   | 10690548 | 10750140 | +0.56    |
| execve  | 2239     | 2221     | -0.80    |
| fork    | 875      | 864      | -1.26    |
| getpgid | 10239026 | 10790324 | +5.38    |

IPI latency benchmark (-ve is improvement)

| Metric         | Base (ns)     | New (ns)      | % Change |
| -------------- | ------------- | ------------- | -------- |
| Dry run        | 583136.56     | 584136.35     | 0.17%    |
| Self IPI       | 4167393.42    | 4149093.90    | -0.44%   |
| Normal IPI     | 61769347.82   | 61753728.39   | -0.03%   |
| Broadcast IPI  | 2235584825.02 | 2227521401.45 | -0.36%   |
| Broadcast lock | 2164964433.31 | 2125658641.76 | -1.82%   |

Thats very close to performance earlier with arch specific handling.

Signed-off-by: Mukesh Kumar Chaurasiya <mkchauras@linux.ibm.com>
Tested-by: Samir M <samir@linux.ibm.com>
Tested-by: David Gow <davidgow@google.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/Kconfig                 |   1 +
 arch/powerpc/include/asm/interrupt.h | 384 +++++----------------------
 arch/powerpc/include/asm/kasan.h     |  15 +-
 arch/powerpc/kernel/interrupt.c      | 250 +++--------------
 arch/powerpc/kernel/ptrace/ptrace.c  |   3 -
 arch/powerpc/kernel/signal.c         |   8 +
 arch/powerpc/kernel/syscall.c        | 119 +--------
 7 files changed, 124 insertions(+), 656 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e93df95b79e7..81642206f7de 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -206,6 +206,7 @@ config PPC
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
 	select GENERIC_EARLY_IOREMAP
+	select GENERIC_ENTRY
 	select GENERIC_GETTIMEOFDAY
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IOREMAP
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 0e2cddf8bd21..fb42a664ae54 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -66,11 +66,9 @@
 
 #ifndef __ASSEMBLER__
 
-#include <linux/context_tracking.h>
-#include <linux/hardirq.h>
-#include <asm/cputime.h>
-#include <asm/firmware.h>
-#include <asm/ftrace.h>
+#include <linux/sched/debug.h> /* for show_regs */
+#include <linux/irq-entry-common.h>
+
 #include <asm/kprobes.h>
 #include <asm/runlatch.h>
 
@@ -88,308 +86,6 @@ do {									\
 #define INT_SOFT_MASK_BUG_ON(regs, cond)
 #endif
 
-#ifdef CONFIG_PPC_BOOK3S_64
-extern char __end_soft_masked[];
-bool search_kernel_soft_mask_table(unsigned long addr);
-unsigned long search_kernel_restart_table(unsigned long addr);
-
-DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
-
-static inline bool is_implicit_soft_masked(struct pt_regs *regs)
-{
-	if (user_mode(regs))
-		return false;
-
-	if (regs->nip >= (unsigned long)__end_soft_masked)
-		return false;
-
-	return search_kernel_soft_mask_table(regs->nip);
-}
-
-static inline void srr_regs_clobbered(void)
-{
-	local_paca->srr_valid = 0;
-	local_paca->hsrr_valid = 0;
-}
-#else
-static inline unsigned long search_kernel_restart_table(unsigned long addr)
-{
-	return 0;
-}
-
-static inline bool is_implicit_soft_masked(struct pt_regs *regs)
-{
-	return false;
-}
-
-static inline void srr_regs_clobbered(void)
-{
-}
-#endif
-
-static inline void nap_adjust_return(struct pt_regs *regs)
-{
-#ifdef CONFIG_PPC_970_NAP
-	if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
-		/* Can avoid a test-and-clear because NMIs do not call this */
-		clear_thread_local_flags(_TLF_NAPPING);
-		regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return);
-	}
-#endif
-}
-
-static inline void booke_restore_dbcr0(void)
-{
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-	unsigned long dbcr0 = current->thread.debug.dbcr0;
-
-	if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) {
-		mtspr(SPRN_DBSR, -1);
-		mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]);
-	}
-#endif
-}
-
-static inline void interrupt_enter_prepare(struct pt_regs *regs)
-{
-#ifdef CONFIG_PPC64
-	irq_soft_mask_set(IRQS_ALL_DISABLED);
-
-	/*
-	 * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
-	 * Asynchronous interrupts get here with HARD_DIS set (see below), so
-	 * this enables MSR[EE] for synchronous interrupts. IRQs remain
-	 * soft-masked. The interrupt handler may later call
-	 * interrupt_cond_local_irq_enable() to achieve a regular process
-	 * context.
-	 */
-	if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) {
-		INT_SOFT_MASK_BUG_ON(regs, !(regs->msr & MSR_EE));
-		__hard_irq_enable();
-	} else {
-		__hard_RI_enable();
-	}
-	/* Enable MSR[RI] early, to support kernel SLB and hash faults */
-#endif
-
-	if (!regs_irqs_disabled(regs))
-		trace_hardirqs_off();
-
-	if (user_mode(regs)) {
-		kuap_lock();
-		CT_WARN_ON(ct_state() != CT_STATE_USER);
-		user_exit_irqoff();
-
-		account_cpu_user_entry();
-		account_stolen_time();
-	} else {
-		kuap_save_and_lock(regs);
-		/*
-		 * CT_WARN_ON comes here via program_check_exception,
-		 * so avoid recursion.
-		 */
-		if (TRAP(regs) != INTERRUPT_PROGRAM)
-			CT_WARN_ON(ct_state() != CT_STATE_KERNEL &&
-				   ct_state() != CT_STATE_IDLE);
-		INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs));
-		INT_SOFT_MASK_BUG_ON(regs, regs_irqs_disabled(regs) &&
-				     search_kernel_restart_table(regs->nip));
-	}
-	INT_SOFT_MASK_BUG_ON(regs, !regs_irqs_disabled(regs) &&
-			     !(regs->msr & MSR_EE));
-
-	booke_restore_dbcr0();
-}
-
-/*
- * Care should be taken to note that interrupt_exit_prepare and
- * interrupt_async_exit_prepare do not necessarily return immediately to
- * regs context (e.g., if regs is usermode, we don't necessarily return to
- * user mode). Other interrupts might be taken between here and return,
- * context switch / preemption may occur in the exit path after this, or a
- * signal may be delivered, etc.
- *
- * The real interrupt exit code is platform specific, e.g.,
- * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s.
- *
- * However interrupt_nmi_exit_prepare does return directly to regs, because
- * NMIs do not do "exit work" or replay soft-masked interrupts.
- */
-static inline void interrupt_exit_prepare(struct pt_regs *regs)
-{
-}
-
-static inline void interrupt_async_enter_prepare(struct pt_regs *regs)
-{
-#ifdef CONFIG_PPC64
-	/* Ensure interrupt_enter_prepare does not enable MSR[EE] */
-	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-#endif
-	interrupt_enter_prepare(regs);
-#ifdef CONFIG_PPC_BOOK3S_64
-	/*
-	 * RI=1 is set by interrupt_enter_prepare, so this thread flags access
-	 * has to come afterward (it can cause SLB faults).
-	 */
-	if (cpu_has_feature(CPU_FTR_CTRL) &&
-	    !test_thread_local_flags(_TLF_RUNLATCH))
-		__ppc64_runlatch_on();
-#endif
-	irq_enter();
-}
-
-static inline void interrupt_async_exit_prepare(struct pt_regs *regs)
-{
-	/*
-	 * Adjust at exit so the main handler sees the true NIA. This must
-	 * come before irq_exit() because irq_exit can enable interrupts, and
-	 * if another interrupt is taken before nap_adjust_return has run
-	 * here, then that interrupt would return directly to idle nap return.
-	 */
-	nap_adjust_return(regs);
-
-	irq_exit();
-	interrupt_exit_prepare(regs);
-}
-
-struct interrupt_nmi_state {
-#ifdef CONFIG_PPC64
-	u8 irq_soft_mask;
-	u8 irq_happened;
-	u8 ftrace_enabled;
-	u64 softe;
-#endif
-};
-
-static inline bool nmi_disables_ftrace(struct pt_regs *regs)
-{
-	/* Allow DEC and PMI to be traced when they are soft-NMI */
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
-		if (TRAP(regs) == INTERRUPT_DECREMENTER)
-		       return false;
-		if (TRAP(regs) == INTERRUPT_PERFMON)
-		       return false;
-	}
-	if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
-		if (TRAP(regs) == INTERRUPT_PERFMON)
-			return false;
-	}
-
-	return true;
-}
-
-static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
-{
-#ifdef CONFIG_PPC64
-	state->irq_soft_mask = local_paca->irq_soft_mask;
-	state->irq_happened = local_paca->irq_happened;
-	state->softe = regs->softe;
-
-	/*
-	 * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
-	 * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile
-	 * because that goes through irq tracing which we don't want in NMI.
-	 */
-	local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
-	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-
-	if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) {
-		/*
-		 * Adjust regs->softe to be soft-masked if it had not been
-		 * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe
-		 * not yet set disabled), or if it was in an implicit soft
-		 * masked state. This makes regs_irqs_disabled(regs)
-		 * behave as expected.
-		 */
-		regs->softe = IRQS_ALL_DISABLED;
-	}
-
-	__hard_RI_enable();
-
-	/* Don't do any per-CPU operations until interrupt state is fixed */
-
-	if (nmi_disables_ftrace(regs)) {
-		state->ftrace_enabled = this_cpu_get_ftrace_enabled();
-		this_cpu_set_ftrace_enabled(0);
-	}
-#endif
-
-	/* If data relocations are enabled, it's safe to use nmi_enter() */
-	if (mfmsr() & MSR_DR) {
-		nmi_enter();
-		return;
-	}
-
-	/*
-	 * But do not use nmi_enter() for pseries hash guest taking a real-mode
-	 * NMI because not everything it touches is within the RMA limit.
-	 */
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
-	    firmware_has_feature(FW_FEATURE_LPAR) &&
-	    !radix_enabled())
-		return;
-
-	/*
-	 * Likewise, don't use it if we have some form of instrumentation (like
-	 * KASAN shadow) that is not safe to access in real mode (even on radix)
-	 */
-	if (IS_ENABLED(CONFIG_KASAN))
-		return;
-
-	/*
-	 * Likewise, do not use it in real mode if percpu first chunk is not
-	 * embedded. With CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled there
-	 * are chances where percpu allocation can come from vmalloc area.
-	 */
-	if (percpu_first_chunk_is_paged)
-		return;
-
-	/* Otherwise, it should be safe to call it */
-	nmi_enter();
-}
-
-static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
-{
-	if (mfmsr() & MSR_DR) {
-		// nmi_exit if relocations are on
-		nmi_exit();
-	} else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
-		   firmware_has_feature(FW_FEATURE_LPAR) &&
-		   !radix_enabled()) {
-		// no nmi_exit for a pseries hash guest taking a real mode exception
-	} else if (IS_ENABLED(CONFIG_KASAN)) {
-		// no nmi_exit for KASAN in real mode
-	} else if (percpu_first_chunk_is_paged) {
-		// no nmi_exit if percpu first chunk is not embedded
-	} else {
-		nmi_exit();
-	}
-
-	/*
-	 * nmi does not call nap_adjust_return because nmi should not create
-	 * new work to do (must use irq_work for that).
-	 */
-
-#ifdef CONFIG_PPC64
-#ifdef CONFIG_PPC_BOOK3S
-	if (regs_irqs_disabled(regs)) {
-		unsigned long rst = search_kernel_restart_table(regs->nip);
-		if (rst)
-			regs_set_return_ip(regs, rst);
-	}
-#endif
-
-	if (nmi_disables_ftrace(regs))
-		this_cpu_set_ftrace_enabled(state->ftrace_enabled);
-
-	/* Check we didn't change the pending interrupt mask. */
-	WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
-	regs->softe = state->softe;
-	local_paca->irq_happened = state->irq_happened;
-	local_paca->irq_soft_mask = state->irq_soft_mask;
-#endif
-}
-
 /*
  * Don't use noinstr here like x86, but rather add NOKPROBE_SYMBOL to each
  * function definition. The reason for this is the noinstr section is placed
@@ -470,11 +166,14 @@ static __always_inline void ____##func(struct pt_regs *regs);		\
 									\
 interrupt_handler void func(struct pt_regs *regs)			\
 {									\
-	interrupt_enter_prepare(regs);					\
-									\
+	irqentry_state_t state;						\
+	arch_interrupt_enter_prepare(regs);				\
+	state = irqentry_enter(regs);					\
+	instrumentation_begin();					\
 	____##func (regs);						\
-									\
-	interrupt_exit_prepare(regs);					\
+	instrumentation_end();						\
+	arch_interrupt_exit_prepare(regs);				\
+	irqentry_exit(regs, state);					\
 }									\
 NOKPROBE_SYMBOL(func);							\
 									\
@@ -504,12 +203,15 @@ static __always_inline long ____##func(struct pt_regs *regs);		\
 interrupt_handler long func(struct pt_regs *regs)			\
 {									\
 	long ret;							\
+	irqentry_state_t state;						\
 									\
-	interrupt_enter_prepare(regs);					\
-									\
+	arch_interrupt_enter_prepare(regs);				\
+	state = irqentry_enter(regs);					\
+	instrumentation_begin();					\
 	ret = ____##func (regs);					\
-									\
-	interrupt_exit_prepare(regs);					\
+	instrumentation_end();						\
+	arch_interrupt_exit_prepare(regs);				\
+	irqentry_exit(regs, state);					\
 									\
 	return ret;							\
 }									\
@@ -538,11 +240,16 @@ static __always_inline void ____##func(struct pt_regs *regs);		\
 									\
 interrupt_handler void func(struct pt_regs *regs)			\
 {									\
-	interrupt_async_enter_prepare(regs);				\
-									\
+	irqentry_state_t state;						\
+	arch_interrupt_async_enter_prepare(regs);			\
+	state = irqentry_enter(regs);					\
+	instrumentation_begin();					\
+	irq_enter_rcu();						\
 	____##func (regs);						\
-									\
-	interrupt_async_exit_prepare(regs);				\
+	irq_exit_rcu();							\
+	instrumentation_end();						\
+	arch_interrupt_async_exit_prepare(regs);			\
+	irqentry_exit(regs, state);					\
 }									\
 NOKPROBE_SYMBOL(func);							\
 									\
@@ -572,14 +279,43 @@ ____##func(struct pt_regs *regs);					\
 									\
 interrupt_handler long func(struct pt_regs *regs)			\
 {									\
-	struct interrupt_nmi_state state;				\
+	irqentry_state_t state;						\
+	struct interrupt_nmi_state nmi_state;				\
 	long ret;							\
 									\
-	interrupt_nmi_enter_prepare(regs, &state);			\
-									\
+	arch_interrupt_nmi_enter_prepare(regs, &nmi_state);		\
+	if (mfmsr() & MSR_DR) {						\
+		/* nmi_entry if relocations are on */			\
+		state = irqentry_nmi_enter(regs);			\
+	} else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&			\
+		   firmware_has_feature(FW_FEATURE_LPAR) &&		\
+		   !radix_enabled()) {					\
+		/* no nmi_entry for a pseries hash guest		\
+		 * taking a real mode exception */			\
+	} else if (IS_ENABLED(CONFIG_KASAN)) {				\
+		/* no nmi_entry for KASAN in real mode */		\
+	} else if (percpu_first_chunk_is_paged) {			\
+		/* no nmi_entry if percpu first chunk is not embedded */\
+	} else {							\
+		state = irqentry_nmi_enter(regs);			\
+	}								\
 	ret = ____##func (regs);					\
-									\
-	interrupt_nmi_exit_prepare(regs, &state);			\
+	arch_interrupt_nmi_exit_prepare(regs, &nmi_state);		\
+	if (mfmsr() & MSR_DR) {						\
+		/* nmi_exit if relocations are on */			\
+		irqentry_nmi_exit(regs, state);				\
+	} else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&			\
+		   firmware_has_feature(FW_FEATURE_LPAR) &&		\
+		   !radix_enabled()) {					\
+		/* no nmi_exit for a pseries hash guest			\
+		 * taking a real mode exception */			\
+	} else if (IS_ENABLED(CONFIG_KASAN)) {				\
+		/* no nmi_exit for KASAN in real mode */		\
+	} else if (percpu_first_chunk_is_paged) {			\
+		/* no nmi_exit if percpu first chunk is not embedded */	\
+	} else {							\
+		irqentry_nmi_exit(regs, state);				\
+	}								\
 									\
 	return ret;							\
 }									\
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 045804a86f98..a690e7da53c2 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -3,14 +3,19 @@
 #define __ASM_KASAN_H
 
 #if defined(CONFIG_KASAN) && !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX)
-#define _GLOBAL_KASAN(fn)	_GLOBAL(__##fn)
-#define _GLOBAL_TOC_KASAN(fn)	_GLOBAL_TOC(__##fn)
-#define EXPORT_SYMBOL_KASAN(fn)	EXPORT_SYMBOL(__##fn)
-#else
+#define _GLOBAL_KASAN(fn)			\
+	_GLOBAL(fn);				\
+	_GLOBAL(__##fn)
+#define _GLOBAL_TOC_KASAN(fn)			\
+	_GLOBAL_TOC(fn);			\
+	_GLOBAL_TOC(__##fn)
+#define EXPORT_SYMBOL_KASAN(fn)			\
+	EXPORT_SYMBOL(__##fn)
+#else /* CONFIG_KASAN && !CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */
 #define _GLOBAL_KASAN(fn)	_GLOBAL(fn)
 #define _GLOBAL_TOC_KASAN(fn)	_GLOBAL_TOC(fn)
 #define EXPORT_SYMBOL_KASAN(fn)
-#endif
+#endif /* CONFIG_KASAN && !CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */
 
 #ifndef __ASSEMBLER__
 
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 666eadb589a5..89a999be1352 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <linux/context_tracking.h>
+#include <linux/entry-common.h>
 #include <linux/err.h>
 #include <linux/compat.h>
 #include <linux/rseq.h>
@@ -25,10 +26,6 @@
 unsigned long global_dbcr0[NR_CPUS];
 #endif
 
-#if defined(CONFIG_PREEMPT_DYNAMIC)
-DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
-#endif
-
 #ifdef CONFIG_PPC_BOOK3S_64
 DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
 static inline bool exit_must_hard_disable(void)
@@ -78,181 +75,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
 	return true;
 }
 
-static notrace void booke_load_dbcr0(void)
-{
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-	unsigned long dbcr0 = current->thread.debug.dbcr0;
-
-	if (likely(!(dbcr0 & DBCR0_IDM)))
-		return;
-
-	/*
-	 * Check to see if the dbcr0 register is set up to debug.
-	 * Use the internal debug mode bit to do this.
-	 */
-	mtmsr(mfmsr() & ~MSR_DE);
-	if (IS_ENABLED(CONFIG_PPC32)) {
-		isync();
-		global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
-	}
-	mtspr(SPRN_DBCR0, dbcr0);
-	mtspr(SPRN_DBSR, -1);
-#endif
-}
-
-static notrace void check_return_regs_valid(struct pt_regs *regs)
-{
-#ifdef CONFIG_PPC_BOOK3S_64
-	unsigned long trap, srr0, srr1;
-	static bool warned;
-	u8 *validp;
-	char *h;
-
-	if (trap_is_scv(regs))
-		return;
-
-	trap = TRAP(regs);
-	// EE in HV mode sets HSRRs like 0xea0
-	if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
-		trap = 0xea0;
-
-	switch (trap) {
-	case 0x980:
-	case INTERRUPT_H_DATA_STORAGE:
-	case 0xe20:
-	case 0xe40:
-	case INTERRUPT_HMI:
-	case 0xe80:
-	case 0xea0:
-	case INTERRUPT_H_FAC_UNAVAIL:
-	case 0x1200:
-	case 0x1500:
-	case 0x1600:
-	case 0x1800:
-		validp = &local_paca->hsrr_valid;
-		if (!READ_ONCE(*validp))
-			return;
-
-		srr0 = mfspr(SPRN_HSRR0);
-		srr1 = mfspr(SPRN_HSRR1);
-		h = "H";
-
-		break;
-	default:
-		validp = &local_paca->srr_valid;
-		if (!READ_ONCE(*validp))
-			return;
-
-		srr0 = mfspr(SPRN_SRR0);
-		srr1 = mfspr(SPRN_SRR1);
-		h = "";
-		break;
-	}
-
-	if (srr0 == regs->nip && srr1 == regs->msr)
-		return;
-
-	/*
-	 * A NMI / soft-NMI interrupt may have come in after we found
-	 * srr_valid and before the SRRs are loaded. The interrupt then
-	 * comes in and clobbers SRRs and clears srr_valid. Then we load
-	 * the SRRs here and test them above and find they don't match.
-	 *
-	 * Test validity again after that, to catch such false positives.
-	 *
-	 * This test in general will have some window for false negatives
-	 * and may not catch and fix all such cases if an NMI comes in
-	 * later and clobbers SRRs without clearing srr_valid, but hopefully
-	 * such things will get caught most of the time, statistically
-	 * enough to be able to get a warning out.
-	 */
-	if (!READ_ONCE(*validp))
-		return;
-
-	if (!data_race(warned)) {
-		data_race(warned = true);
-		printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
-		printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
-		show_regs(regs);
-	}
-
-	WRITE_ONCE(*validp, 0); /* fixup */
-#endif
-}
-
-static notrace unsigned long
-interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
-{
-	unsigned long ti_flags;
-
-again:
-	ti_flags = read_thread_flags();
-	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
-		local_irq_enable();
-		if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) {
-			schedule();
-		} else {
-			/*
-			 * SIGPENDING must restore signal handler function
-			 * argument GPRs, and some non-volatiles (e.g., r1).
-			 * Restore all for now. This could be made lighter.
-			 */
-			if (ti_flags & _TIF_SIGPENDING)
-				ret |= _TIF_RESTOREALL;
-			do_notify_resume(regs, ti_flags);
-		}
-		local_irq_disable();
-		ti_flags = read_thread_flags();
-	}
-
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
-		if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
-				unlikely((ti_flags & _TIF_RESTORE_TM))) {
-			restore_tm_state(regs);
-		} else {
-			unsigned long mathflags = MSR_FP;
-
-			if (cpu_has_feature(CPU_FTR_VSX))
-				mathflags |= MSR_VEC | MSR_VSX;
-			else if (cpu_has_feature(CPU_FTR_ALTIVEC))
-				mathflags |= MSR_VEC;
-
-			/*
-			 * If userspace MSR has all available FP bits set,
-			 * then they are live and no need to restore. If not,
-			 * it means the regs were given up and restore_math
-			 * may decide to restore them (to avoid taking an FP
-			 * fault).
-			 */
-			if ((regs->msr & mathflags) != mathflags)
-				restore_math(regs);
-		}
-	}
-
-	check_return_regs_valid(regs);
-
-	user_enter_irqoff();
-	if (!prep_irq_for_enabled_exit(true)) {
-		user_exit_irqoff();
-		local_irq_enable();
-		local_irq_disable();
-		goto again;
-	}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	local_paca->tm_scratch = regs->msr;
-#endif
-
-	booke_load_dbcr0();
-
-	account_cpu_user_exit();
-
-	/* Restore user access locks last */
-	kuap_user_restore(regs);
-
-	return ret;
-}
-
 /*
  * This should be called after a syscall returns, with r3 the return value
  * from the syscall. If this function returns non-zero, the system call
@@ -267,17 +89,12 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 					   long scv)
 {
 	unsigned long ti_flags;
-	unsigned long ret = 0;
 	bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
 
-	CT_WARN_ON(ct_state() == CT_STATE_USER);
-
 	kuap_assert_locked();
 
 	regs->result = r3;
-
-	/* Check whether the syscall is issued inside a restartable sequence */
-	rseq_syscall(regs);
+	regs->exit_flags = 0;
 
 	ti_flags = read_thread_flags();
 
@@ -290,7 +107,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 
 	if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
 		if (ti_flags & _TIF_RESTOREALL)
-			ret = _TIF_RESTOREALL;
+			regs->exit_flags = _TIF_RESTOREALL;
 		else
 			regs->gpr[3] = r3;
 		clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
@@ -299,18 +116,28 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 	}
 
 	if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
-		do_syscall_trace_leave(regs);
-		ret |= _TIF_RESTOREALL;
+		regs->exit_flags |= _TIF_RESTOREALL;
 	}
 
-	local_irq_disable();
-	ret = interrupt_exit_user_prepare_main(ret, regs);
+	syscall_exit_to_user_mode(regs);
+
+again:
+	user_enter_irqoff();
+	if (!prep_irq_for_enabled_exit(true)) {
+		user_exit_irqoff();
+		local_irq_enable();
+		local_irq_disable();
+		goto again;
+	}
+
+	/* Restore user access locks last */
+	kuap_user_restore(regs);
 
 #ifdef CONFIG_PPC64
-	regs->exit_result = ret;
+	regs->exit_result = regs->exit_flags;
 #endif
 
-	return ret;
+	return regs->exit_flags;
 }
 
 #ifdef CONFIG_PPC64
@@ -330,13 +157,16 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg
 	set_kuap(AMR_KUAP_BLOCKED);
 #endif
 
-	trace_hardirqs_off();
-	user_exit_irqoff();
-	account_cpu_user_entry();
-
-	BUG_ON(!user_mode(regs));
+again:
+	user_enter_irqoff();
+	if (!prep_irq_for_enabled_exit(true)) {
+		user_exit_irqoff();
+		local_irq_enable();
+		local_irq_disable();
+		goto again;
+	}
 
-	regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
+	regs->exit_result |= regs->exit_flags;
 
 	return regs->exit_result;
 }
@@ -348,7 +178,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
 
 	BUG_ON(regs_is_unrecoverable(regs));
 	BUG_ON(regs_irqs_disabled(regs));
-	CT_WARN_ON(ct_state() == CT_STATE_USER);
 
 	/*
 	 * We don't need to restore AMR on the way back to userspace for KUAP.
@@ -357,8 +186,21 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
 	kuap_assert_locked();
 
 	local_irq_disable();
+	regs->exit_flags = 0;
+again:
+	check_return_regs_valid(regs);
+	user_enter_irqoff();
+	if (!prep_irq_for_enabled_exit(true)) {
+		user_exit_irqoff();
+		local_irq_enable();
+		local_irq_disable();
+		goto again;
+	}
+
+	/* Restore user access locks last */
+	kuap_user_restore(regs);
 
-	ret = interrupt_exit_user_prepare_main(0, regs);
+	ret = regs->exit_flags;
 
 #ifdef CONFIG_PPC64
 	regs->exit_result = ret;
@@ -400,13 +242,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
 		/* Returning to a kernel context with local irqs enabled. */
 		WARN_ON_ONCE(!(regs->msr & MSR_EE));
 again:
-		if (need_irq_preemption()) {
-			/* Return to preemptible kernel context */
-			if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
-				if (preempt_count() == 0)
-					preempt_schedule_irq();
-			}
-		}
 
 		check_return_regs_valid(regs);
 
@@ -479,7 +314,6 @@ notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs)
 #endif
 
 	trace_hardirqs_off();
-	user_exit_irqoff();
 	account_cpu_user_entry();
 
 	BUG_ON(!user_mode(regs));
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index 2134b6d155ff..f006a03a0211 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -21,9 +21,6 @@
 #include <asm/switch_to.h>
 #include <asm/debug.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
 #include "ptrace-decl.h"
 
 /*
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index aa17e62f3754..9f1847b4742e 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -6,6 +6,7 @@
  *    Extracted from signal_32.c and signal_64.c
  */
 
+#include <linux/entry-common.h>
 #include <linux/resume_user_mode.h>
 #include <linux/signal.h>
 #include <linux/uprobes.h>
@@ -368,3 +369,10 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
 		printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm,
 				   task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
 }
+
+void arch_do_signal_or_restart(struct pt_regs *regs)
+{
+	BUG_ON(regs != current->thread.regs);
+	regs->exit_flags |= _TIF_RESTOREALL;
+	do_signal(current);
+}
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 52d6e10eab22..a9da2af6efa8 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -3,6 +3,7 @@
 #include <linux/compat.h>
 #include <linux/context_tracking.h>
 #include <linux/randomize_kstack.h>
+#include <linux/entry-common.h>
 
 #include <asm/interrupt.h>
 #include <asm/kup.h>
@@ -18,124 +19,10 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 	long ret;
 	syscall_fn f;
 
-	kuap_lock();
-
-	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
-
-	trace_hardirqs_off(); /* finish reconciling */
-
-	CT_WARN_ON(ct_state() == CT_STATE_KERNEL);
-	user_exit_irqoff();
-
 	add_random_kstack_offset();
+	r0 = syscall_enter_from_user_mode(regs, r0);
 
-	BUG_ON(regs_is_unrecoverable(regs));
-	BUG_ON(!user_mode(regs));
-	BUG_ON(regs_irqs_disabled(regs));
-
-#ifdef CONFIG_PPC_PKEY
-	if (mmu_has_feature(MMU_FTR_PKEY)) {
-		unsigned long amr, iamr;
-		bool flush_needed = false;
-		/*
-		 * When entering from userspace we mostly have the AMR/IAMR
-		 * different from kernel default values. Hence don't compare.
-		 */
-		amr = mfspr(SPRN_AMR);
-		iamr = mfspr(SPRN_IAMR);
-		regs->amr  = amr;
-		regs->iamr = iamr;
-		if (mmu_has_feature(MMU_FTR_KUAP)) {
-			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
-			flush_needed = true;
-		}
-		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
-			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
-			flush_needed = true;
-		}
-		if (flush_needed)
-			isync();
-	} else
-#endif
-		kuap_assert_locked();
-
-	booke_restore_dbcr0();
-
-	account_cpu_user_entry();
-
-	account_stolen_time();
-
-	/*
-	 * This is not required for the syscall exit path, but makes the
-	 * stack frame look nicer. If this was initialised in the first stack
-	 * frame, or if the unwinder was taught the first stack frame always
-	 * returns to user with IRQS_ENABLED, this store could be avoided!
-	 */
-	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
-
-	/*
-	 * If system call is called with TM active, set _TIF_RESTOREALL to
-	 * prevent RFSCV being used to return to userspace, because POWER9
-	 * TM implementation has problems with this instruction returning to
-	 * transactional state. Final register values are not relevant because
-	 * the transaction will be aborted upon return anyway. Or in the case
-	 * of unsupported_scv SIGILL fault, the return state does not much
-	 * matter because it's an edge case.
-	 */
-	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
-			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
-		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
-
-	/*
-	 * If the system call was made with a transaction active, doom it and
-	 * return without performing the system call. Unless it was an
-	 * unsupported scv vector, in which case it's treated like an illegal
-	 * instruction.
-	 */
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
-	    !trap_is_unsupported_scv(regs)) {
-		/* Enable TM in the kernel, and disable EE (for scv) */
-		hard_irq_disable();
-		mtmsr(mfmsr() | MSR_TM);
-
-		/* tabort, this dooms the transaction, nothing else */
-		asm volatile(".long 0x7c00071d | ((%0) << 16)"
-				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
-
-		/*
-		 * Userspace will never see the return value. Execution will
-		 * resume after the tbegin. of the aborted transaction with the
-		 * checkpointed register state. A context switch could occur
-		 * or signal delivered to the process before resuming the
-		 * doomed transaction context, but that should all be handled
-		 * as expected.
-		 */
-		return -ENOSYS;
-	}
-#endif // CONFIG_PPC_TRANSACTIONAL_MEM
-
-	local_irq_enable();
-
-	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
-		if (unlikely(trap_is_unsupported_scv(regs))) {
-			/* Unsupported scv vector */
-			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-			return regs->gpr[3];
-		}
-		/*
-		 * We use the return value of do_syscall_trace_enter() as the
-		 * syscall number. If the syscall was rejected for any reason
-		 * do_syscall_trace_enter() returns an invalid syscall number
-		 * and the test against NR_syscalls will fail and the return
-		 * value to be used is in regs->gpr[3].
-		 */
-		r0 = do_syscall_trace_enter(regs);
-		if (unlikely(r0 >= NR_syscalls))
-			return regs->gpr[3];
-
-	} else if (unlikely(r0 >= NR_syscalls)) {
+	if (unlikely(r0 >= NR_syscalls)) {
 		if (unlikely(trap_is_unsupported_scv(regs))) {
 			/* Unsupported scv vector */
 			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-- 
2.53.0



^ permalink raw reply related

* [PATCH v5 6/8] powerpc: Prepare for IRQ entry exit
From: Mukesh Kumar Chaurasiya (IBM) @ 2026-04-27 12:27 UTC (permalink / raw)
  To: maddy, mpe, npiggin, chleroy, ryabinin.a.a, glider, andreyknvl,
	dvyukov, vincenzo.frascino, oleg, kees, luto, wad, mchauras,
	sshegde, thuth, ruanjinjie, akpm, macro, ldv, charlie, deller,
	kevin.brodsky, ritesh.list, yeoreum.yun, agordeev, segher,
	mark.rutland, ryan.roberts, pmladek, feng.tang, peterz, kan.liang,
	linuxppc-dev, linux-kernel, kasan-dev
  Cc: Samir M, David Gow, Venkat Rao Bagalkote
In-Reply-To: <20260427122742.210074-1-mkchauras@gmail.com>

From: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>

Move interrupt entry and exit helper routines from interrupt.h into the
PowerPC-specific entry-common.h header as a preparatory step for enabling
the generic entry/exit framework.

This consolidation places all PowerPC interrupt entry/exit handling in a
single common header, aligning with the generic entry infrastructure.
The helpers provide architecture-specific handling for interrupt and NMI
entry/exit sequences, including:

 - arch_interrupt_enter/exit_prepare()
 - arch_interrupt_async_enter/exit_prepare()
 - arch_interrupt_nmi_enter/exit_prepare()
 - Supporting helpers such as nap_adjust_return(), check_return_regs_valid(),
   debug register maintenance, and soft mask handling.

The functions are copied verbatim from interrupt.h.Subsequent patches will
integrate these routines into the generic entry/exit flow.

No functional change intended.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
Tested-by: Samir M <samir@linux.ibm.com>
Tested-by: David Gow <davidgow@google.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/include/asm/entry-common.h | 358 ++++++++++++++++++++++++
 1 file changed, 358 insertions(+)

diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
index ff0625e04778..de5601282755 100644
--- a/arch/powerpc/include/asm/entry-common.h
+++ b/arch/powerpc/include/asm/entry-common.h
@@ -5,10 +5,75 @@
 
 #include <asm/cputime.h>
 #include <asm/interrupt.h>
+#include <asm/runlatch.h>
 #include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 #include <asm/tm.h>
 
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+/*
+ * WARN/BUG is handled with a program interrupt so minimise checks here to
+ * avoid recursion and maximise the chance of getting the first oops handled.
+ */
+#define INT_SOFT_MASK_BUG_ON(regs, cond)				\
+do {									\
+	if ((user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM)))	\
+		BUG_ON(cond);						\
+} while (0)
+#else
+#define INT_SOFT_MASK_BUG_ON(regs, cond)
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+extern char __end_soft_masked[];
+bool search_kernel_soft_mask_table(unsigned long addr);
+unsigned long search_kernel_restart_table(unsigned long addr);
+
+DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+
+static inline bool is_implicit_soft_masked(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return false;
+
+	if (regs->nip >= (unsigned long)__end_soft_masked)
+		return false;
+
+	return search_kernel_soft_mask_table(regs->nip);
+}
+
+static inline void srr_regs_clobbered(void)
+{
+	local_paca->srr_valid = 0;
+	local_paca->hsrr_valid = 0;
+}
+#else
+static inline unsigned long search_kernel_restart_table(unsigned long addr)
+{
+	return 0;
+}
+
+static inline bool is_implicit_soft_masked(struct pt_regs *regs)
+{
+	return false;
+}
+
+static inline void srr_regs_clobbered(void)
+{
+}
+#endif
+
+static inline void nap_adjust_return(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_970_NAP
+	if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
+		/* Can avoid a test-and-clear because NMIs do not call this */
+		clear_thread_local_flags(_TLF_NAPPING);
+		regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return);
+	}
+#endif
+}
+
 static __always_inline void booke_load_dbcr0(void)
 {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -31,6 +96,299 @@ static __always_inline void booke_load_dbcr0(void)
 #endif
 }
 
+static inline void booke_restore_dbcr0(void)
+{
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	unsigned long dbcr0 = current->thread.debug.dbcr0;
+
+	if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) {
+		mtspr(SPRN_DBSR, -1);
+		mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]);
+	}
+#endif
+}
+
+static inline void check_return_regs_valid(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	unsigned long trap, srr0, srr1;
+	static bool warned;
+	u8 *validp;
+	char *h;
+
+	if (trap_is_scv(regs))
+		return;
+
+	trap = TRAP(regs);
+	// EE in HV mode sets HSRRs like 0xea0
+	if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
+		trap = 0xea0;
+
+	switch (trap) {
+	case 0x980:
+	case INTERRUPT_H_DATA_STORAGE:
+	case 0xe20:
+	case 0xe40:
+	case INTERRUPT_HMI:
+	case 0xe80:
+	case 0xea0:
+	case INTERRUPT_H_FAC_UNAVAIL:
+	case 0x1200:
+	case 0x1500:
+	case 0x1600:
+	case 0x1800:
+		validp = &local_paca->hsrr_valid;
+		if (!READ_ONCE(*validp))
+			return;
+
+		srr0 = mfspr(SPRN_HSRR0);
+		srr1 = mfspr(SPRN_HSRR1);
+		h = "H";
+
+		break;
+	default:
+		validp = &local_paca->srr_valid;
+		if (!READ_ONCE(*validp))
+			return;
+
+		srr0 = mfspr(SPRN_SRR0);
+		srr1 = mfspr(SPRN_SRR1);
+		h = "";
+		break;
+	}
+
+	if (srr0 == regs->nip && srr1 == regs->msr)
+		return;
+
+	/*
+	 * A NMI / soft-NMI interrupt may have come in after we found
+	 * srr_valid and before the SRRs are loaded. The interrupt then
+	 * comes in and clobbers SRRs and clears srr_valid. Then we load
+	 * the SRRs here and test them above and find they don't match.
+	 *
+	 * Test validity again after that, to catch such false positives.
+	 *
+	 * This test in general will have some window for false negatives
+	 * and may not catch and fix all such cases if an NMI comes in
+	 * later and clobbers SRRs without clearing srr_valid, but hopefully
+	 * such things will get caught most of the time, statistically
+	 * enough to be able to get a warning out.
+	 */
+	if (!READ_ONCE(*validp))
+		return;
+
+	if (!data_race(warned)) {
+		data_race(warned = true);
+		pr_warn("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
+		pr_warn("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
+		show_regs(regs);
+	}
+
+	WRITE_ONCE(*validp, 0); /* fixup */
+#endif
+}
+
+static inline void arch_interrupt_enter_prepare(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC64
+	irq_soft_mask_set(IRQS_ALL_DISABLED);
+
+	/*
+	 * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
+	 * Asynchronous interrupts get here with HARD_DIS set (see below), so
+	 * this enables MSR[EE] for synchronous interrupts. IRQs remain
+	 * soft-masked. The interrupt handler may later call
+	 * interrupt_cond_local_irq_enable() to achieve a regular process
+	 * context.
+	 */
+	if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) {
+		INT_SOFT_MASK_BUG_ON(regs, !(regs->msr & MSR_EE));
+		__hard_irq_enable();
+	} else {
+		__hard_RI_enable();
+	}
+	/* Enable MSR[RI] early, to support kernel SLB and hash faults */
+#endif
+
+	if (!regs_irqs_disabled(regs))
+		trace_hardirqs_off();
+
+	if (user_mode(regs)) {
+		kuap_lock();
+		account_cpu_user_entry();
+		account_stolen_time();
+	} else {
+		kuap_save_and_lock(regs);
+		/*
+		 * CT_WARN_ON comes here via program_check_exception,
+		 * so avoid recursion.
+		 */
+		if (TRAP(regs) != INTERRUPT_PROGRAM)
+			CT_WARN_ON(ct_state() != CT_STATE_KERNEL &&
+				   ct_state() != CT_STATE_IDLE);
+		INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs));
+		INT_SOFT_MASK_BUG_ON(regs, regs_irqs_disabled(regs) &&
+				     search_kernel_restart_table(regs->nip));
+	}
+	INT_SOFT_MASK_BUG_ON(regs, !regs_irqs_disabled(regs) &&
+			     !(regs->msr & MSR_EE));
+
+	booke_restore_dbcr0();
+}
+
+/*
+ * Care should be taken to note that arch_interrupt_exit_prepare and
+ * arch_interrupt_async_exit_prepare do not necessarily return immediately to
+ * regs context (e.g., if regs is usermode, we don't necessarily return to
+ * user mode). Other interrupts might be taken between here and return,
+ * context switch / preemption may occur in the exit path after this, or a
+ * signal may be delivered, etc.
+ *
+ * The real interrupt exit code is platform specific, e.g.,
+ * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s.
+ *
+ * However arch_interrupt_nmi_exit_prepare does return directly to regs, because
+ * NMIs do not do "exit work" or replay soft-masked interrupts.
+ */
+static inline void arch_interrupt_exit_prepare(struct pt_regs *regs)
+{
+	if (user_mode(regs)) {
+		BUG_ON(regs_is_unrecoverable(regs));
+		BUG_ON(regs_irqs_disabled(regs));
+		/*
+		 * We don't need to restore AMR on the way back to userspace for KUAP.
+		 * AMR can only have been unlocked if we interrupted the kernel.
+		 */
+		kuap_assert_locked();
+
+		local_irq_disable();
+	}
+}
+
+static inline void arch_interrupt_async_enter_prepare(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC64
+	/* Ensure arch_interrupt_enter_prepare does not enable MSR[EE] */
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+#endif
+	arch_interrupt_enter_prepare(regs);
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * RI=1 is set by arch_interrupt_enter_prepare, so this thread flags access
+	 * has to come afterward (it can cause SLB faults).
+	 */
+	if (cpu_has_feature(CPU_FTR_CTRL) &&
+	    !test_thread_local_flags(_TLF_RUNLATCH))
+		__ppc64_runlatch_on();
+#endif
+}
+
+static inline void arch_interrupt_async_exit_prepare(struct pt_regs *regs)
+{
+	/*
+	 * Adjust at exit so the main handler sees the true NIA. This must
+	 * come before irq_exit() because irq_exit can enable interrupts, and
+	 * if another interrupt is taken before nap_adjust_return has run
+	 * here, then that interrupt would return directly to idle nap return.
+	 */
+	nap_adjust_return(regs);
+
+	arch_interrupt_exit_prepare(regs);
+}
+
+struct interrupt_nmi_state {
+#ifdef CONFIG_PPC64
+	u8 irq_soft_mask;
+	u8 irq_happened;
+	u8 ftrace_enabled;
+	u64 softe;
+#endif
+};
+
+static inline bool nmi_disables_ftrace(struct pt_regs *regs)
+{
+	/* Allow DEC and PMI to be traced when they are soft-NMI */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
+		if (TRAP(regs) == INTERRUPT_DECREMENTER)
+			return false;
+		if (TRAP(regs) == INTERRUPT_PERFMON)
+			return false;
+	}
+	if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
+		if (TRAP(regs) == INTERRUPT_PERFMON)
+			return false;
+	}
+
+	return true;
+}
+
+static inline void arch_interrupt_nmi_enter_prepare(struct pt_regs *regs,
+						    struct interrupt_nmi_state *state)
+{
+#ifdef CONFIG_PPC64
+	state->irq_soft_mask = local_paca->irq_soft_mask;
+	state->irq_happened = local_paca->irq_happened;
+	state->softe = regs->softe;
+
+	/*
+	 * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
+	 * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile
+	 * because that goes through irq tracing which we don't want in NMI.
+	 */
+	local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+	if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) {
+		/*
+		 * Adjust regs->softe to be soft-masked if it had not been
+		 * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe
+		 * not yet set disabled), or if it was in an implicit soft
+		 * masked state. This makes regs_irqs_disabled(regs)
+		 * behave as expected.
+		 */
+		regs->softe = IRQS_ALL_DISABLED;
+	}
+
+	__hard_RI_enable();
+
+	/* Don't do any per-CPU operations until interrupt state is fixed */
+
+	if (nmi_disables_ftrace(regs)) {
+		state->ftrace_enabled = this_cpu_get_ftrace_enabled();
+		this_cpu_set_ftrace_enabled(0);
+	}
+#endif
+}
+
+static inline void arch_interrupt_nmi_exit_prepare(struct pt_regs *regs,
+						   struct interrupt_nmi_state *state)
+{
+	/*
+	 * nmi does not call nap_adjust_return because nmi should not create
+	 * new work to do (must use irq_work for that).
+	 */
+
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S
+	if (regs_irqs_disabled(regs)) {
+		unsigned long rst = search_kernel_restart_table(regs->nip);
+
+		if (rst)
+			regs_set_return_ip(regs, rst);
+	}
+#endif
+
+	if (nmi_disables_ftrace(regs))
+		this_cpu_set_ftrace_enabled(state->ftrace_enabled);
+
+	/* Check we didn't change the pending interrupt mask. */
+	WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
+	regs->softe = state->softe;
+	local_paca->irq_happened = state->irq_happened;
+	local_paca->irq_soft_mask = state->irq_soft_mask;
+#endif
+}
+
 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
 {
 	kuap_lock();
-- 
2.53.0



^ permalink raw reply related

* [PATCH v5 5/8] powerpc: add exit_flags field in pt_regs
From: Mukesh Kumar Chaurasiya (IBM) @ 2026-04-27 12:27 UTC (permalink / raw)
  To: maddy, mpe, npiggin, chleroy, ryabinin.a.a, glider, andreyknvl,
	dvyukov, vincenzo.frascino, oleg, kees, luto, wad, mchauras,
	sshegde, thuth, ruanjinjie, akpm, macro, ldv, charlie, deller,
	kevin.brodsky, ritesh.list, yeoreum.yun, agordeev, segher,
	mark.rutland, ryan.roberts, pmladek, feng.tang, peterz, kan.liang,
	linuxppc-dev, linux-kernel, kasan-dev
  Cc: Samir M, David Gow, Venkat Rao Bagalkote
In-Reply-To: <20260427122742.210074-1-mkchauras@gmail.com>

From: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>

Add a new field `exit_flags` in the pt_regs structure. This field will hold
the flags set during interrupt or syscall execution that are required during
exit to user mode.

Specifically, the `TIF_RESTOREALL` flag, stored in this field, helps the
exit routine determine if any NVGPRs were modified and need to be restored
before returning to userspace.

This addition ensures a clean and architecture-specific mechanism to track
per-syscall or per-interrupt state transitions related to register restore.

Changes:
 - Add `exit_flags` and `__pt_regs_pad` to maintain 16-byte stack alignment
 - Update asm-offsets.c and ptrace.c for offset and validation
 - Update PT_* constants in uapi header to reflect the new layout

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
Tested-by: Samir M <samir@linux.ibm.com>
Tested-by: David Gow <davidgow@google.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/include/asm/ptrace.h      |  3 +++
 arch/powerpc/include/uapi/asm/ptrace.h | 14 +++++++++-----
 arch/powerpc/kernel/ptrace/ptrace.c    |  1 +
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 94aa1de2b06e..2e741ea57b80 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -53,6 +53,9 @@ struct pt_regs
 				unsigned long esr;
 			};
 			unsigned long result;
+			unsigned long exit_flags;
+			/* Maintain 16 byte interrupt stack alignment */
+			unsigned long __pt_regs_pad[3];
 		};
 	};
 #if defined(CONFIG_PPC64) || defined(CONFIG_PPC_KUAP)
diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h
index 01e630149d48..a393b7f2760a 100644
--- a/arch/powerpc/include/uapi/asm/ptrace.h
+++ b/arch/powerpc/include/uapi/asm/ptrace.h
@@ -55,6 +55,8 @@ struct pt_regs
 	unsigned long dar;		/* Fault registers */
 	unsigned long dsisr;		/* on 4xx/Book-E used for ESR */
 	unsigned long result;		/* Result of a system call */
+	unsigned long exit_flags;	/* System call exit flags */
+	unsigned long __pt_regs_pad[3];	/* Maintain 16 byte interrupt stack alignment */
 };
 
 #endif /* __ASSEMBLER__ */
@@ -114,10 +116,12 @@ struct pt_regs
 #define PT_DAR	41
 #define PT_DSISR 42
 #define PT_RESULT 43
-#define PT_DSCR 44
-#define PT_REGS_COUNT 44
+#define PT_EXIT_FLAGS 44
+#define PT_PAD 47 /* 3 times */
+#define PT_DSCR 48
+#define PT_REGS_COUNT 48
 
-#define PT_FPR0	48	/* each FP reg occupies 2 slots in this space */
+#define PT_FPR0	(PT_REGS_COUNT + 4)	/* each FP reg occupies 2 slots in this space */
 
 #ifndef __powerpc64__
 
@@ -129,7 +133,7 @@ struct pt_regs
 #define PT_FPSCR (PT_FPR0 + 32)	/* each FP reg occupies 1 slot in 64-bit space */
 
 
-#define PT_VR0 82	/* each Vector reg occupies 2 slots in 64-bit */
+#define PT_VR0	(PT_FPSCR + 2)	/* <82> each Vector reg occupies 2 slots in 64-bit */
 #define PT_VSCR (PT_VR0 + 32*2 + 1)
 #define PT_VRSAVE (PT_VR0 + 33*2)
 
@@ -137,7 +141,7 @@ struct pt_regs
 /*
  * Only store first 32 VSRs here. The second 32 VSRs in VR0-31
  */
-#define PT_VSR0 150	/* each VSR reg occupies 2 slots in 64-bit */
+#define PT_VSR0	(PT_VRSAVE + 2)	/* each VSR reg occupies 2 slots in 64-bit */
 #define PT_VSR31 (PT_VSR0 + 2*31)
 #endif /* __powerpc64__ */
 
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index c6997df63287..2134b6d155ff 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -432,6 +432,7 @@ void __init pt_regs_check(void)
 	CHECK_REG(PT_DAR, dar);
 	CHECK_REG(PT_DSISR, dsisr);
 	CHECK_REG(PT_RESULT, result);
+	CHECK_REG(PT_EXIT_FLAGS, exit_flags);
 	#undef CHECK_REG
 
 	BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
-- 
2.53.0



^ permalink raw reply related

* [PATCH v5 4/8] powerpc: Introduce syscall exit arch functions
From: Mukesh Kumar Chaurasiya (IBM) @ 2026-04-27 12:27 UTC (permalink / raw)
  To: maddy, mpe, npiggin, chleroy, ryabinin.a.a, glider, andreyknvl,
	dvyukov, vincenzo.frascino, oleg, kees, luto, wad, mchauras,
	sshegde, thuth, ruanjinjie, akpm, macro, ldv, charlie, deller,
	kevin.brodsky, ritesh.list, yeoreum.yun, agordeev, segher,
	mark.rutland, ryan.roberts, pmladek, feng.tang, peterz, kan.liang,
	linuxppc-dev, linux-kernel, kasan-dev
  Cc: Samir M, David Gow, Venkat Rao Bagalkote
In-Reply-To: <20260427122742.210074-1-mkchauras@gmail.com>

From: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>

Add PowerPC-specific implementations of the generic syscall exit hooks
used by the generic entry/exit framework:

 - arch_exit_to_user_mode_work_prepare()
 - arch_exit_to_user_mode_work()

These helpers handle user state restoration when returning from the
kernel to userspace, including FPU/VMX/VSX state, transactional memory,
KUAP restore, and per-CPU accounting.

Additionally, move check_return_regs_valid() from interrupt.c to
interrupt.h so it can be shared by the new entry/exit logic.

No functional change is intended with this patch.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
Tested-by: Samir M <samir@linux.ibm.com>
Tested-by: David Gow <davidgow@google.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/include/asm/entry-common.h | 49 +++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
index 837a7e020e82..ff0625e04778 100644
--- a/arch/powerpc/include/asm/entry-common.h
+++ b/arch/powerpc/include/asm/entry-common.h
@@ -6,6 +6,7 @@
 #include <asm/cputime.h>
 #include <asm/interrupt.h>
 #include <asm/stacktrace.h>
+#include <asm/switch_to.h>
 #include <asm/tm.h>
 
 static __always_inline void booke_load_dbcr0(void)
@@ -123,4 +124,52 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
 
 #define arch_enter_from_user_mode arch_enter_from_user_mode
 
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+						  unsigned long ti_work)
+{
+	unsigned long mathflags;
+
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
+		if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+		    unlikely((ti_work & _TIF_RESTORE_TM))) {
+			restore_tm_state(regs);
+		} else {
+			mathflags = MSR_FP;
+
+			if (cpu_has_feature(CPU_FTR_VSX))
+				mathflags |= MSR_VEC | MSR_VSX;
+			else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+				mathflags |= MSR_VEC;
+
+			/*
+			 * If userspace MSR has all available FP bits set,
+			 * then they are live and no need to restore. If not,
+			 * it means the regs were given up and restore_math
+			 * may decide to restore them (to avoid taking an FP
+			 * fault).
+			 */
+			if ((regs->msr & mathflags) != mathflags)
+				restore_math(regs);
+		}
+	}
+
+	check_return_regs_valid(regs);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	local_paca->tm_scratch = regs->msr;
+#endif
+	/* Restore user access locks last */
+	kuap_user_restore(regs);
+}
+
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
+
+static __always_inline void arch_exit_to_user_mode(void)
+{
+	booke_load_dbcr0();
+
+	account_cpu_user_exit();
+}
+
+#define arch_exit_to_user_mode arch_exit_to_user_mode
+
 #endif /* _ASM_PPC_ENTRY_COMMON_H */
-- 
2.53.0



^ permalink raw reply related

* [PATCH v5 3/8] powerpc: introduce arch_enter_from_user_mode
From: Mukesh Kumar Chaurasiya (IBM) @ 2026-04-27 12:27 UTC (permalink / raw)
  To: maddy, mpe, npiggin, chleroy, ryabinin.a.a, glider, andreyknvl,
	dvyukov, vincenzo.frascino, oleg, kees, luto, wad, mchauras,
	sshegde, thuth, ruanjinjie, akpm, macro, ldv, charlie, deller,
	kevin.brodsky, ritesh.list, yeoreum.yun, agordeev, segher,
	mark.rutland, ryan.roberts, pmladek, feng.tang, peterz, kan.liang,
	linuxppc-dev, linux-kernel, kasan-dev
  Cc: Samir M, David Gow, Venkat Rao Bagalkote
In-Reply-To: <20260427122742.210074-1-mkchauras@gmail.com>

From: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>

Implement the arch_enter_from_user_mode() hook required by the generic
entry/exit framework. This helper prepares the CPU state when entering
the kernel from userspace, ensuring correct handling of KUAP/KUEP,
transactional memory, and debug register state.

This patch contains no functional changes, it is purely preparatory for
enabling the generic syscall and interrupt entry path on PowerPC.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
Tested-by: Samir M <samir@linux.ibm.com>
Tested-by: David Gow <davidgow@google.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/include/asm/entry-common.h | 118 ++++++++++++++++++++++++
 1 file changed, 118 insertions(+)

diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
index 05ce0583b600..837a7e020e82 100644
--- a/arch/powerpc/include/asm/entry-common.h
+++ b/arch/powerpc/include/asm/entry-common.h
@@ -3,6 +3,124 @@
 #ifndef _ASM_PPC_ENTRY_COMMON_H
 #define _ASM_PPC_ENTRY_COMMON_H
 
+#include <asm/cputime.h>
+#include <asm/interrupt.h>
 #include <asm/stacktrace.h>
+#include <asm/tm.h>
+
+static __always_inline void booke_load_dbcr0(void)
+{
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	unsigned long dbcr0 = current->thread.debug.dbcr0;
+
+	if (likely(!(dbcr0 & DBCR0_IDM)))
+		return;
+
+	/*
+	 * Check to see if the dbcr0 register is set up to debug.
+	 * Use the internal debug mode bit to do this.
+	 */
+	mtmsr(mfmsr() & ~MSR_DE);
+	if (IS_ENABLED(CONFIG_PPC32)) {
+		isync();
+		global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
+	}
+	mtspr(SPRN_DBCR0, dbcr0);
+	mtspr(SPRN_DBSR, -1);
+#endif
+}
+
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
+{
+	kuap_lock();
+
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+
+	BUG_ON(regs_is_unrecoverable(regs));
+	BUG_ON(!user_mode(regs));
+	BUG_ON(regs_irqs_disabled(regs));
+
+#ifdef CONFIG_PPC_PKEY
+	if (mmu_has_feature(MMU_FTR_PKEY) && trap_is_syscall(regs)) {
+		unsigned long amr, iamr;
+		bool flush_needed = false;
+		/*
+		 * When entering from userspace we mostly have the AMR/IAMR
+		 * different from kernel default values. Hence don't compare.
+		 */
+		amr = mfspr(SPRN_AMR);
+		iamr = mfspr(SPRN_IAMR);
+		regs->amr  = amr;
+		regs->iamr = iamr;
+		if (mmu_has_feature(MMU_FTR_KUAP)) {
+			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+			flush_needed = true;
+		}
+		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
+			flush_needed = true;
+		}
+		if (flush_needed)
+			isync();
+	}
+#endif
+	kuap_assert_locked();
+	booke_restore_dbcr0();
+	account_cpu_user_entry();
+	account_stolen_time();
+
+	/*
+	 * This is not required for the syscall exit path, but makes the
+	 * stack frame look nicer. If this was initialised in the first stack
+	 * frame, or if the unwinder was taught the first stack frame always
+	 * returns to user with IRQS_ENABLED, this store could be avoided!
+	 */
+	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
+
+	/*
+	 * If system call is called with TM active, set _TIF_RESTOREALL to
+	 * prevent RFSCV being used to return to userspace, because POWER9
+	 * TM implementation has problems with this instruction returning to
+	 * transactional state. Final register values are not relevant because
+	 * the transaction will be aborted upon return anyway. Or in the case
+	 * of unsupported_scv SIGILL fault, the return state does not much
+	 * matter because it's an edge case.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+	    unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
+		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
+
+	/*
+	 * If the system call was made with a transaction active, doom it and
+	 * return without performing the system call. Unless it was an
+	 * unsupported scv vector, in which case it's treated like an illegal
+	 * instruction.
+	 */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
+	    !trap_is_unsupported_scv(regs)) {
+		/* Enable TM in the kernel, and disable EE (for scv) */
+		hard_irq_disable();
+		mtmsr(mfmsr() | MSR_TM);
+
+		/* tabort, this dooms the transaction, nothing else */
+		asm volatile(".long 0x7c00071d | ((%0) << 16)"
+			     :: "r"(TM_CAUSE_SYSCALL | TM_CAUSE_PERSISTENT));
+
+		/*
+		 * Userspace will never see the return value. Execution will
+		 * resume after the tbegin. of the aborted transaction with the
+		 * checkpointed register state. A context switch could occur
+		 * or signal delivered to the process before resuming the
+		 * doomed transaction context, but that should all be handled
+		 * as expected.
+		 */
+		return;
+	}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+}
+
+#define arch_enter_from_user_mode arch_enter_from_user_mode
 
 #endif /* _ASM_PPC_ENTRY_COMMON_H */
-- 
2.53.0



^ permalink raw reply related

* [PATCH v5 2/8] powerpc: Prepare to build with generic entry/exit framework
From: Mukesh Kumar Chaurasiya (IBM) @ 2026-04-27 12:27 UTC (permalink / raw)
  To: maddy, mpe, npiggin, chleroy, ryabinin.a.a, glider, andreyknvl,
	dvyukov, vincenzo.frascino, oleg, kees, luto, wad, mchauras,
	sshegde, thuth, ruanjinjie, akpm, macro, ldv, charlie, deller,
	kevin.brodsky, ritesh.list, yeoreum.yun, agordeev, segher,
	mark.rutland, ryan.roberts, pmladek, feng.tang, peterz, kan.liang,
	linuxppc-dev, linux-kernel, kasan-dev
  Cc: Samir M, David Gow, Venkat Rao Bagalkote
In-Reply-To: <20260427122742.210074-1-mkchauras@gmail.com>

From: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>

This patch introduces preparatory changes needed to support building
PowerPC with the generic entry/exit (irqentry) framework.

The following infrastructure updates are added:
 - Add a syscall_work field to struct thread_info to hold SYSCALL_WORK_* flags.
 - Provide a stub implementation of arch_syscall_is_vdso_sigreturn(),
   returning false for now.
 - Introduce on_thread_stack() helper to detect if the current stack pointer
   lies within the task’s kernel stack.

These additions enable later integration with the generic entry/exit
infrastructure while keeping existing PowerPC behavior unchanged.

No functional change is intended in this patch.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
Tested-by: Samir M <samir@linux.ibm.com>
Tested-by: David Gow <davidgow@google.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/include/asm/entry-common.h | 8 ++++++++
 arch/powerpc/include/asm/stacktrace.h   | 6 ++++++
 arch/powerpc/include/asm/syscall.h      | 5 +++++
 arch/powerpc/include/asm/thread_info.h  | 1 +
 4 files changed, 20 insertions(+)
 create mode 100644 arch/powerpc/include/asm/entry-common.h

diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
new file mode 100644
index 000000000000..05ce0583b600
--- /dev/null
+++ b/arch/powerpc/include/asm/entry-common.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_PPC_ENTRY_COMMON_H
+#define _ASM_PPC_ENTRY_COMMON_H
+
+#include <asm/stacktrace.h>
+
+#endif /* _ASM_PPC_ENTRY_COMMON_H */
diff --git a/arch/powerpc/include/asm/stacktrace.h b/arch/powerpc/include/asm/stacktrace.h
index 6149b53b3bc8..987f2e996262 100644
--- a/arch/powerpc/include/asm/stacktrace.h
+++ b/arch/powerpc/include/asm/stacktrace.h
@@ -10,4 +10,10 @@
 
 void show_user_instructions(struct pt_regs *regs);
 
+static __always_inline bool on_thread_stack(void)
+{
+	return !(((unsigned long)(current->stack) ^ current_stack_pointer)
+			& ~(THREAD_SIZE - 1));
+}
+
 #endif /* _ASM_POWERPC_STACKTRACE_H */
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 4b3c52ed6e9d..834fcc4f7b54 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -139,4 +139,9 @@ static inline int syscall_get_arch(struct task_struct *task)
 	else
 		return AUDIT_ARCH_PPC64;
 }
+
+static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
+{
+	return false;
+}
 #endif	/* _ASM_SYSCALL_H */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 97f35f9b1a96..ee3b9adb5b67 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -57,6 +57,7 @@ struct thread_info {
 #ifdef CONFIG_SMP
 	unsigned int	cpu;
 #endif
+	unsigned long	syscall_work;		/* SYSCALL_WORK_ flags */
 	unsigned long	local_flags;		/* private flags for thread */
 #ifdef CONFIG_LIVEPATCH_64
 	unsigned long *livepatch_sp;
-- 
2.53.0



^ permalink raw reply related

* [PATCH v5 1/8] powerpc: rename arch_irq_disabled_regs
From: Mukesh Kumar Chaurasiya (IBM) @ 2026-04-27 12:27 UTC (permalink / raw)
  To: maddy, mpe, npiggin, chleroy, ryabinin.a.a, glider, andreyknvl,
	dvyukov, vincenzo.frascino, oleg, kees, luto, wad, mchauras,
	sshegde, thuth, ruanjinjie, akpm, macro, ldv, charlie, deller,
	kevin.brodsky, ritesh.list, yeoreum.yun, agordeev, segher,
	mark.rutland, ryan.roberts, pmladek, feng.tang, peterz, kan.liang,
	linuxppc-dev, linux-kernel, kasan-dev
  Cc: Samir M, David Gow, Venkat Rao Bagalkote
In-Reply-To: <20260427122742.210074-1-mkchauras@gmail.com>

From: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>

Rename arch_irq_disabled_regs() to regs_irqs_disabled() to align with the
naming used in the generic irqentry framework. This makes the function
available for use both in the PowerPC architecture code and in the
common entry/exit paths shared with other architectures.

This is a preparatory change for enabling the generic irqentry framework
on PowerPC.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Reviewed-by: Jinjie Ruan <ruanjinjie@huawei.com>
Tested-by: Samir M <samir@linux.ibm.com>
Tested-by: David Gow <davidgow@google.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
---
 arch/powerpc/include/asm/hw_irq.h    |  4 ++--
 arch/powerpc/include/asm/interrupt.h | 16 ++++++++--------
 arch/powerpc/kernel/interrupt.c      |  4 ++--
 arch/powerpc/kernel/syscall.c        |  2 +-
 arch/powerpc/kernel/traps.c          |  2 +-
 arch/powerpc/kernel/watchdog.c       |  2 +-
 arch/powerpc/perf/core-book3s.c      |  2 +-
 7 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 9cd945f2acaf..b7eee6385ae5 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -393,7 +393,7 @@ static inline void do_hard_irq_enable(void)
 	__hard_irq_enable();
 }
 
-static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
+static inline bool regs_irqs_disabled(struct pt_regs *regs)
 {
 	return (regs->softe & IRQS_DISABLED);
 }
@@ -466,7 +466,7 @@ static inline bool arch_irqs_disabled(void)
 
 #define hard_irq_disable()		arch_local_irq_disable()
 
-static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
+static inline bool regs_irqs_disabled(struct pt_regs *regs)
 {
 	return !(regs->msr & MSR_EE);
 }
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index eb0e4a20b818..0e2cddf8bd21 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -172,7 +172,7 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs)
 	/* Enable MSR[RI] early, to support kernel SLB and hash faults */
 #endif
 
-	if (!arch_irq_disabled_regs(regs))
+	if (!regs_irqs_disabled(regs))
 		trace_hardirqs_off();
 
 	if (user_mode(regs)) {
@@ -192,11 +192,11 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs)
 			CT_WARN_ON(ct_state() != CT_STATE_KERNEL &&
 				   ct_state() != CT_STATE_IDLE);
 		INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs));
-		INT_SOFT_MASK_BUG_ON(regs, arch_irq_disabled_regs(regs) &&
-					   search_kernel_restart_table(regs->nip));
+		INT_SOFT_MASK_BUG_ON(regs, regs_irqs_disabled(regs) &&
+				     search_kernel_restart_table(regs->nip));
 	}
-	INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) &&
-				   !(regs->msr & MSR_EE));
+	INT_SOFT_MASK_BUG_ON(regs, !regs_irqs_disabled(regs) &&
+			     !(regs->msr & MSR_EE));
 
 	booke_restore_dbcr0();
 }
@@ -298,7 +298,7 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
 		 * Adjust regs->softe to be soft-masked if it had not been
 		 * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe
 		 * not yet set disabled), or if it was in an implicit soft
-		 * masked state. This makes arch_irq_disabled_regs(regs)
+		 * masked state. This makes regs_irqs_disabled(regs)
 		 * behave as expected.
 		 */
 		regs->softe = IRQS_ALL_DISABLED;
@@ -372,7 +372,7 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
 
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PPC_BOOK3S
-	if (arch_irq_disabled_regs(regs)) {
+	if (regs_irqs_disabled(regs)) {
 		unsigned long rst = search_kernel_restart_table(regs->nip);
 		if (rst)
 			regs_set_return_ip(regs, rst);
@@ -661,7 +661,7 @@ void replay_soft_interrupts(void);
 
 static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
 {
-	if (!arch_irq_disabled_regs(regs))
+	if (!regs_irqs_disabled(regs))
 		local_irq_enable();
 }
 
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index e63bfde13e03..666eadb589a5 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -347,7 +347,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
 	unsigned long ret;
 
 	BUG_ON(regs_is_unrecoverable(regs));
-	BUG_ON(arch_irq_disabled_regs(regs));
+	BUG_ON(regs_irqs_disabled(regs));
 	CT_WARN_ON(ct_state() == CT_STATE_USER);
 
 	/*
@@ -396,7 +396,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
 
 	local_irq_disable();
 
-	if (!arch_irq_disabled_regs(regs)) {
+	if (!regs_irqs_disabled(regs)) {
 		/* Returning to a kernel context with local irqs enabled. */
 		WARN_ON_ONCE(!(regs->msr & MSR_EE));
 again:
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index b762677f8737..52d6e10eab22 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -32,7 +32,7 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 
 	BUG_ON(regs_is_unrecoverable(regs));
 	BUG_ON(!user_mode(regs));
-	BUG_ON(arch_irq_disabled_regs(regs));
+	BUG_ON(regs_irqs_disabled(regs));
 
 #ifdef CONFIG_PPC_PKEY
 	if (mmu_has_feature(MMU_FTR_PKEY)) {
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index cb8e9357383e..629f2a2d4780 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1956,7 +1956,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
 	 * prevent hash faults on user addresses when reading callchains (and
 	 * looks better from an irq tracing perspective).
 	 */
-	if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
+	if (IS_ENABLED(CONFIG_PPC64) && unlikely(regs_irqs_disabled(regs)))
 		performance_monitor_exception_nmi(regs);
 	else
 		performance_monitor_exception_async(regs);
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 764001deb060..c40c69368476 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -376,7 +376,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
 	u64 tb;
 
 	/* should only arrive from kernel, with irqs disabled */
-	WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
+	WARN_ON_ONCE(!regs_irqs_disabled(regs));
 
 	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
 		return 0;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 8b0081441f85..f7518b7e3055 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2482,7 +2482,7 @@ static void __perf_event_interrupt(struct pt_regs *regs)
 	 * will trigger a PMI after waking up from idle. Since counter values are _not_
 	 * saved/restored in idle path, can lead to below "Can't find PMC" message.
 	 */
-	if (unlikely(!found) && !arch_irq_disabled_regs(regs))
+	if (unlikely(!found) && !regs_irqs_disabled(regs))
 		printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
 
 	/*
-- 
2.53.0



^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox