* [PATCH 2/2] powerpc/selftests/copyloops: extend selftest to exercise __copy_tofrom_user_power7_vmx
2026-02-17 12:44 [PATCH 1/2] powerpc: fix KUAP warning in VMX usercopy path Sayali Patil
@ 2026-02-17 12:44 ` Sayali Patil
2026-02-22 13:04 ` Venkat
2026-02-22 12:59 ` [PATCH 1/2] powerpc: fix KUAP warning in VMX usercopy path Venkat
2026-02-27 9:22 ` Christophe Leroy (CS GROUP)
2 siblings, 1 reply; 6+ messages in thread
From: Sayali Patil @ 2026-02-17 12:44 UTC (permalink / raw)
To: linuxppc-dev, maddy; +Cc: aboorvad, sshegde, chleroy, riteshh, sayalip
The new PowerPC VMX fast path (__copy_tofrom_user_power7_vmx) is not
exercised by existing copyloops selftests. This patch updates
the selftest to exercise the VMX variant, ensuring the VMX copy path
is validated.
Changes include:
- COPY_LOOP=test___copy_tofrom_user_power7_vmx with -D VMX_TEST is used
in existing selftest build targets.
- Inclusion of ../utils.c to provide get_auxv_entry() for hardware
feature detection.
- At runtime, the test skips execution if Altivec is not available.
- Copy sizes above VMX_COPY_THRESHOLD are used to ensure the VMX
path is taken.
This enables validation of the VMX fast path without affecting systems
that do not support Altivec.
Signed-off-by: Sayali Patil <sayalip@linux.ibm.com>
---
.../selftests/powerpc/copyloops/.gitignore | 4 ++--
.../testing/selftests/powerpc/copyloops/Makefile | 11 ++++++++---
tools/testing/selftests/powerpc/copyloops/stubs.S | 8 --------
.../selftests/powerpc/copyloops/validate.c | 15 ++++++++++++++-
4 files changed, 24 insertions(+), 14 deletions(-)
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index 7283e8b07b75..80d4270a71ac 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -2,8 +2,8 @@
copyuser_64_t0
copyuser_64_t1
copyuser_64_t2
-copyuser_p7_t0
-copyuser_p7_t1
+copyuser_p7
+copyuser_p7_vmx
memcpy_64_t0
memcpy_64_t1
memcpy_64_t2
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 42940f92d832..0c8efb0bddeb 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
- copyuser_p7_t0 copyuser_p7_t1 \
+ copyuser_p7 copyuser_p7_vmx \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \
@@ -28,10 +28,15 @@ $(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES)
-D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \
-o $@ $^
-$(OUTPUT)/copyuser_p7_t%: copyuser_power7.S $(EXTRA_SOURCES)
+$(OUTPUT)/copyuser_p7: copyuser_power7.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
-D COPY_LOOP=test___copy_tofrom_user_power7 \
- -D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \
+ -o $@ $^
+
+$(OUTPUT)/copyuser_p7_vmx: copyuser_power7.S $(EXTRA_SOURCES) ../utils.c
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test___copy_tofrom_user_power7_vmx \
+ -D VMX_TEST \
-o $@ $^
# Strictly speaking, we only need the memcpy_64 test cases for big-endian
diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S
index ec8bcf2bf1c2..3a9cb8c9a3ee 100644
--- a/tools/testing/selftests/powerpc/copyloops/stubs.S
+++ b/tools/testing/selftests/powerpc/copyloops/stubs.S
@@ -1,13 +1,5 @@
#include <asm/ppc_asm.h>
-FUNC_START(enter_vmx_usercopy)
- li r3,1
- blr
-
-FUNC_START(exit_vmx_usercopy)
- li r3,0
- blr
-
FUNC_START(enter_vmx_ops)
li r3,1
blr
diff --git a/tools/testing/selftests/powerpc/copyloops/validate.c b/tools/testing/selftests/powerpc/copyloops/validate.c
index 0f6873618552..fb822534fbe9 100644
--- a/tools/testing/selftests/powerpc/copyloops/validate.c
+++ b/tools/testing/selftests/powerpc/copyloops/validate.c
@@ -12,6 +12,10 @@
#define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE)
#define POISON 0xa5
+#ifdef VMX_TEST
+#define VMX_COPY_THRESHOLD 3328
+#endif
+
unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
static void do_one(char *src, char *dst, unsigned long src_off,
@@ -81,8 +85,12 @@ int test_copy_loop(void)
/* Fill with sequential bytes */
for (i = 0; i < BUFLEN; i++)
fill[i] = i & 0xff;
-
+#ifdef VMX_TEST
+ /* Force sizes above kernel VMX threshold (3328) */
+ for (len = VMX_COPY_THRESHOLD + 1; len < MAX_LEN; len++) {
+#else
for (len = 1; len < MAX_LEN; len++) {
+#endif
for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) {
do_one(src, dst, src_off, dst_off, len,
@@ -96,5 +104,10 @@ int test_copy_loop(void)
int main(void)
{
+#ifdef VMX_TEST
+ /* Skip if Altivec not present */
+ SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC), "ALTIVEC not supported");
+#endif
+
return test_harness(test_copy_loop, str(COPY_LOOP));
}
--
2.52.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH 2/2] powerpc/selftests/copyloops: extend selftest to exercise __copy_tofrom_user_power7_vmx
2026-02-17 12:44 ` [PATCH 2/2] powerpc/selftests/copyloops: extend selftest to exercise __copy_tofrom_user_power7_vmx Sayali Patil
@ 2026-02-22 13:04 ` Venkat
0 siblings, 0 replies; 6+ messages in thread
From: Venkat @ 2026-02-22 13:04 UTC (permalink / raw)
To: Sayali Patil; +Cc: linuxppc-dev, maddy, aboorvad, sshegde, chleroy, riteshh
> On 17 Feb 2026, at 6:14 PM, Sayali Patil <sayalip@linux.ibm.com> wrote:
>
> The new PowerPC VMX fast path (__copy_tofrom_user_power7_vmx) is not
> exercised by existing copyloops selftests. This patch updates
> the selftest to exercise the VMX variant, ensuring the VMX copy path
> is validated.
>
> Changes include:
> - COPY_LOOP=test___copy_tofrom_user_power7_vmx with -D VMX_TEST is used
> in existing selftest build targets.
> - Inclusion of ../utils.c to provide get_auxv_entry() for hardware
> feature detection.
> - At runtime, the test skips execution if Altivec is not available.
> - Copy sizes above VMX_COPY_THRESHOLD are used to ensure the VMX
> path is taken.
>
> This enables validation of the VMX fast path without affecting systems
> that do not support Altivec.
>
> Signed-off-by: Sayali Patil <sayalip@linux.ibm.com>
> ---
> .../selftests/powerpc/copyloops/.gitignore | 4 ++--
> .../testing/selftests/powerpc/copyloops/Makefile | 11 ++++++++---
> tools/testing/selftests/powerpc/copyloops/stubs.S | 8 --------
> .../selftests/powerpc/copyloops/validate.c | 15 ++++++++++++++-
> 4 files changed, 24 insertions(+), 14 deletions(-)
>
> diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
> index 7283e8b07b75..80d4270a71ac 100644
> --- a/tools/testing/selftests/powerpc/copyloops/.gitignore
> +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
> @@ -2,8 +2,8 @@
> copyuser_64_t0
> copyuser_64_t1
> copyuser_64_t2
> -copyuser_p7_t0
> -copyuser_p7_t1
> +copyuser_p7
> +copyuser_p7_vmx
> memcpy_64_t0
> memcpy_64_t1
> memcpy_64_t2
> diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
> index 42940f92d832..0c8efb0bddeb 100644
> --- a/tools/testing/selftests/powerpc/copyloops/Makefile
> +++ b/tools/testing/selftests/powerpc/copyloops/Makefile
> @@ -1,6 +1,6 @@
> # SPDX-License-Identifier: GPL-2.0
> TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
> - copyuser_p7_t0 copyuser_p7_t1 \
> + copyuser_p7 copyuser_p7_vmx \
> memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
> memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
> copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \
> @@ -28,10 +28,15 @@ $(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES)
> -D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \
> -o $@ $^
>
> -$(OUTPUT)/copyuser_p7_t%: copyuser_power7.S $(EXTRA_SOURCES)
> +$(OUTPUT)/copyuser_p7: copyuser_power7.S $(EXTRA_SOURCES)
> $(CC) $(CPPFLAGS) $(CFLAGS) \
> -D COPY_LOOP=test___copy_tofrom_user_power7 \
> - -D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \
> + -o $@ $^
> +
> +$(OUTPUT)/copyuser_p7_vmx: copyuser_power7.S $(EXTRA_SOURCES) ../utils.c
> + $(CC) $(CPPFLAGS) $(CFLAGS) \
> + -D COPY_LOOP=test___copy_tofrom_user_power7_vmx \
> + -D VMX_TEST \
> -o $@ $^
>
> # Strictly speaking, we only need the memcpy_64 test cases for big-endian
> diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S
> index ec8bcf2bf1c2..3a9cb8c9a3ee 100644
> --- a/tools/testing/selftests/powerpc/copyloops/stubs.S
> +++ b/tools/testing/selftests/powerpc/copyloops/stubs.S
> @@ -1,13 +1,5 @@
> #include <asm/ppc_asm.h>
>
> -FUNC_START(enter_vmx_usercopy)
> - li r3,1
> - blr
> -
> -FUNC_START(exit_vmx_usercopy)
> - li r3,0
> - blr
> -
> FUNC_START(enter_vmx_ops)
> li r3,1
> blr
> diff --git a/tools/testing/selftests/powerpc/copyloops/validate.c b/tools/testing/selftests/powerpc/copyloops/validate.c
> index 0f6873618552..fb822534fbe9 100644
> --- a/tools/testing/selftests/powerpc/copyloops/validate.c
> +++ b/tools/testing/selftests/powerpc/copyloops/validate.c
> @@ -12,6 +12,10 @@
> #define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE)
> #define POISON 0xa5
>
> +#ifdef VMX_TEST
> +#define VMX_COPY_THRESHOLD 3328
> +#endif
> +
> unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
>
> static void do_one(char *src, char *dst, unsigned long src_off,
> @@ -81,8 +85,12 @@ int test_copy_loop(void)
> /* Fill with sequential bytes */
> for (i = 0; i < BUFLEN; i++)
> fill[i] = i & 0xff;
> -
> +#ifdef VMX_TEST
> + /* Force sizes above kernel VMX threshold (3328) */
> + for (len = VMX_COPY_THRESHOLD + 1; len < MAX_LEN; len++) {
> +#else
> for (len = 1; len < MAX_LEN; len++) {
> +#endif
> for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
> for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) {
> do_one(src, dst, src_off, dst_off, len,
> @@ -96,5 +104,10 @@ int test_copy_loop(void)
>
> int main(void)
> {
> +#ifdef VMX_TEST
> + /* Skip if Altivec not present */
> + SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC), "ALTIVEC not supported");
> +#endif
> +
> return test_harness(test_copy_loop, str(COPY_LOOP));
> }
> --
> 2.52.0
>
>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
# timeout set to 130
# selftests: powerpc/copyloops: copyuser_64_t2
# test: test___copy_tofrom_user_base
# tags: git_version:v6.19-2410-g27ede8cb60bb
# success: test___copy_tofrom_user_base
ok 3 selftests: powerpc/copyloops: copyuser_64_t2
# timeout set to 130
# selftests: powerpc/copyloops: copyuser_p7
# test: test___copy_tofrom_user_power7
# tags: git_version:v6.19-2410-g27ede8cb60bb
# success: test___copy_tofrom_user_power7
ok 4 selftests: powerpc/copyloops: copyuser_p7
# timeout set to 130
# selftests: powerpc/copyloops: copyuser_p7_vmx
# test: test___copy_tofrom_user_power7_vmx
# tags: git_version:v6.19-2410-g27ede8cb60bb
# success: test___copy_tofrom_user_power7_vmx
ok 5 selftests: powerpc/copyloops: copyuser_p7_vmx
Regards,
Venkat.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/2] powerpc: fix KUAP warning in VMX usercopy path
2026-02-17 12:44 [PATCH 1/2] powerpc: fix KUAP warning in VMX usercopy path Sayali Patil
2026-02-17 12:44 ` [PATCH 2/2] powerpc/selftests/copyloops: extend selftest to exercise __copy_tofrom_user_power7_vmx Sayali Patil
@ 2026-02-22 12:59 ` Venkat
2026-02-27 9:22 ` Christophe Leroy (CS GROUP)
2 siblings, 0 replies; 6+ messages in thread
From: Venkat @ 2026-02-22 12:59 UTC (permalink / raw)
To: Sayali Patil; +Cc: linuxppc-dev, maddy, aboorvad, sshegde, chleroy, riteshh
> On 17 Feb 2026, at 6:14 PM, Sayali Patil <sayalip@linux.ibm.com> wrote:
>
> On powerpc with PREEMPT_FULL or PREEMPT_LAZY and function tracing enabled,
> KUAP warnings can be triggered from the VMX usercopy path under memory
> stress workloads.
>
> KUAP requires that no subfunctions are called once userspace access has
> been enabled. The existing VMX copy implementation violates this
> requirement by invoking enter_vmx_usercopy() from the assembly path after
> userspace access has already been enabled. If preemption occurs
> in this window, the AMR state may not be preserved correctly,
> leading to unexpected userspace access state and resulting in
> KUAP warnings.
>
> Fix this by moving VMX selection and enter_vmx_usercopy()/
> exit_vmx_usercopy() handling into the raw_copy_{to,from,in}_user()
> wrappers in uaccess.h. The new flow is:
>
> - Decide whether to use the VMX path based on size and CPU capability
> - Call enter_vmx_usercopy() before enabling userspace access
> - Enable userspace access and perform the VMX copy
> - Disable userspace access
> - Call exit_vmx_usercopy()
> - Fall back to the base copy routine if the VMX copy faults
>
> With this change, the VMX assembly routines no longer perform VMX state
> management or call helper functions; they only implement the
> copy operations.
> The previous feature-section based VMX selection inside
> __copy_tofrom_user_power7() is removed, and a dedicated
> __copy_tofrom_user_power7_vmx() entry point is introduced.
>
> This ensures correct KUAP ordering, avoids subfunction calls
> while KUAP is unlocked, and eliminates the warnings while preserving
> the VMX fast path.
>
> Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection")
> Reported-by: Shrikanth Hegde <sshegde@linux.ibm.com>
> Closes: https://lore.kernel.org/all/20260109064917.777587-2-sshegde@linux.ibm.com/
> Suggested-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
> Co-developed-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
> Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
> Signed-off-by: Sayali Patil <sayalip@linux.ibm.com>
> ---
> arch/powerpc/include/asm/uaccess.h | 67 ++++++++++++++++++++++++++++++
> arch/powerpc/lib/copyuser_64.S | 1 +
> arch/powerpc/lib/copyuser_power7.S | 45 +++++++-------------
> arch/powerpc/lib/vmx-helper.c | 2 +
> 4 files changed, 85 insertions(+), 30 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 784a00e681fa..52e4a784d148 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -13,6 +13,11 @@
> #define TASK_SIZE_MAX TASK_SIZE_USER64
> #endif
>
> +#ifdef CONFIG_ALTIVEC
> +/* Threshold above which VMX copy path is used */
> +#define VMX_COPY_THRESHOLD 3328
> +#endif
> +
> #include <asm-generic/access_ok.h>
>
> /*
> @@ -323,12 +328,42 @@ do { \
> extern unsigned long __copy_tofrom_user(void __user *to,
> const void __user *from, unsigned long size);
>
> +extern unsigned long __copy_tofrom_user_base(void __user *to,
> + const void __user *from, unsigned long size);
> +
> +#ifdef CONFIG_ALTIVEC
> +extern unsigned long __copy_tofrom_user_power7_vmx(void __user *to,
> + const void __user *from, unsigned long size);
> +
> +static inline bool will_use_vmx(unsigned long n)
> +{
> + return cpu_has_feature(CPU_FTR_VMX_COPY) &&
> + n > VMX_COPY_THRESHOLD;
> +}
> +#endif
> +
> #ifdef __powerpc64__
> static inline unsigned long
> raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
> {
> unsigned long ret;
>
> +#ifdef CONFIG_ALTIVEC
> + if (will_use_vmx(n) && enter_vmx_usercopy()) {
> + allow_read_write_user(to, from, n);
> + ret = __copy_tofrom_user_power7_vmx(to, from, n);
> + prevent_read_write_user(to, from, n);
> + exit_vmx_usercopy();
> + if (unlikely(ret)) {
> + allow_read_write_user(to, from, n);
> + ret = __copy_tofrom_user_base(to, from, n);
> + prevent_read_write_user(to, from, n);
> + }
> +
> + return ret;
> + }
> +#endif
> +
> allow_read_write_user(to, from, n);
> ret = __copy_tofrom_user(to, from, n);
> prevent_read_write_user(to, from, n);
> @@ -341,6 +376,22 @@ static inline unsigned long raw_copy_from_user(void *to,
> {
> unsigned long ret;
>
> +#ifdef CONFIG_ALTIVEC
> + if (will_use_vmx(n) && enter_vmx_usercopy()) {
> + allow_read_from_user(from, n);
> + ret = __copy_tofrom_user_power7_vmx((__force void __user *)to, from, n);
> + prevent_read_from_user(from, n);
> + exit_vmx_usercopy();
> + if (unlikely(ret)) {
> + allow_read_from_user(from, n);
> + ret = __copy_tofrom_user_base((__force void __user *)to, from, n);
> + prevent_read_from_user(from, n);
> + }
> +
> + return ret;
> + }
> +#endif
> +
> allow_read_from_user(from, n);
> ret = __copy_tofrom_user((__force void __user *)to, from, n);
> prevent_read_from_user(from, n);
> @@ -352,6 +403,22 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
> {
> unsigned long ret;
>
> +#ifdef CONFIG_ALTIVEC
> + if (will_use_vmx(n) && enter_vmx_usercopy()) {
> + allow_write_to_user(to, n);
> + ret = __copy_tofrom_user_power7_vmx(to, (__force const void __user *)from, n);
> + prevent_write_to_user(to, n);
> + exit_vmx_usercopy();
> + if (unlikely(ret)) {
> + allow_write_to_user(to, n);
> + ret = __copy_tofrom_user_base(to, (__force const void __user *)from, n);
> + prevent_write_to_user(to, n);
> + }
> +
> + return ret;
> + }
> +#endif
> +
> allow_write_to_user(to, n);
> ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
> prevent_write_to_user(to, n);
> diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
> index 9af969d2cc0c..25a99108caff 100644
> --- a/arch/powerpc/lib/copyuser_64.S
> +++ b/arch/powerpc/lib/copyuser_64.S
> @@ -562,3 +562,4 @@ exc; std r10,32(3)
> li r5,4096
> b .Ldst_aligned
> EXPORT_SYMBOL(__copy_tofrom_user)
> +EXPORT_SYMBOL(__copy_tofrom_user_base)
> diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
> index 8474c682a178..17dbcfbae25f 100644
> --- a/arch/powerpc/lib/copyuser_power7.S
> +++ b/arch/powerpc/lib/copyuser_power7.S
> @@ -5,13 +5,9 @@
> *
> * Author: Anton Blanchard <anton@au.ibm.com>
> */
> +#include <linux/export.h>
> #include <asm/ppc_asm.h>
>
> -#ifndef SELFTEST_CASE
> -/* 0 == don't use VMX, 1 == use VMX */
> -#define SELFTEST_CASE 0
> -#endif
> -
> #ifdef __BIG_ENDIAN__
> #define LVS(VRT,RA,RB) lvsl VRT,RA,RB
> #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
> @@ -47,10 +43,14 @@
> ld r15,STK_REG(R15)(r1)
> ld r14,STK_REG(R14)(r1)
> .Ldo_err3:
> - bl CFUNC(exit_vmx_usercopy)
> + ld r6,STK_REG(R31)(r1) /* original destination pointer */
> + ld r5,STK_REG(R29)(r1) /* original number of bytes */
> + subf r7,r6,r3 /* #bytes copied */
> + subf r3,r7,r5 /* #bytes not copied in r3 */
> ld r0,STACKFRAMESIZE+16(r1)
> mtlr r0
> - b .Lexit
> + addi r1,r1,STACKFRAMESIZE
> + blr
> #endif /* CONFIG_ALTIVEC */
>
> .Ldo_err2:
> @@ -74,7 +74,6 @@
>
> _GLOBAL(__copy_tofrom_user_power7)
> cmpldi r5,16
> - cmpldi cr1,r5,3328
>
> std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
> std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
> @@ -82,12 +81,6 @@ _GLOBAL(__copy_tofrom_user_power7)
>
> blt .Lshort_copy
>
> -#ifdef CONFIG_ALTIVEC
> -test_feature = SELFTEST_CASE
> -BEGIN_FTR_SECTION
> - bgt cr1,.Lvmx_copy
> -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
> -#endif
>
> .Lnonvmx_copy:
> /* Get the source 8B aligned */
> @@ -263,23 +256,14 @@ err1; stb r0,0(r3)
> 15: li r3,0
> blr
>
> -.Lunwind_stack_nonvmx_copy:
> - addi r1,r1,STACKFRAMESIZE
> - b .Lnonvmx_copy
> -
> -.Lvmx_copy:
> #ifdef CONFIG_ALTIVEC
> +_GLOBAL(__copy_tofrom_user_power7_vmx)
> mflr r0
> std r0,16(r1)
> stdu r1,-STACKFRAMESIZE(r1)
> - bl CFUNC(enter_vmx_usercopy)
> - cmpwi cr1,r3,0
> - ld r0,STACKFRAMESIZE+16(r1)
> - ld r3,STK_REG(R31)(r1)
> - ld r4,STK_REG(R30)(r1)
> - ld r5,STK_REG(R29)(r1)
> - mtlr r0
>
> + std r3,STK_REG(R31)(r1)
> + std r5,STK_REG(R29)(r1)
> /*
> * We prefetch both the source and destination using enhanced touch
> * instructions. We use a stream ID of 0 for the load side and
> @@ -300,8 +284,6 @@ err1; stb r0,0(r3)
>
> DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
>
> - beq cr1,.Lunwind_stack_nonvmx_copy
> -
> /*
> * If source and destination are not relatively aligned we use a
> * slower permute loop.
> @@ -478,7 +460,8 @@ err3; lbz r0,0(r4)
> err3; stb r0,0(r3)
>
> 15: addi r1,r1,STACKFRAMESIZE
> - b CFUNC(exit_vmx_usercopy) /* tail call optimise */
> + li r3,0
> + blr
>
> .Lvmx_unaligned_copy:
> /* Get the destination 16B aligned */
> @@ -681,5 +664,7 @@ err3; lbz r0,0(r4)
> err3; stb r0,0(r3)
>
> 15: addi r1,r1,STACKFRAMESIZE
> - b CFUNC(exit_vmx_usercopy) /* tail call optimise */
> + li r3,0
> + blr
> +EXPORT_SYMBOL(__copy_tofrom_user_power7_vmx)
> #endif /* CONFIG_ALTIVEC */
> diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
> index 54340912398f..554b248002b4 100644
> --- a/arch/powerpc/lib/vmx-helper.c
> +++ b/arch/powerpc/lib/vmx-helper.c
> @@ -27,6 +27,7 @@ int enter_vmx_usercopy(void)
>
> return 1;
> }
> +EXPORT_SYMBOL(enter_vmx_usercopy);
>
> /*
> * This function must return 0 because we tail call optimise when calling
> @@ -49,6 +50,7 @@ int exit_vmx_usercopy(void)
> set_dec(1);
> return 0;
> }
> +EXPORT_SYMBOL(exit_vmx_usercopy);
>
> int enter_vmx_ops(void)
> {
> --
> 2.52.0
>
>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Confirmed fix using the repro steps in the Closes: tag.
With this patch:
No Warnings seen.
WithOut this patch:
[ 647.366772] Kernel attempted to write user page (3fff86135000) - exploit attempt? (uid: 0)
[ 647.366818] ------------[ cut here ]------------
[ 647.366844] Bug: Write fault blocked by KUAP!
[ 647.366864] WARNING: arch/powerpc/mm/fault.c:231 at bad_kernel_fault.constprop.0+0x1a8/0x2c8, CPU#15: stress-ng-vm-rw/5083
[ 647.366912] Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 bonding tls rfkill ip_set nf_tables nfnetlink sunrpc pseries_rng vmx_crypto fuse ext4 crc16 mbcache jbd2 sd_mod sg ibmvscsi ibmveth scsi_transport_srp pseries_wdt
[ 647.367436] CPU: 15 UID: 0 PID: 5083 Comm: stress-ng-vm-rw Kdump: loaded Tainted: G W 6.19.0+ #2 PREEMPT(lazy)
[ 647.367470] Tainted: [W]=WARN
[ 647.367493] Hardware name: IBM,9080-HEX Power11 (architected) 0x820200 0xf000007 of:IBM,FW1110.01 (NH1110_069) hv:phyp pSeries
[ 647.367523] NIP: c00000000008cf9c LR: c00000000008cf98 CTR: c0000000002293ac
[ 647.367552] REGS: c0000000e0753160 TRAP: 0700 Tainted: G W (6.19.0+)
[ 647.367577] MSR: 8000000000029033 <SF,EE,ME,IR,DR,RI,LE> CR: 28024824 XER: 20040005
[ 647.367732] CFAR: c000000000166418 IRQMASK: 3
[ 647.367732] GPR00: c00000000008cf98 c0000000e0753400 c00000000189c100 0000000000000021
[ 647.367732] GPR04: 0000000000000000 c0000000e0753200 c0000000e07531f8 0000000efa437000
[ 647.367732] GPR08: 0000000000000027 0000000000000000 c000000078ca8000 0000000000004000
[ 647.367732] GPR12: c00000000033ca90 c000000effff4300 0000000000000000 0000000000000000
[ 647.367732] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 647.367732] GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000002000000
[ 647.367732] GPR24: 0000000000000001 0000000000000001 0000000000000000 c000000078c9d800
[ 647.367732] GPR28: 00003fff86135000 0000000000000300 00003fff86135000 c0000000e07535a0
[ 647.368347] NIP [c00000000008cf9c] bad_kernel_fault.constprop.0+0x1a8/0x2c8
[ 647.368375] LR [c00000000008cf98] bad_kernel_fault.constprop.0+0x1a4/0x2c8
[ 647.368405] Call Trace:
[ 647.368426] [c0000000e0753400] [c00000000008cf98] bad_kernel_fault.constprop.0+0x1a4/0x2c8 (unreliable)
[ 647.368482] [c0000000e0753480] [c00000000008dc34] ___do_page_fault+0x688/0xa54
[ 647.368526] [c0000000e0753540] [c00000000008e2ac] do_page_fault+0x30/0x70
[ 647.368564] [c0000000e0753570] [c000000000008be0] data_access_common_virt+0x210/0x220
[ 647.368608] ---- interrupt: 300 at __copy_tofrom_user_base+0x9c/0x5a4
[ 647.368638] NIP: c0000000000be2a8 LR: c0000000009bcba8 CTR: 0000000000000080
[ 647.368664] REGS: c0000000e07535a0 TRAP: 0300 Tainted: G W (6.19.0+)
[ 647.368692] MSR: 800000000280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 24022824 XER: 20040000
[ 647.368892] CFAR: c0000000000be274 DAR: 00003fff86135000 DSISR: 0a000000 IRQMASK: 0
[ 647.368892] GPR00: 0000000000000000 c0000000e0753840 c00000000189c100 00003fff86134ff0
[ 647.368892] GPR04: c00000009fee7010 0000000000001000 0000000000000000 0000000000000000
[ 647.368892] GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 647.368892] GPR12: c00000000033ca90 c000000effff4300 c0000001122bc9a8 c0000000e0753b50
[ 647.368892] GPR16: 00000000002cb000 0000000000000000 0000000000000000 0000000000000400
[ 647.368892] GPR20: 0000000000000001 0000000000000000 c000000078c9da10 00003fff85000000
[ 647.368892] GPR24: 0000000000400000 c00000009fee7000 c000000002de75f0 c0000000e0753b50
[ 647.368892] GPR28: c00c0000027fb9c0 c0000000e0753b50 0000000000000000 0000000000001000
[ 647.369529] NIP [c0000000000be2a8] __copy_tofrom_user_base+0x9c/0x5a4
[ 647.369560] LR [c0000000009bcba8] _copy_to_iter+0x128/0xaa4
[ 647.369593] ---- interrupt: 300
[ 647.369620] [c0000000e0753910] [c0000000009bd5fc] copy_page_to_iter+0xd8/0x1b8
[ 647.369661] [c0000000e0753960] [c0000000005b340c] process_vm_rw_single_vec.constprop.0+0x1cc/0x3b0
[ 647.369703] [c0000000e0753a20] [c0000000005b375c] process_vm_rw_core.constprop.0+0x16c/0x310
[ 647.369746] [c0000000e0753b20] [c0000000005b3a28] process_vm_rw+0x128/0x184
[ 647.369790] [c0000000e0753cb0] [c00000000003186c] system_call_exception+0x14c/0x340
[ 647.369834] [c0000000e0753e50] [c00000000000d05c] system_call_vectored_common+0x15c/0x2ec
[ 647.369877] ---- interrupt: 3000 at 0x3fff86d4dc1c
[ 647.369906] NIP: 00003fff86d4dc1c LR: 00003fff86d4dc1c CTR: 0000000000000000
[ 647.369932] REGS: c0000000e0753e80 TRAP: 3000 Tainted: G W (6.19.0+)
[ 647.369958] MSR: 800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 44022402 XER: 00000000
[ 647.370155] IRQMASK: 0
[ 647.370155] GPR00: 000000000000015f 00003fffcffb0040 000000012ddd7800 00000000000013f2
[ 647.370155] GPR04: 00003fffcffb00c0 0000000000000001 00003fffcffb0100 0000000000000001
[ 647.370155] GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 647.370155] GPR12: 0000000000000000 00003fff871fb760 0000000000000000 0000000000000000
[ 647.370155] GPR16: 0000000000000001 00000000ffffffff 0000000000000019 0000000001000000
[ 647.370155] GPR20: 00003fffcffb00a0 00003fffcffb00b0 000000012ddd0008 00003fff86f39e28
[ 647.370155] GPR24: 00003fff85800000 00003fffcffb0100 0000000040000000 00003fff86800000
[ 647.370155] GPR28: 00003fff85800000 0000000000000000 00003fffcffb0248 00003fffcffb00c0
[ 647.370710] NIP [00003fff86d4dc1c] 0x3fff86d4dc1c
[ 647.370741] LR [00003fff86d4dc1c] 0x3fff86d4dc1c
[ 647.370767] ---- interrupt: 3000
[ 647.370793] Code: e87f0100 48115ebd 60000000 2c230000 4182ff34 40920120 3c82ffd9 38844608 3c62ffd9 386346a0 480d93c1 60000000 <0fe00000> e8010090 ebe10078 38210080
[ 647.371060] ---[ end trace 0000000000000000 ]—
Regards,
Venkat.
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH 1/2] powerpc: fix KUAP warning in VMX usercopy path
2026-02-17 12:44 [PATCH 1/2] powerpc: fix KUAP warning in VMX usercopy path Sayali Patil
2026-02-17 12:44 ` [PATCH 2/2] powerpc/selftests/copyloops: extend selftest to exercise __copy_tofrom_user_power7_vmx Sayali Patil
2026-02-22 12:59 ` [PATCH 1/2] powerpc: fix KUAP warning in VMX usercopy path Venkat
@ 2026-02-27 9:22 ` Christophe Leroy (CS GROUP)
2026-02-28 14:00 ` Sayali Patil
2 siblings, 1 reply; 6+ messages in thread
From: Christophe Leroy (CS GROUP) @ 2026-02-27 9:22 UTC (permalink / raw)
To: Sayali Patil, linuxppc-dev, maddy; +Cc: aboorvad, sshegde, riteshh
Hi,
Le 17/02/2026 à 13:44, Sayali Patil a écrit :
> [Vous ne recevez pas souvent de courriers de sayalip@linux.ibm.com. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>
> On powerpc with PREEMPT_FULL or PREEMPT_LAZY and function tracing enabled,
> KUAP warnings can be triggered from the VMX usercopy path under memory
> stress workloads.
>
> KUAP requires that no subfunctions are called once userspace access has
> been enabled. The existing VMX copy implementation violates this
> requirement by invoking enter_vmx_usercopy() from the assembly path after
> userspace access has already been enabled. If preemption occurs
> in this window, the AMR state may not be preserved correctly,
> leading to unexpected userspace access state and resulting in
> KUAP warnings.
>
> Fix this by moving VMX selection and enter_vmx_usercopy()/
> exit_vmx_usercopy() handling into the raw_copy_{to,from,in}_user()
> wrappers in uaccess.h. The new flow is:
>
> - Decide whether to use the VMX path based on size and CPU capability
> - Call enter_vmx_usercopy() before enabling userspace access
> - Enable userspace access and perform the VMX copy
> - Disable userspace access
> - Call exit_vmx_usercopy()
> - Fall back to the base copy routine if the VMX copy faults
>
> With this change, the VMX assembly routines no longer perform VMX state
> management or call helper functions; they only implement the
> copy operations.
> The previous feature-section based VMX selection inside
> __copy_tofrom_user_power7() is removed, and a dedicated
> __copy_tofrom_user_power7_vmx() entry point is introduced.
>
> This ensures correct KUAP ordering, avoids subfunction calls
> while KUAP is unlocked, and eliminates the warnings while preserving
> the VMX fast path.
You patch conflicts with the changes for adding masked user access.
Can you rebase on top of v7.0-rc1 ?
Comments below
>
> Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection")
> Reported-by: Shrikanth Hegde <sshegde@linux.ibm.com>
> Closes: https://lore.kernel.org/all/20260109064917.777587-2-sshegde@linux.ibm.com/
> Suggested-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
> Co-developed-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
> Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
> Signed-off-by: Sayali Patil <sayalip@linux.ibm.com>
> ---
> arch/powerpc/include/asm/uaccess.h | 67 ++++++++++++++++++++++++++++++
> arch/powerpc/lib/copyuser_64.S | 1 +
> arch/powerpc/lib/copyuser_power7.S | 45 +++++++-------------
> arch/powerpc/lib/vmx-helper.c | 2 +
> 4 files changed, 85 insertions(+), 30 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 784a00e681fa..52e4a784d148 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -13,6 +13,11 @@
> #define TASK_SIZE_MAX TASK_SIZE_USER64
> #endif
>
> +#ifdef CONFIG_ALTIVEC
remove the ifdef to avoid matching ifdef later
> +/* Threshold above which VMX copy path is used */
> +#define VMX_COPY_THRESHOLD 3328
> +#endif
> +
> #include <asm-generic/access_ok.h>
>
> /*
> @@ -323,12 +328,42 @@ do { \
> extern unsigned long __copy_tofrom_user(void __user *to,
> const void __user *from, unsigned long size);
>
> +extern unsigned long __copy_tofrom_user_base(void __user *to,
> + const void __user *from, unsigned long size);
> +
extern keywork is pointless for function prototypes, don't add new ones.
> +#ifdef CONFIG_ALTIVEC
Remove the ifdef
> +extern unsigned long __copy_tofrom_user_power7_vmx(void __user *to,
> + const void __user *from, unsigned long size);
> +
> +static inline bool will_use_vmx(unsigned long n)
> +{
> + return cpu_has_feature(CPU_FTR_VMX_COPY) &&
> + n > VMX_COPY_THRESHOLD;
Change to
return IS_ENABLED(CONFIG_ALTIVEC) && cpu_has_feature(CPU_FTR_VMX_COPY)
&& n > VMX_COPY_THRESHOLD;
Then will_use_vmx() will return false when CONFIG_ALTIVEC is not set
> +}
> +#endif
> +
> #ifdef __powerpc64__
> static inline unsigned long
> raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
> {
> unsigned long ret;
>
> +#ifdef CONFIG_ALTIVEC
Remove the ifdef, will_use_vmx() will return false with the above change
when CONFIG_ALTIVEC is not set
> + if (will_use_vmx(n) && enter_vmx_usercopy()) {
> + allow_read_write_user(to, from, n);
> + ret = __copy_tofrom_user_power7_vmx(to, from, n);
> + prevent_read_write_user(to, from, n);
> + exit_vmx_usercopy();
> + if (unlikely(ret)) {
> + allow_read_write_user(to, from, n);
> + ret = __copy_tofrom_user_base(to, from, n);
> + prevent_read_write_user(to, from, n);
> + }
> +
> + return ret;
> + }
This block is starting to be a bit big for an inline function.
I think we should just have:
if (will_use_vmx(n))
return __copy_tofrom_user_vmx()
and then define a __copy_tofrom_user_vmx() in for instance
arch/powerpc/lib/vmx-helper.c
This would also avoid having to export enter_vmx_usercopy() and
exit_vmx_usercopy()
Christophe
> +#endif
> +
> allow_read_write_user(to, from, n);
> ret = __copy_tofrom_user(to, from, n);
> prevent_read_write_user(to, from, n);
> @@ -341,6 +376,22 @@ static inline unsigned long raw_copy_from_user(void *to,
> {
> unsigned long ret;
>
> +#ifdef CONFIG_ALTIVEC
> + if (will_use_vmx(n) && enter_vmx_usercopy()) {
> + allow_read_from_user(from, n);
> + ret = __copy_tofrom_user_power7_vmx((__force void __user *)to, from, n);
> + prevent_read_from_user(from, n);
> + exit_vmx_usercopy();
> + if (unlikely(ret)) {
> + allow_read_from_user(from, n);
> + ret = __copy_tofrom_user_base((__force void __user *)to, from, n);
> + prevent_read_from_user(from, n);
> + }
> +
> + return ret;
> + }
> +#endif
> +
> allow_read_from_user(from, n);
> ret = __copy_tofrom_user((__force void __user *)to, from, n);
> prevent_read_from_user(from, n);
> @@ -352,6 +403,22 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
> {
> unsigned long ret;
>
> +#ifdef CONFIG_ALTIVEC
> + if (will_use_vmx(n) && enter_vmx_usercopy()) {
> + allow_write_to_user(to, n);
> + ret = __copy_tofrom_user_power7_vmx(to, (__force const void __user *)from, n);
> + prevent_write_to_user(to, n);
> + exit_vmx_usercopy();
> + if (unlikely(ret)) {
> + allow_write_to_user(to, n);
> + ret = __copy_tofrom_user_base(to, (__force const void __user *)from, n);
> + prevent_write_to_user(to, n);
> + }
> +
> + return ret;
> + }
> +#endif
> +
> allow_write_to_user(to, n);
> ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
> prevent_write_to_user(to, n);
> diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
> index 9af969d2cc0c..25a99108caff 100644
> --- a/arch/powerpc/lib/copyuser_64.S
> +++ b/arch/powerpc/lib/copyuser_64.S
> @@ -562,3 +562,4 @@ exc; std r10,32(3)
> li r5,4096
> b .Ldst_aligned
> EXPORT_SYMBOL(__copy_tofrom_user)
> +EXPORT_SYMBOL(__copy_tofrom_user_base)
> diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
> index 8474c682a178..17dbcfbae25f 100644
> --- a/arch/powerpc/lib/copyuser_power7.S
> +++ b/arch/powerpc/lib/copyuser_power7.S
> @@ -5,13 +5,9 @@
> *
> * Author: Anton Blanchard <anton@au.ibm.com>
> */
> +#include <linux/export.h>
> #include <asm/ppc_asm.h>
>
> -#ifndef SELFTEST_CASE
> -/* 0 == don't use VMX, 1 == use VMX */
> -#define SELFTEST_CASE 0
> -#endif
> -
> #ifdef __BIG_ENDIAN__
> #define LVS(VRT,RA,RB) lvsl VRT,RA,RB
> #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
> @@ -47,10 +43,14 @@
> ld r15,STK_REG(R15)(r1)
> ld r14,STK_REG(R14)(r1)
> .Ldo_err3:
> - bl CFUNC(exit_vmx_usercopy)
> + ld r6,STK_REG(R31)(r1) /* original destination pointer */
> + ld r5,STK_REG(R29)(r1) /* original number of bytes */
> + subf r7,r6,r3 /* #bytes copied */
> + subf r3,r7,r5 /* #bytes not copied in r3 */
> ld r0,STACKFRAMESIZE+16(r1)
> mtlr r0
> - b .Lexit
> + addi r1,r1,STACKFRAMESIZE
> + blr
> #endif /* CONFIG_ALTIVEC */
>
> .Ldo_err2:
> @@ -74,7 +74,6 @@
>
> _GLOBAL(__copy_tofrom_user_power7)
> cmpldi r5,16
> - cmpldi cr1,r5,3328
>
> std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
> std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
> @@ -82,12 +81,6 @@ _GLOBAL(__copy_tofrom_user_power7)
>
> blt .Lshort_copy
>
> -#ifdef CONFIG_ALTIVEC
> -test_feature = SELFTEST_CASE
> -BEGIN_FTR_SECTION
> - bgt cr1,.Lvmx_copy
> -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
> -#endif
>
> .Lnonvmx_copy:
> /* Get the source 8B aligned */
> @@ -263,23 +256,14 @@ err1; stb r0,0(r3)
> 15: li r3,0
> blr
>
> -.Lunwind_stack_nonvmx_copy:
> - addi r1,r1,STACKFRAMESIZE
> - b .Lnonvmx_copy
> -
> -.Lvmx_copy:
> #ifdef CONFIG_ALTIVEC
> +_GLOBAL(__copy_tofrom_user_power7_vmx)
> mflr r0
> std r0,16(r1)
> stdu r1,-STACKFRAMESIZE(r1)
> - bl CFUNC(enter_vmx_usercopy)
> - cmpwi cr1,r3,0
> - ld r0,STACKFRAMESIZE+16(r1)
> - ld r3,STK_REG(R31)(r1)
> - ld r4,STK_REG(R30)(r1)
> - ld r5,STK_REG(R29)(r1)
> - mtlr r0
>
> + std r3,STK_REG(R31)(r1)
> + std r5,STK_REG(R29)(r1)
> /*
> * We prefetch both the source and destination using enhanced touch
> * instructions. We use a stream ID of 0 for the load side and
> @@ -300,8 +284,6 @@ err1; stb r0,0(r3)
>
> DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
>
> - beq cr1,.Lunwind_stack_nonvmx_copy
> -
> /*
> * If source and destination are not relatively aligned we use a
> * slower permute loop.
> @@ -478,7 +460,8 @@ err3; lbz r0,0(r4)
> err3; stb r0,0(r3)
>
> 15: addi r1,r1,STACKFRAMESIZE
> - b CFUNC(exit_vmx_usercopy) /* tail call optimise */
> + li r3,0
> + blr
>
> .Lvmx_unaligned_copy:
> /* Get the destination 16B aligned */
> @@ -681,5 +664,7 @@ err3; lbz r0,0(r4)
> err3; stb r0,0(r3)
>
> 15: addi r1,r1,STACKFRAMESIZE
> - b CFUNC(exit_vmx_usercopy) /* tail call optimise */
> + li r3,0
> + blr
> +EXPORT_SYMBOL(__copy_tofrom_user_power7_vmx)
> #endif /* CONFIG_ALTIVEC */
> diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
> index 54340912398f..554b248002b4 100644
> --- a/arch/powerpc/lib/vmx-helper.c
> +++ b/arch/powerpc/lib/vmx-helper.c
> @@ -27,6 +27,7 @@ int enter_vmx_usercopy(void)
>
> return 1;
> }
> +EXPORT_SYMBOL(enter_vmx_usercopy);
>
> /*
> * This function must return 0 because we tail call optimise when calling
> @@ -49,6 +50,7 @@ int exit_vmx_usercopy(void)
> set_dec(1);
> return 0;
> }
> +EXPORT_SYMBOL(exit_vmx_usercopy);
>
> int enter_vmx_ops(void)
> {
> --
> 2.52.0
>
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH 1/2] powerpc: fix KUAP warning in VMX usercopy path
2026-02-27 9:22 ` Christophe Leroy (CS GROUP)
@ 2026-02-28 14:00 ` Sayali Patil
0 siblings, 0 replies; 6+ messages in thread
From: Sayali Patil @ 2026-02-28 14:00 UTC (permalink / raw)
To: Christophe Leroy (CS GROUP), linuxppc-dev, maddy
Cc: aboorvad, sshegde, riteshh
[-- Attachment #1: Type: text/plain, Size: 14211 bytes --]
On 27/02/26 14:52, Christophe Leroy (CS GROUP) wrote:
> Hi,
>
> Le 17/02/2026 à 13:44, Sayali Patil a écrit :
>> [Vous ne recevez pas souvent de courriers de sayalip@linux.ibm.com.
>> Découvrez pourquoi ceci est important à
>> https://aka.ms/LearnAboutSenderIdentification ]
>>
>> On powerpc with PREEMPT_FULL or PREEMPT_LAZY and function tracing
>> enabled,
>> KUAP warnings can be triggered from the VMX usercopy path under memory
>> stress workloads.
>>
>> KUAP requires that no subfunctions are called once userspace access has
>> been enabled. The existing VMX copy implementation violates this
>> requirement by invoking enter_vmx_usercopy() from the assembly path
>> after
>> userspace access has already been enabled. If preemption occurs
>> in this window, the AMR state may not be preserved correctly,
>> leading to unexpected userspace access state and resulting in
>> KUAP warnings.
>>
>> Fix this by moving VMX selection and enter_vmx_usercopy()/
>> exit_vmx_usercopy() handling into the raw_copy_{to,from,in}_user()
>> wrappers in uaccess.h. The new flow is:
>>
>> - Decide whether to use the VMX path based on size and CPU capability
>> - Call enter_vmx_usercopy() before enabling userspace access
>> - Enable userspace access and perform the VMX copy
>> - Disable userspace access
>> - Call exit_vmx_usercopy()
>> - Fall back to the base copy routine if the VMX copy faults
>>
>> With this change, the VMX assembly routines no longer perform VMX state
>> management or call helper functions; they only implement the
>> copy operations.
>> The previous feature-section based VMX selection inside
>> __copy_tofrom_user_power7() is removed, and a dedicated
>> __copy_tofrom_user_power7_vmx() entry point is introduced.
>>
>> This ensures correct KUAP ordering, avoids subfunction calls
>> while KUAP is unlocked, and eliminates the warnings while preserving
>> the VMX fast path.
>
> You patch conflicts with the changes for adding masked user access.
>
> Can you rebase on top of v7.0-rc1 ?
>
> Comments below
>
>>
>> Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace
>> Access Protection")
>> Reported-by: Shrikanth Hegde <sshegde@linux.ibm.com>
>> Closes:
>> https://lore.kernel.org/all/20260109064917.777587-2-sshegde@linux.ibm.com/
>> Suggested-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
>> Co-developed-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
>> Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
>> Signed-off-by: Sayali Patil <sayalip@linux.ibm.com>
>> ---
>> arch/powerpc/include/asm/uaccess.h | 67 ++++++++++++++++++++++++++++++
>> arch/powerpc/lib/copyuser_64.S | 1 +
>> arch/powerpc/lib/copyuser_power7.S | 45 +++++++-------------
>> arch/powerpc/lib/vmx-helper.c | 2 +
>> 4 files changed, 85 insertions(+), 30 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/uaccess.h
>> b/arch/powerpc/include/asm/uaccess.h
>> index 784a00e681fa..52e4a784d148 100644
>> --- a/arch/powerpc/include/asm/uaccess.h
>> +++ b/arch/powerpc/include/asm/uaccess.h
>> @@ -13,6 +13,11 @@
>> #define TASK_SIZE_MAX TASK_SIZE_USER64
>> #endif
>>
>> +#ifdef CONFIG_ALTIVEC
>
> remove the ifdef to avoid matching ifdef later
>
>> +/* Threshold above which VMX copy path is used */
>> +#define VMX_COPY_THRESHOLD 3328
>> +#endif
>> +
>> #include <asm-generic/access_ok.h>
>>
>> /*
>> @@ -323,12 +328,42 @@ do
>> { \
>> extern unsigned long __copy_tofrom_user(void __user *to,
>> const void __user *from, unsigned long size);
>>
>> +extern unsigned long __copy_tofrom_user_base(void __user *to,
>> + const void __user *from, unsigned long size);
>> +
>
> extern keywork is pointless for function prototypes, don't add new ones.
>
>> +#ifdef CONFIG_ALTIVEC
>
> Remove the ifdef
>
>> +extern unsigned long __copy_tofrom_user_power7_vmx(void __user *to,
>> + const void __user *from, unsigned long size);
>> +
>> +static inline bool will_use_vmx(unsigned long n)
>> +{
>> + return cpu_has_feature(CPU_FTR_VMX_COPY) &&
>> + n > VMX_COPY_THRESHOLD;
>
> Change to
>
> return IS_ENABLED(CONFIG_ALTIVEC) &&
> cpu_has_feature(CPU_FTR_VMX_COPY) && n > VMX_COPY_THRESHOLD;
>
> Then will_use_vmx() will return false when CONFIG_ALTIVEC is not set
>
>> +}
>> +#endif
>> +
>> #ifdef __powerpc64__
>> static inline unsigned long
>> raw_copy_in_user(void __user *to, const void __user *from, unsigned
>> long n)
>> {
>> unsigned long ret;
>>
>> +#ifdef CONFIG_ALTIVEC
>
> Remove the ifdef, will_use_vmx() will return false with the above
> change when CONFIG_ALTIVEC is not set
>
>> + if (will_use_vmx(n) && enter_vmx_usercopy()) {
>> + allow_read_write_user(to, from, n);
>> + ret = __copy_tofrom_user_power7_vmx(to, from, n);
>> + prevent_read_write_user(to, from, n);
>> + exit_vmx_usercopy();
>> + if (unlikely(ret)) {
>> + allow_read_write_user(to, from, n);
>> + ret = __copy_tofrom_user_base(to, from, n);
>> + prevent_read_write_user(to, from, n);
>> + }
>> +
>> + return ret;
>> + }
>
> This block is starting to be a bit big for an inline function.
> I think we should just have:
>
> if (will_use_vmx(n))
> return __copy_tofrom_user_vmx()
>
> and then define a __copy_tofrom_user_vmx() in for instance
> arch/powerpc/lib/vmx-helper.c
>
> This would also avoid having to export enter_vmx_usercopy() and
> exit_vmx_usercopy()
>
> Christophe
>
Thanks Christophe for the review and suggestions. We have incorporated
these changes in v2.
v2:
https://lore.kernel.org/all/20260228135319.238985-1-sayalip@linux.ibm.com/
Regards,
Sayali
>> +#endif
>> +
>> allow_read_write_user(to, from, n);
>> ret = __copy_tofrom_user(to, from, n);
>> prevent_read_write_user(to, from, n);
>> @@ -341,6 +376,22 @@ static inline unsigned long
>> raw_copy_from_user(void *to,
>> {
>> unsigned long ret;
>>
>> +#ifdef CONFIG_ALTIVEC
>> + if (will_use_vmx(n) && enter_vmx_usercopy()) {
>> + allow_read_from_user(from, n);
>> + ret = __copy_tofrom_user_power7_vmx((__force void
>> __user *)to, from, n);
>> + prevent_read_from_user(from, n);
>> + exit_vmx_usercopy();
>> + if (unlikely(ret)) {
>> + allow_read_from_user(from, n);
>> + ret = __copy_tofrom_user_base((__force void
>> __user *)to, from, n);
>> + prevent_read_from_user(from, n);
>> + }
>> +
>> + return ret;
>> + }
>> +#endif
>> +
>> allow_read_from_user(from, n);
>> ret = __copy_tofrom_user((__force void __user *)to, from, n);
>> prevent_read_from_user(from, n);
>> @@ -352,6 +403,22 @@ raw_copy_to_user(void __user *to, const void
>> *from, unsigned long n)
>> {
>> unsigned long ret;
>>
>> +#ifdef CONFIG_ALTIVEC
>> + if (will_use_vmx(n) && enter_vmx_usercopy()) {
>> + allow_write_to_user(to, n);
>> + ret = __copy_tofrom_user_power7_vmx(to, (__force
>> const void __user *)from, n);
>> + prevent_write_to_user(to, n);
>> + exit_vmx_usercopy();
>> + if (unlikely(ret)) {
>> + allow_write_to_user(to, n);
>> + ret = __copy_tofrom_user_base(to, (__force
>> const void __user *)from, n);
>> + prevent_write_to_user(to, n);
>> + }
>> +
>> + return ret;
>> + }
>> +#endif
>> +
>> allow_write_to_user(to, n);
>> ret = __copy_tofrom_user(to, (__force const void __user
>> *)from, n);
>> prevent_write_to_user(to, n);
>> diff --git a/arch/powerpc/lib/copyuser_64.S
>> b/arch/powerpc/lib/copyuser_64.S
>> index 9af969d2cc0c..25a99108caff 100644
>> --- a/arch/powerpc/lib/copyuser_64.S
>> +++ b/arch/powerpc/lib/copyuser_64.S
>> @@ -562,3 +562,4 @@ exc; std r10,32(3)
>> li r5,4096
>> b .Ldst_aligned
>> EXPORT_SYMBOL(__copy_tofrom_user)
>> +EXPORT_SYMBOL(__copy_tofrom_user_base)
>> diff --git a/arch/powerpc/lib/copyuser_power7.S
>> b/arch/powerpc/lib/copyuser_power7.S
>> index 8474c682a178..17dbcfbae25f 100644
>> --- a/arch/powerpc/lib/copyuser_power7.S
>> +++ b/arch/powerpc/lib/copyuser_power7.S
>> @@ -5,13 +5,9 @@
>> *
>> * Author: Anton Blanchard <anton@au.ibm.com>
>> */
>> +#include <linux/export.h>
>> #include <asm/ppc_asm.h>
>>
>> -#ifndef SELFTEST_CASE
>> -/* 0 == don't use VMX, 1 == use VMX */
>> -#define SELFTEST_CASE 0
>> -#endif
>> -
>> #ifdef __BIG_ENDIAN__
>> #define LVS(VRT,RA,RB) lvsl VRT,RA,RB
>> #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
>> @@ -47,10 +43,14 @@
>> ld r15,STK_REG(R15)(r1)
>> ld r14,STK_REG(R14)(r1)
>> .Ldo_err3:
>> - bl CFUNC(exit_vmx_usercopy)
>> + ld r6,STK_REG(R31)(r1) /* original destination
>> pointer */
>> + ld r5,STK_REG(R29)(r1) /* original number of bytes */
>> + subf r7,r6,r3 /* #bytes copied */
>> + subf r3,r7,r5 /* #bytes not copied in r3 */
>> ld r0,STACKFRAMESIZE+16(r1)
>> mtlr r0
>> - b .Lexit
>> + addi r1,r1,STACKFRAMESIZE
>> + blr
>> #endif /* CONFIG_ALTIVEC */
>>
>> .Ldo_err2:
>> @@ -74,7 +74,6 @@
>>
>> _GLOBAL(__copy_tofrom_user_power7)
>> cmpldi r5,16
>> - cmpldi cr1,r5,3328
>>
>> std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
>> std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
>> @@ -82,12 +81,6 @@ _GLOBAL(__copy_tofrom_user_power7)
>>
>> blt .Lshort_copy
>>
>> -#ifdef CONFIG_ALTIVEC
>> -test_feature = SELFTEST_CASE
>> -BEGIN_FTR_SECTION
>> - bgt cr1,.Lvmx_copy
>> -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
>> -#endif
>>
>> .Lnonvmx_copy:
>> /* Get the source 8B aligned */
>> @@ -263,23 +256,14 @@ err1; stb r0,0(r3)
>> 15: li r3,0
>> blr
>>
>> -.Lunwind_stack_nonvmx_copy:
>> - addi r1,r1,STACKFRAMESIZE
>> - b .Lnonvmx_copy
>> -
>> -.Lvmx_copy:
>> #ifdef CONFIG_ALTIVEC
>> +_GLOBAL(__copy_tofrom_user_power7_vmx)
>> mflr r0
>> std r0,16(r1)
>> stdu r1,-STACKFRAMESIZE(r1)
>> - bl CFUNC(enter_vmx_usercopy)
>> - cmpwi cr1,r3,0
>> - ld r0,STACKFRAMESIZE+16(r1)
>> - ld r3,STK_REG(R31)(r1)
>> - ld r4,STK_REG(R30)(r1)
>> - ld r5,STK_REG(R29)(r1)
>> - mtlr r0
>>
>> + std r3,STK_REG(R31)(r1)
>> + std r5,STK_REG(R29)(r1)
>> /*
>> * We prefetch both the source and destination using
>> enhanced touch
>> * instructions. We use a stream ID of 0 for the load side and
>> @@ -300,8 +284,6 @@ err1; stb r0,0(r3)
>>
>> DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
>>
>> - beq cr1,.Lunwind_stack_nonvmx_copy
>> -
>> /*
>> * If source and destination are not relatively aligned we
>> use a
>> * slower permute loop.
>> @@ -478,7 +460,8 @@ err3; lbz r0,0(r4)
>> err3; stb r0,0(r3)
>>
>> 15: addi r1,r1,STACKFRAMESIZE
>> - b CFUNC(exit_vmx_usercopy) /* tail call optimise */
>> + li r3,0
>> + blr
>>
>> .Lvmx_unaligned_copy:
>> /* Get the destination 16B aligned */
>> @@ -681,5 +664,7 @@ err3; lbz r0,0(r4)
>> err3; stb r0,0(r3)
>>
>> 15: addi r1,r1,STACKFRAMESIZE
>> - b CFUNC(exit_vmx_usercopy) /* tail call optimise */
>> + li r3,0
>> + blr
>> +EXPORT_SYMBOL(__copy_tofrom_user_power7_vmx)
>> #endif /* CONFIG_ALTIVEC */
>> diff --git a/arch/powerpc/lib/vmx-helper.c
>> b/arch/powerpc/lib/vmx-helper.c
>> index 54340912398f..554b248002b4 100644
>> --- a/arch/powerpc/lib/vmx-helper.c
>> +++ b/arch/powerpc/lib/vmx-helper.c
>> @@ -27,6 +27,7 @@ int enter_vmx_usercopy(void)
>>
>> return 1;
>> }
>> +EXPORT_SYMBOL(enter_vmx_usercopy);
>>
>> /*
>> * This function must return 0 because we tail call optimise when
>> calling
>> @@ -49,6 +50,7 @@ int exit_vmx_usercopy(void)
>> set_dec(1);
>> return 0;
>> }
>> +EXPORT_SYMBOL(exit_vmx_usercopy);
>>
>> int enter_vmx_ops(void)
>> {
>> --
>> 2.52.0
>>
>
>
[-- Attachment #2: Type: text/html, Size: 21979 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread