On 27/02/26 14:52, Christophe Leroy (CS GROUP) wrote: > Hi, > > Le 17/02/2026 à 13:44, Sayali Patil a écrit : >> [Vous ne recevez pas souvent de courriers de sayalip@linux.ibm.com. >> Découvrez pourquoi ceci est important à >> https://aka.ms/LearnAboutSenderIdentification ] >> >> On powerpc with PREEMPT_FULL or PREEMPT_LAZY and function tracing >> enabled, >> KUAP warnings can be triggered from the VMX usercopy path under memory >> stress workloads. >> >> KUAP requires that no subfunctions are called once userspace access has >> been enabled. The existing VMX copy implementation violates this >> requirement by invoking enter_vmx_usercopy() from the assembly path >> after >> userspace access has already been enabled. If preemption occurs >> in this window, the AMR state may not be preserved correctly, >> leading to unexpected userspace access state and resulting in >> KUAP warnings. >> >> Fix this by moving VMX selection and enter_vmx_usercopy()/ >> exit_vmx_usercopy() handling into the raw_copy_{to,from,in}_user() >> wrappers in uaccess.h. The new flow is: >> >>    - Decide whether to use the VMX path based on size and CPU capability >>    - Call enter_vmx_usercopy() before enabling userspace access >>    - Enable userspace access and perform the VMX copy >>    - Disable userspace access >>    - Call exit_vmx_usercopy() >>    - Fall back to the base copy routine if the VMX copy faults >> >> With this change, the VMX assembly routines no longer perform VMX state >> management or call helper functions; they only implement the >> copy operations. >> The previous feature-section based VMX selection inside >> __copy_tofrom_user_power7() is removed, and a dedicated >> __copy_tofrom_user_power7_vmx() entry point is introduced. >> >> This ensures correct KUAP ordering, avoids subfunction calls >> while KUAP is unlocked, and eliminates the warnings while preserving >> the VMX fast path. > > You patch conflicts with the changes for adding masked user access. > > Can you rebase on top of v7.0-rc1 ? > > Comments below > >> >> Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace >> Access Protection") >> Reported-by: Shrikanth Hegde >> Closes: >> https://lore.kernel.org/all/20260109064917.777587-2-sshegde@linux.ibm.com/ >> Suggested-by: Christophe Leroy (CS GROUP) >> Co-developed-by: Aboorva Devarajan >> Signed-off-by: Aboorva Devarajan >> Signed-off-by: Sayali Patil >> --- >>   arch/powerpc/include/asm/uaccess.h | 67 ++++++++++++++++++++++++++++++ >>   arch/powerpc/lib/copyuser_64.S     |  1 + >>   arch/powerpc/lib/copyuser_power7.S | 45 +++++++------------- >>   arch/powerpc/lib/vmx-helper.c      |  2 + >>   4 files changed, 85 insertions(+), 30 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/uaccess.h >> b/arch/powerpc/include/asm/uaccess.h >> index 784a00e681fa..52e4a784d148 100644 >> --- a/arch/powerpc/include/asm/uaccess.h >> +++ b/arch/powerpc/include/asm/uaccess.h >> @@ -13,6 +13,11 @@ >>   #define TASK_SIZE_MAX          TASK_SIZE_USER64 >>   #endif >> >> +#ifdef CONFIG_ALTIVEC > > remove the ifdef to avoid matching ifdef later > >> +/* Threshold above which VMX copy path is used */ >> +#define VMX_COPY_THRESHOLD 3328 >> +#endif >> + >>   #include >> >>   /* >> @@ -323,12 +328,42 @@ do >> {                                                              \ >>   extern unsigned long __copy_tofrom_user(void __user *to, >>                  const void __user *from, unsigned long size); >> >> +extern unsigned long __copy_tofrom_user_base(void __user *to, >> +               const void __user *from, unsigned long size); >> + > > extern keywork is pointless for function prototypes, don't add new ones. > >> +#ifdef CONFIG_ALTIVEC > > Remove the ifdef > >> +extern unsigned long __copy_tofrom_user_power7_vmx(void __user *to, >> +               const void __user *from, unsigned long size); >> + >> +static inline bool will_use_vmx(unsigned long n) >> +{ >> +       return cpu_has_feature(CPU_FTR_VMX_COPY) && >> +               n > VMX_COPY_THRESHOLD; > > Change to > >     return IS_ENABLED(CONFIG_ALTIVEC) && > cpu_has_feature(CPU_FTR_VMX_COPY) &&  n > VMX_COPY_THRESHOLD; > > Then will_use_vmx() will return false when CONFIG_ALTIVEC is not set > >> +} >> +#endif >> + >>   #ifdef __powerpc64__ >>   static inline unsigned long >>   raw_copy_in_user(void __user *to, const void __user *from, unsigned >> long n) >>   { >>          unsigned long ret; >> >> +#ifdef CONFIG_ALTIVEC > > Remove the ifdef, will_use_vmx() will return false with the above > change when CONFIG_ALTIVEC is not set > >> +       if (will_use_vmx(n) && enter_vmx_usercopy()) { >> +               allow_read_write_user(to, from, n); >> +               ret = __copy_tofrom_user_power7_vmx(to, from, n); >> +               prevent_read_write_user(to, from, n); >> +               exit_vmx_usercopy(); >> +               if (unlikely(ret)) { >> +                       allow_read_write_user(to, from, n); >> +                       ret = __copy_tofrom_user_base(to, from, n); >> +                       prevent_read_write_user(to, from, n); >> +               } >> + >> +               return ret; >> +       } > > This block is starting to be a bit big for an inline function. > I think we should just have: > >     if (will_use_vmx(n)) >         return __copy_tofrom_user_vmx() > > and then define a __copy_tofrom_user_vmx() in for instance > arch/powerpc/lib/vmx-helper.c > > This would also avoid having to export enter_vmx_usercopy() and > exit_vmx_usercopy() > > Christophe > Thanks Christophe for the review and suggestions. We have incorporated  these changes in v2. v2: https://lore.kernel.org/all/20260228135319.238985-1-sayalip@linux.ibm.com/ Regards, Sayali >> +#endif >> + >>          allow_read_write_user(to, from, n); >>          ret = __copy_tofrom_user(to, from, n); >>          prevent_read_write_user(to, from, n); >> @@ -341,6 +376,22 @@ static inline unsigned long >> raw_copy_from_user(void *to, >>   { >>          unsigned long ret; >> >> +#ifdef CONFIG_ALTIVEC >> +       if (will_use_vmx(n) && enter_vmx_usercopy()) { >> +               allow_read_from_user(from, n); >> +               ret = __copy_tofrom_user_power7_vmx((__force void >> __user *)to, from, n); >> +               prevent_read_from_user(from, n); >> +               exit_vmx_usercopy(); >> +               if (unlikely(ret)) { >> +                       allow_read_from_user(from, n); >> +                       ret = __copy_tofrom_user_base((__force void >> __user *)to, from, n); >> +                       prevent_read_from_user(from, n); >> +               } >> + >> +               return ret; >> +       } >> +#endif >> + >>          allow_read_from_user(from, n); >>          ret = __copy_tofrom_user((__force void __user *)to, from, n); >>          prevent_read_from_user(from, n); >> @@ -352,6 +403,22 @@ raw_copy_to_user(void __user *to, const void >> *from, unsigned long n) >>   { >>          unsigned long ret; >> >> +#ifdef CONFIG_ALTIVEC >> +       if (will_use_vmx(n) && enter_vmx_usercopy()) { >> +               allow_write_to_user(to, n); >> +               ret = __copy_tofrom_user_power7_vmx(to, (__force >> const void __user *)from, n); >> +               prevent_write_to_user(to, n); >> +               exit_vmx_usercopy(); >> +               if (unlikely(ret)) { >> +                       allow_write_to_user(to, n); >> +                       ret = __copy_tofrom_user_base(to, (__force >> const void __user *)from, n); >> +                       prevent_write_to_user(to, n); >> +               } >> + >> +               return ret; >> +       } >> +#endif >> + >>          allow_write_to_user(to, n); >>          ret = __copy_tofrom_user(to, (__force const void __user >> *)from, n); >>          prevent_write_to_user(to, n); >> diff --git a/arch/powerpc/lib/copyuser_64.S >> b/arch/powerpc/lib/copyuser_64.S >> index 9af969d2cc0c..25a99108caff 100644 >> --- a/arch/powerpc/lib/copyuser_64.S >> +++ b/arch/powerpc/lib/copyuser_64.S >> @@ -562,3 +562,4 @@ exc;        std     r10,32(3) >>          li      r5,4096 >>          b       .Ldst_aligned >>   EXPORT_SYMBOL(__copy_tofrom_user) >> +EXPORT_SYMBOL(__copy_tofrom_user_base) >> diff --git a/arch/powerpc/lib/copyuser_power7.S >> b/arch/powerpc/lib/copyuser_power7.S >> index 8474c682a178..17dbcfbae25f 100644 >> --- a/arch/powerpc/lib/copyuser_power7.S >> +++ b/arch/powerpc/lib/copyuser_power7.S >> @@ -5,13 +5,9 @@ >>    * >>    * Author: Anton Blanchard >>    */ >> +#include >>   #include >> >> -#ifndef SELFTEST_CASE >> -/* 0 == don't use VMX, 1 == use VMX */ >> -#define SELFTEST_CASE  0 >> -#endif >> - >>   #ifdef __BIG_ENDIAN__ >>   #define LVS(VRT,RA,RB)         lvsl    VRT,RA,RB >>   #define VPERM(VRT,VRA,VRB,VRC) vperm   VRT,VRA,VRB,VRC >> @@ -47,10 +43,14 @@ >>          ld      r15,STK_REG(R15)(r1) >>          ld      r14,STK_REG(R14)(r1) >>   .Ldo_err3: >> -       bl      CFUNC(exit_vmx_usercopy) >> +       ld      r6,STK_REG(R31)(r1)     /* original destination >> pointer */ >> +       ld      r5,STK_REG(R29)(r1)     /* original number of bytes */ >> +       subf    r7,r6,r3                /* #bytes copied */ >> +       subf    r3,r7,r5                /* #bytes not copied in r3 */ >>          ld      r0,STACKFRAMESIZE+16(r1) >>          mtlr    r0 >> -       b       .Lexit >> +       addi    r1,r1,STACKFRAMESIZE >> +       blr >>   #endif /* CONFIG_ALTIVEC */ >> >>   .Ldo_err2: >> @@ -74,7 +74,6 @@ >> >>   _GLOBAL(__copy_tofrom_user_power7) >>          cmpldi  r5,16 >> -       cmpldi  cr1,r5,3328 >> >>          std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1) >>          std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1) >> @@ -82,12 +81,6 @@ _GLOBAL(__copy_tofrom_user_power7) >> >>          blt     .Lshort_copy >> >> -#ifdef CONFIG_ALTIVEC >> -test_feature = SELFTEST_CASE >> -BEGIN_FTR_SECTION >> -       bgt     cr1,.Lvmx_copy >> -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) >> -#endif >> >>   .Lnonvmx_copy: >>          /* Get the source 8B aligned */ >> @@ -263,23 +256,14 @@ err1;     stb     r0,0(r3) >>   15:    li      r3,0 >>          blr >> >> -.Lunwind_stack_nonvmx_copy: >> -       addi    r1,r1,STACKFRAMESIZE >> -       b       .Lnonvmx_copy >> - >> -.Lvmx_copy: >>   #ifdef CONFIG_ALTIVEC >> +_GLOBAL(__copy_tofrom_user_power7_vmx) >>          mflr    r0 >>          std     r0,16(r1) >>          stdu    r1,-STACKFRAMESIZE(r1) >> -       bl      CFUNC(enter_vmx_usercopy) >> -       cmpwi   cr1,r3,0 >> -       ld      r0,STACKFRAMESIZE+16(r1) >> -       ld      r3,STK_REG(R31)(r1) >> -       ld      r4,STK_REG(R30)(r1) >> -       ld      r5,STK_REG(R29)(r1) >> -       mtlr    r0 >> >> +       std     r3,STK_REG(R31)(r1) >> +       std     r5,STK_REG(R29)(r1) >>          /* >>           * We prefetch both the source and destination using >> enhanced touch >>           * instructions. We use a stream ID of 0 for the load side and >> @@ -300,8 +284,6 @@ err1;       stb     r0,0(r3) >> >>          DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8) >> >> -       beq     cr1,.Lunwind_stack_nonvmx_copy >> - >>          /* >>           * If source and destination are not relatively aligned we >> use a >>           * slower permute loop. >> @@ -478,7 +460,8 @@ err3;       lbz     r0,0(r4) >>   err3;  stb     r0,0(r3) >> >>   15:    addi    r1,r1,STACKFRAMESIZE >> -       b       CFUNC(exit_vmx_usercopy)        /* tail call optimise */ >> +       li r3,0 >> +       blr >> >>   .Lvmx_unaligned_copy: >>          /* Get the destination 16B aligned */ >> @@ -681,5 +664,7 @@ err3;       lbz     r0,0(r4) >>   err3;  stb     r0,0(r3) >> >>   15:    addi    r1,r1,STACKFRAMESIZE >> -       b       CFUNC(exit_vmx_usercopy)        /* tail call optimise */ >> +       li r3,0 >> +       blr >> +EXPORT_SYMBOL(__copy_tofrom_user_power7_vmx) >>   #endif /* CONFIG_ALTIVEC */ >> diff --git a/arch/powerpc/lib/vmx-helper.c >> b/arch/powerpc/lib/vmx-helper.c >> index 54340912398f..554b248002b4 100644 >> --- a/arch/powerpc/lib/vmx-helper.c >> +++ b/arch/powerpc/lib/vmx-helper.c >> @@ -27,6 +27,7 @@ int enter_vmx_usercopy(void) >> >>          return 1; >>   } >> +EXPORT_SYMBOL(enter_vmx_usercopy); >> >>   /* >>    * This function must return 0 because we tail call optimise when >> calling >> @@ -49,6 +50,7 @@ int exit_vmx_usercopy(void) >>                  set_dec(1); >>          return 0; >>   } >> +EXPORT_SYMBOL(exit_vmx_usercopy); >> >>   int enter_vmx_ops(void) >>   { >> -- >> 2.52.0 >> > >