linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: Ard Biesheuvel <ardb+git@google.com>
Cc: linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org,  linux-crypto@vger.kernel.org,
	herbert@gondor.apana.org.au,  ebiggers@kernel.org
Subject: Re: [PATCH v4 21/21] arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack
Date: Fri, 31 Oct 2025 15:16:03 +0100	[thread overview]
Message-ID: <CAMj1kXGV1xeZXF7adHwbUsg6+JpLyueWaiS89pS7XFm3fKuw6A@mail.gmail.com> (raw)
In-Reply-To: <20251031103858.529530-44-ardb+git@google.com>

On Fri, 31 Oct 2025 at 11:40, Ard Biesheuvel <ardb+git@google.com> wrote:
>
> From: Ard Biesheuvel <ardb@kernel.org>
>
> Commit aefbab8e77eb16b5
>
>   ("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")
>
> added a 'kernel_fpsimd_state' field to struct thread_struct, which is
> the arch-specific portion of struct task_struct, and is allocated for
> each task in the system. The size of this field is 528 bytes, resulting
> in non-negligible bloat of task_struct, and the resulting memory
> overhead may impact performance on systems with many processes.
>
> This allocation is only used if the task is scheduled out or interrupted
> by a softirq while using the FP/SIMD unit in kernel mode, and so it is
> possible to transparently allocate this buffer on the caller's stack
> instead.
>
> So tweak the 'ksimd' scoped guard implementation so that a stack buffer
> is allocated and passed to both kernel_neon_begin() and
> kernel_neon_end(), and either record it in the task struct, or use it
> directly to preserve the task mode kernel FP/SIMD when running in
> softirq context. Passing the address to both functions, and checking the
> addresses for consistency ensures that callers of the updated bare
> begin/end API use it in a manner that is consistent with the new context
> switch semantics.
>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
>  arch/arm64/include/asm/fpu.h       |  4 +-
>  arch/arm64/include/asm/neon.h      |  4 +-
>  arch/arm64/include/asm/processor.h |  7 ++-
>  arch/arm64/include/asm/simd.h      |  7 ++-
>  arch/arm64/kernel/fpsimd.c         | 53 ++++++++++++++------
>  5 files changed, 54 insertions(+), 21 deletions(-)
>
> diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
> index bdc4c6304c6a..751e88a96734 100644
> --- a/arch/arm64/include/asm/fpu.h
> +++ b/arch/arm64/include/asm/fpu.h
> @@ -15,12 +15,12 @@ static inline void kernel_fpu_begin(void)
>  {
>         BUG_ON(!in_task());
>         preempt_disable();
> -       kernel_neon_begin();
> +       kernel_neon_begin(NULL);
>  }
>
>  static inline void kernel_fpu_end(void)
>  {
> -       kernel_neon_end();
> +       kernel_neon_end(NULL);
>         preempt_enable();
>  }
>
> diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
> index d4b1d172a79b..acebee4605b5 100644
> --- a/arch/arm64/include/asm/neon.h
> +++ b/arch/arm64/include/asm/neon.h
> @@ -13,7 +13,7 @@
>
>  #define cpu_has_neon()         system_supports_fpsimd()
>
> -void kernel_neon_begin(void);
> -void kernel_neon_end(void);
> +void kernel_neon_begin(struct user_fpsimd_state *);
> +void kernel_neon_end(struct user_fpsimd_state *);
>
>  #endif /* ! __ASM_NEON_H */
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 61d62bfd5a7b..de3c3b65461d 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -172,7 +172,12 @@ struct thread_struct {
>         unsigned long           fault_code;     /* ESR_EL1 value */
>         struct debug_info       debug;          /* debugging */
>
> -       struct user_fpsimd_state        kernel_fpsimd_state;
> +       /*
> +        * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the
> +        * address of a caller provided buffer that will be used to preserve a
> +        * task's kernel mode FPSIMD state while it is scheduled out.
> +        */
> +       struct user_fpsimd_state        *kernel_fpsimd_state;
>         unsigned int                    kernel_fpsimd_cpu;
>  #ifdef CONFIG_ARM64_PTR_AUTH
>         struct ptrauth_keys_user        keys_user;
> diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
> index d9f83c478736..7ddb25df5c98 100644
> --- a/arch/arm64/include/asm/simd.h
> +++ b/arch/arm64/include/asm/simd.h
> @@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) {
>
>  #endif /* ! CONFIG_KERNEL_MODE_NEON */
>
> -DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
> +DEFINE_LOCK_GUARD_1(ksimd,
> +                   struct user_fpsimd_state,
> +                   kernel_neon_begin(_T->lock),
> +                   kernel_neon_end(_T->lock))
>
> -#define scoped_ksimd() scoped_guard(ksimd)
> +#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
>
>  #endif
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index e3f8f51748bc..1c652ce4d40d 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -1489,21 +1489,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
>          * Elide the load if this CPU holds the most recent kernel mode
>          * FPSIMD context of the current task.
>          */
> -       if (last->st == &task->thread.kernel_fpsimd_state &&
> +       if (last->st == task->thread.kernel_fpsimd_state &&
>             task->thread.kernel_fpsimd_cpu == smp_processor_id())
>                 return;
>
> -       fpsimd_load_state(&task->thread.kernel_fpsimd_state);
> +       fpsimd_load_state(task->thread.kernel_fpsimd_state);
>  }
>
>  static void fpsimd_save_kernel_state(struct task_struct *task)
>  {
>         struct cpu_fp_state cpu_fp_state = {
> -               .st             = &task->thread.kernel_fpsimd_state,
> +               .st             = task->thread.kernel_fpsimd_state,
>                 .to_save        = FP_STATE_FPSIMD,
>         };
>
> -       fpsimd_save_state(&task->thread.kernel_fpsimd_state);
> +       BUG_ON(!cpu_fp_state.st);
> +
> +       fpsimd_save_state(task->thread.kernel_fpsimd_state);
>         fpsimd_bind_state_to_cpu(&cpu_fp_state);
>
>         task->thread.kernel_fpsimd_cpu = smp_processor_id();
> @@ -1774,6 +1776,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
>  void fpsimd_flush_task_state(struct task_struct *t)
>  {
>         t->thread.fpsimd_cpu = NR_CPUS;
> +       t->thread.kernel_fpsimd_state = NULL;
>         /*
>          * If we don't support fpsimd, bail out after we have
>          * reset the fpsimd_cpu for this task and clear the
> @@ -1833,8 +1836,13 @@ void fpsimd_save_and_flush_cpu_state(void)
>   *
>   * The caller may freely use the FPSIMD registers until kernel_neon_end() is
>   * called.
> + *
> + * Unless called from non-preemptible task context, @state must point to a
> + * caller provided buffer that will be used to preserve the task's kernel mode
> + * FPSIMD context when it is scheduled out, or if it is interrupted by kernel
> + * mode FPSIMD occurring in softirq context. May be %NULL otherwise.
>   */
> -void kernel_neon_begin(void)
> +void kernel_neon_begin(struct user_fpsimd_state *state)
>  {
>         if (WARN_ON(!system_supports_fpsimd()))
>                 return;
> @@ -1846,7 +1854,7 @@ void kernel_neon_begin(void)
>         /* Save unsaved fpsimd state, if any: */
>         if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
>                 BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
> -               fpsimd_save_kernel_state(current);
> +               fpsimd_save_state(state);
>         } else {
>                 fpsimd_save_user_state();
>
> @@ -1867,8 +1875,17 @@ void kernel_neon_begin(void)
>                  * mode in task context. So in this case, setting the flag here
>                  * is always appropriate.
>                  */
> -               if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
> +               if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
> +                       /*
> +                        * Record the caller provided buffer as the kernel mode
> +                        * FP/SIMD buffer for this task, so that the state can
> +                        * be preserved and restored on a context switch.
> +                        */
> +                       WARN_ON(current->thread.kernel_fpsimd_state != NULL);
> +                       WARN_ON(preemptible() && !state);

This is in the wrong place: we are never preemptible here, even when
called from preemptible context.

Will fix for the next rev.

  reply	other threads:[~2025-10-31 14:16 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-31 10:38 [PATCH v4 00/21] arm64: Move kernel mode FPSIMD buffer to the stack Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 01/21] crypto/arm64: aes-ce-ccm - Avoid pointless yield of the NEON unit Ard Biesheuvel
2025-11-06  7:08   ` Herbert Xu
2025-10-31 10:39 ` [PATCH v4 02/21] crypto/arm64: sm4-ce-ccm " Ard Biesheuvel
2025-11-06  7:11   ` Herbert Xu
2025-10-31 10:39 ` [PATCH v4 03/21] crypto/arm64: sm4-ce-gcm " Ard Biesheuvel
2025-11-06  7:15   ` Herbert Xu
2025-10-31 10:39 ` [PATCH v4 04/21] arm64/simd: Add scoped guard API for kernel mode SIMD Ard Biesheuvel
2025-10-31 13:55   ` Jonathan Cameron
2025-10-31 14:05     ` Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 05/21] ARM/simd: " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 06/21] crypto: aegis128-neon - Move to more abstract 'ksimd' guard API Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 07/21] raid6: " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 08/21] lib/crc: Switch ARM and arm64 to 'ksimd' scoped " Ard Biesheuvel
2025-10-31 13:49   ` Jonathan Cameron
2025-10-31 13:52     ` Ard Biesheuvel
2025-10-31 14:05       ` Ard Biesheuvel
2025-11-03 11:28         ` Jonathan Cameron
2025-11-04 15:32           ` Ard Biesheuvel
2025-11-12 20:58             ` Eric Biggers
2025-10-31 10:39 ` [PATCH v4 09/21] lib/crypto: " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 10/21] crypto/arm64: aes-ccm - Switch " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 11/21] crypto/arm64: aes-blk " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 12/21] crypto/arm64: aes-gcm " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 13/21] crypto/arm64: nhpoly1305 " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 14/21] crypto/arm64: polyval " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 15/21] crypto/arm64: sha3 " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 16/21] crypto/arm64: sm3 " Ard Biesheuvel
2025-10-31 13:52   ` Jonathan Cameron
2025-10-31 13:55     ` Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 17/21] crypto/arm64: sm4 " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 18/21] arm64/xorblocks: " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 19/21] net/mlx5: Switch to more abstract scoped ksimd guard API on arm64 Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 20/21] arm64/fpu: Enforce task-context only for generic kernel mode FPU Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 21/21] arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack Ard Biesheuvel
2025-10-31 14:16   ` Ard Biesheuvel [this message]
2025-11-11 17:12 ` [PATCH v4 00/21] arm64: Move kernel mode FPSIMD buffer to " Catalin Marinas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAMj1kXGV1xeZXF7adHwbUsg6+JpLyueWaiS89pS7XFm3fKuw6A@mail.gmail.com \
    --to=ardb@kernel.org \
    --cc=ardb+git@google.com \
    --cc=ebiggers@kernel.org \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).