From: Ard Biesheuvel <ardb@kernel.org>
To: Ard Biesheuvel <ardb+git@google.com>
Cc: linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org, linux-crypto@vger.kernel.org,
herbert@gondor.apana.org.au, ebiggers@kernel.org
Subject: Re: [PATCH v4 21/21] arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack
Date: Fri, 31 Oct 2025 15:16:03 +0100 [thread overview]
Message-ID: <CAMj1kXGV1xeZXF7adHwbUsg6+JpLyueWaiS89pS7XFm3fKuw6A@mail.gmail.com> (raw)
In-Reply-To: <20251031103858.529530-44-ardb+git@google.com>
On Fri, 31 Oct 2025 at 11:40, Ard Biesheuvel <ardb+git@google.com> wrote:
>
> From: Ard Biesheuvel <ardb@kernel.org>
>
> Commit aefbab8e77eb16b5
>
> ("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")
>
> added a 'kernel_fpsimd_state' field to struct thread_struct, which is
> the arch-specific portion of struct task_struct, and is allocated for
> each task in the system. The size of this field is 528 bytes, resulting
> in non-negligible bloat of task_struct, and the resulting memory
> overhead may impact performance on systems with many processes.
>
> This allocation is only used if the task is scheduled out or interrupted
> by a softirq while using the FP/SIMD unit in kernel mode, and so it is
> possible to transparently allocate this buffer on the caller's stack
> instead.
>
> So tweak the 'ksimd' scoped guard implementation so that a stack buffer
> is allocated and passed to both kernel_neon_begin() and
> kernel_neon_end(), and either record it in the task struct, or use it
> directly to preserve the task mode kernel FP/SIMD when running in
> softirq context. Passing the address to both functions, and checking the
> addresses for consistency ensures that callers of the updated bare
> begin/end API use it in a manner that is consistent with the new context
> switch semantics.
>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> arch/arm64/include/asm/fpu.h | 4 +-
> arch/arm64/include/asm/neon.h | 4 +-
> arch/arm64/include/asm/processor.h | 7 ++-
> arch/arm64/include/asm/simd.h | 7 ++-
> arch/arm64/kernel/fpsimd.c | 53 ++++++++++++++------
> 5 files changed, 54 insertions(+), 21 deletions(-)
>
> diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
> index bdc4c6304c6a..751e88a96734 100644
> --- a/arch/arm64/include/asm/fpu.h
> +++ b/arch/arm64/include/asm/fpu.h
> @@ -15,12 +15,12 @@ static inline void kernel_fpu_begin(void)
> {
> BUG_ON(!in_task());
> preempt_disable();
> - kernel_neon_begin();
> + kernel_neon_begin(NULL);
> }
>
> static inline void kernel_fpu_end(void)
> {
> - kernel_neon_end();
> + kernel_neon_end(NULL);
> preempt_enable();
> }
>
> diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
> index d4b1d172a79b..acebee4605b5 100644
> --- a/arch/arm64/include/asm/neon.h
> +++ b/arch/arm64/include/asm/neon.h
> @@ -13,7 +13,7 @@
>
> #define cpu_has_neon() system_supports_fpsimd()
>
> -void kernel_neon_begin(void);
> -void kernel_neon_end(void);
> +void kernel_neon_begin(struct user_fpsimd_state *);
> +void kernel_neon_end(struct user_fpsimd_state *);
>
> #endif /* ! __ASM_NEON_H */
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 61d62bfd5a7b..de3c3b65461d 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -172,7 +172,12 @@ struct thread_struct {
> unsigned long fault_code; /* ESR_EL1 value */
> struct debug_info debug; /* debugging */
>
> - struct user_fpsimd_state kernel_fpsimd_state;
> + /*
> + * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the
> + * address of a caller provided buffer that will be used to preserve a
> + * task's kernel mode FPSIMD state while it is scheduled out.
> + */
> + struct user_fpsimd_state *kernel_fpsimd_state;
> unsigned int kernel_fpsimd_cpu;
> #ifdef CONFIG_ARM64_PTR_AUTH
> struct ptrauth_keys_user keys_user;
> diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
> index d9f83c478736..7ddb25df5c98 100644
> --- a/arch/arm64/include/asm/simd.h
> +++ b/arch/arm64/include/asm/simd.h
> @@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) {
>
> #endif /* ! CONFIG_KERNEL_MODE_NEON */
>
> -DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
> +DEFINE_LOCK_GUARD_1(ksimd,
> + struct user_fpsimd_state,
> + kernel_neon_begin(_T->lock),
> + kernel_neon_end(_T->lock))
>
> -#define scoped_ksimd() scoped_guard(ksimd)
> +#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
>
> #endif
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index e3f8f51748bc..1c652ce4d40d 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -1489,21 +1489,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
> * Elide the load if this CPU holds the most recent kernel mode
> * FPSIMD context of the current task.
> */
> - if (last->st == &task->thread.kernel_fpsimd_state &&
> + if (last->st == task->thread.kernel_fpsimd_state &&
> task->thread.kernel_fpsimd_cpu == smp_processor_id())
> return;
>
> - fpsimd_load_state(&task->thread.kernel_fpsimd_state);
> + fpsimd_load_state(task->thread.kernel_fpsimd_state);
> }
>
> static void fpsimd_save_kernel_state(struct task_struct *task)
> {
> struct cpu_fp_state cpu_fp_state = {
> - .st = &task->thread.kernel_fpsimd_state,
> + .st = task->thread.kernel_fpsimd_state,
> .to_save = FP_STATE_FPSIMD,
> };
>
> - fpsimd_save_state(&task->thread.kernel_fpsimd_state);
> + BUG_ON(!cpu_fp_state.st);
> +
> + fpsimd_save_state(task->thread.kernel_fpsimd_state);
> fpsimd_bind_state_to_cpu(&cpu_fp_state);
>
> task->thread.kernel_fpsimd_cpu = smp_processor_id();
> @@ -1774,6 +1776,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
> void fpsimd_flush_task_state(struct task_struct *t)
> {
> t->thread.fpsimd_cpu = NR_CPUS;
> + t->thread.kernel_fpsimd_state = NULL;
> /*
> * If we don't support fpsimd, bail out after we have
> * reset the fpsimd_cpu for this task and clear the
> @@ -1833,8 +1836,13 @@ void fpsimd_save_and_flush_cpu_state(void)
> *
> * The caller may freely use the FPSIMD registers until kernel_neon_end() is
> * called.
> + *
> + * Unless called from non-preemptible task context, @state must point to a
> + * caller provided buffer that will be used to preserve the task's kernel mode
> + * FPSIMD context when it is scheduled out, or if it is interrupted by kernel
> + * mode FPSIMD occurring in softirq context. May be %NULL otherwise.
> */
> -void kernel_neon_begin(void)
> +void kernel_neon_begin(struct user_fpsimd_state *state)
> {
> if (WARN_ON(!system_supports_fpsimd()))
> return;
> @@ -1846,7 +1854,7 @@ void kernel_neon_begin(void)
> /* Save unsaved fpsimd state, if any: */
> if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
> BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
> - fpsimd_save_kernel_state(current);
> + fpsimd_save_state(state);
> } else {
> fpsimd_save_user_state();
>
> @@ -1867,8 +1875,17 @@ void kernel_neon_begin(void)
> * mode in task context. So in this case, setting the flag here
> * is always appropriate.
> */
> - if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
> + if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
> + /*
> + * Record the caller provided buffer as the kernel mode
> + * FP/SIMD buffer for this task, so that the state can
> + * be preserved and restored on a context switch.
> + */
> + WARN_ON(current->thread.kernel_fpsimd_state != NULL);
> + WARN_ON(preemptible() && !state);
This is in the wrong place: we are never preemptible here, even when
called from preemptible context.
Will fix for the next rev.
next prev parent reply other threads:[~2025-10-31 14:16 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-31 10:38 [PATCH v4 00/21] arm64: Move kernel mode FPSIMD buffer to the stack Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 01/21] crypto/arm64: aes-ce-ccm - Avoid pointless yield of the NEON unit Ard Biesheuvel
2025-11-06 7:08 ` Herbert Xu
2025-10-31 10:39 ` [PATCH v4 02/21] crypto/arm64: sm4-ce-ccm " Ard Biesheuvel
2025-11-06 7:11 ` Herbert Xu
2025-10-31 10:39 ` [PATCH v4 03/21] crypto/arm64: sm4-ce-gcm " Ard Biesheuvel
2025-11-06 7:15 ` Herbert Xu
2025-10-31 10:39 ` [PATCH v4 04/21] arm64/simd: Add scoped guard API for kernel mode SIMD Ard Biesheuvel
2025-10-31 13:55 ` Jonathan Cameron
2025-10-31 14:05 ` Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 05/21] ARM/simd: " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 06/21] crypto: aegis128-neon - Move to more abstract 'ksimd' guard API Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 07/21] raid6: " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 08/21] lib/crc: Switch ARM and arm64 to 'ksimd' scoped " Ard Biesheuvel
2025-10-31 13:49 ` Jonathan Cameron
2025-10-31 13:52 ` Ard Biesheuvel
2025-10-31 14:05 ` Ard Biesheuvel
2025-11-03 11:28 ` Jonathan Cameron
2025-11-04 15:32 ` Ard Biesheuvel
2025-11-12 20:58 ` Eric Biggers
2025-10-31 10:39 ` [PATCH v4 09/21] lib/crypto: " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 10/21] crypto/arm64: aes-ccm - Switch " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 11/21] crypto/arm64: aes-blk " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 12/21] crypto/arm64: aes-gcm " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 13/21] crypto/arm64: nhpoly1305 " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 14/21] crypto/arm64: polyval " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 15/21] crypto/arm64: sha3 " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 16/21] crypto/arm64: sm3 " Ard Biesheuvel
2025-10-31 13:52 ` Jonathan Cameron
2025-10-31 13:55 ` Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 17/21] crypto/arm64: sm4 " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 18/21] arm64/xorblocks: " Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 19/21] net/mlx5: Switch to more abstract scoped ksimd guard API on arm64 Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 20/21] arm64/fpu: Enforce task-context only for generic kernel mode FPU Ard Biesheuvel
2025-10-31 10:39 ` [PATCH v4 21/21] arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack Ard Biesheuvel
2025-10-31 14:16 ` Ard Biesheuvel [this message]
2025-11-11 17:12 ` [PATCH v4 00/21] arm64: Move kernel mode FPSIMD buffer to " Catalin Marinas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAMj1kXGV1xeZXF7adHwbUsg6+JpLyueWaiS89pS7XFm3fKuw6A@mail.gmail.com \
--to=ardb@kernel.org \
--cc=ardb+git@google.com \
--cc=ebiggers@kernel.org \
--cc=herbert@gondor.apana.org.au \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).