From: Ard Biesheuvel <ardb+git@google.com>
To: linux-arm-kernel@lists.infradead.org
Cc: linux-crypto@vger.kernel.org, linux-kernel@vger.kernel.org,
herbert@gondor.apana.org.au, linux@armlinux.org.uk,
Ard Biesheuvel <ardb@kernel.org>, Marc Zyngier <maz@kernel.org>,
Will Deacon <will@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Kees Cook <keescook@chromium.org>,
Catalin Marinas <catalin.marinas@arm.com>,
Mark Brown <broonie@kernel.org>,
Eric Biggers <ebiggers@kernel.org>
Subject: [PATCH v2 20/20] arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack
Date: Wed, 1 Oct 2025 23:02:22 +0200 [thread overview]
Message-ID: <20251001210201.838686-42-ardb+git@google.com> (raw)
In-Reply-To: <20251001210201.838686-22-ardb+git@google.com>
From: Ard Biesheuvel <ardb@kernel.org>
Commit aefbab8e77eb16b5
("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")
added a 'kernel_fpsimd_state' field to struct thread_struct, which is
the arch-specific portion of struct task_struct, and is allocated for
each task in the system. The size of this field is 528 bytes, resulting
in non-trivial bloat of task_struct, and the resulting memory overhead
may impact performance on systems with many processes.
This allocation is only used if the task is scheduled out or interrupted
by a softirq while using the FP/SIMD unit in kernel mode, and so it is
possible to transparently allocate this buffer on the caller's stack
instead.
So tweak the 'ksimd' scoped guard implementation so that a stack buffer
is allocated and passed to both kernel_neon_begin() and
kernel_neon_end(), and record it in the task struct. Passing the address
to both functions, and checking the addresses for consistency ensures
that callers of the updated bare begin/end API use it in a manner that
is consistent with the new context switch semantics.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/include/asm/neon.h | 4 +--
arch/arm64/include/asm/processor.h | 2 +-
arch/arm64/include/asm/simd.h | 7 ++--
arch/arm64/kernel/fpsimd.c | 34 +++++++++++++-------
4 files changed, 31 insertions(+), 16 deletions(-)
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index d4b1d172a79b..acebee4605b5 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -13,7 +13,7 @@
#define cpu_has_neon() system_supports_fpsimd()
-void kernel_neon_begin(void);
-void kernel_neon_end(void);
+void kernel_neon_begin(struct user_fpsimd_state *);
+void kernel_neon_end(struct user_fpsimd_state *);
#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 4f8d677b73ee..93bca4d454d7 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -172,7 +172,7 @@ struct thread_struct {
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
- struct user_fpsimd_state kernel_fpsimd_state;
+ struct user_fpsimd_state *kernel_fpsimd_state;
unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index d9f83c478736..7ddb25df5c98 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) {
#endif /* ! CONFIG_KERNEL_MODE_NEON */
-DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
+DEFINE_LOCK_GUARD_1(ksimd,
+ struct user_fpsimd_state,
+ kernel_neon_begin(_T->lock),
+ kernel_neon_end(_T->lock))
-#define scoped_ksimd() scoped_guard(ksimd)
+#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index c37f02d7194e..ea9192a180aa 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1488,21 +1488,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
- if (last->st == &task->thread.kernel_fpsimd_state &&
+ if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
- fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
- .st = &task->thread.kernel_fpsimd_state,
+ .st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
- fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ BUG_ON(!cpu_fp_state.st);
+
+ fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1773,6 +1775,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+ t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1833,7 +1836,7 @@ void fpsimd_save_and_flush_cpu_state(void)
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
*/
-void kernel_neon_begin(void)
+void kernel_neon_begin(struct user_fpsimd_state *s)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
@@ -1866,8 +1869,16 @@ void kernel_neon_begin(void)
* mode in task context. So in this case, setting the flag here
* is always appropriate.
*/
- if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
+ /*
+ * Record the caller provided buffer as the kernel mode
+ * FP/SIMD buffer for this task, so that the state can
+ * be preserved and restored on a context switch.
+ */
+ if (cmpxchg(¤t->thread.kernel_fpsimd_state, NULL, s))
+ BUG();
set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
}
/* Invalidate any task state remaining in the fpsimd regs: */
@@ -1886,7 +1897,7 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin);
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
*/
-void kernel_neon_end(void)
+void kernel_neon_end(struct user_fpsimd_state *s)
{
if (!system_supports_fpsimd())
return;
@@ -1899,8 +1910,9 @@ void kernel_neon_end(void)
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
test_thread_flag(TIF_KERNEL_FPSTATE))
fpsimd_load_kernel_state(current);
- else
- clear_thread_flag(TIF_KERNEL_FPSTATE);
+ else if (test_and_clear_thread_flag(TIF_KERNEL_FPSTATE))
+ if (cmpxchg(¤t->thread.kernel_fpsimd_state, s, NULL) != s)
+ BUG();
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
@@ -1936,7 +1948,7 @@ void __efi_fpsimd_begin(void)
WARN_ON(preemptible());
if (may_use_simd()) {
- kernel_neon_begin();
+ kernel_neon_begin(&efi_fpsimd_state);
} else {
/*
* If !efi_sve_state, SVE can't be in use yet and doesn't need
@@ -1985,7 +1997,7 @@ void __efi_fpsimd_end(void)
return;
if (!efi_fpsimd_state_used) {
- kernel_neon_end();
+ kernel_neon_end(&efi_fpsimd_state);
} else {
if (system_supports_sve() && efi_sve_state_used) {
bool ffr = true;
--
2.51.0.618.g983fd99d29-goog
next prev parent reply other threads:[~2025-10-01 21:04 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-01 21:02 [PATCH v2 00/20] arm64: Move kernel mode FPSIMD buffer to the stack Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 01/20] arm64: Revert support for generic kernel mode FPU Ard Biesheuvel
2025-10-02 16:23 ` Mark Brown
2025-10-08 12:44 ` Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 02/20] arm64/simd: Add scoped guard API for kernel mode SIMD Ard Biesheuvel
2025-10-02 16:17 ` Kees Cook
2025-10-14 14:34 ` Mark Brown
2025-10-01 21:02 ` [PATCH v2 03/20] ARM/simd: " Ard Biesheuvel
2025-10-02 16:18 ` Kees Cook
2025-10-01 21:02 ` [PATCH v2 04/20] crypto: aegis128-neon - Move to more abstract 'ksimd' guard API Ard Biesheuvel
2025-10-02 16:20 ` Kees Cook
2025-10-02 16:48 ` Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 05/20] raid6: " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 06/20] crypto/arm64: aes-ce-ccm - Avoid pointless yield of the NEON unit Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 07/20] crypto/arm64: sm4-ce-ccm " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 08/20] crypto/arm64: sm4-ce-gcm " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 09/20] lib/crc: Switch ARM and arm64 to 'ksimd' scoped guard API Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 10/20] lib/crypto: " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 11/20] crypto/arm64: aes-ccm - Switch " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 12/20] crypto/arm64: aes-blk " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 13/20] crypto/arm64: aes-gcm " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 14/20] crypto/arm64: nhpoly1305 " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 15/20] crypto/arm64: polyval " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 16/20] crypto/arm64: sha3 " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 17/20] crypto/arm64: sm3 " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 18/20] crypto/arm64: sm4 " Ard Biesheuvel
2025-10-01 21:02 ` [PATCH v2 19/20] arm64/xorblocks: " Ard Biesheuvel
2025-10-01 21:02 ` Ard Biesheuvel [this message]
2025-10-02 16:22 ` [PATCH v2 20/20] arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack Kees Cook
2025-10-02 16:51 ` Ard Biesheuvel
2025-10-03 20:18 ` Eric Biggers
2025-10-05 14:54 ` Ard Biesheuvel
2025-10-03 20:28 ` [PATCH v2 00/20] arm64: Move kernel mode FPSIMD buffer to " Eric Biggers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251001210201.838686-42-ardb+git@google.com \
--to=ardb+git@google.com \
--cc=ardb@kernel.org \
--cc=broonie@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=ebiggers@kernel.org \
--cc=herbert@gondor.apana.org.au \
--cc=keescook@chromium.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux@armlinux.org.uk \
--cc=mark.rutland@arm.com \
--cc=maz@kernel.org \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.