From: Ard Biesheuvel <ardb+git@google.com>
To: linux-arm-kernel@lists.infradead.org
Cc: linux-crypto@vger.kernel.org, linux-kernel@vger.kernel.org,
herbert@gondor.apana.org.au, ebiggers@kernel.org,
Ard Biesheuvel <ardb@kernel.org>, Marc Zyngier <maz@kernel.org>,
Will Deacon <will@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Kees Cook <keescook@chromium.org>,
Catalin Marinas <catalin.marinas@arm.com>,
Mark Brown <broonie@kernel.org>
Subject: [PATCH 5/5] arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack
Date: Thu, 18 Sep 2025 08:35:45 +0200 [thread overview]
Message-ID: <20250918063539.2640512-12-ardb+git@google.com> (raw)
In-Reply-To: <20250918063539.2640512-7-ardb+git@google.com>
From: Ard Biesheuvel <ardb@kernel.org>
Commit aefbab8e77eb16b5
("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")
added a 'kernel_fpsimd_state' field to struct thread_struct, which is
the arch-specific portion of struct task_struct, and is allocated for
each task in the system. The size of this field is 528 bytes, resulting
in non-trivial bloat of task_struct, and the resulting memory overhead
may impact performance on systems with many processes.
This allocation is only used if the task is scheduled out or interrupted
by a softirq while using the FP/SIMD unit in kernel mode, and given that
calls to kernel_neon_begin() and kernel_neon_end() are now guaranteed to
originate from the same lexical scope, it is possible to transparently
allocate this buffer on the caller's stack instead.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/include/asm/neon.h | 4 +--
arch/arm64/include/asm/processor.h | 2 +-
arch/arm64/kernel/fpsimd.c | 26 ++++++++++++++------
3 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index 4e24f1058b55..acaac98ff449 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -13,10 +13,10 @@
#define cpu_has_neon() system_supports_fpsimd()
-void __kernel_neon_begin(void);
+void __kernel_neon_begin(struct user_fpsimd_state *);
void __kernel_neon_end(void);
-#define kernel_neon_begin() do { __kernel_neon_begin()
+#define kernel_neon_begin() do { __kernel_neon_begin(&(struct user_fpsimd_state){})
#define kernel_neon_end() __kernel_neon_end(); } while (0)
#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 61d62bfd5a7b..226e635c53d9 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -172,7 +172,7 @@ struct thread_struct {
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
- struct user_fpsimd_state kernel_fpsimd_state;
+ struct user_fpsimd_state *kernel_fpsimd_state;
unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index d7eb073d1366..919c53a26484 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1488,21 +1488,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
- if (last->st == &task->thread.kernel_fpsimd_state &&
+ if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
- fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
- .st = &task->thread.kernel_fpsimd_state,
+ .st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
- fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ BUG_ON(!cpu_fp_state.st);
+
+ fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1773,6 +1775,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+ t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1833,7 +1836,7 @@ void fpsimd_save_and_flush_cpu_state(void)
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
*/
-void __kernel_neon_begin(void)
+void __kernel_neon_begin(struct user_fpsimd_state *s)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
@@ -1849,6 +1852,13 @@ void __kernel_neon_begin(void)
} else {
fpsimd_save_user_state();
+ /*
+ * Record the caller provided buffer as the kernel mode FP/SIMD
+ * buffer for this task, so that the state can be preserved and
+ * restored on a context switch.
+ */
+ current->thread.kernel_fpsimd_state = s;
+
/*
* Set the thread flag so that the kernel mode FPSIMD state
* will be context switched along with the rest of the task
@@ -1899,8 +1909,8 @@ void __kernel_neon_end(void)
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
test_thread_flag(TIF_KERNEL_FPSTATE))
fpsimd_load_kernel_state(current);
- else
- clear_thread_flag(TIF_KERNEL_FPSTATE);
+ else if (test_and_clear_thread_flag(TIF_KERNEL_FPSTATE))
+ current->thread.kernel_fpsimd_state = NULL;
}
EXPORT_SYMBOL_GPL(__kernel_neon_end);
@@ -1936,7 +1946,7 @@ void __efi_fpsimd_begin(void)
WARN_ON(preemptible());
if (may_use_simd()) {
- __kernel_neon_begin();
+ __kernel_neon_begin(&efi_fpsimd_state);
} else {
/*
* If !efi_sve_state, SVE can't be in use yet and doesn't need
--
2.51.0.384.g4c02a37b29-goog
next prev parent reply other threads:[~2025-09-18 6:36 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-18 6:35 [PATCH 0/5] arm64: Move kernel mode FPSIMD buffer to the stack Ard Biesheuvel
2025-09-18 6:35 ` [PATCH 1/5] crypto/arm64: aes-ce-ccm - Avoid pointless yield of the NEON unit Ard Biesheuvel
2025-09-19 16:01 ` Mark Brown
2025-09-18 6:35 ` [PATCH 2/5] crypto/arm64: sm4-ce-ccm " Ard Biesheuvel
2025-09-19 16:03 ` Mark Brown
2025-09-18 6:35 ` [PATCH 3/5] crypto/arm64: sm4-ce-gcm " Ard Biesheuvel
2025-09-19 16:03 ` Mark Brown
2025-09-18 6:35 ` [PATCH 4/5] arm64/fpsimd: Require kernel NEON begin/end calls from the same scope Ard Biesheuvel
2025-09-21 21:58 ` kernel test robot
2025-09-18 6:35 ` Ard Biesheuvel [this message]
2025-09-19 19:32 ` [PATCH 0/5] arm64: Move kernel mode FPSIMD buffer to the stack Eric Biggers
2025-09-19 22:41 ` Ard Biesheuvel
2025-09-20 6:42 ` Kees Cook
2025-09-20 13:20 ` Ard Biesheuvel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250918063539.2640512-12-ardb+git@google.com \
--to=ardb+git@google.com \
--cc=ardb@kernel.org \
--cc=broonie@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=ebiggers@kernel.org \
--cc=herbert@gondor.apana.org.au \
--cc=keescook@chromium.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=maz@kernel.org \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).