linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: ard.biesheuvel@linaro.org (Ard Biesheuvel)
To: linux-arm-kernel@lists.infradead.org
Subject: [RFC v2 PATCH 2/4] ARM64: add support for kernel mode NEON in atomic context
Date: Wed,  9 Oct 2013 20:50:32 +0200	[thread overview]
Message-ID: <1381344634-14917-3-git-send-email-ard.biesheuvel@linaro.org> (raw)
In-Reply-To: <1381344634-14917-1-git-send-email-ard.biesheuvel@linaro.org>

This patch adds kernel_neon_begin_atomic() and kernel_neon_end_atomic(),
which may be called from any context. In !in_interrupt() case, they
just call their non-_atomic counterparts. In atomic context, they
stack resp. unstack the number of NEON registers declared when setting
up the stack area using DEFINE_NEON_REG_STACK().

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/include/asm/fpsimd.h       | 16 +++++++++++++++
 arch/arm64/include/asm/fpsimdmacros.h | 37 +++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/neon.h         | 31 +++++++++++++++++++++++++++++
 arch/arm64/kernel/entry-fpsimd.S      | 24 +++++++++++++++++++++++
 arch/arm64/kernel/fpsimd.c            |  3 +++
 5 files changed, 111 insertions(+)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index c43b4ac..3a741b0 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -39,6 +39,19 @@ struct fpsimd_state {
 	};
 };
 
+/*
+ * Variable sized struct for stacking the bottom n FP/SIMD registers.
+ * Mainly intended for kernel use of v8 Crypto Extensions which only
+ * needs a few registers and may need to execute in atomic context.
+ */
+struct fpsimd_partial_state {
+	const u32	num_regs;
+	u32		fpsr;
+	u32		fpcr;
+	__uint128_t	vregs[] __aligned(16);
+} __aligned(16);
+
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
 #define VFP_FPSCR_STAT_MASK	0xf800009f
@@ -55,6 +68,9 @@ struct task_struct;
 extern void fpsimd_save_state(struct fpsimd_state *state);
 extern void fpsimd_load_state(struct fpsimd_state *state);
 
+extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state);
+extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+
 extern void fpsimd_thread_switch(struct task_struct *next);
 extern void fpsimd_flush_thread(void);
 
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index bbec599..1b47587 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -62,3 +62,40 @@
 	ldr	w\tmpnr, [\state, #16 * 2 + 4]
 	msr	fpcr, x\tmpnr
 .endm
+
+.altmacro
+.macro	q2op, op, q1, q2, state
+	\op	q\q1, q\q2, [\state, #-(16 * \q1) - 16]
+.endm
+
+.macro fpsimd_save_partial state, tmpnr1, tmpnr2
+	mrs	x\tmpnr1, fpsr
+	mrs	x\tmpnr2, fpcr
+	stp	w\tmpnr1, w\tmpnr2, [\state, #4]
+	adr	x\tmpnr1, 0f
+	ldr	w\tmpnr2, [\state]
+	add	\state, \state, x\tmpnr2, lsl #4
+	sub	x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
+	br	x\tmpnr1
+	.irp	qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+		qb = \qa + 1
+	q2op	stp, \qa, %qb, \state
+	.endr
+0:
+.endm
+
+.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
+	ldp	w\tmpnr1, w\tmpnr2, [\state, #4]
+	msr	fpsr, x\tmpnr1
+	msr	fpcr, x\tmpnr2
+	adr	x\tmpnr1, 0f
+	ldr	w\tmpnr2, [\state]
+	add	\state, \state, x\tmpnr2, lsl #4
+	sub	x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
+	br	x\tmpnr1
+	.irp	qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+		qb = \qa + 1
+	q2op	ldp, \qa, %qb, \state
+	.endr
+0:
+.endm
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index b0cc58a9..1c8600a 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -8,7 +8,38 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <asm/fpsimd.h>
+
 #define cpu_has_neon()		(1)
 
+#define DEFINE_NEON_STACK_REGS(a, num)					\
+	struct {							\
+		struct fpsimd_partial_state regs;			\
+		__uint128_t vregs[(num) > 32 ? 32 : ((num) + 1) & ~1U];	\
+	} a = { .regs.num_regs = sizeof(a.vregs) / sizeof(__uint128_t) }
+
+#define DEFINE_NEON_STACK_REGS_ALL(name)	DEFINE_NEON_STACK_REGS(name, 32)
+
 void kernel_neon_begin(void);
 void kernel_neon_end(void);
+
+static inline void __kernel_neon_begin_atomic(struct fpsimd_partial_state *regs)
+{
+	if (!in_interrupt())
+		kernel_neon_begin();
+	else
+		fpsimd_save_partial_state(regs);
+}
+
+static inline void __kernel_neon_end_atomic(struct fpsimd_partial_state *regs)
+{
+	if (!in_interrupt())
+		kernel_neon_end();
+	else
+		fpsimd_load_partial_state(regs);
+}
+
+#define kernel_neon_begin_atomic(a)	__kernel_neon_begin_atomic(&(a).regs)
+#define kernel_neon_end_atomic(a)	__kernel_neon_end_atomic(&(a).regs)
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 6a27cd6..82cf648 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)
 	fpsimd_restore x0, 8
 	ret
 ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Save the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_save_partial_state)
+	fpsimd_save_partial x0, 8, 9
+	ret
+ENDPROC(fpsimd_load_partial_state)
+
+/*
+ * Load the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_load_partial_state)
+	fpsimd_restore_partial x0, 8, 9
+	ret
+ENDPROC(fpsimd_load_partial_state)
+
+#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 1f2e4d5..69c7962 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -109,6 +109,9 @@ void kernel_neon_end(void)
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
+EXPORT_SYMBOL(fpsimd_load_partial_state);
+EXPORT_SYMBOL(fpsimd_save_partial_state);
+
 #endif /* CONFIG_KERNEL_MODE_NEON */
 
 /*
-- 
1.8.1.2

  parent reply	other threads:[~2013-10-09 18:50 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-09 18:50 [RFC v2 PATCH 0/4] ARM[64]: kernel mode NEON in atomic contexts Ard Biesheuvel
2013-10-09 18:50 ` [RFC v2 PATCH 1/4] ARM: add support for kernel mode NEON in atomic context Ard Biesheuvel
2013-10-09 19:24   ` Nicolas Pitre
2013-10-09 19:32     ` Ard Biesheuvel
2013-10-10  3:45       ` Nicolas Pitre
2013-10-09 18:50 ` Ard Biesheuvel [this message]
2013-10-11 17:14   ` [RFC v2 PATCH 2/4] ARM64: " Catalin Marinas
2013-10-11 17:30     ` Ard Biesheuvel
2013-10-11 19:35       ` Catalin Marinas
2013-10-11 20:09         ` Nicolas Pitre
2013-10-13 22:48           ` Catalin Marinas
2013-10-14  8:12             ` Ard Biesheuvel
2013-10-09 18:50 ` [RFC v2 PATCH 3/4] ARM64: add Crypto Extensions based synchronous core AES cipher Ard Biesheuvel
2013-10-09 18:50 ` [RFC v2 PATCH 4/4] ARM64: add Crypto Extensions based synchronous AES in CCM mode Ard Biesheuvel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1381344634-14917-3-git-send-email-ard.biesheuvel@linaro.org \
    --to=ard.biesheuvel@linaro.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).