linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-arm-kernel@lists.infradead.org, linux@armlinux.org.uk
Cc: Ard Biesheuvel <ardb@kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Guenter Roeck <linux@roeck-us.net>,
	Peter Zijlstra <peterz@infradead.org>,
	Linus Walleij <linus.walleij@linaro.org>,
	Arnd Bergmann <arnd@arndb.de>
Subject: [PATCH v4 07/12] ARM: vfp: Reimplement VFP exception entry in C code
Date: Mon, 20 Mar 2023 14:18:40 +0100	[thread overview]
Message-ID: <20230320131845.3138015-8-ardb@kernel.org> (raw)
In-Reply-To: <20230320131845.3138015-1-ardb@kernel.org>

En/disabling softirqs from asm code turned out to be trickier than
expected, so vfp_support_entry now returns by tail calling
__local_enable_bh_ip() and passing the same arguments that a C call to
local_bh_enable() would pass. However, this is slightly hacky, as we
don't want to carry our own implementation of local_bh_enable().

So let's bite the bullet, and get rid of the asm logic in
vfp_support_entry that reasons about whether or not to save and/or
reload the VFP state, and about whether or not an FP exception is
pending, and only keep the VFP loading logic as a function that is
callable from C.

Replicate the removed logic in vfp_entry(), and use the exact same
reasoning as in the asm code. To emphasize the correspondance, retain
some of the asm comments in the C version as well.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/vfp/entry.S     |   9 +-
 arch/arm/vfp/vfp.h       |   1 +
 arch/arm/vfp/vfphw.S     | 202 ++------------------
 arch/arm/vfp/vfpmodule.c | 123 ++++++++++--
 4 files changed, 124 insertions(+), 211 deletions(-)

diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index 7483ef8bccda394c..547c94c62cd3a66a 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -22,7 +22,10 @@
 @  IRQs enabled.
 @
 ENTRY(do_vfp)
-	mov	r1, r10
-	mov	r3, r9
-	b	vfp_entry
+	mov	r1, r0				@ pass trigger opcode via R1
+	mov	r0, sp				@ pass struct pt_regs via R0
+	bl	vfp_support_entry		@ dispatch the VFP exception
+	cmp	r0, #0				@ handled successfully?
+	reteq	r9				@ then use R9 as return address
+	ret	lr				@ pass to undef handler
 ENDPROC(do_vfp)
diff --git a/arch/arm/vfp/vfp.h b/arch/arm/vfp/vfp.h
index 5cd6d5053271760e..e43a630f8a164f9d 100644
--- a/arch/arm/vfp/vfp.h
+++ b/arch/arm/vfp/vfp.h
@@ -375,3 +375,4 @@ struct op {
 };
 
 asmlinkage void vfp_save_state(void *location, u32 fpexc);
+asmlinkage u32 vfp_load_state(const void *location);
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 8049c6830eeb1380..d5a03f3c10c500f3 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -4,12 +4,6 @@
  *
  *  Copyright (C) 2004 ARM Limited.
  *  Written by Deep Blue Solutions Limited.
- *
- * This code is called from the kernel's undefined instruction trap.
- * r1 holds the thread_info pointer
- * r3 holds the return address for successful handling.
- * lr holds the return address for unrecognised instructions.
- * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h)
  */
 #include <linux/init.h>
 #include <linux/linkage.h>
@@ -19,20 +13,6 @@
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 
-	.macro	DBGSTR, str
-#ifdef DEBUG
-	stmfd	sp!, {r0-r3, ip, lr}
-	ldr	r0, =1f
-	bl	_printk
-	ldmfd	sp!, {r0-r3, ip, lr}
-
-	.pushsection .rodata, "a"
-1:	.ascii	KERN_DEBUG "VFP: \str\n"
-	.byte	0
-	.previous
-#endif
-	.endm
-
 	.macro  DBGSTR1, str, arg
 #ifdef DEBUG
 	stmfd	sp!, {r0-r3, ip, lr}
@@ -48,175 +28,25 @@
 #endif
 	.endm
 
-	.macro  DBGSTR3, str, arg1, arg2, arg3
-#ifdef DEBUG
-	stmfd	sp!, {r0-r3, ip, lr}
-	mov	r3, \arg3
-	mov	r2, \arg2
-	mov	r1, \arg1
-	ldr	r0, =1f
-	bl	_printk
-	ldmfd	sp!, {r0-r3, ip, lr}
-
-	.pushsection .rodata, "a"
-1:	.ascii	KERN_DEBUG "VFP: \str\n"
-	.byte	0
-	.previous
-#endif
-	.endm
-
-
-@ VFP hardware support entry point.
-@
-@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@  r1  = thread_info pointer
-@  r2  = PC value to resume execution after successful emulation
-@  r3  = normal "successful" return address
-@  lr  = unrecognised instruction return address
-@  IRQs enabled.
-ENTRY(vfp_support_entry)
-	ldr	r11, [r1, #TI_CPU]	@ CPU number
-	add	r10, r1, #TI_VFPSTATE	@ r10 = workspace
-
-	DBGSTR3	"instr %08x pc %08x state %p", r0, r2, r10
-
-	.fpu	vfpv2
-	VFPFMRX	r1, FPEXC		@ Is the VFP enabled?
-	DBGSTR1	"fpexc %08x", r1
-	tst	r1, #FPEXC_EN
-	bne	look_for_VFP_exceptions	@ VFP is already enabled
-
-	DBGSTR1 "enable %x", r10
-	ldr	r9, vfp_current_hw_state_address
-	orr	r1, r1, #FPEXC_EN	@ user FPEXC has the enable bit set
-	ldr	r4, [r9, r11, lsl #2]	@ vfp_current_hw_state pointer
-	bic	r5, r1, #FPEXC_EX	@ make sure exceptions are disabled
-	cmp	r4, r10			@ this thread owns the hw context?
-#ifndef CONFIG_SMP
-	@ For UP, checking that this thread owns the hw context is
-	@ sufficient to determine that the hardware state is valid.
-	beq	vfp_hw_state_valid
-
-	@ On UP, we lazily save the VFP context.  As a different
-	@ thread wants ownership of the VFP hardware, save the old
-	@ state if there was a previous (valid) owner.
-
-	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
-					@ exceptions, so we can get at the
-					@ rest of it
-
-	DBGSTR1	"save old state %p", r4
-	cmp	r4, #0			@ if the vfp_current_hw_state is NULL
-	beq	vfp_reload_hw		@ then the hw state needs reloading
-	VFPFSTMIA r4, r5		@ save the working registers
-	VFPFMRX	r5, FPSCR		@ current status
-	tst	r1, #FPEXC_EX		@ is there additional state to save?
-	beq	1f
-	VFPFMRX	r6, FPINST		@ FPINST (only if FPEXC.EX is set)
-	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
-	beq	1f
-	VFPFMRX	r8, FPINST2		@ FPINST2 if needed (and present)
-1:
-	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
-vfp_reload_hw:
-
-#else
-	@ For SMP, if this thread does not own the hw context, then we
-	@ need to reload it.  No need to save the old state as on SMP,
-	@ we always save the state when we switch away from a thread.
-	bne	vfp_reload_hw
-
-	@ This thread has ownership of the current hardware context.
-	@ However, it may have been migrated to another CPU, in which
-	@ case the saved state is newer than the hardware context.
-	@ Check this by looking at the CPU number which the state was
-	@ last loaded onto.
-	ldr	ip, [r10, #VFP_CPU]
-	teq	ip, r11
-	beq	vfp_hw_state_valid
-
-vfp_reload_hw:
-	@ We're loading this threads state into the VFP hardware. Update
-	@ the CPU number which contains the most up to date VFP context.
-	str	r11, [r10, #VFP_CPU]
-
-	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
-					@ exceptions, so we can get at the
-					@ rest of it
-#endif
-
-	DBGSTR1	"load state %p", r10
-	str	r10, [r9, r11, lsl #2]	@ update the vfp_current_hw_state pointer
+ENTRY(vfp_load_state)
+	@ Load the current VFP state
+	@ r0 - load location
+	@ returns FPEXC
+	DBGSTR1	"load VFP state %p", r0
 					@ Load the saved state back into the VFP
-	VFPFLDMIA r10, r5		@ reload the working registers while
+	VFPFLDMIA r0, r1		@ reload the working registers while
 					@ FPEXC is in a safe state
-	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
-	tst	r1, #FPEXC_EX		@ is there additional state to restore?
+	ldmia	r0, {r0-r3}		@ load FPEXC, FPSCR, FPINST, FPINST2
+	tst	r0, #FPEXC_EX		@ is there additional state to restore?
 	beq	1f
-	VFPFMXR	FPINST, r6		@ restore FPINST (only if FPEXC.EX is set)
-	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to write?
+	VFPFMXR	FPINST, r2		@ restore FPINST (only if FPEXC.EX is set)
+	tst	r0, #FPEXC_FP2V		@ is there an FPINST2 to write?
 	beq	1f
-	VFPFMXR	FPINST2, r8		@ FPINST2 if needed (and present)
+	VFPFMXR	FPINST2, r3		@ FPINST2 if needed (and present)
 1:
-	VFPFMXR	FPSCR, r5		@ restore status
-
-@ The context stored in the VFP hardware is up to date with this thread
-vfp_hw_state_valid:
-	tst	r1, #FPEXC_EX
-	bne	process_exception	@ might as well handle the pending
-					@ exception before retrying branch
-					@ out before setting an FPEXC that
-					@ stops us reading stuff
-	VFPFMXR	FPEXC, r1		@ Restore FPEXC last
-	sub	r2, r2, #4		@ Retry current instruction - if Thumb
-	str	r2, [sp, #S_PC]		@ mode it's two 16-bit instructions,
-					@ else it's one 32-bit instruction, so
-					@ always subtract 4 from the following
-					@ instruction address.
-
-	mov	lr, r3			@ we think we have handled things
-local_bh_enable_and_ret:
-	adr	r0, .
-	mov	r1, #SOFTIRQ_DISABLE_OFFSET
-	b	__local_bh_enable_ip	@ tail call
-
-look_for_VFP_exceptions:
-	@ Check for synchronous or asynchronous exception
-	tst	r1, #FPEXC_EX | FPEXC_DEX
-	bne	process_exception
-	@ On some implementations of the VFP subarch 1, setting FPSCR.IXE
-	@ causes all the CDP instructions to be bounced synchronously without
-	@ setting the FPEXC.EX bit
-	VFPFMRX	r5, FPSCR
-	tst	r5, #FPSCR_IXE
-	bne	process_exception
-
-	tst	r5, #FPSCR_LENGTH_MASK
-	beq	skip
-	orr	r1, r1, #FPEXC_DEX
-	b	process_exception
-skip:
-
-	@ Fall into hand on to next handler - appropriate coproc instr
-	@ not recognised by VFP
-
-	DBGSTR	"not VFP"
-	b	local_bh_enable_and_ret
-
-process_exception:
-	DBGSTR	"bounce"
-	mov	r2, sp			@ nothing stacked - regdump is at TOS
-	mov	lr, r3			@ setup for a return to the user code.
-
-	@ Now call the C code to package up the bounce to the support code
-	@   r0 holds the trigger instruction
-	@   r1 holds the FPEXC value
-	@   r2 pointer to register dump
-	b	VFP_bounce		@ we have handled this - the support
-					@ code will raise an exception if
-					@ required. If not, the user code will
-					@ retry the faulted instruction
-ENDPROC(vfp_support_entry)
+	VFPFMXR	FPSCR, r1		@ restore status
+	ret	lr
+ENDPROC(vfp_load_state)
 
 ENTRY(vfp_save_state)
 	@ Save the current VFP state
@@ -236,10 +66,6 @@ ENTRY(vfp_save_state)
 	ret	lr
 ENDPROC(vfp_save_state)
 
-	.align
-vfp_current_hw_state_address:
-	.word	vfp_current_hw_state
-
 	.macro	tbl_branch, base, tmp, shift
 #ifdef CONFIG_THUMB2_KERNEL
 	adr	\tmp, 1f
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index dd31d13ca1d8fc8a..6db7d20c467ff843 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -30,11 +30,6 @@
 #include "vfpinstr.h"
 #include "vfp.h"
 
-/*
- * Our undef handlers (in entry.S)
- */
-asmlinkage void vfp_support_entry(u32, void *, u32, u32);
-
 static bool have_vfp __ro_after_init;
 
 /*
@@ -325,7 +320,7 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs)
 /*
  * Package up a bounce condition.
  */
-void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
+static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 {
 	u32 fpscr, orig_fpscr, fpsid, exceptions;
 
@@ -374,7 +369,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 		 * on VFP subarch 1.
 		 */
 		 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
-		goto exit;
+		return;
 	}
 
 	/*
@@ -405,7 +400,7 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
 	 */
 	if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
-		goto exit;
+		return;
 
 	/*
 	 * The barrier() here prevents fpinst2 being read
@@ -418,8 +413,6 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
 	if (exceptions)
 		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
- exit:
-	local_bh_enable();
 }
 
 static void vfp_enable(void *unused)
@@ -673,22 +666,112 @@ static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
 }
 
 /*
- * Entered with:
+ * vfp_support_entry - Handle VFP exception from user mode
  *
- *  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
- *  r1  = thread_info pointer
- *  r2  = PC value to resume execution after successful emulation
- *  r3  = normal "successful" return address
- *  lr  = unrecognised instruction return address
+ * @regs:	pt_regs structure holding the register state at exception entry
+ * @trigger:	The opcode of the instruction that triggered the exception
+ *
+ * Returns 0 if the exception was handled, or an error code otherwise.
  */
-asmlinkage void vfp_entry(u32 trigger, struct thread_info *ti, u32 resume_pc,
-			  u32 resume_return_address)
+asmlinkage int vfp_support_entry(struct pt_regs *regs, u32 trigger)
 {
+	struct thread_info *ti = current_thread_info();
+	u32 fpexc;
+
 	if (unlikely(!have_vfp))
-		return;
+		return -ENODEV;
 
 	local_bh_disable();
-	vfp_support_entry(trigger, ti, resume_pc, resume_return_address);
+	fpexc = fmrx(FPEXC);
+
+	/*
+	 * If the VFP unit was not enabled yet, we have to check whether the
+	 * VFP state in the CPU's registers is the most recent VFP state
+	 * associated with the process. On UP systems, we don't save the VFP
+	 * state eagerly on a context switch, so we may need to save the
+	 * VFP state to memory first, as it may belong to another process.
+	 */
+	if (!(fpexc & FPEXC_EN)) {
+		/*
+		 * Enable the VFP unit but mask the FP exception flag for the
+		 * time being, so we can access all the registers.
+		 */
+		fpexc |= FPEXC_EN;
+		fmxr(FPEXC, fpexc & ~FPEXC_EX);
+
+		/*
+		 * Check whether or not the VFP state in the CPU's registers is
+		 * the most recent VFP state associated with this task. On SMP,
+		 * migration may result in multiple CPUs holding VFP states
+		 * that belong to the same task, but only the most recent one
+		 * is valid.
+		 */
+		if (!vfp_state_in_hw(ti->cpu, ti)) {
+			if (!IS_ENABLED(CONFIG_SMP) &&
+			    vfp_current_hw_state[ti->cpu] != NULL) {
+				/*
+				 * This CPU is currently holding the most
+				 * recent VFP state associated with another
+				 * task, and we must save that to memory first.
+				 */
+				vfp_save_state(vfp_current_hw_state[ti->cpu],
+					       fpexc);
+			}
+
+			/*
+			 * We can now proceed with loading the task's VFP state
+			 * from memory into the CPU registers.
+			 */
+			fpexc = vfp_load_state(&ti->vfpstate);
+			vfp_current_hw_state[ti->cpu] = &ti->vfpstate;
+#ifdef CONFIG_SMP
+			/*
+			 * Record that this CPU is now the one holding the most
+			 * recent VFP state of the task.
+			 */
+			ti->vfpstate.hard.cpu = ti->cpu;
+#endif
+		}
+
+		if (fpexc & FPEXC_EX)
+			/*
+			 * Might as well handle the pending exception before
+			 * retrying branch out before setting an FPEXC that
+			 * stops us reading stuff.
+			 */
+			goto bounce;
+
+		/*
+		 * No FP exception is pending: just enable the VFP and
+		 * replay the instruction that trapped.
+		 */
+		fmxr(FPEXC, fpexc);
+		regs->ARM_pc -= 4;
+	} else {
+		/* Check for synchronous or asynchronous exceptions */
+		if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
+			u32 fpscr = fmrx(FPSCR);
+
+			/*
+			 * On some implementations of the VFP subarch 1,
+			 * setting FPSCR.IXE causes all the CDP instructions to
+			 * be bounced synchronously without setting the
+			 * FPEXC.EX bit
+			 */
+			if (!(fpscr & FPSCR_IXE)) {
+				if (!(fpscr & FPSCR_LENGTH_MASK)) {
+					pr_debug("not VFP\n");
+					local_bh_enable();
+					return -ENOEXEC;
+				}
+				fpexc |= FPEXC_DEX;
+			}
+		}
+bounce:		VFP_bounce(trigger, fpexc, regs);
+	}
+
+	local_bh_enable();
+	return 0;
 }
 
 static struct undef_hook vfp_kmode_exception_hook[] = {{
-- 
2.39.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2023-03-20 13:20 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-20 13:18 [PATCH v4 00/12] ARM: vfp: Switch to C API to en/disable softirqs Ard Biesheuvel
2023-03-20 13:18 ` [PATCH v4 01/12] ARM: vfp: Pass thread_info pointer to vfp_support_entry Ard Biesheuvel
2023-03-20 13:18 ` [PATCH v4 02/12] ARM: vfp: Pass successful return address via register R3 Ard Biesheuvel
2023-03-20 13:18 ` [PATCH v4 03/12] ARM: vfp: Fix broken softirq handling with instrumentation enabled Ard Biesheuvel
2023-04-09 14:29   ` Linux regression tracking (Thorsten Leemhuis)
2023-04-10 20:15     ` Guenter Roeck
2023-03-20 13:18 ` [PATCH v4 04/12] ARM: entry: Fix iWMMXT TIF flag handling Ard Biesheuvel
2023-03-21 14:32   ` Linus Walleij
2023-03-21 19:19     ` Nicolas Pitre
2023-03-21 19:32       ` Ard Biesheuvel
2023-03-20 13:18 ` [PATCH v4 05/12] ARM: vfp: Record VFP bounces as perf emulation faults Ard Biesheuvel
2023-03-21 14:33   ` Linus Walleij
2023-03-20 13:18 ` [PATCH v4 06/12] ARM: vfp: Remove workaround for Feroceon CPUs Ard Biesheuvel
2023-03-21 14:44   ` Linus Walleij
2023-03-21 15:42     ` Ard Biesheuvel
2023-03-21 20:40       ` Linus Walleij
2023-03-22  7:26       ` Arnd Bergmann
2023-03-21 20:00     ` Nicolas Pitre
2023-03-20 13:18 ` Ard Biesheuvel [this message]
2023-03-20 13:18 ` [PATCH v4 08/12] ARM: kernel: Get rid of thread_info::used_cp[] array Ard Biesheuvel
2023-03-21 14:58   ` Linus Walleij
2023-03-20 13:18 ` [PATCH v4 09/12] ARM: vfp: Use undef hook for handling VFP exceptions Ard Biesheuvel
2023-03-21 14:59   ` Linus Walleij
2023-03-21 15:41     ` Ard Biesheuvel
2023-03-20 13:18 ` [PATCH v4 10/12] ARM: entry: Disregard Thumb undef exception in coproc dispatch Ard Biesheuvel
2023-03-21 15:05   ` Linus Walleij
2023-03-20 13:18 ` [PATCH v4 11/12] ARM: iwmmxt: Use undef hook to enable coprocessor for task Ard Biesheuvel
2023-03-21 15:06   ` Linus Walleij
2023-03-20 13:18 ` [PATCH v4 12/12] ARM: entry: Make asm coproc dispatch code NWFPE only Ard Biesheuvel
2023-03-21 15:11   ` Linus Walleij
2023-03-23  2:44 ` [PATCH v4 00/12] ARM: vfp: Switch to C API to en/disable softirqs Guenter Roeck
2023-03-23  8:33   ` Ard Biesheuvel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230320131845.3138015-8-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=arnd@arndb.de \
    --cc=frederic@kernel.org \
    --cc=linus.walleij@linaro.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux@armlinux.org.uk \
    --cc=linux@roeck-us.net \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).