All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-arm-kernel@lists.infradead.org
Cc: Ard Biesheuvel <ardb@kernel.org>,
	Russell King <linux@armlinux.org.uk>,
	Arnd Bergmann <arnd@arndb.de>,
	Linus Walleij <linus.walleij@linaro.org>
Subject: [PATCH 2/2] ARM: entry: avoid explicit literal loads
Date: Tue,  1 Mar 2022 13:04:40 +0100	[thread overview]
Message-ID: <20220301120440.994447-3-ardb@kernel.org> (raw)
In-Reply-To: <20220301120440.994447-1-ardb@kernel.org>

ARMv7 has MOVW/MOVT instruction pairs to load symbol addresses into
registers without having to rely on literal loads that go via the
D-cache.  For older cores, we now support a similar arrangement, based
on PC-relative group relocations.

This means we can elide most literal loads entirely from the entry path,
by switching to the ldr_va macro to emit the appropriate sequence
depending on the target architecture revision.

While at it, switch to the bl_r macro for invoking the right PABT/DABT
helpers instead of setting the LR register explicitly, which does not
play well with cores that speculate across function returns.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/assembler.h | 18 +++++-----
 arch/arm/kernel/entry-armv.S     | 37 ++++----------------
 arch/arm/kernel/entry-common.S   | 10 +-----
 arch/arm/kernel/entry-header.S   |  3 +-
 4 files changed, 18 insertions(+), 50 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 96f4028f7423..3a76241d880f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -656,12 +656,11 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	__adldst_l	str, \src, \sym, \tmp, \cond
 	.endm
 
-	.macro		__ldst_va, op, reg, tmp, sym, cond
+	.macro		__ldst_va, op, reg, tmp, sym, cond, offset
 #if __LINUX_ARM_ARCH__ >= 7 || \
     !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
     (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
 	mov_l		\tmp, \sym, \cond
-	\op\cond	\reg, [\tmp]
 #else
 	/*
 	 * Avoid a literal load, by emitting a sequence of ADD/LDR instructions
@@ -673,20 +672,21 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	.reloc		.L0_\@, R_ARM_ALU_PC_G0_NC, \sym
 	.reloc		.L1_\@, R_ARM_ALU_PC_G1_NC, \sym
 	.reloc		.L2_\@, R_ARM_LDR_PC_G2, \sym
-.L0_\@: sub\cond	\tmp, pc, #8
-.L1_\@: sub\cond	\tmp, \tmp, #4
-.L2_\@: \op\cond	\reg, [\tmp, #0]
+.L0_\@: sub\cond	\tmp, pc, #8 - \offset
+.L1_\@: sub\cond	\tmp, \tmp, #4 - \offset
+.L2_\@:
 #endif
+	\op\cond	\reg, [\tmp, #\offset]
 	.endm
 
 	/*
 	 * ldr_va - load a 32-bit word from the virtual address of \sym
 	 */
-	.macro		ldr_va, rd:req, sym:req, cond, tmp
+	.macro		ldr_va, rd:req, sym:req, cond, tmp, offset=0
 	.ifnb		\tmp
-	__ldst_va	ldr, \rd, \tmp, \sym, \cond
+	__ldst_va	ldr, \rd, \tmp, \sym, \cond, \offset
 	.else
-	__ldst_va	ldr, \rd, \rd, \sym, \cond
+	__ldst_va	ldr, \rd, \rd, \sym, \cond, \offset
 	.endif
 	.endm
 
@@ -694,7 +694,7 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	 * str_va - store a 32-bit word to the virtual address of \sym
 	 */
 	.macro		str_va, rn:req, sym:req, tmp:req, cond
-	__ldst_va	str, \rn, \tmp, \sym, \cond
+	__ldst_va	str, \rn, \tmp, \sym, \cond, 0
 	.endm
 
 	/*
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 5609ca8ae46a..c88a1b5c0ca5 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -90,9 +90,8 @@ UNWIND(	.setfp	fpreg, sp		)
 	.macro	pabt_helper
 	@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
 #ifdef MULTI_PABORT
-	ldr	ip, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [ip, #PROCESSOR_PABT_FUNC]
+	ldr_va	ip, processor, offset=PROCESSOR_PABT_FUNC
+	bl_r	ip
 #else
 	bl	CPU_PABORT_HANDLER
 #endif
@@ -111,9 +110,8 @@ UNWIND(	.setfp	fpreg, sp		)
 	@ the fault status register in r1.  r9 must be preserved.
 	@
 #ifdef MULTI_DABORT
-	ldr	ip, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [ip, #PROCESSOR_DABT_FUNC]
+	ldr_va	ip, processor, offset=PROCESSOR_DABT_FUNC
+	bl_r	ip
 #else
 	bl	CPU_DABORT_HANDLER
 #endif
@@ -331,16 +329,6 @@ __fiq_svc:
  UNWIND(.fnend		)
 ENDPROC(__fiq_svc)
 
-	.align	5
-.LCcralign:
-	.word	cr_alignment
-#ifdef MULTI_DABORT
-.LCprocfns:
-	.word	processor
-#endif
-.LCfp:
-	.word	fp_enter
-
 /*
  * Abort mode handlers
  */
@@ -399,7 +387,7 @@ ENDPROC(__fiq_abt)
  THUMB(	stmia	sp, {r0 - r12}	)
 
  ATRAP(	mrc	p15, 0, r7, c1, c0, 0)
- ATRAP(	ldr	r8, .LCcralign)
+ ATRAP(	ldr_va	r8, cr_alignment)
 
 	ldmia	r0, {r3 - r5}
 	add	r0, sp, #S_PC		@ here for interlock avoidance
@@ -408,8 +396,6 @@ ENDPROC(__fiq_abt)
 	str	r3, [sp]		@ save the "real" r0 copied
 					@ from the exception stack
 
- ATRAP(	ldr	r8, [r8, #0])
-
 	@
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@
@@ -534,9 +520,7 @@ __und_usr_thumb:
  */
 #if __LINUX_ARM_ARCH__ < 7
 /* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
-#define NEED_CPU_ARCHITECTURE
-	ldr	r5, .LCcpu_architecture
-	ldr	r5, [r5]
+	ldr_va	r5, cpu_architecture
 	cmp	r5, #CPU_ARCH_ARMv7
 	blo	__und_usr_fault_16		@ 16bit undefined instruction
 /*
@@ -683,12 +667,6 @@ call_fpe:
 	ret.w	lr				@ CP#14 (Debug)
 	ret.w	lr				@ CP#15 (Control)
 
-#ifdef NEED_CPU_ARCHITECTURE
-	.align	2
-.LCcpu_architecture:
-	.word	__cpu_architecture
-#endif
-
 #ifdef CONFIG_NEON
 	.align	6
 
@@ -714,9 +692,8 @@ call_fpe:
 #endif
 
 do_fpe:
-	ldr	r4, .LCfp
 	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
-	ldr	pc, [r4]			@ Call FP module USR entry point
+	ldr_va	pc, fp_enter, tmp=r4		@ Call FP module USR entry point
 
 /*
  * The FP module is called with these registers set:
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index c928d6b04cce..f48ef2378d9b 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -174,7 +174,7 @@ ENTRY(vector_swi)
 #endif
 	reload_current r10, ip
 	zero_fp
-	alignment_trap r10, ip, __cr_alignment
+	alignment_trap r10, ip, cr_alignment
 	asm_trace_hardirqs_on save=0
 	enable_irq_notrace
 	ct_user_exit save=0
@@ -304,14 +304,6 @@ __sys_trace_return:
 	bl	syscall_trace_exit
 	b	ret_slow_syscall
 
-	.align	5
-#ifdef CONFIG_ALIGNMENT_TRAP
-	.type	__cr_alignment, #object
-__cr_alignment:
-	.word	cr_alignment
-#endif
-	.ltorg
-
 	.macro	syscall_table_start, sym
 	.equ	__sys_nr, 0
 	.type	\sym, #object
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 9a1dc142f782..5865621bf691 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -48,8 +48,7 @@
 	.macro	alignment_trap, rtmp1, rtmp2, label
 #ifdef CONFIG_ALIGNMENT_TRAP
 	mrc	p15, 0, \rtmp2, c1, c0, 0
-	ldr	\rtmp1, \label
-	ldr	\rtmp1, [\rtmp1]
+	ldr_va	\rtmp1, \label
 	teq	\rtmp1, \rtmp2
 	mcrne	p15, 0, \rtmp1, c1, c0, 0
 #endif
-- 
2.30.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2022-03-01 12:06 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-01 12:04 [PATCH 0/2] ARM: optimize some variable accesses Ard Biesheuvel
2022-03-01 12:04 ` [PATCH 1/2] ARM: assembler: simplify ldr_this_cpu for !SMP builds Ard Biesheuvel
2022-03-02 11:33   ` Linus Walleij
2022-03-01 12:04 ` Ard Biesheuvel [this message]
2022-03-02 11:42   ` [PATCH 2/2] ARM: entry: avoid explicit literal loads Linus Walleij

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220301120440.994447-3-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=arnd@arndb.de \
    --cc=linus.walleij@linaro.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux@armlinux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.