All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-arm-kernel@lists.infradead.org
Cc: linux-efi@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	Zhen Lei <thunder.leizhen@huawei.com>,
	Russell King <rmk+kernel@armlinux.org.uk>,
	Santosh Shilimkar <santosh.shilimkar@ti.com>,
	Linus Walleij <linus.walleij@linaro.org>,
	Nicolas Pitre <nico@fluxnic.net>
Subject: [RFC/RFT PATCH 5/6] ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE
Date: Fri, 18 Sep 2020 13:31:01 +0300	[thread overview]
Message-ID: <20200918103102.18107-6-ardb@kernel.org> (raw)
In-Reply-To: <20200918103102.18107-1-ardb@kernel.org>

In preparation for reducing the phys-to-virt minimum relative alignment
from 16 MiB to 2 MiB, switch to patchable sequences involving MOVW
instructions that can more easily be manipulated to carry a 12-bit
immediate. Note that the non-LPAE ARM sequence is not updated: MOVW
may not be supported on non-LPAE platforms, and the sequence itself
can be updated more easily to apply the 12 bits of displacement.

For Thumb2, which has many more versions of opcodes, switch to a sequence
that can be patched by the same patching code for both versions, and use
asm constraints and S-suffixed opcodes to force narrow encodings to be
selected.

Suggested-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/memory.h | 43 +++++++++++----
 arch/arm/kernel/head.S        | 57 +++++++++++++-------
 2 files changed, 69 insertions(+), 31 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 4121662dea5a..7184a2540816 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define PHYS_OFFSET	((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
 #define PHYS_PFN_OFFSET	(__pv_phys_pfn_offset)
 
+#ifndef CONFIG_THUMB2_KERNEL
 #define __pv_stub(from,to,instr)			\
 	__asm__("@ __pv_stub\n"				\
 	"1:	" instr "	%0, %1, %2\n"		\
@@ -192,25 +193,46 @@ extern const void *__pv_table_begin, *__pv_table_end;
 	: "=r" (to)					\
 	: "r" (from), "I" (__PV_BITS_31_24))
 
-#define __pv_stub_mov_hi(t)				\
-	__asm__ volatile("@ __pv_stub_mov\n"		\
-	"1:	mov	%R0, %1\n"			\
+#define __pv_add_carry_stub(x, y)			\
+	__asm__ volatile("@ __pv_add_carry_stub\n"	\
+	"0:	movw	%R0, %2\n"			\
+	"1:	adds	%Q0, %1, %R0, lsl #24\n"	\
+	"2:	mov	%R0, %3\n"			\
+	"	adc	%R0, %R0, #0\n"			\
 	"	.pushsection .pv_table,\"a\"\n"		\
-	"	.long	1b - .\n"			\
+	"	.long	0b - ., 1b - ., 2b - .\n"	\
 	"	.popsection\n"				\
-	: "=r" (t)					\
-	: "I" (__PV_BITS_7_0))
+	: "=&r" (y)					\
+	: "r" (x), "j" (0), "I" (__PV_BITS_7_0)		\
+	: "cc")
+
+#else
+#define __pv_stub(from,to,instr)			\
+	__asm__("@ __pv_stub\n"				\
+	"0:	movw	%0, %2\n"			\
+	"	lsls	%0, #24\n"			\
+	"	" instr "s %0, %1, %0\n"		\
+	"	.pushsection .pv_table,\"a\"\n"		\
+	"	.long	0b - .\n"			\
+	"	.popsection\n"				\
+	: "=&l" (to)					\
+	: "l" (from), "j" (0)				\
+	: "cc")
 
 #define __pv_add_carry_stub(x, y)			\
 	__asm__ volatile("@ __pv_add_carry_stub\n"	\
-	"1:	adds	%Q0, %1, %2\n"			\
+	"0:	movw	%R0, %2\n"			\
+	"	lsls	%R0, #24\n"			\
+	"	adds	%Q0, %1, %R0\n"			\
+	"1:	mvn	%R0, #0\n"			\
 	"	adc	%R0, %R0, #0\n"			\
 	"	.pushsection .pv_table,\"a\"\n"		\
-	"	.long	1b - .\n"			\
+	"	.long	0b - ., 1b - .\n"		\
 	"	.popsection\n"				\
-	: "+r" (y)					\
-	: "r" (x), "I" (__PV_BITS_31_24)		\
+	: "=&l" (y)					\
+	: "l" (x), "j" (0)				\
 	: "cc")
+#endif
 
 static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 {
@@ -219,7 +241,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 	if (sizeof(phys_addr_t) == 4) {
 		__pv_stub(x, t, "add");
 	} else {
-		__pv_stub_mov_hi(t);
 		__pv_add_carry_stub(x, t);
 	}
 	return t;
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index d2bd3b258386..86cea608a5ea 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -639,43 +639,45 @@ __fixup_a_pv_table:
 	mov	r6, r6, lsr #24
 	cmn	r0, #1
 #ifdef CONFIG_THUMB2_KERNEL
-	moveq	r0, #0x200000	@ set bit 21, mov to mvn instruction
-	lsls	r6, #24
-	beq	.Lnext
-	clz	r7, r6
-	lsr	r6, #24
-	lsl	r6, r7
-	bic	r6, #0x0080
-	lsrs	r7, #1
-	orrcs	r6, #0x0080
-	orr	r6, r6, r7, lsl #12
-	orr	r6, #0x4000
+	moveq	r0, #0x200	@ bit 9, ADD to SUB instruction (T1 encoding)
 	b	.Lnext
 .Lloop:	add	r7, r4
 	add	r4, #4
+#ifdef CONFIG_ARM_LPAE
+	ldrh	ip, [r7]
+ARM_BE8(rev16	ip, ip)
+	tst	ip, #0x200	@ MOVW has bit 9 set, MVN has it clear
+	bne	0f		@ skip if MOVW
+	tst	r0, #0x200	@ need to convert MVN to MOV ?
+	bne	.Lnext
+	eor	ip, ip, #0x20	@ flick bit #5
+ARM_BE8(rev16	ip, ip)
+	strh	ip, [r7]
+	b	.Lnext
+0:
+#endif
 	ldrh	ip, [r7, #2]
 ARM_BE8(rev16	ip, ip)
-	tst	ip, #0x4000
-	and	ip, #0x8f00
-	orrne	ip, r6	@ mask in offset bits 31-24
-	orreq	ip, r0	@ mask in offset bits 7-0
+	orr	ip, r6	@ mask in offset bits 31-24
 ARM_BE8(rev16	ip, ip)
 	strh	ip, [r7, #2]
-	bne	.Lnext
-	ldrh	ip, [r7]
+	ldrh	ip, [r7, #6]
 ARM_BE8(rev16	ip, ip)
-	bic	ip, #0x20
-	orr	ip, ip, r0, lsr #16
+	eor	ip, ip, r0
 ARM_BE8(rev16	ip, ip)
-	strh	ip, [r7]
+	strh	ip, [r7, #6]
 #else
 #ifdef CONFIG_CPU_ENDIAN_BE8
 @ in BE8, we load data in BE, but instructions still in LE
+#define PV_BIT20	0x00001000
 #define PV_BIT22	0x00004000
+#define PV_BIT23_22	0x0000c000
 #define PV_IMM8_MASK	0xff000000
 #define PV_ROT_MASK	0x000f0000
 #else
+#define PV_BIT20	0x00100000
 #define PV_BIT22	0x00400000
+#define PV_BIT23_22	0x00c00000
 #define PV_IMM8_MASK	0x000000ff
 #define PV_ROT_MASK	0xf00
 #endif
@@ -683,11 +685,26 @@ ARM_BE8(rev16	ip, ip)
 	moveq	r0, #PV_BIT22	@ set bit 22, mov to mvn instruction
 	b	.Lnext
 .Lloop:	ldr	ip, [r7, r4]
+#ifdef CONFIG_ARM_LPAE
+	tst	ip, #PV_BIT23_22	@ MOVW has bit 23:22 clear, MOV/ADD/SUB have it set
+ARM_BE8(rev	ip, ip)
+	orreq	ip, ip, r6
+ARM_BE8(rev	ip, ip)
+	beq	2f
+	tst	ip, #PV_BIT20		@ ADDS has bit 20 set
+	beq	1f
+	tst	r0, #PV_BIT22		@ check whether to invert bits 23:22 (ADD -> SUB)
+	beq	.Lnext
+	eor	ip, ip, #PV_BIT23_22
+	b	2f
+1:
+#endif
 	bic	ip, ip, #PV_IMM8_MASK
 	tst	ip, #PV_ROT_MASK		@ check the rotation field
 	orrne	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
 	biceq	ip, ip, #PV_BIT22		@ clear bit 22
 	orreq	ip, ip, r0			@ mask in offset bits 7-0
+2:
 	str	ip, [r7, r4]
 	add	r4, r4, #4
 #endif
-- 
2.17.1


WARNING: multiple messages have this Message-ID (diff)
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-arm-kernel@lists.infradead.org
Cc: linux-efi@vger.kernel.org, Nicolas Pitre <nico@fluxnic.net>,
	Linus Walleij <linus.walleij@linaro.org>,
	Russell King <rmk+kernel@armlinux.org.uk>,
	Santosh Shilimkar <santosh.shilimkar@ti.com>,
	Zhen Lei <thunder.leizhen@huawei.com>,
	Ard Biesheuvel <ardb@kernel.org>
Subject: [RFC/RFT PATCH 5/6] ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE
Date: Fri, 18 Sep 2020 13:31:01 +0300	[thread overview]
Message-ID: <20200918103102.18107-6-ardb@kernel.org> (raw)
In-Reply-To: <20200918103102.18107-1-ardb@kernel.org>

In preparation for reducing the phys-to-virt minimum relative alignment
from 16 MiB to 2 MiB, switch to patchable sequences involving MOVW
instructions that can more easily be manipulated to carry a 12-bit
immediate. Note that the non-LPAE ARM sequence is not updated: MOVW
may not be supported on non-LPAE platforms, and the sequence itself
can be updated more easily to apply the 12 bits of displacement.

For Thumb2, which has many more versions of opcodes, switch to a sequence
that can be patched by the same patching code for both versions, and use
asm constraints and S-suffixed opcodes to force narrow encodings to be
selected.

Suggested-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/memory.h | 43 +++++++++++----
 arch/arm/kernel/head.S        | 57 +++++++++++++-------
 2 files changed, 69 insertions(+), 31 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 4121662dea5a..7184a2540816 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define PHYS_OFFSET	((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
 #define PHYS_PFN_OFFSET	(__pv_phys_pfn_offset)
 
+#ifndef CONFIG_THUMB2_KERNEL
 #define __pv_stub(from,to,instr)			\
 	__asm__("@ __pv_stub\n"				\
 	"1:	" instr "	%0, %1, %2\n"		\
@@ -192,25 +193,46 @@ extern const void *__pv_table_begin, *__pv_table_end;
 	: "=r" (to)					\
 	: "r" (from), "I" (__PV_BITS_31_24))
 
-#define __pv_stub_mov_hi(t)				\
-	__asm__ volatile("@ __pv_stub_mov\n"		\
-	"1:	mov	%R0, %1\n"			\
+#define __pv_add_carry_stub(x, y)			\
+	__asm__ volatile("@ __pv_add_carry_stub\n"	\
+	"0:	movw	%R0, %2\n"			\
+	"1:	adds	%Q0, %1, %R0, lsl #24\n"	\
+	"2:	mov	%R0, %3\n"			\
+	"	adc	%R0, %R0, #0\n"			\
 	"	.pushsection .pv_table,\"a\"\n"		\
-	"	.long	1b - .\n"			\
+	"	.long	0b - ., 1b - ., 2b - .\n"	\
 	"	.popsection\n"				\
-	: "=r" (t)					\
-	: "I" (__PV_BITS_7_0))
+	: "=&r" (y)					\
+	: "r" (x), "j" (0), "I" (__PV_BITS_7_0)		\
+	: "cc")
+
+#else
+#define __pv_stub(from,to,instr)			\
+	__asm__("@ __pv_stub\n"				\
+	"0:	movw	%0, %2\n"			\
+	"	lsls	%0, #24\n"			\
+	"	" instr "s %0, %1, %0\n"		\
+	"	.pushsection .pv_table,\"a\"\n"		\
+	"	.long	0b - .\n"			\
+	"	.popsection\n"				\
+	: "=&l" (to)					\
+	: "l" (from), "j" (0)				\
+	: "cc")
 
 #define __pv_add_carry_stub(x, y)			\
 	__asm__ volatile("@ __pv_add_carry_stub\n"	\
-	"1:	adds	%Q0, %1, %2\n"			\
+	"0:	movw	%R0, %2\n"			\
+	"	lsls	%R0, #24\n"			\
+	"	adds	%Q0, %1, %R0\n"			\
+	"1:	mvn	%R0, #0\n"			\
 	"	adc	%R0, %R0, #0\n"			\
 	"	.pushsection .pv_table,\"a\"\n"		\
-	"	.long	1b - .\n"			\
+	"	.long	0b - ., 1b - .\n"		\
 	"	.popsection\n"				\
-	: "+r" (y)					\
-	: "r" (x), "I" (__PV_BITS_31_24)		\
+	: "=&l" (y)					\
+	: "l" (x), "j" (0)				\
 	: "cc")
+#endif
 
 static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 {
@@ -219,7 +241,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 	if (sizeof(phys_addr_t) == 4) {
 		__pv_stub(x, t, "add");
 	} else {
-		__pv_stub_mov_hi(t);
 		__pv_add_carry_stub(x, t);
 	}
 	return t;
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index d2bd3b258386..86cea608a5ea 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -639,43 +639,45 @@ __fixup_a_pv_table:
 	mov	r6, r6, lsr #24
 	cmn	r0, #1
 #ifdef CONFIG_THUMB2_KERNEL
-	moveq	r0, #0x200000	@ set bit 21, mov to mvn instruction
-	lsls	r6, #24
-	beq	.Lnext
-	clz	r7, r6
-	lsr	r6, #24
-	lsl	r6, r7
-	bic	r6, #0x0080
-	lsrs	r7, #1
-	orrcs	r6, #0x0080
-	orr	r6, r6, r7, lsl #12
-	orr	r6, #0x4000
+	moveq	r0, #0x200	@ bit 9, ADD to SUB instruction (T1 encoding)
 	b	.Lnext
 .Lloop:	add	r7, r4
 	add	r4, #4
+#ifdef CONFIG_ARM_LPAE
+	ldrh	ip, [r7]
+ARM_BE8(rev16	ip, ip)
+	tst	ip, #0x200	@ MOVW has bit 9 set, MVN has it clear
+	bne	0f		@ skip if MOVW
+	tst	r0, #0x200	@ need to convert MVN to MOV ?
+	bne	.Lnext
+	eor	ip, ip, #0x20	@ flick bit #5
+ARM_BE8(rev16	ip, ip)
+	strh	ip, [r7]
+	b	.Lnext
+0:
+#endif
 	ldrh	ip, [r7, #2]
 ARM_BE8(rev16	ip, ip)
-	tst	ip, #0x4000
-	and	ip, #0x8f00
-	orrne	ip, r6	@ mask in offset bits 31-24
-	orreq	ip, r0	@ mask in offset bits 7-0
+	orr	ip, r6	@ mask in offset bits 31-24
 ARM_BE8(rev16	ip, ip)
 	strh	ip, [r7, #2]
-	bne	.Lnext
-	ldrh	ip, [r7]
+	ldrh	ip, [r7, #6]
 ARM_BE8(rev16	ip, ip)
-	bic	ip, #0x20
-	orr	ip, ip, r0, lsr #16
+	eor	ip, ip, r0
 ARM_BE8(rev16	ip, ip)
-	strh	ip, [r7]
+	strh	ip, [r7, #6]
 #else
 #ifdef CONFIG_CPU_ENDIAN_BE8
 @ in BE8, we load data in BE, but instructions still in LE
+#define PV_BIT20	0x00001000
 #define PV_BIT22	0x00004000
+#define PV_BIT23_22	0x0000c000
 #define PV_IMM8_MASK	0xff000000
 #define PV_ROT_MASK	0x000f0000
 #else
+#define PV_BIT20	0x00100000
 #define PV_BIT22	0x00400000
+#define PV_BIT23_22	0x00c00000
 #define PV_IMM8_MASK	0x000000ff
 #define PV_ROT_MASK	0xf00
 #endif
@@ -683,11 +685,26 @@ ARM_BE8(rev16	ip, ip)
 	moveq	r0, #PV_BIT22	@ set bit 22, mov to mvn instruction
 	b	.Lnext
 .Lloop:	ldr	ip, [r7, r4]
+#ifdef CONFIG_ARM_LPAE
+	tst	ip, #PV_BIT23_22	@ MOVW has bit 23:22 clear, MOV/ADD/SUB have it set
+ARM_BE8(rev	ip, ip)
+	orreq	ip, ip, r6
+ARM_BE8(rev	ip, ip)
+	beq	2f
+	tst	ip, #PV_BIT20		@ ADDS has bit 20 set
+	beq	1f
+	tst	r0, #PV_BIT22		@ check whether to invert bits 23:22 (ADD -> SUB)
+	beq	.Lnext
+	eor	ip, ip, #PV_BIT23_22
+	b	2f
+1:
+#endif
 	bic	ip, ip, #PV_IMM8_MASK
 	tst	ip, #PV_ROT_MASK		@ check the rotation field
 	orrne	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
 	biceq	ip, ip, #PV_BIT22		@ clear bit 22
 	orreq	ip, ip, r0			@ mask in offset bits 7-0
+2:
 	str	ip, [r7, r4]
 	add	r4, r4, #4
 #endif
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2020-09-18 10:31 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-18 10:30 [RFC/RFT PATCH 0/6] ARM: p2v: reduce min alignment to 2 MiB Ard Biesheuvel
2020-09-18 10:30 ` Ard Biesheuvel
2020-09-18 10:30 ` [RFC/RFT PATCH 1/6] ARM: p2v: factor out shared loop processing Ard Biesheuvel
2020-09-18 10:30   ` Ard Biesheuvel
2020-09-18 10:30 ` [RFC/RFT PATCH 2/6] ARM: p2v: factor out BE8 handling Ard Biesheuvel
2020-09-18 10:30   ` Ard Biesheuvel
2020-09-18 10:30 ` [RFC/RFT PATCH 3/6] ARM: p2v: drop redundant 'type' argument from __pv_stub Ard Biesheuvel
2020-09-18 10:30   ` Ard Biesheuvel
2020-09-18 10:31 ` [RFC/RFT PATCH 4/6] ARM: p2v: use relative references in patch site arrays Ard Biesheuvel
2020-09-18 10:31   ` Ard Biesheuvel
2020-09-18 10:31 ` Ard Biesheuvel [this message]
2020-09-18 10:31   ` [RFC/RFT PATCH 5/6] ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE Ard Biesheuvel
2020-09-18 10:31 ` [RFC/RFT PATCH 6/6] ARM: p2v: reduce p2v alignment requirement to 2 MiB Ard Biesheuvel
2020-09-18 10:31   ` Ard Biesheuvel
2020-09-18 17:25 ` [RFC/RFT PATCH 0/6] ARM: p2v: reduce min alignment " Ard Biesheuvel
2020-09-18 17:25   ` Ard Biesheuvel
2020-09-19 23:49 ` Nicolas Pitre
2020-09-19 23:49   ` Nicolas Pitre
2020-09-20  7:50   ` Ard Biesheuvel
2020-09-20  7:50     ` Ard Biesheuvel
2020-09-20  8:57     ` Russell King - ARM Linux admin
2020-09-20  8:57       ` Russell King - ARM Linux admin
2020-09-20 10:06       ` Ard Biesheuvel
2020-09-20 10:06         ` Ard Biesheuvel
2020-09-20 15:34         ` Nicolas Pitre
2020-09-20 15:34           ` Nicolas Pitre
2020-09-20  8:55   ` Russell King - ARM Linux admin
2020-09-20  8:55     ` Russell King - ARM Linux admin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200918103102.18107-6-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=linus.walleij@linaro.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=nico@fluxnic.net \
    --cc=rmk+kernel@armlinux.org.uk \
    --cc=santosh.shilimkar@ti.com \
    --cc=thunder.leizhen@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.