From: Ard Biesheuvel <ardb@kernel.org>
To: linux-arm-kernel@lists.infradead.org
Cc: linux-efi@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
Zhen Lei <thunder.leizhen@huawei.com>,
Russell King <rmk+kernel@armlinux.org.uk>,
Santosh Shilimkar <santosh.shilimkar@ti.com>,
Linus Walleij <linus.walleij@linaro.org>,
Nicolas Pitre <nico@fluxnic.net>
Subject: [RFC/RFT PATCH 5/6] ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE
Date: Fri, 18 Sep 2020 13:31:01 +0300 [thread overview]
Message-ID: <20200918103102.18107-6-ardb@kernel.org> (raw)
In-Reply-To: <20200918103102.18107-1-ardb@kernel.org>
In preparation for reducing the phys-to-virt minimum relative alignment
from 16 MiB to 2 MiB, switch to patchable sequences involving MOVW
instructions that can more easily be manipulated to carry a 12-bit
immediate. Note that the non-LPAE ARM sequence is not updated: MOVW
may not be supported on non-LPAE platforms, and the sequence itself
can be updated more easily to apply the 12 bits of displacement.
For Thumb2, which has many more versions of opcodes, switch to a sequence
that can be patched by the same patching code for both versions, and use
asm constraints and S-suffixed opcodes to force narrow encodings to be
selected.
Suggested-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm/include/asm/memory.h | 43 +++++++++++----
arch/arm/kernel/head.S | 57 +++++++++++++-------
2 files changed, 69 insertions(+), 31 deletions(-)
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 4121662dea5a..7184a2540816 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
#define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
#define PHYS_PFN_OFFSET (__pv_phys_pfn_offset)
+#ifndef CONFIG_THUMB2_KERNEL
#define __pv_stub(from,to,instr) \
__asm__("@ __pv_stub\n" \
"1: " instr " %0, %1, %2\n" \
@@ -192,25 +193,46 @@ extern const void *__pv_table_begin, *__pv_table_end;
: "=r" (to) \
: "r" (from), "I" (__PV_BITS_31_24))
-#define __pv_stub_mov_hi(t) \
- __asm__ volatile("@ __pv_stub_mov\n" \
- "1: mov %R0, %1\n" \
+#define __pv_add_carry_stub(x, y) \
+ __asm__ volatile("@ __pv_add_carry_stub\n" \
+ "0: movw %R0, %2\n" \
+ "1: adds %Q0, %1, %R0, lsl #24\n" \
+ "2: mov %R0, %3\n" \
+ " adc %R0, %R0, #0\n" \
" .pushsection .pv_table,\"a\"\n" \
- " .long 1b - .\n" \
+ " .long 0b - ., 1b - ., 2b - .\n" \
" .popsection\n" \
- : "=r" (t) \
- : "I" (__PV_BITS_7_0))
+ : "=&r" (y) \
+ : "r" (x), "j" (0), "I" (__PV_BITS_7_0) \
+ : "cc")
+
+#else
+#define __pv_stub(from,to,instr) \
+ __asm__("@ __pv_stub\n" \
+ "0: movw %0, %2\n" \
+ " lsls %0, #24\n" \
+ " " instr "s %0, %1, %0\n" \
+ " .pushsection .pv_table,\"a\"\n" \
+ " .long 0b - .\n" \
+ " .popsection\n" \
+ : "=&l" (to) \
+ : "l" (from), "j" (0) \
+ : "cc")
#define __pv_add_carry_stub(x, y) \
__asm__ volatile("@ __pv_add_carry_stub\n" \
- "1: adds %Q0, %1, %2\n" \
+ "0: movw %R0, %2\n" \
+ " lsls %R0, #24\n" \
+ " adds %Q0, %1, %R0\n" \
+ "1: mvn %R0, #0\n" \
" adc %R0, %R0, #0\n" \
" .pushsection .pv_table,\"a\"\n" \
- " .long 1b - .\n" \
+ " .long 0b - ., 1b - .\n" \
" .popsection\n" \
- : "+r" (y) \
- : "r" (x), "I" (__PV_BITS_31_24) \
+ : "=&l" (y) \
+ : "l" (x), "j" (0) \
: "cc")
+#endif
static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
{
@@ -219,7 +241,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
if (sizeof(phys_addr_t) == 4) {
__pv_stub(x, t, "add");
} else {
- __pv_stub_mov_hi(t);
__pv_add_carry_stub(x, t);
}
return t;
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index d2bd3b258386..86cea608a5ea 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -639,43 +639,45 @@ __fixup_a_pv_table:
mov r6, r6, lsr #24
cmn r0, #1
#ifdef CONFIG_THUMB2_KERNEL
- moveq r0, #0x200000 @ set bit 21, mov to mvn instruction
- lsls r6, #24
- beq .Lnext
- clz r7, r6
- lsr r6, #24
- lsl r6, r7
- bic r6, #0x0080
- lsrs r7, #1
- orrcs r6, #0x0080
- orr r6, r6, r7, lsl #12
- orr r6, #0x4000
+ moveq r0, #0x200 @ bit 9, ADD to SUB instruction (T1 encoding)
b .Lnext
.Lloop: add r7, r4
add r4, #4
+#ifdef CONFIG_ARM_LPAE
+ ldrh ip, [r7]
+ARM_BE8(rev16 ip, ip)
+ tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear
+ bne 0f @ skip if MOVW
+ tst r0, #0x200 @ need to convert MVN to MOV ?
+ bne .Lnext
+ eor ip, ip, #0x20 @ flick bit #5
+ARM_BE8(rev16 ip, ip)
+ strh ip, [r7]
+ b .Lnext
+0:
+#endif
ldrh ip, [r7, #2]
ARM_BE8(rev16 ip, ip)
- tst ip, #0x4000
- and ip, #0x8f00
- orrne ip, r6 @ mask in offset bits 31-24
- orreq ip, r0 @ mask in offset bits 7-0
+ orr ip, r6 @ mask in offset bits 31-24
ARM_BE8(rev16 ip, ip)
strh ip, [r7, #2]
- bne .Lnext
- ldrh ip, [r7]
+ ldrh ip, [r7, #6]
ARM_BE8(rev16 ip, ip)
- bic ip, #0x20
- orr ip, ip, r0, lsr #16
+ eor ip, ip, r0
ARM_BE8(rev16 ip, ip)
- strh ip, [r7]
+ strh ip, [r7, #6]
#else
#ifdef CONFIG_CPU_ENDIAN_BE8
@ in BE8, we load data in BE, but instructions still in LE
+#define PV_BIT20 0x00001000
#define PV_BIT22 0x00004000
+#define PV_BIT23_22 0x0000c000
#define PV_IMM8_MASK 0xff000000
#define PV_ROT_MASK 0x000f0000
#else
+#define PV_BIT20 0x00100000
#define PV_BIT22 0x00400000
+#define PV_BIT23_22 0x00c00000
#define PV_IMM8_MASK 0x000000ff
#define PV_ROT_MASK 0xf00
#endif
@@ -683,11 +685,26 @@ ARM_BE8(rev16 ip, ip)
moveq r0, #PV_BIT22 @ set bit 22, mov to mvn instruction
b .Lnext
.Lloop: ldr ip, [r7, r4]
+#ifdef CONFIG_ARM_LPAE
+ tst ip, #PV_BIT23_22 @ MOVW has bit 23:22 clear, MOV/ADD/SUB have it set
+ARM_BE8(rev ip, ip)
+ orreq ip, ip, r6
+ARM_BE8(rev ip, ip)
+ beq 2f
+ tst ip, #PV_BIT20 @ ADDS has bit 20 set
+ beq 1f
+ tst r0, #PV_BIT22 @ check whether to invert bits 23:22 (ADD -> SUB)
+ beq .Lnext
+ eor ip, ip, #PV_BIT23_22
+ b 2f
+1:
+#endif
bic ip, ip, #PV_IMM8_MASK
tst ip, #PV_ROT_MASK @ check the rotation field
orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24
biceq ip, ip, #PV_BIT22 @ clear bit 22
orreq ip, ip, r0 @ mask in offset bits 7-0
+2:
str ip, [r7, r4]
add r4, r4, #4
#endif
--
2.17.1
WARNING: multiple messages have this Message-ID (diff)
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-arm-kernel@lists.infradead.org
Cc: linux-efi@vger.kernel.org, Nicolas Pitre <nico@fluxnic.net>,
Linus Walleij <linus.walleij@linaro.org>,
Russell King <rmk+kernel@armlinux.org.uk>,
Santosh Shilimkar <santosh.shilimkar@ti.com>,
Zhen Lei <thunder.leizhen@huawei.com>,
Ard Biesheuvel <ardb@kernel.org>
Subject: [RFC/RFT PATCH 5/6] ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE
Date: Fri, 18 Sep 2020 13:31:01 +0300 [thread overview]
Message-ID: <20200918103102.18107-6-ardb@kernel.org> (raw)
In-Reply-To: <20200918103102.18107-1-ardb@kernel.org>
In preparation for reducing the phys-to-virt minimum relative alignment
from 16 MiB to 2 MiB, switch to patchable sequences involving MOVW
instructions that can more easily be manipulated to carry a 12-bit
immediate. Note that the non-LPAE ARM sequence is not updated: MOVW
may not be supported on non-LPAE platforms, and the sequence itself
can be updated more easily to apply the 12 bits of displacement.
For Thumb2, which has many more versions of opcodes, switch to a sequence
that can be patched by the same patching code for both versions, and use
asm constraints and S-suffixed opcodes to force narrow encodings to be
selected.
Suggested-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm/include/asm/memory.h | 43 +++++++++++----
arch/arm/kernel/head.S | 57 +++++++++++++-------
2 files changed, 69 insertions(+), 31 deletions(-)
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 4121662dea5a..7184a2540816 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
#define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
#define PHYS_PFN_OFFSET (__pv_phys_pfn_offset)
+#ifndef CONFIG_THUMB2_KERNEL
#define __pv_stub(from,to,instr) \
__asm__("@ __pv_stub\n" \
"1: " instr " %0, %1, %2\n" \
@@ -192,25 +193,46 @@ extern const void *__pv_table_begin, *__pv_table_end;
: "=r" (to) \
: "r" (from), "I" (__PV_BITS_31_24))
-#define __pv_stub_mov_hi(t) \
- __asm__ volatile("@ __pv_stub_mov\n" \
- "1: mov %R0, %1\n" \
+#define __pv_add_carry_stub(x, y) \
+ __asm__ volatile("@ __pv_add_carry_stub\n" \
+ "0: movw %R0, %2\n" \
+ "1: adds %Q0, %1, %R0, lsl #24\n" \
+ "2: mov %R0, %3\n" \
+ " adc %R0, %R0, #0\n" \
" .pushsection .pv_table,\"a\"\n" \
- " .long 1b - .\n" \
+ " .long 0b - ., 1b - ., 2b - .\n" \
" .popsection\n" \
- : "=r" (t) \
- : "I" (__PV_BITS_7_0))
+ : "=&r" (y) \
+ : "r" (x), "j" (0), "I" (__PV_BITS_7_0) \
+ : "cc")
+
+#else
+#define __pv_stub(from,to,instr) \
+ __asm__("@ __pv_stub\n" \
+ "0: movw %0, %2\n" \
+ " lsls %0, #24\n" \
+ " " instr "s %0, %1, %0\n" \
+ " .pushsection .pv_table,\"a\"\n" \
+ " .long 0b - .\n" \
+ " .popsection\n" \
+ : "=&l" (to) \
+ : "l" (from), "j" (0) \
+ : "cc")
#define __pv_add_carry_stub(x, y) \
__asm__ volatile("@ __pv_add_carry_stub\n" \
- "1: adds %Q0, %1, %2\n" \
+ "0: movw %R0, %2\n" \
+ " lsls %R0, #24\n" \
+ " adds %Q0, %1, %R0\n" \
+ "1: mvn %R0, #0\n" \
" adc %R0, %R0, #0\n" \
" .pushsection .pv_table,\"a\"\n" \
- " .long 1b - .\n" \
+ " .long 0b - ., 1b - .\n" \
" .popsection\n" \
- : "+r" (y) \
- : "r" (x), "I" (__PV_BITS_31_24) \
+ : "=&l" (y) \
+ : "l" (x), "j" (0) \
: "cc")
+#endif
static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
{
@@ -219,7 +241,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
if (sizeof(phys_addr_t) == 4) {
__pv_stub(x, t, "add");
} else {
- __pv_stub_mov_hi(t);
__pv_add_carry_stub(x, t);
}
return t;
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index d2bd3b258386..86cea608a5ea 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -639,43 +639,45 @@ __fixup_a_pv_table:
mov r6, r6, lsr #24
cmn r0, #1
#ifdef CONFIG_THUMB2_KERNEL
- moveq r0, #0x200000 @ set bit 21, mov to mvn instruction
- lsls r6, #24
- beq .Lnext
- clz r7, r6
- lsr r6, #24
- lsl r6, r7
- bic r6, #0x0080
- lsrs r7, #1
- orrcs r6, #0x0080
- orr r6, r6, r7, lsl #12
- orr r6, #0x4000
+ moveq r0, #0x200 @ bit 9, ADD to SUB instruction (T1 encoding)
b .Lnext
.Lloop: add r7, r4
add r4, #4
+#ifdef CONFIG_ARM_LPAE
+ ldrh ip, [r7]
+ARM_BE8(rev16 ip, ip)
+ tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear
+ bne 0f @ skip if MOVW
+ tst r0, #0x200 @ need to convert MVN to MOV ?
+ bne .Lnext
+ eor ip, ip, #0x20 @ flick bit #5
+ARM_BE8(rev16 ip, ip)
+ strh ip, [r7]
+ b .Lnext
+0:
+#endif
ldrh ip, [r7, #2]
ARM_BE8(rev16 ip, ip)
- tst ip, #0x4000
- and ip, #0x8f00
- orrne ip, r6 @ mask in offset bits 31-24
- orreq ip, r0 @ mask in offset bits 7-0
+ orr ip, r6 @ mask in offset bits 31-24
ARM_BE8(rev16 ip, ip)
strh ip, [r7, #2]
- bne .Lnext
- ldrh ip, [r7]
+ ldrh ip, [r7, #6]
ARM_BE8(rev16 ip, ip)
- bic ip, #0x20
- orr ip, ip, r0, lsr #16
+ eor ip, ip, r0
ARM_BE8(rev16 ip, ip)
- strh ip, [r7]
+ strh ip, [r7, #6]
#else
#ifdef CONFIG_CPU_ENDIAN_BE8
@ in BE8, we load data in BE, but instructions still in LE
+#define PV_BIT20 0x00001000
#define PV_BIT22 0x00004000
+#define PV_BIT23_22 0x0000c000
#define PV_IMM8_MASK 0xff000000
#define PV_ROT_MASK 0x000f0000
#else
+#define PV_BIT20 0x00100000
#define PV_BIT22 0x00400000
+#define PV_BIT23_22 0x00c00000
#define PV_IMM8_MASK 0x000000ff
#define PV_ROT_MASK 0xf00
#endif
@@ -683,11 +685,26 @@ ARM_BE8(rev16 ip, ip)
moveq r0, #PV_BIT22 @ set bit 22, mov to mvn instruction
b .Lnext
.Lloop: ldr ip, [r7, r4]
+#ifdef CONFIG_ARM_LPAE
+ tst ip, #PV_BIT23_22 @ MOVW has bit 23:22 clear, MOV/ADD/SUB have it set
+ARM_BE8(rev ip, ip)
+ orreq ip, ip, r6
+ARM_BE8(rev ip, ip)
+ beq 2f
+ tst ip, #PV_BIT20 @ ADDS has bit 20 set
+ beq 1f
+ tst r0, #PV_BIT22 @ check whether to invert bits 23:22 (ADD -> SUB)
+ beq .Lnext
+ eor ip, ip, #PV_BIT23_22
+ b 2f
+1:
+#endif
bic ip, ip, #PV_IMM8_MASK
tst ip, #PV_ROT_MASK @ check the rotation field
orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24
biceq ip, ip, #PV_BIT22 @ clear bit 22
orreq ip, ip, r0 @ mask in offset bits 7-0
+2:
str ip, [r7, r4]
add r4, r4, #4
#endif
--
2.17.1
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2020-09-18 10:31 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-18 10:30 [RFC/RFT PATCH 0/6] ARM: p2v: reduce min alignment to 2 MiB Ard Biesheuvel
2020-09-18 10:30 ` Ard Biesheuvel
2020-09-18 10:30 ` [RFC/RFT PATCH 1/6] ARM: p2v: factor out shared loop processing Ard Biesheuvel
2020-09-18 10:30 ` Ard Biesheuvel
2020-09-18 10:30 ` [RFC/RFT PATCH 2/6] ARM: p2v: factor out BE8 handling Ard Biesheuvel
2020-09-18 10:30 ` Ard Biesheuvel
2020-09-18 10:30 ` [RFC/RFT PATCH 3/6] ARM: p2v: drop redundant 'type' argument from __pv_stub Ard Biesheuvel
2020-09-18 10:30 ` Ard Biesheuvel
2020-09-18 10:31 ` [RFC/RFT PATCH 4/6] ARM: p2v: use relative references in patch site arrays Ard Biesheuvel
2020-09-18 10:31 ` Ard Biesheuvel
2020-09-18 10:31 ` Ard Biesheuvel [this message]
2020-09-18 10:31 ` [RFC/RFT PATCH 5/6] ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE Ard Biesheuvel
2020-09-18 10:31 ` [RFC/RFT PATCH 6/6] ARM: p2v: reduce p2v alignment requirement to 2 MiB Ard Biesheuvel
2020-09-18 10:31 ` Ard Biesheuvel
2020-09-18 17:25 ` [RFC/RFT PATCH 0/6] ARM: p2v: reduce min alignment " Ard Biesheuvel
2020-09-18 17:25 ` Ard Biesheuvel
2020-09-19 23:49 ` Nicolas Pitre
2020-09-19 23:49 ` Nicolas Pitre
2020-09-20 7:50 ` Ard Biesheuvel
2020-09-20 7:50 ` Ard Biesheuvel
2020-09-20 8:57 ` Russell King - ARM Linux admin
2020-09-20 8:57 ` Russell King - ARM Linux admin
2020-09-20 10:06 ` Ard Biesheuvel
2020-09-20 10:06 ` Ard Biesheuvel
2020-09-20 15:34 ` Nicolas Pitre
2020-09-20 15:34 ` Nicolas Pitre
2020-09-20 8:55 ` Russell King - ARM Linux admin
2020-09-20 8:55 ` Russell King - ARM Linux admin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200918103102.18107-6-ardb@kernel.org \
--to=ardb@kernel.org \
--cc=linus.walleij@linaro.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-efi@vger.kernel.org \
--cc=nico@fluxnic.net \
--cc=rmk+kernel@armlinux.org.uk \
--cc=santosh.shilimkar@ti.com \
--cc=thunder.leizhen@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.