From: Ard Biesheuvel <ardb@kernel.org>
To: linux-arm-kernel@lists.infradead.org
Cc: Ard Biesheuvel <ardb@kernel.org>, Marc Zyngier <maz@kernel.org>,
Will Deacon <will@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Kees Cook <keescook@chromium.org>,
Catalin Marinas <catalin.marinas@arm.com>,
Mark Brown <broonie@kernel.org>,
Anshuman Khandual <anshuman.khandual@arm.com>
Subject: [PATCH v6 5/9] arm64: head: move early kernel mapping and relocation code to C code
Date: Fri, 1 Jul 2022 15:04:40 +0200 [thread overview]
Message-ID: <20220701130444.2945106-6-ardb@kernel.org> (raw)
In-Reply-To: <20220701130444.2945106-1-ardb@kernel.org>
The asm version of the kernel mapping code works fine for creating a
coarse grained identity map, but for mapping the kernel down to its
exact boundaries with the right attributes, it is not suitable. This is
why we create a preliminary RWX kernel mapping first, and then rebuild
it from scratch later on.
So let's reimplement this, and along with it the relocation routines, in
C, in a way that will make it unnecessary to create the kernel page
tables yet another time in paging_init().
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/include/asm/pgtable-prot.h | 2 +
arch/arm64/kernel/Makefile | 4 +-
arch/arm64/kernel/head.S | 157 +--------
arch/arm64/kernel/image-vars.h | 16 +
arch/arm64/kernel/pi/Makefile | 2 +-
arch/arm64/kernel/pi/early_map_kernel.c | 368 ++++++++++++++++++++
arch/arm64/kernel/pi/kaslr_early.c | 112 ------
arch/arm64/kernel/vmlinux.lds.S | 13 +-
arch/arm64/mm/proc.S | 1 +
9 files changed, 410 insertions(+), 265 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 62e0ebeed720..dd38f8e80fac 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -32,7 +32,9 @@
#include <asm/cpufeature.h>
#include <asm/pgtable-types.h>
+#ifndef arm64_use_ng_mappings
extern bool arm64_use_ng_mappings;
+#endif
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 88a96511580e..802de025bbea 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -29,7 +29,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
syscall.o proton-pack.o idreg-override.o idle.o \
- patching.o
+ patching.o pi/
targets += efi-entry.o
@@ -59,7 +59,7 @@ obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index f80127df5846..b7f1bd07a647 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -81,8 +81,6 @@
* x20 primary_entry() .. __primary_switch() CPU boot mode
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
* x22 create_idmap() .. start_kernel() ID map VA of the DT blob
- * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
- * x24 __primary_switch() linear map KASLR seed
* x25 primary_entry() .. start_kernel() supported VA size
* x28 create_idmap() callee preserved temp register
*/
@@ -153,17 +151,6 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
0: ret
SYM_CODE_END(preserve_boot_args)
-SYM_FUNC_START_LOCAL(clear_page_tables)
- /*
- * Clear the init page tables.
- */
- adrp x0, init_pg_dir
- adrp x1, init_pg_end
- sub x2, x1, x0
- mov x1, xzr
- b __pi_memset // tail call
-SYM_FUNC_END(clear_page_tables)
-
/*
* Macro to populate page table entries, these entries can be pointers to the next level
* or last level entries pointing to physical memory.
@@ -365,7 +352,7 @@ SYM_FUNC_START_LOCAL(create_idmap)
/* Remap the kernel page tables r/w in the ID map */
adrp x1, _text
adrp x2, init_pg_dir
- adrp x3, init_pg_end
+ adrp x3, _end
bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
mov x5, SWAPPER_RW_MMUFLAGS
mov x6, #SWAPPER_BLOCK_SHIFT
@@ -396,22 +383,6 @@ SYM_FUNC_START_LOCAL(create_idmap)
0: ret x28
SYM_FUNC_END(create_idmap)
-SYM_FUNC_START_LOCAL(create_kernel_mapping)
- adrp x0, init_pg_dir
- mov_q x5, KIMAGE_VADDR // compile time __va(_text)
- add x5, x5, x23 // add KASLR displacement
- adrp x6, _end // runtime __pa(_end)
- adrp x3, _text // runtime __pa(_text)
- sub x6, x6, x3 // _end - _text
- add x6, x6, x5 // runtime __va(_end)
- mov x7, SWAPPER_RW_MMUFLAGS
-
- map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
-
- dsb ishst // sync with page table walker
- ret
-SYM_FUNC_END(create_kernel_mapping)
-
/*
* Initialize CPU registers with task-specific and cpu-specific context.
*
@@ -445,6 +416,7 @@ SYM_FUNC_END(create_kernel_mapping)
* The following fragment of code is executed with the MMU enabled.
*
* x0 = __pa(KERNEL_START)
+ * w1 = memstart_offset_seed
*/
SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x4, init_task
@@ -454,6 +426,11 @@ SYM_FUNC_START_LOCAL(__primary_switched)
msr vbar_el1, x8 // vector table address
isb
+#ifdef CONFIG_RANDOMIZE_BASE
+ adrp x5, memstart_offset_seed // Save KASLR linear map seed
+ strh w1, [x5, :lo12:memstart_offset_seed]
+#endif
+
stp x29, x30, [sp, #-16]!
mov x29, sp
@@ -479,11 +456,6 @@ SYM_FUNC_START_LOCAL(__primary_switched)
str x25, [x8] // ... observes the correct value
dc civac, x8 // Make visible to booting secondaries
#endif
-
-#ifdef CONFIG_RANDOMIZE_BASE
- adrp x5, memstart_offset_seed // Save KASLR linear map seed
- strh w24, [x5, :lo12:memstart_offset_seed]
-#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
@@ -747,123 +719,18 @@ SYM_FUNC_START_LOCAL(__no_granule_support)
b 1b
SYM_FUNC_END(__no_granule_support)
-#ifdef CONFIG_RELOCATABLE
-SYM_FUNC_START_LOCAL(__relocate_kernel)
- /*
- * Iterate over each entry in the relocation table, and apply the
- * relocations in place.
- */
- adr_l x9, __rela_start
- adr_l x10, __rela_end
- mov_q x11, KIMAGE_VADDR // default virtual offset
- add x11, x11, x23 // actual virtual offset
-
-0: cmp x9, x10
- b.hs 1f
- ldp x12, x13, [x9], #24
- ldr x14, [x9, #-8]
- cmp w13, #R_AARCH64_RELATIVE
- b.ne 0b
- add x14, x14, x23 // relocate
- str x14, [x12, x23]
- b 0b
-
-1:
-#ifdef CONFIG_RELR
- /*
- * Apply RELR relocations.
- *
- * RELR is a compressed format for storing relative relocations. The
- * encoded sequence of entries looks like:
- * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
- *
- * i.e. start with an address, followed by any number of bitmaps. The
- * address entry encodes 1 relocation. The subsequent bitmap entries
- * encode up to 63 relocations each, at subsequent offsets following
- * the last address entry.
- *
- * The bitmap entries must have 1 in the least significant bit. The
- * assumption here is that an address cannot have 1 in lsb. Odd
- * addresses are not supported. Any odd addresses are stored in the RELA
- * section, which is handled above.
- *
- * Excluding the least significant bit in the bitmap, each non-zero
- * bit in the bitmap represents a relocation to be applied to
- * a corresponding machine word that follows the base address
- * word. The second least significant bit represents the machine
- * word immediately following the initial address, and each bit
- * that follows represents the next word, in linear order. As such,
- * a single bitmap can encode up to 63 relocations in a 64-bit object.
- *
- * In this implementation we store the address of the next RELR table
- * entry in x9, the address being relocated by the current address or
- * bitmap entry in x13 and the address being relocated by the current
- * bit in x14.
- */
- adr_l x9, __relr_start
- adr_l x10, __relr_end
-
-2: cmp x9, x10
- b.hs 7f
- ldr x11, [x9], #8
- tbnz x11, #0, 3f // branch to handle bitmaps
- add x13, x11, x23
- ldr x12, [x13] // relocate address entry
- add x12, x12, x23
- str x12, [x13], #8 // adjust to start of bitmap
- b 2b
-
-3: mov x14, x13
-4: lsr x11, x11, #1
- cbz x11, 6f
- tbz x11, #0, 5f // skip bit if not set
- ldr x12, [x14] // relocate bit
- add x12, x12, x23
- str x12, [x14]
-
-5: add x14, x14, #8 // move to next bit's address
- b 4b
-
-6: /*
- * Move to the next bitmap's address. 8 is the word size, and 63 is the
- * number of significant bits in a bitmap entry.
- */
- add x13, x13, #(8 * 63)
- b 2b
-
-7:
-#endif
- ret
-
-SYM_FUNC_END(__relocate_kernel)
-#endif
-
SYM_FUNC_START_LOCAL(__primary_switch)
adrp x1, reserved_pg_dir
adrp x2, init_idmap_pg_dir
bl __enable_mmu
-#ifdef CONFIG_RELOCATABLE
- adrp x23, KERNEL_START
- and x23, x23, MIN_KIMG_ALIGN - 1
-#ifdef CONFIG_RANDOMIZE_BASE
- mov x0, x22
- adrp x1, init_pg_end
+
+ adrp x1, primary_init_stack
mov sp, x1
mov x29, xzr
- bl __pi_kaslr_early_init
- and x24, x0, #SZ_2M - 1 // capture memstart offset seed
- bic x0, x0, #SZ_2M - 1
- orr x23, x23, x0 // record kernel offset
-#endif
-#endif
- bl clear_page_tables
- bl create_kernel_mapping
+ mov x0, x22 // pass FDT pointer
+ bl __pi_early_map_kernel
+ mov w1, w0 // capture memstart offset seed
- adrp x1, init_pg_dir
- load_ttbr1 x1, x1, x2
-#ifdef CONFIG_RELOCATABLE
- bl __relocate_kernel
-#endif
ldr x8, =__primary_switched
adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index afa69e04e75e..032c3e1aff20 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -54,6 +54,22 @@ PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
+/*
+ * The symbols below are used by the early C kernel mapping code.
+ */
+PROVIDE(__pi_init_pg_dir = init_pg_dir);
+PROVIDE(__pi_init_pg_end = init_pg_end);
+
+PROVIDE(__pi__text = _text);
+PROVIDE(__pi__stext = _stext);
+PROVIDE(__pi__etext = _etext);
+PROVIDE(__pi___start_rodata = __start_rodata);
+PROVIDE(__pi___inittext_begin = __inittext_begin);
+PROVIDE(__pi___inittext_end = __inittext_end);
+PROVIDE(__pi___initdata_begin = __initdata_begin);
+PROVIDE(__pi___initdata_end = __initdata_end);
+PROVIDE(__pi__data = _data);
+
#ifdef CONFIG_KVM
/*
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index 839291430cb3..b88612eab1bc 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -29,5 +29,5 @@ $(obj)/%.pi.o: $(obj)/%.o FORCE
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
$(call if_changed_rule,cc_o_c)
-obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+obj-y := early_map_kernel.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/pi/early_map_kernel.c b/arch/arm64/kernel/pi/early_map_kernel.c
new file mode 100644
index 000000000000..4199548584fb
--- /dev/null
+++ b/arch/arm64/kernel/pi/early_map_kernel.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2022 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+// NOTE: code in this file runs *very* early, and is not permitted to use
+// global variables or anything that relies on absolute addressing.
+
+#define arm64_use_ng_mappings 0
+
+#include <linux/elf.h>
+#include <linux/libfdt.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/archrandom.h>
+#include <asm/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/* taken from lib/string.c */
+static char *__strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+
+/*
+ * Returns whether @pfx appears in @string, either at the very start, or
+ * elsewhere but preceded by a space character.
+ */
+static bool string_contains_prefix(const u8 *string, const u8 *pfx)
+{
+ const u8 *str;
+
+ str = __strstr(string, pfx);
+ return str == string || (str > string && *(str - 1) == ' ');
+}
+
+static bool cmdline_has(void *fdt, const u8 *word)
+{
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+ int node;
+ const u8 *prop;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ goto out;
+
+ prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ if (!prop)
+ goto out;
+
+ if (string_contains_prefix(prop, word))
+ return true;
+
+ if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+ goto out;
+
+ return false;
+ }
+out:
+ return string_contains_prefix(CONFIG_CMDLINE, word);
+}
+
+static u64 get_kaslr_seed(void *fdt)
+{
+ int node, len;
+ fdt64_t *prop;
+ u64 ret;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return 0;
+
+ prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ if (!prop || len != sizeof(u64))
+ return 0;
+
+ ret = fdt64_to_cpu(*prop);
+ *prop = 0;
+ return ret;
+}
+
+static u64 kaslr_early_init(void *fdt)
+{
+ u64 seed;
+
+ if (cmdline_has(fdt, "nokaslr"))
+ return 0;
+
+ seed = get_kaslr_seed(fdt);
+ if (!seed) {
+#ifdef CONFIG_ARCH_RANDOM
+ if (!__early_cpu_has_rndr() ||
+ !__arm64_rndr((unsigned long *)&seed))
+#endif
+ return 0;
+ }
+
+ /*
+ * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+ * kernel image offset from the seed. Let's place the kernel in the
+ * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
+ * the lower and upper quarters to avoid colliding with other
+ * allocations.
+ */
+ return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
+}
+
+extern const Elf64_Rela rela_start[], rela_end[];
+extern const u64 relr_start[], relr_end[];
+
+static void relocate_kernel(u64 offset)
+{
+ const Elf64_Rela *rela;
+ const u64 *relr;
+ u64 *place;
+
+ for (rela = rela_start; rela < rela_end; rela++) {
+ if (ELF64_R_TYPE(rela->r_info) != R_AARCH64_RELATIVE)
+ continue;
+ place = (u64 *)(rela->r_offset + offset);
+ *place = rela->r_addend + offset;
+ }
+
+ if (!IS_ENABLED(CONFIG_RELR) || !offset)
+ return;
+
+ /*
+ * Apply RELR relocations.
+ *
+ * RELR is a compressed format for storing relative relocations. The
+ * encoded sequence of entries looks like:
+ * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
+ *
+ * i.e. start with an address, followed by any number of bitmaps. The
+ * address entry encodes 1 relocation. The subsequent bitmap entries
+ * encode up to 63 relocations each, at subsequent offsets following
+ * the last address entry.
+ *
+ * The bitmap entries must have 1 in the least significant bit. The
+ * assumption here is that an address cannot have 1 in lsb. Odd
+ * addresses are not supported. Any odd addresses are stored in the
+ * RELA section, which is handled above.
+ *
+ * Excluding the least significant bit in the bitmap, each non-zero bit
+ * in the bitmap represents a relocation to be applied to a
+ * corresponding machine word that follows the base address word. The
+ * second least significant bit represents the machine word immediately
+ * following the initial address, and each bit that follows represents
+ * the next word, in linear order. As such, a single bitmap can encode
+ * up to 63 relocations in a 64-bit object.
+ */
+ for (relr = relr_start; relr < relr_end; relr++) {
+ u64 *p, r = *relr;
+
+ if ((r & 1) == 0) {
+ place = (u64 *)(r + offset);
+ *place++ += offset;
+ } else {
+ for (p = place; r; p++) {
+ r >>= 1;
+ if (r & 1)
+ *p += offset;
+ }
+ place += 63;
+ }
+ }
+}
+
+extern void idmap_cpu_replace_ttbr1(void *pgdir);
+
+static void map_range(pgd_t **pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
+ int level, pte_t *tbl, bool may_use_cont)
+{
+ u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
+ u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
+ int lshift = (3 - level) * (PAGE_SHIFT - 3);
+ u64 lmask = (PAGE_SIZE << lshift) - 1;
+
+ // Advance tbl to the entry that covers start
+ tbl += (start >> (lshift + PAGE_SHIFT)) % BIT(PAGE_SHIFT - 3);
+
+ // Set the right block/page bits for this level unless we are
+ // clearing the mapping
+ if (protval)
+ protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
+
+ while (start < end) {
+ u64 next = min((start | lmask) + 1, end);
+
+ if (level < 3 &&
+ (start & lmask || next & lmask || pa & lmask)) {
+ // This chunk needs a finer grained mapping
+ // Put down a table mapping if necessary and recurse
+ if (pte_none(*tbl)) {
+ *tbl = __pte(__phys_to_pte_val((u64)*pgd) |
+ PMD_TYPE_TABLE);
+ *pgd += PTRS_PER_PTE;
+ }
+ map_range(pgd, start, next, pa, prot, level + 1,
+ (pte_t *)__pte_to_phys(*tbl), may_use_cont);
+ } else {
+ // Start a contiguous range if start and pa are
+ // suitably aligned
+ if (((start | pa) & cmask) == 0 && may_use_cont)
+ protval |= PTE_CONT;
+ // Clear the contiguous attribute if the remaining
+ // range does not cover a contiguous block
+ if ((end & ~cmask) <= start)
+ protval &= ~PTE_CONT;
+ // Put down a block or page mapping
+ *tbl = __pte(__phys_to_pte_val(pa) | protval);
+ }
+ pa += next - start;
+ start = next;
+ tbl++;
+ }
+}
+
+static void map_segment(pgd_t **pgd, u64 va_offset, void *start, void *end,
+ pgprot_t prot, bool may_use_cont)
+{
+ map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
+ ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+ prot, 4 - CONFIG_PGTABLE_LEVELS, (pte_t *)init_pg_dir,
+ may_use_cont);
+}
+
+static void unmap_segment(u64 va_offset, void *start, void *end)
+{
+ map_range(NULL, ((u64)start + va_offset) & ~PAGE_OFFSET,
+ ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+ __pgprot(0), 4 - CONFIG_PGTABLE_LEVELS, (pte_t *)init_pg_dir,
+ false);
+}
+
+/*
+ * Open coded check for BTI, only for use to determine configuration
+ * for early mappings for before the cpufeature code has run.
+ */
+static bool arm64_early_this_cpu_has_bti(void)
+{
+ u64 pfr1;
+
+ if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+ return false;
+
+ pfr1 = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
+ return cpuid_feature_extract_unsigned_field(pfr1,
+ ID_AA64PFR1_BT_SHIFT);
+}
+
+static bool arm64_early_this_cpu_has_e0pd(void)
+{
+ u64 mmfr2;
+
+ if (!IS_ENABLED(CONFIG_ARM64_E0PD))
+ return false;
+
+ mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ return cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_E0PD_SHIFT);
+}
+
+static void map_kernel(void *fdt, u64 kaslr_offset, u64 va_offset)
+{
+ pgd_t *pgdp = (void *)init_pg_dir + PAGE_SIZE;
+ pgprot_t text_prot = PAGE_KERNEL_ROX;
+ pgprot_t data_prot = PAGE_KERNEL;
+ pgprot_t prot;
+
+ if (cmdline_has(fdt, "rodata=off"))
+ text_prot = PAGE_KERNEL_EXEC;
+
+ // If we have a CPU that supports BTI and a kernel built for
+ // BTI then mark the kernel executable text as guarded pages
+ // now so we don't have to rewrite the page tables later.
+ if (arm64_early_this_cpu_has_bti() && !cmdline_has(fdt, "arm64.nobti"))
+ text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
+
+ // Assume that any CPU that does not implement E0PD needs KPTI to
+ // ensure that KASLR randomized addresses will not leak. This means we
+ // need to use non-global mappings for the kernel text and data.
+ if (!arm64_early_this_cpu_has_e0pd() && kaslr_offset >= MIN_KIMG_ALIGN) {
+ text_prot = __pgprot_modify(text_prot, PTE_NG, PTE_NG);
+ data_prot = __pgprot_modify(data_prot, PTE_NG, PTE_NG);
+ }
+
+ // Map all code read-write on the first pass for relocation processing
+ prot = IS_ENABLED(CONFIG_RELOCATABLE) ? data_prot : text_prot;
+
+ map_segment(&pgdp, va_offset, _stext, _etext, prot, true);
+ map_segment(&pgdp, va_offset, __start_rodata, __inittext_begin, data_prot, false);
+ map_segment(&pgdp, va_offset, __inittext_begin, __inittext_end, prot, false);
+ map_segment(&pgdp, va_offset, __initdata_begin, __initdata_end, data_prot, false);
+ map_segment(&pgdp, va_offset, _data, init_pg_dir, data_prot, true);
+ // omit [init_pg_dir, _end] - it doesn't need a kernel mapping
+ dsb(ishst);
+
+ idmap_cpu_replace_ttbr1(init_pg_dir);
+
+ if (IS_ENABLED(CONFIG_RELOCATABLE)) {
+ relocate_kernel(kaslr_offset);
+
+ // Unmap the text region before remapping it, to avoid
+ // potential TLB conflicts on the contiguous descriptors. This
+ // assumes that it is permitted to clear the valid bit on a
+ // live descriptor with the CONT bit set.
+ unmap_segment(va_offset, _stext, _etext);
+ dsb(ishst);
+ isb();
+ __tlbi(vmalle1);
+ isb();
+
+ // Remap these segments with different permissions
+ // No new page table allocations should be needed
+ map_segment(NULL, va_offset, _stext, _etext, text_prot, true);
+ map_segment(NULL, va_offset, __inittext_begin, __inittext_end,
+ text_prot, false);
+ dsb(ishst);
+ }
+}
+
+asmlinkage u64 early_map_kernel(void *fdt)
+{
+ u64 kaslr_seed = 0, kaslr_offset = 0;
+ u64 va_base = KIMAGE_VADDR;
+ u64 pa_base = (u64)&_text;
+
+ // Clear the initial page tables before populating them
+ memset(init_pg_dir, 0, init_pg_end - init_pg_dir);
+
+ // The virtual KASLR displacement modulo 2MiB is decided by the
+ // physical placement of the image, as otherwise, we might not be able
+ // to create the early kernel mapping using 2 MiB block descriptors. So
+ // take the low bits of the KASLR offset from the physical address, and
+ // fill in the high bits from the seed.
+ if (IS_ENABLED(CONFIG_RELOCATABLE)) {
+ kaslr_offset = pa_base & (MIN_KIMG_ALIGN - 1);
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ kaslr_seed = kaslr_early_init(fdt);
+ kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
+ }
+ va_base += kaslr_offset;
+ }
+
+ map_kernel(fdt, kaslr_offset, va_base - pa_base);
+
+ // Return the lower 16 bits of the seed - this will be
+ // used to randomize the linear map
+ return kaslr_seed & U16_MAX;
+}
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
deleted file mode 100644
index 6c3855e69395..000000000000
--- a/arch/arm64/kernel/pi/kaslr_early.c
+++ /dev/null
@@ -1,112 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-// Copyright 2022 Google LLC
-// Author: Ard Biesheuvel <ardb@google.com>
-
-// NOTE: code in this file runs *very* early, and is not permitted to use
-// global variables or anything that relies on absolute addressing.
-
-#include <linux/libfdt.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/types.h>
-#include <linux/sizes.h>
-#include <linux/string.h>
-
-#include <asm/archrandom.h>
-#include <asm/memory.h>
-
-/* taken from lib/string.c */
-static char *__strstr(const char *s1, const char *s2)
-{
- size_t l1, l2;
-
- l2 = strlen(s2);
- if (!l2)
- return (char *)s1;
- l1 = strlen(s1);
- while (l1 >= l2) {
- l1--;
- if (!memcmp(s1, s2, l2))
- return (char *)s1;
- s1++;
- }
- return NULL;
-}
-static bool cmdline_contains_nokaslr(const u8 *cmdline)
-{
- const u8 *str;
-
- str = __strstr(cmdline, "nokaslr");
- return str == cmdline || (str > cmdline && *(str - 1) == ' ');
-}
-
-static bool is_kaslr_disabled_cmdline(void *fdt)
-{
- if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
- int node;
- const u8 *prop;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- goto out;
-
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
- if (!prop)
- goto out;
-
- if (cmdline_contains_nokaslr(prop))
- return true;
-
- if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
- goto out;
-
- return false;
- }
-out:
- return cmdline_contains_nokaslr(CONFIG_CMDLINE);
-}
-
-static u64 get_kaslr_seed(void *fdt)
-{
- int node, len;
- fdt64_t *prop;
- u64 ret;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- return 0;
-
- prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
- if (!prop || len != sizeof(u64))
- return 0;
-
- ret = fdt64_to_cpu(*prop);
- *prop = 0;
- return ret;
-}
-
-asmlinkage u64 kaslr_early_init(void *fdt)
-{
- u64 seed;
-
- if (is_kaslr_disabled_cmdline(fdt))
- return 0;
-
- seed = get_kaslr_seed(fdt);
- if (!seed) {
-#ifdef CONFIG_ARCH_RANDOM
- if (!__early_cpu_has_rndr() ||
- !__arm64_rndr((unsigned long *)&seed))
-#endif
- return 0;
- }
-
- /*
- * OK, so we are proceeding with KASLR enabled. Calculate a suitable
- * kernel image offset from the seed. Let's place the kernel in the
- * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
- * the lower and upper quarters to avoid colliding with other
- * allocations.
- */
- return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
-}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5002d869fa7f..60641aa3ac07 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -257,15 +257,15 @@ SECTIONS
HYPERVISOR_RELOC_SECTION
.rela.dyn : ALIGN(8) {
- __rela_start = .;
+ __pi_rela_start = .;
*(.rela .rela*)
- __rela_end = .;
+ __pi_rela_end = .;
}
.relr.dyn : ALIGN(8) {
- __relr_start = .;
+ __pi_relr_start = .;
*(.relr.dyn)
- __relr_end = .;
+ __pi_relr_end = .;
}
. = ALIGN(SEGMENT_ALIGN);
@@ -309,11 +309,14 @@ SECTIONS
BSS_SECTION(SBSS_ALIGN, 0, 0)
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(SEGMENT_ALIGN);
init_pg_dir = .;
. += INIT_DIR_SIZE;
init_pg_end = .;
+ . += SZ_4K;
+ primary_init_stack = .;
+
. = ALIGN(SEGMENT_ALIGN);
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
_end = .;
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 113a4fedf5b8..4322ddf5e02f 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -196,6 +196,7 @@ SYM_FUNC_START(idmap_cpu_replace_ttbr1)
ret
SYM_FUNC_END(idmap_cpu_replace_ttbr1)
+SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
.popsection
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
--
2.35.1
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2022-07-01 13:08 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-01 13:04 [PATCH v6 0/9] arm64: add support for WXN Ard Biesheuvel
2022-07-01 13:04 ` [PATCH v6 1/9] arm64: kaslr: use an ordinary command line param for nokaslr Ard Biesheuvel
2022-07-01 14:07 ` Mark Brown
2022-07-01 13:04 ` [PATCH v6 2/9] arm64: kaslr: don't pretend KASLR is enabled if offset < MIN_KIMG_ALIGN Ard Biesheuvel
2022-07-01 14:12 ` Mark Brown
2022-07-01 13:04 ` [PATCH v6 3/9] arm64: kaslr: drop special case for ThunderX in kaslr_requires_kpti() Ard Biesheuvel
2022-07-01 13:04 ` [PATCH v6 4/9] arm64: head: allocate more pages for the kernel mapping Ard Biesheuvel
2022-07-01 13:04 ` Ard Biesheuvel [this message]
2022-07-01 13:04 ` [PATCH v6 6/9] arm64: mm: avoid fixmap for early swapper_pg_dir updates Ard Biesheuvel
2022-07-01 13:04 ` [PATCH v6 7/9] arm64: mm: omit redundant remap of kernel image Ard Biesheuvel
2022-07-01 13:04 ` [PATCH v6 8/9] mm: add arch hook to validate mmap() prot flags Ard Biesheuvel
2022-07-01 13:04 ` [PATCH v6 9/9] arm64: mm: add support for WXN memory translation attribute Ard Biesheuvel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220701130444.2945106-6-ardb@kernel.org \
--to=ardb@kernel.org \
--cc=anshuman.khandual@arm.com \
--cc=broonie@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=keescook@chromium.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=mark.rutland@arm.com \
--cc=maz@kernel.org \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.