All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-arm-kernel@lists.infradead.org
Cc: Ard Biesheuvel <ardb@kernel.org>, Marc Zyngier <maz@kernel.org>,
	Will Deacon <will@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Kees Cook <keescook@chromium.org>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Mark Brown <broonie@kernel.org>,
	Anshuman Khandual <anshuman.khandual@arm.com>
Subject: [PATCH v6 5/9] arm64: head: move early kernel mapping and relocation code to C code
Date: Fri,  1 Jul 2022 15:04:40 +0200	[thread overview]
Message-ID: <20220701130444.2945106-6-ardb@kernel.org> (raw)
In-Reply-To: <20220701130444.2945106-1-ardb@kernel.org>

The asm version of the kernel mapping code works fine for creating a
coarse grained identity map, but for mapping the kernel down to its
exact boundaries with the right attributes, it is not suitable. This is
why we create a preliminary RWX kernel mapping first, and then rebuild
it from scratch later on.

So let's reimplement this, and along with it the relocation routines, in
C, in a way that will make it unnecessary to create the kernel page
tables yet another time in paging_init().

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/include/asm/pgtable-prot.h   |   2 +
 arch/arm64/kernel/Makefile              |   4 +-
 arch/arm64/kernel/head.S                | 157 +--------
 arch/arm64/kernel/image-vars.h          |  16 +
 arch/arm64/kernel/pi/Makefile           |   2 +-
 arch/arm64/kernel/pi/early_map_kernel.c | 368 ++++++++++++++++++++
 arch/arm64/kernel/pi/kaslr_early.c      | 112 ------
 arch/arm64/kernel/vmlinux.lds.S         |  13 +-
 arch/arm64/mm/proc.S                    |   1 +
 9 files changed, 410 insertions(+), 265 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 62e0ebeed720..dd38f8e80fac 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -32,7 +32,9 @@
 #include <asm/cpufeature.h>
 #include <asm/pgtable-types.h>
 
+#ifndef arm64_use_ng_mappings
 extern bool arm64_use_ng_mappings;
+#endif
 
 #define _PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define _PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 88a96511580e..802de025bbea 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -29,7 +29,7 @@ obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
 			   syscall.o proton-pack.o idreg-override.o idle.o	\
-			   patching.o
+			   patching.o pi/
 
 targets			+= efi-entry.o
 
@@ -59,7 +59,7 @@ obj-$(CONFIG_ACPI)			+= acpi.o
 obj-$(CONFIG_ACPI_NUMA)			+= acpi_numa.o
 obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 obj-$(CONFIG_PARAVIRT)			+= paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o pi/
+obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o
 obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 obj-$(CONFIG_ELF_CORE)			+= elfcore.o
 obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index f80127df5846..b7f1bd07a647 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -81,8 +81,6 @@
 	 *  x20        primary_entry() .. __primary_switch()    CPU boot mode
 	 *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
 	 *  x22        create_idmap() .. start_kernel()         ID map VA of the DT blob
-	 *  x23        primary_entry() .. start_kernel()        physical misalignment/KASLR offset
-	 *  x24        __primary_switch()                       linear map KASLR seed
 	 *  x25        primary_entry() .. start_kernel()        supported VA size
 	 *  x28        create_idmap()                           callee preserved temp register
 	 */
@@ -153,17 +151,6 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
 0:	ret
 SYM_CODE_END(preserve_boot_args)
 
-SYM_FUNC_START_LOCAL(clear_page_tables)
-	/*
-	 * Clear the init page tables.
-	 */
-	adrp	x0, init_pg_dir
-	adrp	x1, init_pg_end
-	sub	x2, x1, x0
-	mov	x1, xzr
-	b	__pi_memset			// tail call
-SYM_FUNC_END(clear_page_tables)
-
 /*
  * Macro to populate page table entries, these entries can be pointers to the next level
  * or last level entries pointing to physical memory.
@@ -365,7 +352,7 @@ SYM_FUNC_START_LOCAL(create_idmap)
 	/* Remap the kernel page tables r/w in the ID map */
 	adrp	x1, _text
 	adrp	x2, init_pg_dir
-	adrp	x3, init_pg_end
+	adrp	x3, _end
 	bic	x4, x2, #SWAPPER_BLOCK_SIZE - 1
 	mov	x5, SWAPPER_RW_MMUFLAGS
 	mov	x6, #SWAPPER_BLOCK_SHIFT
@@ -396,22 +383,6 @@ SYM_FUNC_START_LOCAL(create_idmap)
 0:	ret	x28
 SYM_FUNC_END(create_idmap)
 
-SYM_FUNC_START_LOCAL(create_kernel_mapping)
-	adrp	x0, init_pg_dir
-	mov_q	x5, KIMAGE_VADDR		// compile time __va(_text)
-	add	x5, x5, x23			// add KASLR displacement
-	adrp	x6, _end			// runtime __pa(_end)
-	adrp	x3, _text			// runtime __pa(_text)
-	sub	x6, x6, x3			// _end - _text
-	add	x6, x6, x5			// runtime __va(_end)
-	mov	x7, SWAPPER_RW_MMUFLAGS
-
-	map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
-
-	dsb	ishst				// sync with page table walker
-	ret
-SYM_FUNC_END(create_kernel_mapping)
-
 	/*
 	 * Initialize CPU registers with task-specific and cpu-specific context.
 	 *
@@ -445,6 +416,7 @@ SYM_FUNC_END(create_kernel_mapping)
  * The following fragment of code is executed with the MMU enabled.
  *
  *   x0 = __pa(KERNEL_START)
+ *   w1 = memstart_offset_seed
  */
 SYM_FUNC_START_LOCAL(__primary_switched)
 	adr_l	x4, init_task
@@ -454,6 +426,11 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 	msr	vbar_el1, x8			// vector table address
 	isb
 
+#ifdef CONFIG_RANDOMIZE_BASE
+	adrp	x5, memstart_offset_seed	// Save KASLR linear map seed
+	strh	w1, [x5, :lo12:memstart_offset_seed]
+#endif
+
 	stp	x29, x30, [sp, #-16]!
 	mov	x29, sp
 
@@ -479,11 +456,6 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 	str	x25, [x8]			// ... observes the correct value
 	dc	civac, x8			// Make visible to booting secondaries
 #endif
-
-#ifdef CONFIG_RANDOMIZE_BASE
-	adrp	x5, memstart_offset_seed	// Save KASLR linear map seed
-	strh	w24, [x5, :lo12:memstart_offset_seed]
-#endif
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 	bl	kasan_early_init
 #endif
@@ -747,123 +719,18 @@ SYM_FUNC_START_LOCAL(__no_granule_support)
 	b	1b
 SYM_FUNC_END(__no_granule_support)
 
-#ifdef CONFIG_RELOCATABLE
-SYM_FUNC_START_LOCAL(__relocate_kernel)
-	/*
-	 * Iterate over each entry in the relocation table, and apply the
-	 * relocations in place.
-	 */
-	adr_l	x9, __rela_start
-	adr_l	x10, __rela_end
-	mov_q	x11, KIMAGE_VADDR		// default virtual offset
-	add	x11, x11, x23			// actual virtual offset
-
-0:	cmp	x9, x10
-	b.hs	1f
-	ldp	x12, x13, [x9], #24
-	ldr	x14, [x9, #-8]
-	cmp	w13, #R_AARCH64_RELATIVE
-	b.ne	0b
-	add	x14, x14, x23			// relocate
-	str	x14, [x12, x23]
-	b	0b
-
-1:
-#ifdef CONFIG_RELR
-	/*
-	 * Apply RELR relocations.
-	 *
-	 * RELR is a compressed format for storing relative relocations. The
-	 * encoded sequence of entries looks like:
-	 * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
-	 *
-	 * i.e. start with an address, followed by any number of bitmaps. The
-	 * address entry encodes 1 relocation. The subsequent bitmap entries
-	 * encode up to 63 relocations each, at subsequent offsets following
-	 * the last address entry.
-	 *
-	 * The bitmap entries must have 1 in the least significant bit. The
-	 * assumption here is that an address cannot have 1 in lsb. Odd
-	 * addresses are not supported. Any odd addresses are stored in the RELA
-	 * section, which is handled above.
-	 *
-	 * Excluding the least significant bit in the bitmap, each non-zero
-	 * bit in the bitmap represents a relocation to be applied to
-	 * a corresponding machine word that follows the base address
-	 * word. The second least significant bit represents the machine
-	 * word immediately following the initial address, and each bit
-	 * that follows represents the next word, in linear order. As such,
-	 * a single bitmap can encode up to 63 relocations in a 64-bit object.
-	 *
-	 * In this implementation we store the address of the next RELR table
-	 * entry in x9, the address being relocated by the current address or
-	 * bitmap entry in x13 and the address being relocated by the current
-	 * bit in x14.
-	 */
-	adr_l	x9, __relr_start
-	adr_l	x10, __relr_end
-
-2:	cmp	x9, x10
-	b.hs	7f
-	ldr	x11, [x9], #8
-	tbnz	x11, #0, 3f			// branch to handle bitmaps
-	add	x13, x11, x23
-	ldr	x12, [x13]			// relocate address entry
-	add	x12, x12, x23
-	str	x12, [x13], #8			// adjust to start of bitmap
-	b	2b
-
-3:	mov	x14, x13
-4:	lsr	x11, x11, #1
-	cbz	x11, 6f
-	tbz	x11, #0, 5f			// skip bit if not set
-	ldr	x12, [x14]			// relocate bit
-	add	x12, x12, x23
-	str	x12, [x14]
-
-5:	add	x14, x14, #8			// move to next bit's address
-	b	4b
-
-6:	/*
-	 * Move to the next bitmap's address. 8 is the word size, and 63 is the
-	 * number of significant bits in a bitmap entry.
-	 */
-	add	x13, x13, #(8 * 63)
-	b	2b
-
-7:
-#endif
-	ret
-
-SYM_FUNC_END(__relocate_kernel)
-#endif
-
 SYM_FUNC_START_LOCAL(__primary_switch)
 	adrp	x1, reserved_pg_dir
 	adrp	x2, init_idmap_pg_dir
 	bl	__enable_mmu
-#ifdef CONFIG_RELOCATABLE
-	adrp	x23, KERNEL_START
-	and	x23, x23, MIN_KIMG_ALIGN - 1
-#ifdef CONFIG_RANDOMIZE_BASE
-	mov	x0, x22
-	adrp	x1, init_pg_end
+
+	adrp	x1, primary_init_stack
 	mov	sp, x1
 	mov	x29, xzr
-	bl	__pi_kaslr_early_init
-	and	x24, x0, #SZ_2M - 1		// capture memstart offset seed
-	bic	x0, x0, #SZ_2M - 1
-	orr	x23, x23, x0			// record kernel offset
-#endif
-#endif
-	bl	clear_page_tables
-	bl	create_kernel_mapping
+	mov	x0, x22				// pass FDT pointer
+	bl	__pi_early_map_kernel
+	mov	w1, w0				// capture memstart offset seed
 
-	adrp	x1, init_pg_dir
-	load_ttbr1 x1, x1, x2
-#ifdef CONFIG_RELOCATABLE
-	bl	__relocate_kernel
-#endif
 	ldr	x8, =__primary_switched
 	adrp	x0, KERNEL_START		// __pa(KERNEL_START)
 	br	x8
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index afa69e04e75e..032c3e1aff20 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -54,6 +54,22 @@ PROVIDE(__pi___memcpy			= __pi_memcpy);
 PROVIDE(__pi___memmove			= __pi_memmove);
 PROVIDE(__pi___memset			= __pi_memset);
 
+/*
+ * The symbols below are used by the early C kernel mapping code.
+ */
+PROVIDE(__pi_init_pg_dir		= init_pg_dir);
+PROVIDE(__pi_init_pg_end		= init_pg_end);
+
+PROVIDE(__pi__text			= _text);
+PROVIDE(__pi__stext               	= _stext);
+PROVIDE(__pi__etext               	= _etext);
+PROVIDE(__pi___start_rodata       	= __start_rodata);
+PROVIDE(__pi___inittext_begin     	= __inittext_begin);
+PROVIDE(__pi___inittext_end       	= __inittext_end);
+PROVIDE(__pi___initdata_begin     	= __initdata_begin);
+PROVIDE(__pi___initdata_end       	= __initdata_end);
+PROVIDE(__pi__data                	= _data);
+
 #ifdef CONFIG_KVM
 
 /*
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index 839291430cb3..b88612eab1bc 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -29,5 +29,5 @@ $(obj)/%.pi.o: $(obj)/%.o FORCE
 $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
-obj-y		:= kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+obj-y		:= early_map_kernel.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
 extra-y		:= $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/pi/early_map_kernel.c b/arch/arm64/kernel/pi/early_map_kernel.c
new file mode 100644
index 000000000000..4199548584fb
--- /dev/null
+++ b/arch/arm64/kernel/pi/early_map_kernel.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2022 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+// NOTE: code in this file runs *very* early, and is not permitted to use
+// global variables or anything that relies on absolute addressing.
+
+#define arm64_use_ng_mappings 0
+
+#include <linux/elf.h>
+#include <linux/libfdt.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/archrandom.h>
+#include <asm/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/* taken from lib/string.c */
+static char *__strstr(const char *s1, const char *s2)
+{
+	size_t l1, l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	l1 = strlen(s1);
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+
+/*
+ * Returns whether @pfx appears in @string, either at the very start, or
+ * elsewhere but preceded by a space character.
+ */
+static bool string_contains_prefix(const u8 *string, const u8 *pfx)
+{
+	const u8 *str;
+
+	str = __strstr(string, pfx);
+	return str == string || (str > string && *(str - 1) == ' ');
+}
+
+static bool cmdline_has(void *fdt, const u8 *word)
+{
+	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+		int node;
+		const u8 *prop;
+
+		node = fdt_path_offset(fdt, "/chosen");
+		if (node < 0)
+			goto out;
+
+		prop = fdt_getprop(fdt, node, "bootargs", NULL);
+		if (!prop)
+			goto out;
+
+		if (string_contains_prefix(prop, word))
+			return true;
+
+		if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+			goto out;
+
+		return false;
+	}
+out:
+	return string_contains_prefix(CONFIG_CMDLINE, word);
+}
+
+static u64 get_kaslr_seed(void *fdt)
+{
+	int node, len;
+	fdt64_t *prop;
+	u64 ret;
+
+	node = fdt_path_offset(fdt, "/chosen");
+	if (node < 0)
+		return 0;
+
+	prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+	if (!prop || len != sizeof(u64))
+		return 0;
+
+	ret = fdt64_to_cpu(*prop);
+	*prop = 0;
+	return ret;
+}
+
+static u64 kaslr_early_init(void *fdt)
+{
+	u64 seed;
+
+	if (cmdline_has(fdt, "nokaslr"))
+		return 0;
+
+	seed = get_kaslr_seed(fdt);
+	if (!seed) {
+#ifdef CONFIG_ARCH_RANDOM
+		 if (!__early_cpu_has_rndr() ||
+		     !__arm64_rndr((unsigned long *)&seed))
+#endif
+		return 0;
+	}
+
+	/*
+	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+	 * kernel image offset from the seed. Let's place the kernel in the
+	 * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
+	 * the lower and upper quarters to avoid colliding with other
+	 * allocations.
+	 */
+	return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
+}
+
+extern const Elf64_Rela rela_start[], rela_end[];
+extern const u64 relr_start[], relr_end[];
+
+static void relocate_kernel(u64 offset)
+{
+	const Elf64_Rela *rela;
+	const u64 *relr;
+	u64 *place;
+
+	for (rela = rela_start; rela < rela_end; rela++) {
+		if (ELF64_R_TYPE(rela->r_info) != R_AARCH64_RELATIVE)
+			continue;
+		place = (u64 *)(rela->r_offset + offset);
+		*place = rela->r_addend + offset;
+	}
+
+	if (!IS_ENABLED(CONFIG_RELR) || !offset)
+		return;
+
+	/*
+	 * Apply RELR relocations.
+	 *
+	 * RELR is a compressed format for storing relative relocations. The
+	 * encoded sequence of entries looks like:
+	 * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
+	 *
+	 * i.e. start with an address, followed by any number of bitmaps. The
+	 * address entry encodes 1 relocation. The subsequent bitmap entries
+	 * encode up to 63 relocations each, at subsequent offsets following
+	 * the last address entry.
+	 *
+	 * The bitmap entries must have 1 in the least significant bit. The
+	 * assumption here is that an address cannot have 1 in lsb. Odd
+	 * addresses are not supported. Any odd addresses are stored in the
+	 * RELA section, which is handled above.
+	 *
+	 * Excluding the least significant bit in the bitmap, each non-zero bit
+	 * in the bitmap represents a relocation to be applied to a
+	 * corresponding machine word that follows the base address word. The
+	 * second least significant bit represents the machine word immediately
+	 * following the initial address, and each bit that follows represents
+	 * the next word, in linear order. As such, a single bitmap can encode
+	 * up to 63 relocations in a 64-bit object.
+	 */
+	for (relr = relr_start; relr < relr_end; relr++) {
+		u64 *p, r = *relr;
+
+		if ((r & 1) == 0) {
+			place = (u64 *)(r + offset);
+			*place++ += offset;
+		} else {
+			for (p = place; r; p++) {
+				r >>= 1;
+				if (r & 1)
+					*p += offset;
+			}
+			place += 63;
+		}
+	}
+}
+
+extern void idmap_cpu_replace_ttbr1(void *pgdir);
+
+static void map_range(pgd_t **pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
+		      int level, pte_t *tbl, bool may_use_cont)
+{
+	u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
+	u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
+	int lshift = (3 - level) * (PAGE_SHIFT - 3);
+	u64 lmask = (PAGE_SIZE << lshift) - 1;
+
+	// Advance tbl to the entry that covers start
+	tbl += (start >> (lshift + PAGE_SHIFT)) % BIT(PAGE_SHIFT - 3);
+
+	// Set the right block/page bits for this level unless we are
+	// clearing the mapping
+	if (protval)
+		protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
+
+	while (start < end) {
+		u64 next = min((start | lmask) + 1, end);
+
+		if (level < 3 &&
+		    (start & lmask || next & lmask || pa & lmask)) {
+			// This chunk needs a finer grained mapping
+			// Put down a table mapping if necessary and recurse
+			if (pte_none(*tbl)) {
+				*tbl = __pte(__phys_to_pte_val((u64)*pgd) |
+					     PMD_TYPE_TABLE);
+				*pgd += PTRS_PER_PTE;
+			}
+			map_range(pgd, start, next, pa, prot, level + 1,
+				  (pte_t *)__pte_to_phys(*tbl), may_use_cont);
+		} else {
+			// Start a contiguous range if start and pa are
+			// suitably aligned
+			if (((start | pa) & cmask) == 0 && may_use_cont)
+				protval |= PTE_CONT;
+			// Clear the contiguous attribute if the remaining
+			// range does not cover a contiguous block
+			if ((end & ~cmask) <= start)
+				protval &= ~PTE_CONT;
+			// Put down a block or page mapping
+			*tbl = __pte(__phys_to_pte_val(pa) | protval);
+		}
+		pa += next - start;
+		start = next;
+		tbl++;
+	}
+}
+
+static void map_segment(pgd_t **pgd, u64 va_offset, void *start, void *end,
+			pgprot_t prot, bool may_use_cont)
+{
+	map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
+		  ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+		  prot, 4 - CONFIG_PGTABLE_LEVELS, (pte_t *)init_pg_dir,
+		  may_use_cont);
+}
+
+static void unmap_segment(u64 va_offset, void *start, void *end)
+{
+	map_range(NULL, ((u64)start + va_offset) & ~PAGE_OFFSET,
+		  ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+		  __pgprot(0), 4 - CONFIG_PGTABLE_LEVELS, (pte_t *)init_pg_dir,
+		  false);
+}
+
+/*
+ * Open coded check for BTI, only for use to determine configuration
+ * for early mappings for before the cpufeature code has run.
+ */
+static bool arm64_early_this_cpu_has_bti(void)
+{
+	u64 pfr1;
+
+	if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+		return false;
+
+	pfr1 = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
+	return cpuid_feature_extract_unsigned_field(pfr1,
+						    ID_AA64PFR1_BT_SHIFT);
+}
+
+static bool arm64_early_this_cpu_has_e0pd(void)
+{
+	u64 mmfr2;
+
+	if (!IS_ENABLED(CONFIG_ARM64_E0PD))
+		return false;
+
+	mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+	return cpuid_feature_extract_unsigned_field(mmfr2,
+						    ID_AA64MMFR2_E0PD_SHIFT);
+}
+
+static void map_kernel(void *fdt, u64 kaslr_offset, u64 va_offset)
+{
+	pgd_t *pgdp = (void *)init_pg_dir + PAGE_SIZE;
+	pgprot_t text_prot = PAGE_KERNEL_ROX;
+	pgprot_t data_prot = PAGE_KERNEL;
+	pgprot_t prot;
+
+	if (cmdline_has(fdt, "rodata=off"))
+		text_prot = PAGE_KERNEL_EXEC;
+
+	// If we have a CPU that supports BTI and a kernel built for
+	// BTI then mark the kernel executable text as guarded pages
+	// now so we don't have to rewrite the page tables later.
+	if (arm64_early_this_cpu_has_bti() && !cmdline_has(fdt, "arm64.nobti"))
+		text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
+
+	// Assume that any CPU that does not implement E0PD needs KPTI to
+	// ensure that KASLR randomized addresses will not leak. This means we
+	// need to use non-global mappings for the kernel text and data.
+	if (!arm64_early_this_cpu_has_e0pd() && kaslr_offset >= MIN_KIMG_ALIGN) {
+		text_prot = __pgprot_modify(text_prot, PTE_NG, PTE_NG);
+		data_prot = __pgprot_modify(data_prot, PTE_NG, PTE_NG);
+	}
+
+	// Map all code read-write on the first pass for relocation processing
+	prot = IS_ENABLED(CONFIG_RELOCATABLE) ? data_prot : text_prot;
+
+	map_segment(&pgdp, va_offset, _stext, _etext, prot, true);
+	map_segment(&pgdp, va_offset, __start_rodata, __inittext_begin, data_prot, false);
+	map_segment(&pgdp, va_offset, __inittext_begin, __inittext_end, prot, false);
+	map_segment(&pgdp, va_offset, __initdata_begin, __initdata_end, data_prot, false);
+	map_segment(&pgdp, va_offset, _data, init_pg_dir, data_prot, true);
+	// omit [init_pg_dir, _end] - it doesn't need a kernel mapping
+	dsb(ishst);
+
+	idmap_cpu_replace_ttbr1(init_pg_dir);
+
+	if (IS_ENABLED(CONFIG_RELOCATABLE)) {
+		relocate_kernel(kaslr_offset);
+
+		// Unmap the text region before remapping it, to avoid
+		// potential TLB conflicts on the contiguous descriptors. This
+		// assumes that it is permitted to clear the valid bit on a
+		// live descriptor with the CONT bit set.
+		unmap_segment(va_offset, _stext, _etext);
+		dsb(ishst);
+		isb();
+		__tlbi(vmalle1);
+		isb();
+
+		// Remap these segments with different permissions
+		// No new page table allocations should be needed
+		map_segment(NULL, va_offset, _stext, _etext, text_prot, true);
+		map_segment(NULL, va_offset, __inittext_begin, __inittext_end,
+			    text_prot, false);
+		dsb(ishst);
+	}
+}
+
+asmlinkage u64 early_map_kernel(void *fdt)
+{
+	u64 kaslr_seed = 0, kaslr_offset = 0;
+	u64 va_base = KIMAGE_VADDR;
+	u64 pa_base = (u64)&_text;
+
+	// Clear the initial page tables before populating them
+	memset(init_pg_dir, 0, init_pg_end - init_pg_dir);
+
+	// The virtual KASLR displacement modulo 2MiB is decided by the
+	// physical placement of the image, as otherwise, we might not be able
+	// to create the early kernel mapping using 2 MiB block descriptors. So
+	// take the low bits of the KASLR offset from the physical address, and
+	// fill in the high bits from the seed.
+	if (IS_ENABLED(CONFIG_RELOCATABLE)) {
+		kaslr_offset = pa_base & (MIN_KIMG_ALIGN - 1);
+		if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+			kaslr_seed = kaslr_early_init(fdt);
+			kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
+		}
+		va_base += kaslr_offset;
+	}
+
+	map_kernel(fdt, kaslr_offset, va_base - pa_base);
+
+	// Return the lower 16 bits of the seed - this will be
+	// used to randomize the linear map
+	return kaslr_seed & U16_MAX;
+}
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
deleted file mode 100644
index 6c3855e69395..000000000000
--- a/arch/arm64/kernel/pi/kaslr_early.c
+++ /dev/null
@@ -1,112 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-// Copyright 2022 Google LLC
-// Author: Ard Biesheuvel <ardb@google.com>
-
-// NOTE: code in this file runs *very* early, and is not permitted to use
-// global variables or anything that relies on absolute addressing.
-
-#include <linux/libfdt.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/types.h>
-#include <linux/sizes.h>
-#include <linux/string.h>
-
-#include <asm/archrandom.h>
-#include <asm/memory.h>
-
-/* taken from lib/string.c */
-static char *__strstr(const char *s1, const char *s2)
-{
-	size_t l1, l2;
-
-	l2 = strlen(s2);
-	if (!l2)
-		return (char *)s1;
-	l1 = strlen(s1);
-	while (l1 >= l2) {
-		l1--;
-		if (!memcmp(s1, s2, l2))
-			return (char *)s1;
-		s1++;
-	}
-	return NULL;
-}
-static bool cmdline_contains_nokaslr(const u8 *cmdline)
-{
-	const u8 *str;
-
-	str = __strstr(cmdline, "nokaslr");
-	return str == cmdline || (str > cmdline && *(str - 1) == ' ');
-}
-
-static bool is_kaslr_disabled_cmdline(void *fdt)
-{
-	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
-		int node;
-		const u8 *prop;
-
-		node = fdt_path_offset(fdt, "/chosen");
-		if (node < 0)
-			goto out;
-
-		prop = fdt_getprop(fdt, node, "bootargs", NULL);
-		if (!prop)
-			goto out;
-
-		if (cmdline_contains_nokaslr(prop))
-			return true;
-
-		if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
-			goto out;
-
-		return false;
-	}
-out:
-	return cmdline_contains_nokaslr(CONFIG_CMDLINE);
-}
-
-static u64 get_kaslr_seed(void *fdt)
-{
-	int node, len;
-	fdt64_t *prop;
-	u64 ret;
-
-	node = fdt_path_offset(fdt, "/chosen");
-	if (node < 0)
-		return 0;
-
-	prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
-	if (!prop || len != sizeof(u64))
-		return 0;
-
-	ret = fdt64_to_cpu(*prop);
-	*prop = 0;
-	return ret;
-}
-
-asmlinkage u64 kaslr_early_init(void *fdt)
-{
-	u64 seed;
-
-	if (is_kaslr_disabled_cmdline(fdt))
-		return 0;
-
-	seed = get_kaslr_seed(fdt);
-	if (!seed) {
-#ifdef CONFIG_ARCH_RANDOM
-		 if (!__early_cpu_has_rndr() ||
-		     !__arm64_rndr((unsigned long *)&seed))
-#endif
-		return 0;
-	}
-
-	/*
-	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
-	 * kernel image offset from the seed. Let's place the kernel in the
-	 * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
-	 * the lower and upper quarters to avoid colliding with other
-	 * allocations.
-	 */
-	return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
-}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5002d869fa7f..60641aa3ac07 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -257,15 +257,15 @@ SECTIONS
 	HYPERVISOR_RELOC_SECTION
 
 	.rela.dyn : ALIGN(8) {
-		__rela_start = .;
+		__pi_rela_start = .;
 		*(.rela .rela*)
-		__rela_end = .;
+		__pi_rela_end = .;
 	}
 
 	.relr.dyn : ALIGN(8) {
-		__relr_start = .;
+		__pi_relr_start = .;
 		*(.relr.dyn)
-		__relr_end = .;
+		__pi_relr_end = .;
 	}
 
 	. = ALIGN(SEGMENT_ALIGN);
@@ -309,11 +309,14 @@ SECTIONS
 
 	BSS_SECTION(SBSS_ALIGN, 0, 0)
 
-	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(SEGMENT_ALIGN);
 	init_pg_dir = .;
 	. += INIT_DIR_SIZE;
 	init_pg_end = .;
 
+	. += SZ_4K;
+	primary_init_stack = .;
+
 	. = ALIGN(SEGMENT_ALIGN);
 	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
 	_end = .;
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 113a4fedf5b8..4322ddf5e02f 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -196,6 +196,7 @@ SYM_FUNC_START(idmap_cpu_replace_ttbr1)
 
 	ret
 SYM_FUNC_END(idmap_cpu_replace_ttbr1)
+SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
 	.popsection
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-- 
2.35.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2022-07-01 13:08 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-01 13:04 [PATCH v6 0/9] arm64: add support for WXN Ard Biesheuvel
2022-07-01 13:04 ` [PATCH v6 1/9] arm64: kaslr: use an ordinary command line param for nokaslr Ard Biesheuvel
2022-07-01 14:07   ` Mark Brown
2022-07-01 13:04 ` [PATCH v6 2/9] arm64: kaslr: don't pretend KASLR is enabled if offset < MIN_KIMG_ALIGN Ard Biesheuvel
2022-07-01 14:12   ` Mark Brown
2022-07-01 13:04 ` [PATCH v6 3/9] arm64: kaslr: drop special case for ThunderX in kaslr_requires_kpti() Ard Biesheuvel
2022-07-01 13:04 ` [PATCH v6 4/9] arm64: head: allocate more pages for the kernel mapping Ard Biesheuvel
2022-07-01 13:04 ` Ard Biesheuvel [this message]
2022-07-01 13:04 ` [PATCH v6 6/9] arm64: mm: avoid fixmap for early swapper_pg_dir updates Ard Biesheuvel
2022-07-01 13:04 ` [PATCH v6 7/9] arm64: mm: omit redundant remap of kernel image Ard Biesheuvel
2022-07-01 13:04 ` [PATCH v6 8/9] mm: add arch hook to validate mmap() prot flags Ard Biesheuvel
2022-07-01 13:04 ` [PATCH v6 9/9] arm64: mm: add support for WXN memory translation attribute Ard Biesheuvel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220701130444.2945106-6-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=anshuman.khandual@arm.com \
    --cc=broonie@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=keescook@chromium.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=mark.rutland@arm.com \
    --cc=maz@kernel.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.