linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv4 1/7] arm64: Treat handle_arch_irq as a function pointer
       [not found] <1414440752-9411-1-git-send-email-lauraa@codeaurora.org>
@ 2014-10-27 20:12 ` Laura Abbott
  2014-10-28  8:11   ` Ard Biesheuvel
  2014-10-28 10:25   ` Mark Rutland
  2014-10-27 20:12 ` [PATCHv4 2/7] arm64: Switch to ldr for loading the stub vectors Laura Abbott
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 30+ messages in thread
From: Laura Abbott @ 2014-10-27 20:12 UTC (permalink / raw)
  To: linux-arm-kernel

handle_arch_irq isn't actually text, it's just a function pointer.
It doesn't need to be stored in the text section and doing so
causes problems if we ever want to make the kernel text read only.
Declare handle_arch_irq as a proper function pointer stored in
the data section.

Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---
v4: style comments
---
 arch/arm64/include/asm/irq.h | 1 -
 arch/arm64/kernel/entry.S    | 6 ++----
 arch/arm64/kernel/irq.c      | 2 ++
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index e1f7ecd..1eebf5b 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -3,7 +3,6 @@
 
 #include <asm-generic/irq.h>
 
-extern void (*handle_arch_irq)(struct pt_regs *);
 extern void migrate_irqs(void);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 726b910..ee27f39 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -168,7 +168,8 @@ tsk	.req	x28		// current thread_info
  * Interrupt handling.
  */
 	.macro	irq_handler
-	ldr	x1, handle_arch_irq
+	adrp	x1, handle_arch_irq
+	ldr	x1, [x1, #:lo12:handle_arch_irq]
 	mov	x0, sp
 	blr	x1
 	.endm
@@ -695,6 +696,3 @@ ENTRY(sys_rt_sigreturn_wrapper)
 	mov	x0, sp
 	b	sys_rt_sigreturn
 ENDPROC(sys_rt_sigreturn_wrapper)
-
-ENTRY(handle_arch_irq)
-	.quad	0
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 071a6ec..240b75c 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -40,6 +40,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	return 0;
 }
 
+void (*handle_arch_irq)(struct pt_regs *) = NULL;
+
 void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
 {
 	if (handle_arch_irq)
-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv4 2/7] arm64: Switch to ldr for loading the stub vectors
       [not found] <1414440752-9411-1-git-send-email-lauraa@codeaurora.org>
  2014-10-27 20:12 ` [PATCHv4 1/7] arm64: Treat handle_arch_irq as a function pointer Laura Abbott
@ 2014-10-27 20:12 ` Laura Abbott
  2014-10-28  8:23   ` Ard Biesheuvel
                     ` (2 more replies)
  2014-10-27 20:12 ` [PATCHv4 3/7] arm64: Move cpu_resume into the text section Laura Abbott
                   ` (5 subsequent siblings)
  7 siblings, 3 replies; 30+ messages in thread
From: Laura Abbott @ 2014-10-27 20:12 UTC (permalink / raw)
  To: linux-arm-kernel

The hyp stub vectors are currently loaded using adr. This
instruction has a +/- 1MB range for the loading address. If
the alignment for sections is changed the address may be more
than 1MB away, resulting in reclocation errors. Switch to using
adrp for getting the address to ensure we aren't affected by the
location of the __hyp_stub_vectors.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---
v4: Commit text tweaks
---
 arch/arm64/kernel/head.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0a6e4f9..10f5cc0 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -331,7 +331,8 @@ CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
 	msr	vttbr_el2, xzr
 
 	/* Hypervisor stub */
-	adr	x0, __hyp_stub_vectors
+	adrp	x0, __hyp_stub_vectors
+	add	x0, x0, #:lo12:__hyp_stub_vectors
 	msr	vbar_el2, x0
 
 	/* spsr */
-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv4 3/7] arm64: Move cpu_resume into the text section
       [not found] <1414440752-9411-1-git-send-email-lauraa@codeaurora.org>
  2014-10-27 20:12 ` [PATCHv4 1/7] arm64: Treat handle_arch_irq as a function pointer Laura Abbott
  2014-10-27 20:12 ` [PATCHv4 2/7] arm64: Switch to ldr for loading the stub vectors Laura Abbott
@ 2014-10-27 20:12 ` Laura Abbott
  2014-10-28  8:10   ` Ard Biesheuvel
  2014-10-27 20:12 ` [PATCHv4 4/7] arm64: Move some head.text functions to executable section Laura Abbott
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 30+ messages in thread
From: Laura Abbott @ 2014-10-27 20:12 UTC (permalink / raw)
  To: linux-arm-kernel

The function cpu_resume currently lives in the .data
section. There's no reason for it to be there since
we can use relative instructions without a problem.
Move it to the text section where it belongs.

Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---
 arch/arm64/kernel/sleep.S | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index a564b44..5762b16 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -147,12 +147,12 @@ cpu_resume_after_mmu:
 	ret
 ENDPROC(cpu_resume_after_mmu)
 
-	.data
 ENTRY(cpu_resume)
 	bl	el2_setup		// if in EL2 drop to EL1 cleanly
 #ifdef CONFIG_SMP
 	mrs	x1, mpidr_el1
-	adr	x4, mpidr_hash_ptr
+	adrp	x4, mpidr_hash_ptr
+	add	x4, x4, #:lo12:mpidr_hash_ptr
 	ldr	x5, [x4]
 	add	x8, x4, x5		// x8 = struct mpidr_hash phys address
         /* retrieve mpidr_hash members to compute the hash */
@@ -164,14 +164,15 @@ ENTRY(cpu_resume)
 #else
 	mov	x7, xzr
 #endif
-	adr	x0, sleep_save_sp
+	adrp	x0, sleep_save_sp
+	add	x0, x0, #:lo12:sleep_save_sp
 	ldr	x0, [x0, #SLEEP_SAVE_SP_PHYS]
 	ldr	x0, [x0, x7, lsl #3]
 	/* load sp from context */
 	ldr	x2, [x0, #CPU_CTX_SP]
-	adr	x1, sleep_idmap_phys
+	adrp	x1, sleep_idmap_phys
 	/* load physical address of identity map page table in x1 */
-	ldr	x1, [x1]
+	ldr	x1, [x1, #:lo12:sleep_idmap_phys]
 	mov	sp, x2
 	/*
 	 * cpu_do_resume expects x0 to contain context physical address
@@ -181,6 +182,7 @@ ENTRY(cpu_resume)
 	b	cpu_resume_mmu		// Resume MMU, never returns
 ENDPROC(cpu_resume)
 
+	.data
 	.align 3
 mpidr_hash_ptr:
 	/*
-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv4 4/7] arm64: Move some head.text functions to executable section
       [not found] <1414440752-9411-1-git-send-email-lauraa@codeaurora.org>
                   ` (2 preceding siblings ...)
  2014-10-27 20:12 ` [PATCHv4 3/7] arm64: Move cpu_resume into the text section Laura Abbott
@ 2014-10-27 20:12 ` Laura Abbott
  2014-10-28  8:35   ` Ard Biesheuvel
  2014-10-27 20:12 ` [PATCHv4 5/7] arm64: Factor out fixmap initialiation from ioremap Laura Abbott
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 30+ messages in thread
From: Laura Abbott @ 2014-10-27 20:12 UTC (permalink / raw)
  To: linux-arm-kernel

The head.text section is intended to be run at early bootup
before any of the regular kernel mappings have been setup.
Parts of head.text may be freed back into the buddy allocator
due to TEXT_OFFSET so for security requirements this memory
must not be executable. The suspend/resume/hotplug code path
requires some of these head.S functions to run however which
means they need to be executable. Support these conflicting
requirements by moving the few head.text functions that need
to be executable to the text section which has the appropriate
page table permissions.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---
v4: New apprach based on discussions with Mark
---
 arch/arm64/kernel/head.S | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 10f5cc0..dc362da 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -432,12 +432,14 @@ ENTRY(secondary_startup)
 	b	__enable_mmu
 ENDPROC(secondary_startup)
 
+	.pushsection	.text, "ax"
 ENTRY(__secondary_switched)
 	ldr	x0, [x21]			// get secondary_data.stack
 	mov	sp, x0
 	mov	x29, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
+	.popsection
 #endif	/* CONFIG_SMP */
 
 /*
@@ -471,11 +473,13 @@ ENDPROC(__enable_mmu)
  * table to map the entire function.
  */
 	.align	4
+	.pushsection	.text, "ax"
 __turn_mmu_on:
 	msr	sctlr_el1, x0
 	isb
 	br	x27
 ENDPROC(__turn_mmu_on)
+	.popsection
 
 /*
  * Calculate the start of physical memory.
-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv4 5/7] arm64: Factor out fixmap initialiation from ioremap
       [not found] <1414440752-9411-1-git-send-email-lauraa@codeaurora.org>
                   ` (3 preceding siblings ...)
  2014-10-27 20:12 ` [PATCHv4 4/7] arm64: Move some head.text functions to executable section Laura Abbott
@ 2014-10-27 20:12 ` Laura Abbott
  2014-10-28 14:17   ` Mark Rutland
  2014-10-27 20:12 ` [PATCHv4 6/7] arm64: use fixmap for text patching when text is RO Laura Abbott
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 30+ messages in thread
From: Laura Abbott @ 2014-10-27 20:12 UTC (permalink / raw)
  To: linux-arm-kernel

The fixmap API was originally added for arm64 for
early_ioremap purposes. It can be used for other purposes too
so move the initialization from ioremap to somewhere more
generic. This makes it obvious where the fixmap is being set
up and allows for a cleaner implementation of __set_fixmap.

Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---
 arch/arm64/include/asm/fixmap.h |  7 +--
 arch/arm64/kernel/setup.c       |  3 +-
 arch/arm64/mm/ioremap.c         | 93 ++--------------------------------------
 arch/arm64/mm/mmu.c             | 94 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 103 insertions(+), 94 deletions(-)

diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 5f7bfe6..db26a2f2 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -56,10 +56,11 @@ enum fixed_addresses {
 
 #define FIXMAP_PAGE_IO     __pgprot(PROT_DEVICE_nGnRE)
 
-extern void __early_set_fixmap(enum fixed_addresses idx,
-			       phys_addr_t phys, pgprot_t flags);
+void __init early_fixmap_init(void);
 
-#define __set_fixmap __early_set_fixmap
+#define __early_set_fixmap __set_fixmap
+
+extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
 
 #include <asm-generic/fixmap.h>
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 2437196..efc8bc5 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -376,7 +376,8 @@ void __init setup_arch(char **cmdline_p)
 
 	*cmdline_p = boot_command_line;
 
-	early_ioremap_init();
+	early_fixmap_init();
+	early_ioremap_setup();
 
 	parse_early_param();
 
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index fa324bd..cbb99c8 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -103,97 +103,10 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
 }
 EXPORT_SYMBOL(ioremap_cache);
 
-static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_ARM64_PGTABLE_LEVELS > 2
-static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_ARM64_PGTABLE_LEVELS > 3
-static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
-
-static inline pud_t * __init early_ioremap_pud(unsigned long addr)
-{
-	pgd_t *pgd;
-
-	pgd = pgd_offset_k(addr);
-	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
-
-	return pud_offset(pgd, addr);
-}
-
-static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
-{
-	pud_t *pud = early_ioremap_pud(addr);
-
-	BUG_ON(pud_none(*pud) || pud_bad(*pud));
-
-	return pmd_offset(pud, addr);
-}
-
-static inline pte_t * __init early_ioremap_pte(unsigned long addr)
-{
-	pmd_t *pmd = early_ioremap_pmd(addr);
-
-	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
-
-	return pte_offset_kernel(pmd, addr);
-}
-
+/*
+ * Must be called after early_fixmap_init
+ */
 void __init early_ioremap_init(void)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN);
-
-	pgd = pgd_offset_k(addr);
-	pgd_populate(&init_mm, pgd, bm_pud);
-	pud = pud_offset(pgd, addr);
-	pud_populate(&init_mm, pud, bm_pmd);
-	pmd = pmd_offset(pud, addr);
-	pmd_populate_kernel(&init_mm, pmd, bm_pte);
-
-	/*
-	 * The boot-ioremap range spans multiple pmds, for which
-	 * we are not prepared:
-	 */
-	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
-		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
-
-	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
-		WARN_ON(1);
-		pr_warn("pmd %p != %p\n",
-			pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
-		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
-			fix_to_virt(FIX_BTMAP_BEGIN));
-		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
-			fix_to_virt(FIX_BTMAP_END));
-
-		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
-		pr_warn("FIX_BTMAP_BEGIN:     %d\n",
-			FIX_BTMAP_BEGIN);
-	}
-
 	early_ioremap_setup();
 }
-
-void __init __early_set_fixmap(enum fixed_addresses idx,
-			       phys_addr_t phys, pgprot_t flags)
-{
-	unsigned long addr = __fix_to_virt(idx);
-	pte_t *pte;
-
-	if (idx >= __end_of_fixed_addresses) {
-		BUG();
-		return;
-	}
-
-	pte = early_ioremap_pte(addr);
-
-	if (pgprot_val(flags))
-		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
-	else {
-		pte_clear(&init_mm, addr, pte);
-		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
-	}
-}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6894ef3..e92f633 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -28,6 +28,7 @@
 #include <linux/io.h>
 
 #include <asm/cputype.h>
+#include <asm/fixmap.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
@@ -459,3 +460,96 @@ void vmemmap_free(unsigned long start, unsigned long end)
 {
 }
 #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
+
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
+#endif
+#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
+#endif
+
+static inline pud_t * fixmap_pud(unsigned long addr)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+
+	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+
+	return pud_offset(pgd, addr);
+}
+
+static inline pmd_t * fixmap_pmd(unsigned long addr)
+{
+	pud_t *pud = fixmap_pud(addr);
+
+	BUG_ON(pud_none(*pud) || pud_bad(*pud));
+
+	return pmd_offset(pud, addr);
+}
+
+static inline pte_t * fixmap_pte(unsigned long addr)
+{
+	pmd_t *pmd = fixmap_pmd(addr);
+
+	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
+
+	return pte_offset_kernel(pmd, addr);
+}
+
+void __init early_fixmap_init(void)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned long addr = FIXADDR_START;
+
+	pgd = pgd_offset_k(addr);
+	pgd_populate(&init_mm, pgd, bm_pud);
+	pud = pud_offset(pgd, addr);
+	pud_populate(&init_mm, pud, bm_pmd);
+	pmd = pmd_offset(pud, addr);
+	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+
+	/*
+	 * The boot-ioremap range spans multiple pmds, for which
+	 * we are not preparted:
+	 */
+	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+	if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+	     || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+		WARN_ON(1);
+		pr_warn("pmd %p != %p, %p\n",
+			pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
+		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+			fix_to_virt(FIX_BTMAP_BEGIN));
+		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+			fix_to_virt(FIX_BTMAP_END));
+
+		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
+		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
+	}
+}
+
+void __set_fixmap(enum fixed_addresses idx,
+			       phys_addr_t phys, pgprot_t flags)
+{
+	unsigned long addr = __fix_to_virt(idx);
+	pte_t *pte;
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+
+	pte = fixmap_pte(addr);
+
+	if (pgprot_val(flags)) {
+		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+	} else {
+		pte_clear(&init_mm, addr, pte);
+		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
+	}
+}
-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv4 6/7] arm64: use fixmap for text patching when text is RO
       [not found] <1414440752-9411-1-git-send-email-lauraa@codeaurora.org>
                   ` (4 preceding siblings ...)
  2014-10-27 20:12 ` [PATCHv4 5/7] arm64: Factor out fixmap initialiation from ioremap Laura Abbott
@ 2014-10-27 20:12 ` Laura Abbott
  2014-10-27 20:12 ` [PATCHv4 7/7] arm64: add better page protections to arm64 Laura Abbott
  2014-10-27 20:19 ` [PATCHv4 0/7] Better page protections for arm64 Laura Abbott
  7 siblings, 0 replies; 30+ messages in thread
From: Laura Abbott @ 2014-10-27 20:12 UTC (permalink / raw)
  To: linux-arm-kernel

When kernel text is marked as read only, it cannot be modified directly.
Use a fixmap to modify the text instead in a similar manner to
x86 and arm.

Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---
v4: Dropped the __flush_dcache_area
---
 arch/arm64/include/asm/fixmap.h |  1 +
 arch/arm64/include/asm/insn.h   |  2 ++
 arch/arm64/kernel/insn.c        | 72 +++++++++++++++++++++++++++++++++++++++--
 arch/arm64/kernel/jump_label.c  |  2 +-
 4 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index db26a2f2..2cd4b0d 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -48,6 +48,7 @@ enum fixed_addresses {
 
 	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+	FIX_TEXT_POKE0,
 	__end_of_fixed_addresses
 };
 
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 56a9e63..f66853b 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -282,6 +282,7 @@ bool aarch64_insn_is_nop(u32 insn);
 
 int aarch64_insn_read(void *addr, u32 *insnp);
 int aarch64_insn_write(void *addr, u32 insn);
+int aarch64_insn_write_early(void *addr, u32 insn);
 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
 u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
 				  u32 insn, u64 imm);
@@ -352,6 +353,7 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
 bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
 
 int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
+int __aarch64_insn_patch_text_nosync(void *addr, u32 insn, bool early);
 int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
 int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index e007714..07affef 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -19,12 +19,15 @@
 #include <linux/bitops.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/smp.h>
+#include <linux/spinlock.h>
 #include <linux/stop_machine.h>
 #include <linux/uaccess.h>
 
 #include <asm/cacheflush.h>
 #include <asm/debug-monitors.h>
+#include <asm/fixmap.h>
 #include <asm/insn.h>
 
 #define AARCH64_INSN_SF_BIT	BIT(31)
@@ -72,6 +75,36 @@ bool __kprobes aarch64_insn_is_nop(u32 insn)
 	}
 }
 
+static DEFINE_SPINLOCK(patch_lock);
+
+static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
+{
+	unsigned long uintaddr = (uintptr_t) addr;
+	bool module = !core_kernel_text(uintaddr);
+	struct page *page;
+
+	if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
+		page = vmalloc_to_page(addr);
+	else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
+		page = virt_to_page(addr);
+	else
+		return addr;
+
+	if (flags)
+		spin_lock_irqsave(&patch_lock, *flags);
+
+	set_fixmap(fixmap, page_to_phys(page));
+
+	return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
+{
+	clear_fixmap(fixmap);
+
+	if (flags)
+		spin_unlock_irqrestore(&patch_lock, *flags);
+}
 /*
  * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
  * little-endian.
@@ -88,10 +121,34 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
 	return ret;
 }
 
+static int __kprobes __aarch64_insn_write(void *addr, u32 insn, bool patch)
+{
+	void *waddr = addr;
+	unsigned long flags;
+	int ret;
+
+	if (patch)
+		waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
+
+	ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE);
+
+	if (waddr != addr)
+		patch_unmap(FIX_TEXT_POKE0, &flags);
+
+	return ret;
+}
+
 int __kprobes aarch64_insn_write(void *addr, u32 insn)
 {
 	insn = cpu_to_le32(insn);
-	return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE);
+	return __aarch64_insn_write(addr, insn, true);
+}
+
+int __kprobes aarch64_insn_write_early(void *addr, u32 insn)
+{
+	insn = cpu_to_le32(insn);
+	return __aarch64_insn_write(addr, insn, false);
+
 }
 
 static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
@@ -124,7 +181,7 @@ bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn)
 	       __aarch64_insn_hotpatch_safe(new_insn);
 }
 
-int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
+int __kprobes __aarch64_insn_patch_text_nosync(void *addr, u32 insn, bool early)
 {
 	u32 *tp = addr;
 	int ret;
@@ -133,7 +190,11 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
 	if ((uintptr_t)tp & 0x3)
 		return -EINVAL;
 
-	ret = aarch64_insn_write(tp, insn);
+	if (early)
+		ret = aarch64_insn_write_early(tp, insn);
+	else
+		ret = aarch64_insn_write(tp, insn);
+
 	if (ret == 0)
 		flush_icache_range((uintptr_t)tp,
 				   (uintptr_t)tp + AARCH64_INSN_SIZE);
@@ -141,6 +202,11 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
 	return ret;
 }
 
+int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
+{
+	return __aarch64_insn_patch_text_nosync(addr, insn, false);
+}
+
 struct aarch64_insn_patch {
 	void		**text_addrs;
 	u32		*new_insns;
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index 263a166..9ac30bb 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -38,7 +38,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry,
 	}
 
 	if (is_static)
-		aarch64_insn_patch_text_nosync(addr, insn);
+		__aarch64_insn_patch_text_nosync(addr, insn, true);
 	else
 		aarch64_insn_patch_text(&addr, &insn, 1);
 }
-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv4 7/7] arm64: add better page protections to arm64
       [not found] <1414440752-9411-1-git-send-email-lauraa@codeaurora.org>
                   ` (5 preceding siblings ...)
  2014-10-27 20:12 ` [PATCHv4 6/7] arm64: use fixmap for text patching when text is RO Laura Abbott
@ 2014-10-27 20:12 ` Laura Abbott
  2014-10-28 11:29   ` Steve Capper
  2014-10-28 14:20   ` Mark Rutland
  2014-10-27 20:19 ` [PATCHv4 0/7] Better page protections for arm64 Laura Abbott
  7 siblings, 2 replies; 30+ messages in thread
From: Laura Abbott @ 2014-10-27 20:12 UTC (permalink / raw)
  To: linux-arm-kernel

Add page protections for arm64 similar to those in arm.
This is for security reasons to prevent certain classes
of exploits. The current method:

- Map all memory as either RWX or RW. We round to the nearest
  section to avoid creating page tables before everything is mapped
- Once everything is mapped, if either end of the RWX section should
  not be X, we split the PMD and remap as necessary
- When initmem is to be freed, we change the permissions back to
  RW (using stop machine if necessary to flush the TLB)
- If CONFIG_DEBUG_RODATA is set, the read only sections are set
  read only.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---
v4: Combined the Kconfig options
---
 arch/arm64/Kconfig.debug            |  23 +++
 arch/arm64/include/asm/cacheflush.h |   4 +
 arch/arm64/kernel/vmlinux.lds.S     |  17 ++
 arch/arm64/mm/init.c                |   1 +
 arch/arm64/mm/mm.h                  |   2 +
 arch/arm64/mm/mmu.c                 | 303 +++++++++++++++++++++++++++++++-----
 6 files changed, 314 insertions(+), 36 deletions(-)

diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 0a12933..1b25015 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -54,4 +54,27 @@ config DEBUG_SET_MODULE_RONX
           against certain classes of kernel exploits.
           If in doubt, say "N".
 
+config DEBUG_RODATA
+	bool "Make kernel text and rodata read-only"
+	help
+	  If this is set, kernel text and rodata will be made read-only. This
+	  is to help catch accidental or malicious attempts to change the
+	  kernel's executable code. Additionally splits rodata from kernel
+	  text so it can be made explicitly non-executable.
+
+          If in doubt, say Y
+
+config DEBUG_ALIGN_RODATA
+	depends on DEBUG_RODATA
+	bool "Align linker sections up to SECTION_SIZE"
+	help
+	  If this option is enabled, sections that may potentially be marked as
+	  read only or non-executable will be aligned up to the section size of
+	  the kernel. This prevents sections from being split into pages and
+	  avoids a potential TLB penalty. The downside is an increase in
+	  alignment and potentially wasted space. Turn on this option if
+	  performance is more important than memory pressure.
+
+	  If in doubt, say N
+
 endmenu
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 689b637..0014207 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -152,4 +152,8 @@ int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+#endif
 #endif
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index edf8715..83424ad 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -8,6 +8,7 @@
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/page.h>
+#include <asm/pgtable.h>
 
 #include "image.h"
 
@@ -54,6 +55,9 @@ SECTIONS
 		_text = .;
 		HEAD_TEXT
 	}
+#ifdef DEBUG_ALIGN_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+#endif
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
 			__exception_text_start = .;
@@ -70,19 +74,32 @@ SECTIONS
 		*(.got)			/* Global offset table		*/
 	}
 
+#ifdef DEBUG_ALIGN_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+#endif
 	RO_DATA(PAGE_SIZE)
 	EXCEPTION_TABLE(8)
 	NOTES
 	_etext = .;			/* End of text and rodata section */
 
+#ifdef DEBUG_ALIGN_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+#else
 	. = ALIGN(PAGE_SIZE);
+#endif
 	__init_begin = .;
 
 	INIT_TEXT_SECTION(8)
 	.exit.text : {
 		ARM_EXIT_KEEP(EXIT_TEXT)
 	}
+
+#ifdef DEBUG_ALIGN_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+	__init_data_begin = .;
+#else
 	. = ALIGN(16);
+#endif
 	.init.data : {
 		INIT_DATA
 		INIT_SETUP(16)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 494297c..61f44c7 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -324,6 +324,7 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
+	fixup_init();
 	free_initmem_default(0);
 }
 
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
index d519f4f..82347d8 100644
--- a/arch/arm64/mm/mm.h
+++ b/arch/arm64/mm/mm.h
@@ -1,2 +1,4 @@
 extern void __init bootmem_init(void);
 extern void __init arm64_swiotlb_init(void);
+
+void fixup_init(void);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e92f633..0428294 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -26,6 +26,7 @@
 #include <linux/memblock.h>
 #include <linux/fs.h>
 #include <linux/io.h>
+#include <linux/stop_machine.h>
 
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
@@ -137,17 +138,55 @@ static void __init *early_alloc(unsigned long sz)
 	return ptr;
 }
 
-static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
+/*
+ * remap a PMD into pages
+ */
+static noinline void __ref split_pmd(pmd_t *pmd, bool early)
+{
+	pte_t *pte, *start_pte;
+	unsigned long pfn;
+	int i = 0;
+
+	if (early)
+		start_pte = pte = early_alloc(PTRS_PER_PTE*sizeof(pte_t));
+	else
+		start_pte = pte = (pte_t *)__get_free_page(PGALLOC_GFP);
+
+	BUG_ON(!pte);
+
+	pfn = pmd_pfn(*pmd);
+
+	do {
+		/*
+		 * Need to have the least restrictive permissions available
+		 * permissions will be fixed up later
+		 */
+		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+		pfn++;
+	} while (pte++, i++, i < PTRS_PER_PTE);
+
+
+	__pmd_populate(pmd, __pa(start_pte), PMD_TYPE_TABLE);
+	flush_tlb_all();
+}
+
+static void __ref alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  unsigned long end, unsigned long pfn,
-				  pgprot_t prot)
+				  pgprot_t prot, bool early)
 {
 	pte_t *pte;
 
 	if (pmd_none(*pmd)) {
-		pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
+		if (early)
+			pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
+		else
+			pte = (pte_t *)__get_free_page(PGALLOC_GFP);
+		BUG_ON(!pte);
 		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
 	}
-	BUG_ON(pmd_bad(*pmd));
+
+	if (pmd_bad(*pmd))
+		split_pmd(pmd, early);
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
@@ -156,29 +195,52 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
-static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
+void __ref split_pud(pud_t *old_pud, pmd_t *pmd)
+{
+	unsigned long pfn = pud_pfn(*old_pud);
+        const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
+                              PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
+                              PMD_SECT_VALID;
+	pgprot_t prot = __pgprot(pud_val(*old_pud) & mask);
+	int i = 0;
+
+	do {
+		set_pmd(pmd, pfn_pmd(pfn, prot));
+		pfn++;
+	} while (pmd++, i++, i < PTRS_PER_PMD);
+}
+
+static void __ref alloc_init_pmd(pud_t *pud, unsigned long addr,
 				  unsigned long end, phys_addr_t phys,
-				  int map_io)
+				  pgprot_t sect_prot, pgprot_t pte_prot,
+				  bool early, int map_io)
 {
 	pmd_t *pmd;
 	unsigned long next;
-	pmdval_t prot_sect;
-	pgprot_t prot_pte;
 
 	if (map_io) {
-		prot_sect = PROT_SECT_DEVICE_nGnRE;
-		prot_pte = __pgprot(PROT_DEVICE_nGnRE);
-	} else {
-		prot_sect = PROT_SECT_NORMAL_EXEC;
-		prot_pte = PAGE_KERNEL_EXEC;
+		sect_prot = PROT_SECT_DEVICE_nGnRE;
+		pte_prot = __pgprot(PROT_DEVICE_nGnRE);
 	}
 
 	/*
 	 * Check for initial section mappings in the pgd/pud and remove them.
 	 */
 	if (pud_none(*pud) || pud_bad(*pud)) {
-		pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
+		if (early)
+			pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
+		else
+			pmd = pmd_alloc_one(&init_mm, addr);
+		BUG_ON(!pmd);
+		if (pud_sect(*pud)) {
+			/*
+			 * need to have the 1G of mappings continue to be
+			 * present
+			 */
+			split_pud(pud, pmd);
+		}
 		pud_populate(&init_mm, pud, pmd);
+		flush_tlb_all();
 	}
 
 	pmd = pmd_offset(pud, addr);
@@ -186,8 +248,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 		next = pmd_addr_end(addr, end);
 		/* try section mapping first */
 		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
-			pmd_t old_pmd =*pmd;
-			set_pmd(pmd, __pmd(phys | prot_sect));
+			pmd_t old_pmd = *pmd;
+			set_pmd(pmd, __pmd(phys | sect_prot));
 			/*
 			 * Check for previous table entries created during
 			 * boot (__create_page_tables) and flush them.
@@ -196,15 +258,16 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 				flush_tlb_all();
 		} else {
 			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-				       prot_pte);
+					pte_prot, early);
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
 }
 
-static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
+static void __ref alloc_init_pud(pgd_t *pgd, unsigned long addr,
 				  unsigned long end, unsigned long phys,
-				  int map_io)
+				  pgprot_t sect_prot, pgprot_t pte_prot,
+				  bool early, int map_io)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -222,10 +285,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
 		/*
 		 * For 4K granule only, attempt to put down a 1GB block
 		 */
-		if (!map_io && (PAGE_SHIFT == 12) &&
+		if (!map_io && early && (PAGE_SHIFT == 12) &&
 		    ((addr | next | phys) & ~PUD_MASK) == 0) {
 			pud_t old_pud = *pud;
-			set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC));
+			set_pud(pud, __pud(phys | sect_prot));
 
 			/*
 			 * If we have an old value for a pud, it will
@@ -240,7 +303,8 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
 				flush_tlb_all();
 			}
 		} else {
-			alloc_init_pmd(pud, addr, next, phys, map_io);
+			alloc_init_pmd(pud, addr, next, phys, sect_prot,
+					pte_prot, early, map_io);
 		}
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
@@ -250,9 +314,11 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
  * Create the page directory entries and any necessary page tables for the
  * mapping specified by 'md'.
  */
-static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
-				    unsigned long virt, phys_addr_t size,
-				    int map_io)
+static void __ref __create_mapping(pgd_t *pgd, phys_addr_t phys,
+				  unsigned long virt,
+				  phys_addr_t size,
+				  pgprot_t sect_prot, pgprot_t pte_prot,
+				  int map_io, bool early)
 {
 	unsigned long addr, length, end, next;
 
@@ -262,31 +328,140 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
 	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
-		alloc_init_pud(pgd, addr, next, phys, map_io);
+		alloc_init_pud(pgd, addr, next, phys, sect_prot, pte_prot,
+				early, map_io);
 		phys += next - addr;
 	} while (pgd++, addr = next, addr != end);
 }
 
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
-				  phys_addr_t size)
+void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
+{
+	if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
+		pr_warn("BUG: not creating id mapping for %pa\n", &addr);
+		return;
+	}
+	__create_mapping(&idmap_pg_dir[pgd_index(addr)],
+			 addr, addr, size, PROT_SECT_NORMAL_EXEC,
+			 PAGE_KERNEL_EXEC, map_io, false);
+}
+
+static inline pmd_t *pmd_off_k(unsigned long virt)
+{
+	return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
+}
+
+#ifdef CONFIG_EARLY_PRINTK
+/*
+ * Create an early I/O mapping using the pgd/pmd entries already populated
+ * in head.S as this function is called too early to allocated any memory. The
+ * mapping size is 2MB with 4KB pages or 64KB or 64KB pages.
+ */
+void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
+{
+	unsigned long size, mask;
+	bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/*
+	 * No early pte entries with !ARM64_64K_PAGES configuration, so using
+	 * sections (pmd).
+	 */
+	size = page64k ? PAGE_SIZE : SECTION_SIZE;
+	mask = ~(size - 1);
+
+	pgd = pgd_offset_k(virt);
+	pud = pud_offset(pgd, virt);
+	if (pud_none(*pud))
+		return NULL;
+	pmd = pmd_offset(pud, virt);
+
+	if (page64k) {
+		if (pmd_none(*pmd))
+			return NULL;
+		pte = pte_offset_kernel(pmd, virt);
+		set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE));
+	} else {
+		set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE));
+	}
+
+	return (void __iomem *)((virt & mask) + (phys & ~mask));
+}
+#endif
+
+static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
+				  phys_addr_t size,
+				  pgprot_t sect_prot, pgprot_t pte_prot)
 {
 	if (virt < VMALLOC_START) {
 		pr_warn("BUG: not creating mapping for %pa@0x%016lx - outside kernel range\n",
 			&phys, virt);
 		return;
 	}
-	__create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0);
+
+	return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
+				size, sect_prot, pte_prot, 0, true);
 }
 
-void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
+static void __ref create_mapping_late(phys_addr_t phys, unsigned long virt,
+				  phys_addr_t size,
+				  pgprot_t sect_prot, pgprot_t pte_prot)
 {
-	if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
-		pr_warn("BUG: not creating id mapping for %pa\n", &addr);
+	if (virt < VMALLOC_START) {
+		pr_warn("BUG: not creating mapping for %pa@0x%016lx - outside kernel range\n",
+			&phys, virt);
 		return;
 	}
-	__create_mapping(&idmap_pg_dir[pgd_index(addr)],
-			 addr, addr, size, map_io);
+
+	return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
+				size, sect_prot, pte_prot, 0, false);
+}
+
+#ifdef CONFIG_DEBUG_RODATA
+static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+{
+	/*
+	 * Set up the executable regions using the existing section mappings
+	 * for now. This will get more fine grained later once all memory
+	 * is mapped
+	 */
+	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
+	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+
+	if (end < kernel_x_start) {
+		create_mapping(start, __phys_to_virt(start),
+			end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
+	} else if (start >= kernel_x_end) {
+		create_mapping(start, __phys_to_virt(start),
+			end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
+	} else {
+		if (start < kernel_x_start)
+			create_mapping(start, __phys_to_virt(start),
+				kernel_x_start - start,
+				PROT_SECT_NORMAL,
+				PAGE_KERNEL);
+		create_mapping(kernel_x_start,
+				__phys_to_virt(kernel_x_start),
+				kernel_x_end - kernel_x_start,
+				PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
+		if (kernel_x_end < end)
+			create_mapping(kernel_x_end,
+				__phys_to_virt(kernel_x_end),
+				end - kernel_x_end,
+				PROT_SECT_NORMAL,
+				PAGE_KERNEL);
+	}
+
+}
+#else
+static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+{
+	create_mapping(start, __phys_to_virt(start), end - start,
+			PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
 }
+#endif
 
 static void __init map_mem(void)
 {
@@ -328,14 +503,69 @@ static void __init map_mem(void)
 			memblock_set_current_limit(limit);
 		}
 #endif
-
-		create_mapping(start, __phys_to_virt(start), end - start);
+		__map_memblock(start, end);
 	}
 
 	/* Limit no longer required. */
 	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 }
 
+void __init fixup_executable(void)
+{
+#ifdef CONFIG_DEBUG_RODATA
+	/* now that we are actually fully mapped, make the start/end more fine grained */
+	if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
+		unsigned long aligned_start = round_down(__pa(_stext),
+							SECTION_SIZE);
+
+		create_mapping(aligned_start, __phys_to_virt(aligned_start),
+				__pa(_stext) - aligned_start,
+				PROT_SECT_NORMAL,
+				PAGE_KERNEL);
+	}
+
+	if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
+		unsigned long aligned_end = round_up(__pa(__init_end),
+							SECTION_SIZE);
+		create_mapping(__pa(__init_end), (unsigned long)__init_end,
+				aligned_end - __pa(__init_end),
+				PROT_SECT_NORMAL,
+				PAGE_KERNEL);
+	}
+#endif
+}
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void)
+{
+	create_mapping_late(__pa(_stext), (unsigned long)_stext,
+				(unsigned long)_etext - (unsigned long)_stext,
+				PROT_SECT_NORMAL_EXEC | PMD_SECT_RDONLY,
+				PAGE_KERNEL_EXEC | PTE_RDONLY);
+
+}
+#endif
+
+static int __flush_mappings(void *unused)
+{
+	flush_tlb_kernel_range((unsigned long)__init_begin,
+				(unsigned long)__init_end);
+	return 0;
+}
+
+void __ref fixup_init(void)
+{
+	phys_addr_t start = __pa(__init_begin);
+	phys_addr_t end = __pa(__init_end);
+
+	create_mapping_late(start, (unsigned long)__init_begin,
+			end - start,
+			PROT_SECT_NORMAL,
+			PAGE_KERNEL);
+	if (!IS_ALIGNED(start, SECTION_SIZE) || !IS_ALIGNED(end, SECTION_SIZE))
+		stop_machine(__flush_mappings, NULL, NULL);
+}
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps and sets up the zero page.
@@ -345,6 +575,7 @@ void __init paging_init(void)
 	void *zero_page;
 
 	map_mem();
+	fixup_executable();
 
 	/*
 	 * Finally flush the caches and tlb to ensure that we're in a
-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCHv4 0/7] Better page protections for arm64
       [not found] <1414440752-9411-1-git-send-email-lauraa@codeaurora.org>
                   ` (6 preceding siblings ...)
  2014-10-27 20:12 ` [PATCHv4 7/7] arm64: add better page protections to arm64 Laura Abbott
@ 2014-10-27 20:19 ` Laura Abbott
  7 siblings, 0 replies; 30+ messages in thread
From: Laura Abbott @ 2014-10-27 20:19 UTC (permalink / raw)
  To: linux-arm-kernel

Apologies for the spam for those on the direct mail, I had a typo in the mailing list name.

On 10/27/2014 1:12 PM, Laura Abbott wrote:
> Hi,
>
> This is v4 of the series to add stricter page protections for arm64.
> The goal is to have text be RO/X and everything else be RW/NX.
> More testing is still needed all around, especially with additional
> page table levels. Some of this work may conflict with ongoing work
> to support ARCH_DEBUG_PAGEALLOC on arm64[1] so depending on where
> that goes I may need to rebase or re-work the code.
>
>
> Thanks,
> Laura
>
> [1] http://www.spinics.net/lists/arm-kernel/msg372216.html
>
> Laura Abbott (7):
>    arm64: Treat handle_arch_irq as a function pointer
>    arm64: Switch to ldr for loading the stub vectors
>    arm64: Move cpu_resume into the text section
>    arm64: Move some head.text functions to executable section
>    arm64: Factor out fixmap initialiation from ioremap
>    arm64: use fixmap for text patching when text is RO
>    arm64: add better page protections to arm64
>
>   arch/arm64/Kconfig.debug            |  23 +++
>   arch/arm64/include/asm/cacheflush.h |   4 +
>   arch/arm64/include/asm/fixmap.h     |   8 +-
>   arch/arm64/include/asm/insn.h       |   2 +
>   arch/arm64/include/asm/irq.h        |   1 -
>   arch/arm64/kernel/entry.S           |   6 +-
>   arch/arm64/kernel/head.S            |   7 +-
>   arch/arm64/kernel/insn.c            |  72 ++++++-
>   arch/arm64/kernel/irq.c             |   2 +
>   arch/arm64/kernel/jump_label.c      |   2 +-
>   arch/arm64/kernel/setup.c           |   3 +-
>   arch/arm64/kernel/sleep.S           |  12 +-
>   arch/arm64/kernel/vmlinux.lds.S     |  17 ++
>   arch/arm64/mm/init.c                |   1 +
>   arch/arm64/mm/ioremap.c             |  93 +--------
>   arch/arm64/mm/mm.h                  |   2 +
>   arch/arm64/mm/mmu.c                 | 397 ++++++++++++++++++++++++++++++++----
>   17 files changed, 507 insertions(+), 145 deletions(-)
>


-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 3/7] arm64: Move cpu_resume into the text section
  2014-10-27 20:12 ` [PATCHv4 3/7] arm64: Move cpu_resume into the text section Laura Abbott
@ 2014-10-28  8:10   ` Ard Biesheuvel
  2014-10-28  8:22     ` Ard Biesheuvel
  0 siblings, 1 reply; 30+ messages in thread
From: Ard Biesheuvel @ 2014-10-28  8:10 UTC (permalink / raw)
  To: linux-arm-kernel

On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
> The function cpu_resume currently lives in the .data
> section. There's no reason for it to be there since
> we can use relative instructions without a problem.
> Move it to the text section where it belongs.
>
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> ---
>  arch/arm64/kernel/sleep.S | 12 +++++++-----
>  1 file changed, 7 insertions(+), 5 deletions(-)
>

Hi Laura,

Apologies for waiting until v4 to bring this up, but I have some minor comments.

> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
> index a564b44..5762b16 100644
> --- a/arch/arm64/kernel/sleep.S
> +++ b/arch/arm64/kernel/sleep.S
> @@ -147,12 +147,12 @@ cpu_resume_after_mmu:
>         ret
>  ENDPROC(cpu_resume_after_mmu)
>
> -       .data
>  ENTRY(cpu_resume)
>         bl      el2_setup               // if in EL2 drop to EL1 cleanly
>  #ifdef CONFIG_SMP
>         mrs     x1, mpidr_el1
> -       adr     x4, mpidr_hash_ptr
> +       adrp    x4, mpidr_hash_ptr
> +       add     x4, x4, #:lo12:mpidr_hash_ptr

Instead of this change, you could put mpidr_hash_ptr itself in .text as well.

>         ldr     x5, [x4]
>         add     x8, x4, x5              // x8 = struct mpidr_hash phys address
>          /* retrieve mpidr_hash members to compute the hash */
> @@ -164,14 +164,15 @@ ENTRY(cpu_resume)
>  #else
>         mov     x7, xzr
>  #endif
> -       adr     x0, sleep_save_sp
> +       adrp    x0, sleep_save_sp
> +       add     x0, x0, #:lo12:sleep_save_sp
>         ldr     x0, [x0, #SLEEP_SAVE_SP_PHYS]

Nit:
ldr x0, [x0, #:lo12:sleep_save_sp + SLEEP_SAVE_SP_PHYS]

>         ldr     x0, [x0, x7, lsl #3]
>         /* load sp from context */
>         ldr     x2, [x0, #CPU_CTX_SP]
> -       adr     x1, sleep_idmap_phys
> +       adrp    x1, sleep_idmap_phys
>         /* load physical address of identity map page table in x1 */
> -       ldr     x1, [x1]
> +       ldr     x1, [x1, #:lo12:sleep_idmap_phys]
>         mov     sp, x2
>         /*
>          * cpu_do_resume expects x0 to contain context physical address
> @@ -181,6 +182,7 @@ ENTRY(cpu_resume)
>         b       cpu_resume_mmu          // Resume MMU, never returns
>  ENDPROC(cpu_resume)
>
> +       .data
>         .align 3
>  mpidr_hash_ptr:
>         /*

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 1/7] arm64: Treat handle_arch_irq as a function pointer
  2014-10-27 20:12 ` [PATCHv4 1/7] arm64: Treat handle_arch_irq as a function pointer Laura Abbott
@ 2014-10-28  8:11   ` Ard Biesheuvel
  2014-10-28 10:25   ` Mark Rutland
  1 sibling, 0 replies; 30+ messages in thread
From: Ard Biesheuvel @ 2014-10-28  8:11 UTC (permalink / raw)
  To: linux-arm-kernel

On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
> handle_arch_irq isn't actually text, it's just a function pointer.
> It doesn't need to be stored in the text section and doing so
> causes problems if we ever want to make the kernel text read only.
> Declare handle_arch_irq as a proper function pointer stored in
> the data section.
>
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

> ---
> v4: style comments
> ---
>  arch/arm64/include/asm/irq.h | 1 -
>  arch/arm64/kernel/entry.S    | 6 ++----
>  arch/arm64/kernel/irq.c      | 2 ++
>  3 files changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
> index e1f7ecd..1eebf5b 100644
> --- a/arch/arm64/include/asm/irq.h
> +++ b/arch/arm64/include/asm/irq.h
> @@ -3,7 +3,6 @@
>
>  #include <asm-generic/irq.h>
>
> -extern void (*handle_arch_irq)(struct pt_regs *);
>  extern void migrate_irqs(void);
>  extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
>
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 726b910..ee27f39 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -168,7 +168,8 @@ tsk .req    x28             // current thread_info
>   * Interrupt handling.
>   */
>         .macro  irq_handler
> -       ldr     x1, handle_arch_irq
> +       adrp    x1, handle_arch_irq
> +       ldr     x1, [x1, #:lo12:handle_arch_irq]
>         mov     x0, sp
>         blr     x1
>         .endm
> @@ -695,6 +696,3 @@ ENTRY(sys_rt_sigreturn_wrapper)
>         mov     x0, sp
>         b       sys_rt_sigreturn
>  ENDPROC(sys_rt_sigreturn_wrapper)
> -
> -ENTRY(handle_arch_irq)
> -       .quad   0
> diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
> index 071a6ec..240b75c 100644
> --- a/arch/arm64/kernel/irq.c
> +++ b/arch/arm64/kernel/irq.c
> @@ -40,6 +40,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
>         return 0;
>  }
>
> +void (*handle_arch_irq)(struct pt_regs *) = NULL;
> +
>  void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
>  {
>         if (handle_arch_irq)
> --
> Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 3/7] arm64: Move cpu_resume into the text section
  2014-10-28  8:10   ` Ard Biesheuvel
@ 2014-10-28  8:22     ` Ard Biesheuvel
  2014-10-28 12:43       ` Mark Rutland
  0 siblings, 1 reply; 30+ messages in thread
From: Ard Biesheuvel @ 2014-10-28  8:22 UTC (permalink / raw)
  To: linux-arm-kernel

On 28 October 2014 09:10, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
>> The function cpu_resume currently lives in the .data
>> section. There's no reason for it to be there since
>> we can use relative instructions without a problem.
>> Move it to the text section where it belongs.
>>
>> Reviewed-by: Kees Cook <keescook@chromium.org>
>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>> ---
>>  arch/arm64/kernel/sleep.S | 12 +++++++-----
>>  1 file changed, 7 insertions(+), 5 deletions(-)
>>
>
> Hi Laura,
>
> Apologies for waiting until v4 to bring this up, but I have some minor comments.
>
>> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
>> index a564b44..5762b16 100644
>> --- a/arch/arm64/kernel/sleep.S
>> +++ b/arch/arm64/kernel/sleep.S
>> @@ -147,12 +147,12 @@ cpu_resume_after_mmu:
>>         ret
>>  ENDPROC(cpu_resume_after_mmu)
>>
>> -       .data
>>  ENTRY(cpu_resume)
>>         bl      el2_setup               // if in EL2 drop to EL1 cleanly
>>  #ifdef CONFIG_SMP
>>         mrs     x1, mpidr_el1
>> -       adr     x4, mpidr_hash_ptr
>> +       adrp    x4, mpidr_hash_ptr
>> +       add     x4, x4, #:lo12:mpidr_hash_ptr
>
> Instead of this change, you could put mpidr_hash_ptr itself in .text as well.
>

Actually, looking more closely, it appears mpidr_hash_ptr can be
dropped completely:
the address obtained by adrp/add is PC relative, so the whole sequence
could just be

adrp x8, mpidr_hash
add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address

(and the ldr and add below can be dropped, and so can mpidr_hash_ptr at the end)

>>         ldr     x5, [x4]
>>         add     x8, x4, x5              // x8 = struct mpidr_hash phys address
>>          /* retrieve mpidr_hash members to compute the hash */
>> @@ -164,14 +164,15 @@ ENTRY(cpu_resume)
>>  #else
>>         mov     x7, xzr
>>  #endif
>> -       adr     x0, sleep_save_sp
>> +       adrp    x0, sleep_save_sp
>> +       add     x0, x0, #:lo12:sleep_save_sp
>>         ldr     x0, [x0, #SLEEP_SAVE_SP_PHYS]
>
> Nit:
> ldr x0, [x0, #:lo12:sleep_save_sp + SLEEP_SAVE_SP_PHYS]
>
>>         ldr     x0, [x0, x7, lsl #3]
>>         /* load sp from context */
>>         ldr     x2, [x0, #CPU_CTX_SP]
>> -       adr     x1, sleep_idmap_phys
>> +       adrp    x1, sleep_idmap_phys
>>         /* load physical address of identity map page table in x1 */
>> -       ldr     x1, [x1]
>> +       ldr     x1, [x1, #:lo12:sleep_idmap_phys]
>>         mov     sp, x2
>>         /*
>>          * cpu_do_resume expects x0 to contain context physical address
>> @@ -181,6 +182,7 @@ ENTRY(cpu_resume)
>>         b       cpu_resume_mmu          // Resume MMU, never returns
>>  ENDPROC(cpu_resume)
>>
>> +       .data
>>         .align 3
>>  mpidr_hash_ptr:
>>         /*
>
> Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 2/7] arm64: Switch to ldr for loading the stub vectors
  2014-10-27 20:12 ` [PATCHv4 2/7] arm64: Switch to ldr for loading the stub vectors Laura Abbott
@ 2014-10-28  8:23   ` Ard Biesheuvel
  2014-10-28  9:51   ` Marc Zyngier
  2014-10-28 10:27   ` Mark Rutland
  2 siblings, 0 replies; 30+ messages in thread
From: Ard Biesheuvel @ 2014-10-28  8:23 UTC (permalink / raw)
  To: linux-arm-kernel

On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
> The hyp stub vectors are currently loaded using adr. This
> instruction has a +/- 1MB range for the loading address. If
> the alignment for sections is changed the address may be more
> than 1MB away, resulting in reclocation errors. Switch to using
> adrp for getting the address to ensure we aren't affected by the
> location of the __hyp_stub_vectors.
>
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> ---
> v4: Commit text tweaks

Your subject line still says 'ldr'

Other than that:
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>


> ---
>  arch/arm64/kernel/head.S | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 0a6e4f9..10f5cc0 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -331,7 +331,8 @@ CPU_LE(     movk    x0, #0x30d0, lsl #16    )       // Clear EE and E0E on LE systems
>         msr     vttbr_el2, xzr
>
>         /* Hypervisor stub */
> -       adr     x0, __hyp_stub_vectors
> +       adrp    x0, __hyp_stub_vectors
> +       add     x0, x0, #:lo12:__hyp_stub_vectors
>         msr     vbar_el2, x0
>
>         /* spsr */
> --
> Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 4/7] arm64: Move some head.text functions to executable section
  2014-10-27 20:12 ` [PATCHv4 4/7] arm64: Move some head.text functions to executable section Laura Abbott
@ 2014-10-28  8:35   ` Ard Biesheuvel
  2014-10-28 11:10     ` Mark Rutland
  0 siblings, 1 reply; 30+ messages in thread
From: Ard Biesheuvel @ 2014-10-28  8:35 UTC (permalink / raw)
  To: linux-arm-kernel

On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
> The head.text section is intended to be run at early bootup
> before any of the regular kernel mappings have been setup.
> Parts of head.text may be freed back into the buddy allocator
> due to TEXT_OFFSET so for security requirements this memory
> must not be executable. The suspend/resume/hotplug code path
> requires some of these head.S functions to run however which
> means they need to be executable. Support these conflicting
> requirements by moving the few head.text functions that need
> to be executable to the text section which has the appropriate
> page table permissions.
>
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> ---
> v4: New apprach based on discussions with Mark
> ---
>  arch/arm64/kernel/head.S | 4 ++++
>  1 file changed, 4 insertions(+)
>
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 10f5cc0..dc362da 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -432,12 +432,14 @@ ENTRY(secondary_startup)
>         b       __enable_mmu
>  ENDPROC(secondary_startup)
>
> +       .pushsection    .text, "ax"
>  ENTRY(__secondary_switched)
>         ldr     x0, [x21]                       // get secondary_data.stack
>         mov     sp, x0
>         mov     x29, #0
>         b       secondary_start_kernel
>  ENDPROC(__secondary_switched)
> +       .popsection
>  #endif /* CONFIG_SMP */
>
>  /*
> @@ -471,11 +473,13 @@ ENDPROC(__enable_mmu)
>   * table to map the entire function.
>   */
>         .align  4
> +       .pushsection    .text, "ax"

There is a comment before this .align that explains why it is
separated from __enable_mmu, and I think jumping into another section
right after it kind of defeats the purpose.
Perhaps it is better to put the pushsection before __enable_mmu instead?

>  __turn_mmu_on:
>         msr     sctlr_el1, x0
>         isb
>         br      x27
>  ENDPROC(__turn_mmu_on)
> +       .popsection
>
>  /*
>   * Calculate the start of physical memory.
> --
> Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 2/7] arm64: Switch to ldr for loading the stub vectors
  2014-10-27 20:12 ` [PATCHv4 2/7] arm64: Switch to ldr for loading the stub vectors Laura Abbott
  2014-10-28  8:23   ` Ard Biesheuvel
@ 2014-10-28  9:51   ` Marc Zyngier
  2014-10-28 10:27   ` Mark Rutland
  2 siblings, 0 replies; 30+ messages in thread
From: Marc Zyngier @ 2014-10-28  9:51 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Laura,

On 27/10/14 20:12, Laura Abbott wrote:
> The hyp stub vectors are currently loaded using adr. This
> instruction has a +/- 1MB range for the loading address. If
> the alignment for sections is changed the address may be more
> than 1MB away, resulting in reclocation errors. Switch to using
                              relocation
> adrp for getting the address to ensure we aren't affected by the
> location of the __hyp_stub_vectors.
> 
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>

Apart from the above typo and the subject line (which Ard already
mentioned), this looks good.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>

> ---
> v4: Commit text tweaks
> ---
>  arch/arm64/kernel/head.S | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 0a6e4f9..10f5cc0 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -331,7 +331,8 @@ CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
>  	msr	vttbr_el2, xzr
>  
>  	/* Hypervisor stub */
> -	adr	x0, __hyp_stub_vectors
> +	adrp	x0, __hyp_stub_vectors
> +	add	x0, x0, #:lo12:__hyp_stub_vectors
>  	msr	vbar_el2, x0
>  
>  	/* spsr */
> 


-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 1/7] arm64: Treat handle_arch_irq as a function pointer
  2014-10-27 20:12 ` [PATCHv4 1/7] arm64: Treat handle_arch_irq as a function pointer Laura Abbott
  2014-10-28  8:11   ` Ard Biesheuvel
@ 2014-10-28 10:25   ` Mark Rutland
  1 sibling, 0 replies; 30+ messages in thread
From: Mark Rutland @ 2014-10-28 10:25 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Oct 27, 2014 at 08:12:26PM +0000, Laura Abbott wrote:
> handle_arch_irq isn't actually text, it's just a function pointer.
> It doesn't need to be stored in the text section and doing so
> causes problems if we ever want to make the kernel text read only.
> Declare handle_arch_irq as a proper function pointer stored in
> the data section.
> 
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>

This looks good to me, compiles cleanly on v3.18-rc2, and my Juno
continues to boot, so:

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>

Mark.

> ---
> v4: style comments
> ---
>  arch/arm64/include/asm/irq.h | 1 -
>  arch/arm64/kernel/entry.S    | 6 ++----
>  arch/arm64/kernel/irq.c      | 2 ++
>  3 files changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
> index e1f7ecd..1eebf5b 100644
> --- a/arch/arm64/include/asm/irq.h
> +++ b/arch/arm64/include/asm/irq.h
> @@ -3,7 +3,6 @@
>  
>  #include <asm-generic/irq.h>
>  
> -extern void (*handle_arch_irq)(struct pt_regs *);
>  extern void migrate_irqs(void);
>  extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
>  
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 726b910..ee27f39 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -168,7 +168,8 @@ tsk	.req	x28		// current thread_info
>   * Interrupt handling.
>   */
>  	.macro	irq_handler
> -	ldr	x1, handle_arch_irq
> +	adrp	x1, handle_arch_irq
> +	ldr	x1, [x1, #:lo12:handle_arch_irq]
>  	mov	x0, sp
>  	blr	x1
>  	.endm
> @@ -695,6 +696,3 @@ ENTRY(sys_rt_sigreturn_wrapper)
>  	mov	x0, sp
>  	b	sys_rt_sigreturn
>  ENDPROC(sys_rt_sigreturn_wrapper)
> -
> -ENTRY(handle_arch_irq)
> -	.quad	0
> diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
> index 071a6ec..240b75c 100644
> --- a/arch/arm64/kernel/irq.c
> +++ b/arch/arm64/kernel/irq.c
> @@ -40,6 +40,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
>  	return 0;
>  }
>  
> +void (*handle_arch_irq)(struct pt_regs *) = NULL;
> +
>  void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
>  {
>  	if (handle_arch_irq)
> -- 
> Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
> 
> 

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 2/7] arm64: Switch to ldr for loading the stub vectors
  2014-10-27 20:12 ` [PATCHv4 2/7] arm64: Switch to ldr for loading the stub vectors Laura Abbott
  2014-10-28  8:23   ` Ard Biesheuvel
  2014-10-28  9:51   ` Marc Zyngier
@ 2014-10-28 10:27   ` Mark Rutland
  2 siblings, 0 replies; 30+ messages in thread
From: Mark Rutland @ 2014-10-28 10:27 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Oct 27, 2014 at 08:12:27PM +0000, Laura Abbott wrote:
> The hyp stub vectors are currently loaded using adr. This
> instruction has a +/- 1MB range for the loading address. If
> the alignment for sections is changed the address may be more
> than 1MB away, resulting in reclocation errors. Switch to using
> adrp for getting the address to ensure we aren't affected by the
> location of the __hyp_stub_vectors.
> 
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>

I've just given this a spin and nothing has blown up on my Juno, so:

Tested-by: Mark Rutland <mark.rutland@arm.com>

Mark.

> ---
> v4: Commit text tweaks
> ---
>  arch/arm64/kernel/head.S | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 0a6e4f9..10f5cc0 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -331,7 +331,8 @@ CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
>  	msr	vttbr_el2, xzr
>  
>  	/* Hypervisor stub */
> -	adr	x0, __hyp_stub_vectors
> +	adrp	x0, __hyp_stub_vectors
> +	add	x0, x0, #:lo12:__hyp_stub_vectors
>  	msr	vbar_el2, x0
>  
>  	/* spsr */
> -- 
> Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
> 
> 

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 4/7] arm64: Move some head.text functions to executable section
  2014-10-28  8:35   ` Ard Biesheuvel
@ 2014-10-28 11:10     ` Mark Rutland
  2014-10-30 17:06       ` Laura Abbott
  0 siblings, 1 reply; 30+ messages in thread
From: Mark Rutland @ 2014-10-28 11:10 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Oct 28, 2014 at 08:35:37AM +0000, Ard Biesheuvel wrote:
> On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
> > The head.text section is intended to be run at early bootup
> > before any of the regular kernel mappings have been setup.
> > Parts of head.text may be freed back into the buddy allocator
> > due to TEXT_OFFSET so for security requirements this memory
> > must not be executable. The suspend/resume/hotplug code path
> > requires some of these head.S functions to run however which
> > means they need to be executable. Support these conflicting
> > requirements by moving the few head.text functions that need
> > to be executable to the text section which has the appropriate
> > page table permissions.
> >
> > Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> > ---
> > v4: New apprach based on discussions with Mark
> > ---
> >  arch/arm64/kernel/head.S | 4 ++++
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > index 10f5cc0..dc362da 100644
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -432,12 +432,14 @@ ENTRY(secondary_startup)
> >         b       __enable_mmu
> >  ENDPROC(secondary_startup)
> >
> > +       .pushsection    .text, "ax"
> >  ENTRY(__secondary_switched)
> >         ldr     x0, [x21]                       // get secondary_data.stack
> >         mov     sp, x0
> >         mov     x29, #0
> >         b       secondary_start_kernel
> >  ENDPROC(__secondary_switched)
> > +       .popsection
> >  #endif /* CONFIG_SMP */
> >
> >  /*
> > @@ -471,11 +473,13 @@ ENDPROC(__enable_mmu)
> >   * table to map the entire function.
> >   */
> >         .align  4
> > +       .pushsection    .text, "ax"
> 
> There is a comment before this .align that explains why it is
> separated from __enable_mmu, and I think jumping into another section
> right after it kind of defeats the purpose.
> Perhaps it is better to put the pushsection before __enable_mmu instead?

To keep the alignment correct we just need to move the .align after the
pushsection. With that changed I think this patch is Ok.

As __enable_mmu is only executed with the MMU off it doesn't need to be
moved into an executable section to prevent the MMU from blowing up in
our faces -- it would be wrong to call it with the MMU on anyway.

However, this does raise a potential problem in that an attacker could
scribble over code executed before the MMU is on. Then they just have to
wait for the next CPU hotplug or suspend/resume for it to be executed.
So some functions including __enable_mmu and el2_setup aren't
necessarily safe in their current location.

There are a few ways of solving that, either moving stuff around or
releasing less memory for allocation.

Mark.

> 
> >  __turn_mmu_on:
> >         msr     sctlr_el1, x0
> >         isb
> >         br      x27
> >  ENDPROC(__turn_mmu_on)
> > +       .popsection
> >
> >  /*
> >   * Calculate the start of physical memory.
> > --
> > Qualcomm Innovation Center, Inc.
> > Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
> >
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel at lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 7/7] arm64: add better page protections to arm64
  2014-10-27 20:12 ` [PATCHv4 7/7] arm64: add better page protections to arm64 Laura Abbott
@ 2014-10-28 11:29   ` Steve Capper
  2014-10-28 11:44     ` Ard Biesheuvel
  2014-10-30 17:12     ` Laura Abbott
  2014-10-28 14:20   ` Mark Rutland
  1 sibling, 2 replies; 30+ messages in thread
From: Steve Capper @ 2014-10-28 11:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Oct 27, 2014 at 01:12:32PM -0700, Laura Abbott wrote:
> Add page protections for arm64 similar to those in arm.
> This is for security reasons to prevent certain classes
> of exploits. The current method:
> 
> - Map all memory as either RWX or RW. We round to the nearest
>   section to avoid creating page tables before everything is mapped
> - Once everything is mapped, if either end of the RWX section should
>   not be X, we split the PMD and remap as necessary
> - When initmem is to be freed, we change the permissions back to
>   RW (using stop machine if necessary to flush the TLB)
> - If CONFIG_DEBUG_RODATA is set, the read only sections are set
>   read only.
> 
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>

Hi Laura,
I have some comments below.

> ---
> v4: Combined the Kconfig options
> ---
>  arch/arm64/Kconfig.debug            |  23 +++
>  arch/arm64/include/asm/cacheflush.h |   4 +
>  arch/arm64/kernel/vmlinux.lds.S     |  17 ++
>  arch/arm64/mm/init.c                |   1 +
>  arch/arm64/mm/mm.h                  |   2 +
>  arch/arm64/mm/mmu.c                 | 303 +++++++++++++++++++++++++++++++-----
>  6 files changed, 314 insertions(+), 36 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
> index 0a12933..1b25015 100644
> --- a/arch/arm64/Kconfig.debug
> +++ b/arch/arm64/Kconfig.debug
> @@ -54,4 +54,27 @@ config DEBUG_SET_MODULE_RONX
>            against certain classes of kernel exploits.
>            If in doubt, say "N".
>  
> +config DEBUG_RODATA
> +	bool "Make kernel text and rodata read-only"
> +	help
> +	  If this is set, kernel text and rodata will be made read-only. This
> +	  is to help catch accidental or malicious attempts to change the
> +	  kernel's executable code. Additionally splits rodata from kernel
> +	  text so it can be made explicitly non-executable.
> +
> +          If in doubt, say Y
> +
> +config DEBUG_ALIGN_RODATA
> +	depends on DEBUG_RODATA
> +	bool "Align linker sections up to SECTION_SIZE"
> +	help
> +	  If this option is enabled, sections that may potentially be marked as
> +	  read only or non-executable will be aligned up to the section size of
> +	  the kernel. This prevents sections from being split into pages and
> +	  avoids a potential TLB penalty. The downside is an increase in
> +	  alignment and potentially wasted space. Turn on this option if
> +	  performance is more important than memory pressure.
> +
> +	  If in doubt, say N
> +
>  endmenu
> diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
> index 689b637..0014207 100644
> --- a/arch/arm64/include/asm/cacheflush.h
> +++ b/arch/arm64/include/asm/cacheflush.h
> @@ -152,4 +152,8 @@ int set_memory_ro(unsigned long addr, int numpages);
>  int set_memory_rw(unsigned long addr, int numpages);
>  int set_memory_x(unsigned long addr, int numpages);
>  int set_memory_nx(unsigned long addr, int numpages);
> +
> +#ifdef CONFIG_DEBUG_RODATA
> +void mark_rodata_ro(void);
> +#endif
>  #endif
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index edf8715..83424ad 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -8,6 +8,7 @@
>  #include <asm/thread_info.h>
>  #include <asm/memory.h>
>  #include <asm/page.h>
> +#include <asm/pgtable.h>
>  
>  #include "image.h"
>  
> @@ -54,6 +55,9 @@ SECTIONS
>  		_text = .;
>  		HEAD_TEXT
>  	}
> +#ifdef DEBUG_ALIGN_RODATA
> +	. = ALIGN(1<<SECTION_SHIFT);
> +#endif
>  	.text : {			/* Real text segment		*/
>  		_stext = .;		/* Text and read-only data	*/
>  			__exception_text_start = .;
> @@ -70,19 +74,32 @@ SECTIONS
>  		*(.got)			/* Global offset table		*/
>  	}
>  
> +#ifdef DEBUG_ALIGN_RODATA
> +	. = ALIGN(1<<SECTION_SHIFT);
> +#endif
>  	RO_DATA(PAGE_SIZE)
>  	EXCEPTION_TABLE(8)
>  	NOTES
>  	_etext = .;			/* End of text and rodata section */
>  
> +#ifdef DEBUG_ALIGN_RODATA
> +	. = ALIGN(1<<SECTION_SHIFT);
> +#else
>  	. = ALIGN(PAGE_SIZE);
> +#endif
>  	__init_begin = .;
>  
>  	INIT_TEXT_SECTION(8)
>  	.exit.text : {
>  		ARM_EXIT_KEEP(EXIT_TEXT)
>  	}
> +
> +#ifdef DEBUG_ALIGN_RODATA
> +	. = ALIGN(1<<SECTION_SHIFT);
> +	__init_data_begin = .;
> +#else
>  	. = ALIGN(16);
> +#endif
>  	.init.data : {
>  		INIT_DATA
>  		INIT_SETUP(16)
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 494297c..61f44c7 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -324,6 +324,7 @@ void __init mem_init(void)
>  
>  void free_initmem(void)
>  {
> +	fixup_init();
>  	free_initmem_default(0);
>  }
>  
> diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
> index d519f4f..82347d8 100644
> --- a/arch/arm64/mm/mm.h
> +++ b/arch/arm64/mm/mm.h
> @@ -1,2 +1,4 @@
>  extern void __init bootmem_init(void);
>  extern void __init arm64_swiotlb_init(void);
> +
> +void fixup_init(void);
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index e92f633..0428294 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -26,6 +26,7 @@
>  #include <linux/memblock.h>
>  #include <linux/fs.h>
>  #include <linux/io.h>
> +#include <linux/stop_machine.h>
>  
>  #include <asm/cputype.h>
>  #include <asm/fixmap.h>
> @@ -137,17 +138,55 @@ static void __init *early_alloc(unsigned long sz)
>  	return ptr;
>  }
>  
> -static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
> +/*
> + * remap a PMD into pages
> + */
> +static noinline void __ref split_pmd(pmd_t *pmd, bool early)
> +{
> +	pte_t *pte, *start_pte;
> +	unsigned long pfn;
> +	int i = 0;
> +
> +	if (early)
> +		start_pte = pte = early_alloc(PTRS_PER_PTE*sizeof(pte_t));
> +	else
> +		start_pte = pte = (pte_t *)__get_free_page(PGALLOC_GFP);
> +
> +	BUG_ON(!pte);
> +
> +	pfn = pmd_pfn(*pmd);
> +
> +	do {
> +		/*
> +		 * Need to have the least restrictive permissions available
> +		 * permissions will be fixed up later
> +		 */
> +		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
> +		pfn++;
> +	} while (pte++, i++, i < PTRS_PER_PTE);
> +
> +
> +	__pmd_populate(pmd, __pa(start_pte), PMD_TYPE_TABLE);
> +	flush_tlb_all();
> +}
> +
> +static void __ref alloc_init_pte(pmd_t *pmd, unsigned long addr,
>  				  unsigned long end, unsigned long pfn,
> -				  pgprot_t prot)
> +				  pgprot_t prot, bool early)
>  {
>  	pte_t *pte;
>  
>  	if (pmd_none(*pmd)) {
> -		pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
> +		if (early)
> +			pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
> +		else
> +			pte = (pte_t *)__get_free_page(PGALLOC_GFP);
> +		BUG_ON(!pte);
>  		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
>  	}
> -	BUG_ON(pmd_bad(*pmd));
> +
> +	if (pmd_bad(*pmd))
> +		split_pmd(pmd, early);
>  
>  	pte = pte_offset_kernel(pmd, addr);
>  	do {
> @@ -156,29 +195,52 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>  	} while (pte++, addr += PAGE_SIZE, addr != end);
>  }
>  
> -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
> +void __ref split_pud(pud_t *old_pud, pmd_t *pmd)
> +{
> +	unsigned long pfn = pud_pfn(*old_pud);
> +        const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
> +                              PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
> +                              PMD_SECT_VALID;
> +	pgprot_t prot = __pgprot(pud_val(*old_pud) & mask);

This looks a little fragile, if I've understood things correctly then
all we want to do is create a set of pmds with the same pgprot_t as the
original pud?

In that case it would probably be easier to simply mask out the pfn
from our pud to create the pgprot rather than mask in a set of
attributes (that may change in future)?

> +	int i = 0;
> +
> +	do {
> +		set_pmd(pmd, pfn_pmd(pfn, prot));
> +		pfn++;
> +	} while (pmd++, i++, i < PTRS_PER_PMD);
> +}
> +
> +static void __ref alloc_init_pmd(pud_t *pud, unsigned long addr,
>  				  unsigned long end, phys_addr_t phys,
> -				  int map_io)
> +				  pgprot_t sect_prot, pgprot_t pte_prot,
> +				  bool early, int map_io)
>  {
>  	pmd_t *pmd;
>  	unsigned long next;
> -	pmdval_t prot_sect;
> -	pgprot_t prot_pte;
>  
>  	if (map_io) {
> -		prot_sect = PROT_SECT_DEVICE_nGnRE;
> -		prot_pte = __pgprot(PROT_DEVICE_nGnRE);
> -	} else {
> -		prot_sect = PROT_SECT_NORMAL_EXEC;
> -		prot_pte = PAGE_KERNEL_EXEC;
> +		sect_prot = PROT_SECT_DEVICE_nGnRE;
> +		pte_prot = __pgprot(PROT_DEVICE_nGnRE);
>  	}

I think we should wipe out map_io from all these functions as it's
causing too much complexity. It's enough to have just sect_prot and
pte_prot. I posted some similar feedback to Ard:
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/296918.html

>  
>  	/*
>  	 * Check for initial section mappings in the pgd/pud and remove them.
>  	 */
>  	if (pud_none(*pud) || pud_bad(*pud)) {
> -		pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
> +		if (early)
> +			pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
> +		else
> +			pmd = pmd_alloc_one(&init_mm, addr);
> +		BUG_ON(!pmd);
> +		if (pud_sect(*pud)) {
> +			/*
> +			 * need to have the 1G of mappings continue to be
> +			 * present
> +			 */
> +			split_pud(pud, pmd);
> +		}
>  		pud_populate(&init_mm, pud, pmd);
> +		flush_tlb_all();
>  	}
>  
>  	pmd = pmd_offset(pud, addr);
> @@ -186,8 +248,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>  		next = pmd_addr_end(addr, end);
>  		/* try section mapping first */
>  		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
> -			pmd_t old_pmd =*pmd;
> -			set_pmd(pmd, __pmd(phys | prot_sect));
> +			pmd_t old_pmd = *pmd;
> +			set_pmd(pmd, __pmd(phys | sect_prot));
>  			/*
>  			 * Check for previous table entries created during
>  			 * boot (__create_page_tables) and flush them.
> @@ -196,15 +258,16 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>  				flush_tlb_all();
>  		} else {
>  			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
> -				       prot_pte);
> +					pte_prot, early);
>  		}
>  		phys += next - addr;
>  	} while (pmd++, addr = next, addr != end);
>  }
>  
> -static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
> +static void __ref alloc_init_pud(pgd_t *pgd, unsigned long addr,
>  				  unsigned long end, unsigned long phys,
> -				  int map_io)
> +				  pgprot_t sect_prot, pgprot_t pte_prot,
> +				  bool early, int map_io)
>  {
>  	pud_t *pud;
>  	unsigned long next;
> @@ -222,10 +285,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>  		/*
>  		 * For 4K granule only, attempt to put down a 1GB block
>  		 */
> -		if (!map_io && (PAGE_SHIFT == 12) &&
> +		if (!map_io && early && (PAGE_SHIFT == 12) &&
>  		    ((addr | next | phys) & ~PUD_MASK) == 0) {
>  			pud_t old_pud = *pud;
> -			set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC));
> +			set_pud(pud, __pud(phys | sect_prot));
>  
>  			/*
>  			 * If we have an old value for a pud, it will
> @@ -240,7 +303,8 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>  				flush_tlb_all();
>  			}
>  		} else {
> -			alloc_init_pmd(pud, addr, next, phys, map_io);
> +			alloc_init_pmd(pud, addr, next, phys, sect_prot,
> +					pte_prot, early, map_io);
>  		}
>  		phys += next - addr;
>  	} while (pud++, addr = next, addr != end);
> @@ -250,9 +314,11 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>   * Create the page directory entries and any necessary page tables for the
>   * mapping specified by 'md'.
>   */
> -static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
> -				    unsigned long virt, phys_addr_t size,
> -				    int map_io)
> +static void __ref __create_mapping(pgd_t *pgd, phys_addr_t phys,
> +				  unsigned long virt,
> +				  phys_addr_t size,
> +				  pgprot_t sect_prot, pgprot_t pte_prot,
> +				  int map_io, bool early)
>  {
>  	unsigned long addr, length, end, next;
>  
> @@ -262,31 +328,140 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
>  	end = addr + length;
>  	do {
>  		next = pgd_addr_end(addr, end);
> -		alloc_init_pud(pgd, addr, next, phys, map_io);
> +		alloc_init_pud(pgd, addr, next, phys, sect_prot, pte_prot,
> +				early, map_io);
>  		phys += next - addr;
>  	} while (pgd++, addr = next, addr != end);
>  }
>  
> -static void __init create_mapping(phys_addr_t phys, unsigned long virt,
> -				  phys_addr_t size)
> +void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
> +{
> +	if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
> +		pr_warn("BUG: not creating id mapping for %pa\n", &addr);
> +		return;
> +	}
> +	__create_mapping(&idmap_pg_dir[pgd_index(addr)],
> +			 addr, addr, size, PROT_SECT_NORMAL_EXEC,
> +			 PAGE_KERNEL_EXEC, map_io, false);
> +}
> +
> +static inline pmd_t *pmd_off_k(unsigned long virt)
> +{
> +	return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
> +}
> +
> +#ifdef CONFIG_EARLY_PRINTK
> +/*
> + * Create an early I/O mapping using the pgd/pmd entries already populated
> + * in head.S as this function is called too early to allocated any memory. The
> + * mapping size is 2MB with 4KB pages or 64KB or 64KB pages.
> + */
> +void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
> +{
> +	unsigned long size, mask;
> +	bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
> +	pgd_t *pgd;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +	pte_t *pte;
> +
> +	/*
> +	 * No early pte entries with !ARM64_64K_PAGES configuration, so using
> +	 * sections (pmd).
> +	 */
> +	size = page64k ? PAGE_SIZE : SECTION_SIZE;
> +	mask = ~(size - 1);
> +
> +	pgd = pgd_offset_k(virt);
> +	pud = pud_offset(pgd, virt);
> +	if (pud_none(*pud))
> +		return NULL;
> +	pmd = pmd_offset(pud, virt);
> +
> +	if (page64k) {
> +		if (pmd_none(*pmd))
> +			return NULL;
> +		pte = pte_offset_kernel(pmd, virt);
> +		set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE));
> +	} else {
> +		set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE));
> +	}
> +
> +	return (void __iomem *)((virt & mask) + (phys & ~mask));
> +}
> +#endif
> +
> +static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
> +				  phys_addr_t size,
> +				  pgprot_t sect_prot, pgprot_t pte_prot)
>  {
>  	if (virt < VMALLOC_START) {
>  		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
>  			&phys, virt);
>  		return;
>  	}
> -	__create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0);
> +
> +	return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
> +				size, sect_prot, pte_prot, 0, true);
>  }
>  
> -void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
> +static void __ref create_mapping_late(phys_addr_t phys, unsigned long virt,
> +				  phys_addr_t size,
> +				  pgprot_t sect_prot, pgprot_t pte_prot)
>  {
> -	if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
> -		pr_warn("BUG: not creating id mapping for %pa\n", &addr);
> +	if (virt < VMALLOC_START) {
> +		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
> +			&phys, virt);
>  		return;
>  	}
> -	__create_mapping(&idmap_pg_dir[pgd_index(addr)],
> -			 addr, addr, size, map_io);
> +
> +	return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
> +				size, sect_prot, pte_prot, 0, false);
> +}
> +
> +#ifdef CONFIG_DEBUG_RODATA
> +static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
> +{
> +	/*
> +	 * Set up the executable regions using the existing section mappings
> +	 * for now. This will get more fine grained later once all memory
> +	 * is mapped
> +	 */
> +	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
> +	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
> +
> +	if (end < kernel_x_start) {
> +		create_mapping(start, __phys_to_virt(start),
> +			end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
> +	} else if (start >= kernel_x_end) {
> +		create_mapping(start, __phys_to_virt(start),
> +			end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
> +	} else {
> +		if (start < kernel_x_start)
> +			create_mapping(start, __phys_to_virt(start),
> +				kernel_x_start - start,
> +				PROT_SECT_NORMAL,
> +				PAGE_KERNEL);
> +		create_mapping(kernel_x_start,
> +				__phys_to_virt(kernel_x_start),
> +				kernel_x_end - kernel_x_start,
> +				PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
> +		if (kernel_x_end < end)
> +			create_mapping(kernel_x_end,
> +				__phys_to_virt(kernel_x_end),
> +				end - kernel_x_end,
> +				PROT_SECT_NORMAL,
> +				PAGE_KERNEL);
> +	}
> +
> +}
> +#else
> +static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
> +{
> +	create_mapping(start, __phys_to_virt(start), end - start,
> +			PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
>  }
> +#endif
>  
>  static void __init map_mem(void)
>  {
> @@ -328,14 +503,69 @@ static void __init map_mem(void)
>  			memblock_set_current_limit(limit);
>  		}
>  #endif
> -
> -		create_mapping(start, __phys_to_virt(start), end - start);
> +		__map_memblock(start, end);
>  	}
>  
>  	/* Limit no longer required. */
>  	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
>  }
>  
> +void __init fixup_executable(void)
> +{
> +#ifdef CONFIG_DEBUG_RODATA
> +	/* now that we are actually fully mapped, make the start/end more fine grained */
> +	if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
> +		unsigned long aligned_start = round_down(__pa(_stext),
> +							SECTION_SIZE);
> +
> +		create_mapping(aligned_start, __phys_to_virt(aligned_start),
> +				__pa(_stext) - aligned_start,
> +				PROT_SECT_NORMAL,
> +				PAGE_KERNEL);
> +	}
> +
> +	if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
> +		unsigned long aligned_end = round_up(__pa(__init_end),
> +							SECTION_SIZE);
> +		create_mapping(__pa(__init_end), (unsigned long)__init_end,
> +				aligned_end - __pa(__init_end),
> +				PROT_SECT_NORMAL,
> +				PAGE_KERNEL);
> +	}
> +#endif
> +}
> +
> +#ifdef CONFIG_DEBUG_RODATA
> +void mark_rodata_ro(void)
> +{
> +	create_mapping_late(__pa(_stext), (unsigned long)_stext,
> +				(unsigned long)_etext - (unsigned long)_stext,
> +				PROT_SECT_NORMAL_EXEC | PMD_SECT_RDONLY,
> +				PAGE_KERNEL_EXEC | PTE_RDONLY);
> +
> +}
> +#endif
> +
> +static int __flush_mappings(void *unused)
> +{
> +	flush_tlb_kernel_range((unsigned long)__init_begin,
> +				(unsigned long)__init_end);
> +	return 0;
> +}
> +
> +void __ref fixup_init(void)
> +{
> +	phys_addr_t start = __pa(__init_begin);
> +	phys_addr_t end = __pa(__init_end);
> +
> +	create_mapping_late(start, (unsigned long)__init_begin,
> +			end - start,
> +			PROT_SECT_NORMAL,
> +			PAGE_KERNEL);
> +	if (!IS_ALIGNED(start, SECTION_SIZE) || !IS_ALIGNED(end, SECTION_SIZE))
> +		stop_machine(__flush_mappings, NULL, NULL);
> +}
> +
>  /*
>   * paging_init() sets up the page tables, initialises the zone memory
>   * maps and sets up the zero page.
> @@ -345,6 +575,7 @@ void __init paging_init(void)
>  	void *zero_page;
>  
>  	map_mem();
> +	fixup_executable();
>  
>  	/*
>  	 * Finally flush the caches and tlb to ensure that we're in a
> -- 
> Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
> 

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 7/7] arm64: add better page protections to arm64
  2014-10-28 11:29   ` Steve Capper
@ 2014-10-28 11:44     ` Ard Biesheuvel
  2014-10-28 13:40       ` Steve Capper
  2014-10-30 17:12     ` Laura Abbott
  1 sibling, 1 reply; 30+ messages in thread
From: Ard Biesheuvel @ 2014-10-28 11:44 UTC (permalink / raw)
  To: linux-arm-kernel

On 28 October 2014 12:29, Steve Capper <steve.capper@linaro.org> wrote:
> On Mon, Oct 27, 2014 at 01:12:32PM -0700, Laura Abbott wrote:
>> Add page protections for arm64 similar to those in arm.
>> This is for security reasons to prevent certain classes
>> of exploits. The current method:
>>
>> - Map all memory as either RWX or RW. We round to the nearest
>>   section to avoid creating page tables before everything is mapped
>> - Once everything is mapped, if either end of the RWX section should
>>   not be X, we split the PMD and remap as necessary
>> - When initmem is to be freed, we change the permissions back to
>>   RW (using stop machine if necessary to flush the TLB)
>> - If CONFIG_DEBUG_RODATA is set, the read only sections are set
>>   read only.
>>
>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>
> Hi Laura,
> I have some comments below.
>
>> ---
>> v4: Combined the Kconfig options
>> ---
>>  arch/arm64/Kconfig.debug            |  23 +++
>>  arch/arm64/include/asm/cacheflush.h |   4 +
>>  arch/arm64/kernel/vmlinux.lds.S     |  17 ++
>>  arch/arm64/mm/init.c                |   1 +
>>  arch/arm64/mm/mm.h                  |   2 +
>>  arch/arm64/mm/mmu.c                 | 303 +++++++++++++++++++++++++++++++-----
>>  6 files changed, 314 insertions(+), 36 deletions(-)
>>
[...]
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index e92f633..0428294 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -26,6 +26,7 @@
>>  #include <linux/memblock.h>
>>  #include <linux/fs.h>
>>  #include <linux/io.h>
>> +#include <linux/stop_machine.h>
>>
>>  #include <asm/cputype.h>
>>  #include <asm/fixmap.h>
>> @@ -137,17 +138,55 @@ static void __init *early_alloc(unsigned long sz)
>>       return ptr;
>>  }
>>
>> -static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>> +/*
>> + * remap a PMD into pages
>> + */
>> +static noinline void __ref split_pmd(pmd_t *pmd, bool early)
>> +{
>> +     pte_t *pte, *start_pte;
>> +     unsigned long pfn;
>> +     int i = 0;
>> +
>> +     if (early)
>> +             start_pte = pte = early_alloc(PTRS_PER_PTE*sizeof(pte_t));
>> +     else
>> +             start_pte = pte = (pte_t *)__get_free_page(PGALLOC_GFP);
>> +
>> +     BUG_ON(!pte);
>> +
>> +     pfn = pmd_pfn(*pmd);
>> +
>> +     do {
>> +             /*
>> +              * Need to have the least restrictive permissions available
>> +              * permissions will be fixed up later
>> +              */
>> +             set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
>> +             pfn++;
>> +     } while (pte++, i++, i < PTRS_PER_PTE);
>> +
>> +
>> +     __pmd_populate(pmd, __pa(start_pte), PMD_TYPE_TABLE);
>> +     flush_tlb_all();
>> +}
>> +
>> +static void __ref alloc_init_pte(pmd_t *pmd, unsigned long addr,
>>                                 unsigned long end, unsigned long pfn,
>> -                               pgprot_t prot)
>> +                               pgprot_t prot, bool early)
>>  {
>>       pte_t *pte;
>>
>>       if (pmd_none(*pmd)) {
>> -             pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
>> +             if (early)
>> +                     pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
>> +             else
>> +                     pte = (pte_t *)__get_free_page(PGALLOC_GFP);
>> +             BUG_ON(!pte);
>>               __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
>>       }
>> -     BUG_ON(pmd_bad(*pmd));
>> +
>> +     if (pmd_bad(*pmd))
>> +             split_pmd(pmd, early);
>>
>>       pte = pte_offset_kernel(pmd, addr);
>>       do {
>> @@ -156,29 +195,52 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>>       } while (pte++, addr += PAGE_SIZE, addr != end);
>>  }
>>
>> -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>> +void __ref split_pud(pud_t *old_pud, pmd_t *pmd)
>> +{
>> +     unsigned long pfn = pud_pfn(*old_pud);
>> +        const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
>> +                              PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
>> +                              PMD_SECT_VALID;
>> +     pgprot_t prot = __pgprot(pud_val(*old_pud) & mask);
>
> This looks a little fragile, if I've understood things correctly then
> all we want to do is create a set of pmds with the same pgprot_t as the
> original pud?
>
> In that case it would probably be easier to simply mask out the pfn
> from our pud to create the pgprot rather than mask in a set of
> attributes (that may change in future)?
>
>> +     int i = 0;
>> +
>> +     do {
>> +             set_pmd(pmd, pfn_pmd(pfn, prot));
>> +             pfn++;
>> +     } while (pmd++, i++, i < PTRS_PER_PMD);
>> +}
>> +
>> +static void __ref alloc_init_pmd(pud_t *pud, unsigned long addr,
>>                                 unsigned long end, phys_addr_t phys,
>> -                               int map_io)
>> +                               pgprot_t sect_prot, pgprot_t pte_prot,
>> +                               bool early, int map_io)
>>  {
>>       pmd_t *pmd;
>>       unsigned long next;
>> -     pmdval_t prot_sect;
>> -     pgprot_t prot_pte;
>>
>>       if (map_io) {
>> -             prot_sect = PROT_SECT_DEVICE_nGnRE;
>> -             prot_pte = __pgprot(PROT_DEVICE_nGnRE);
>> -     } else {
>> -             prot_sect = PROT_SECT_NORMAL_EXEC;
>> -             prot_pte = PAGE_KERNEL_EXEC;
>> +             sect_prot = PROT_SECT_DEVICE_nGnRE;
>> +             pte_prot = __pgprot(PROT_DEVICE_nGnRE);
>>       }
>
> I think we should wipe out map_io from all these functions as it's
> causing too much complexity. It's enough to have just sect_prot and
> pte_prot. I posted some similar feedback to Ard:
> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/296918.html
>

I do agree. But could we have a single enum that maps onto {sect_prot,
pte_prot} tuples rather than having to pass both values everywhere we
call any of these functions?
I.e., { MMU_PROT_DEFAULT, MMU_PROT_READONLY, MMU_PROT_READWRITE,
MMU_PROT_READEXEC, MMU_PROT_DEVICE }, with the mapping local to mmu.c?

>>
>>       /*
>>        * Check for initial section mappings in the pgd/pud and remove them.
>>        */
>>       if (pud_none(*pud) || pud_bad(*pud)) {
>> -             pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
>> +             if (early)
>> +                     pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
>> +             else
>> +                     pmd = pmd_alloc_one(&init_mm, addr);
>> +             BUG_ON(!pmd);
>> +             if (pud_sect(*pud)) {
>> +                     /*
>> +                      * need to have the 1G of mappings continue to be
>> +                      * present
>> +                      */
>> +                     split_pud(pud, pmd);
>> +             }
>>               pud_populate(&init_mm, pud, pmd);
>> +             flush_tlb_all();
>>       }
>>
>>       pmd = pmd_offset(pud, addr);
>> @@ -186,8 +248,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>>               next = pmd_addr_end(addr, end);
>>               /* try section mapping first */
>>               if (((addr | next | phys) & ~SECTION_MASK) == 0) {
>> -                     pmd_t old_pmd =*pmd;
>> -                     set_pmd(pmd, __pmd(phys | prot_sect));
>> +                     pmd_t old_pmd = *pmd;
>> +                     set_pmd(pmd, __pmd(phys | sect_prot));
>>                       /*
>>                        * Check for previous table entries created during
>>                        * boot (__create_page_tables) and flush them.
>> @@ -196,15 +258,16 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>>                               flush_tlb_all();
>>               } else {
>>                       alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
>> -                                    prot_pte);
>> +                                     pte_prot, early);
>>               }
>>               phys += next - addr;
>>       } while (pmd++, addr = next, addr != end);
>>  }
>>
>> -static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>> +static void __ref alloc_init_pud(pgd_t *pgd, unsigned long addr,
>>                                 unsigned long end, unsigned long phys,
>> -                               int map_io)
>> +                               pgprot_t sect_prot, pgprot_t pte_prot,
>> +                               bool early, int map_io)
>>  {
>>       pud_t *pud;
>>       unsigned long next;
>> @@ -222,10 +285,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>>               /*
>>                * For 4K granule only, attempt to put down a 1GB block
>>                */
>> -             if (!map_io && (PAGE_SHIFT == 12) &&
>> +             if (!map_io && early && (PAGE_SHIFT == 12) &&
>>                   ((addr | next | phys) & ~PUD_MASK) == 0) {
>>                       pud_t old_pud = *pud;
>> -                     set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC));
>> +                     set_pud(pud, __pud(phys | sect_prot));
>>
>>                       /*
>>                        * If we have an old value for a pud, it will
>> @@ -240,7 +303,8 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>>                               flush_tlb_all();
>>                       }
>>               } else {
>> -                     alloc_init_pmd(pud, addr, next, phys, map_io);
>> +                     alloc_init_pmd(pud, addr, next, phys, sect_prot,
>> +                                     pte_prot, early, map_io);
>>               }
>>               phys += next - addr;
>>       } while (pud++, addr = next, addr != end);
>> @@ -250,9 +314,11 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>>   * Create the page directory entries and any necessary page tables for the
>>   * mapping specified by 'md'.
>>   */
>> -static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
>> -                                 unsigned long virt, phys_addr_t size,
>> -                                 int map_io)
>> +static void __ref __create_mapping(pgd_t *pgd, phys_addr_t phys,
>> +                               unsigned long virt,
>> +                               phys_addr_t size,
>> +                               pgprot_t sect_prot, pgprot_t pte_prot,
>> +                               int map_io, bool early)
>>  {
>>       unsigned long addr, length, end, next;
>>
>> @@ -262,31 +328,140 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
>>       end = addr + length;
>>       do {
>>               next = pgd_addr_end(addr, end);
>> -             alloc_init_pud(pgd, addr, next, phys, map_io);
>> +             alloc_init_pud(pgd, addr, next, phys, sect_prot, pte_prot,
>> +                             early, map_io);
>>               phys += next - addr;
>>       } while (pgd++, addr = next, addr != end);
>>  }
>>
>> -static void __init create_mapping(phys_addr_t phys, unsigned long virt,
>> -                               phys_addr_t size)
>> +void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
>> +{
>> +     if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
>> +             pr_warn("BUG: not creating id mapping for %pa\n", &addr);
>> +             return;
>> +     }
>> +     __create_mapping(&idmap_pg_dir[pgd_index(addr)],
>> +                      addr, addr, size, PROT_SECT_NORMAL_EXEC,
>> +                      PAGE_KERNEL_EXEC, map_io, false);
>> +}
>> +
>> +static inline pmd_t *pmd_off_k(unsigned long virt)
>> +{
>> +     return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
>> +}
>> +
>> +#ifdef CONFIG_EARLY_PRINTK
>> +/*
>> + * Create an early I/O mapping using the pgd/pmd entries already populated
>> + * in head.S as this function is called too early to allocated any memory. The
>> + * mapping size is 2MB with 4KB pages or 64KB or 64KB pages.
>> + */
>> +void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
>> +{
>> +     unsigned long size, mask;
>> +     bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
>> +     pgd_t *pgd;
>> +     pud_t *pud;
>> +     pmd_t *pmd;
>> +     pte_t *pte;
>> +
>> +     /*
>> +      * No early pte entries with !ARM64_64K_PAGES configuration, so using
>> +      * sections (pmd).
>> +      */
>> +     size = page64k ? PAGE_SIZE : SECTION_SIZE;
>> +     mask = ~(size - 1);
>> +
>> +     pgd = pgd_offset_k(virt);
>> +     pud = pud_offset(pgd, virt);
>> +     if (pud_none(*pud))
>> +             return NULL;
>> +     pmd = pmd_offset(pud, virt);
>> +
>> +     if (page64k) {
>> +             if (pmd_none(*pmd))
>> +                     return NULL;
>> +             pte = pte_offset_kernel(pmd, virt);
>> +             set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE));
>> +     } else {
>> +             set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE));
>> +     }
>> +
>> +     return (void __iomem *)((virt & mask) + (phys & ~mask));
>> +}
>> +#endif
>> +
>> +static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
>> +                               phys_addr_t size,
>> +                               pgprot_t sect_prot, pgprot_t pte_prot)
>>  {
>>       if (virt < VMALLOC_START) {
>>               pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
>>                       &phys, virt);
>>               return;
>>       }
>> -     __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0);
>> +
>> +     return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
>> +                             size, sect_prot, pte_prot, 0, true);
>>  }
>>
>> -void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
>> +static void __ref create_mapping_late(phys_addr_t phys, unsigned long virt,
>> +                               phys_addr_t size,
>> +                               pgprot_t sect_prot, pgprot_t pte_prot)
>>  {
>> -     if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
>> -             pr_warn("BUG: not creating id mapping for %pa\n", &addr);
>> +     if (virt < VMALLOC_START) {
>> +             pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
>> +                     &phys, virt);
>>               return;
>>       }
>> -     __create_mapping(&idmap_pg_dir[pgd_index(addr)],
>> -                      addr, addr, size, map_io);
>> +
>> +     return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
>> +                             size, sect_prot, pte_prot, 0, false);
>> +}
>> +
>> +#ifdef CONFIG_DEBUG_RODATA
>> +static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
>> +{
>> +     /*
>> +      * Set up the executable regions using the existing section mappings
>> +      * for now. This will get more fine grained later once all memory
>> +      * is mapped
>> +      */
>> +     unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
>> +     unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
>> +
>> +     if (end < kernel_x_start) {
>> +             create_mapping(start, __phys_to_virt(start),
>> +                     end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
>> +     } else if (start >= kernel_x_end) {
>> +             create_mapping(start, __phys_to_virt(start),
>> +                     end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
>> +     } else {
>> +             if (start < kernel_x_start)
>> +                     create_mapping(start, __phys_to_virt(start),
>> +                             kernel_x_start - start,
>> +                             PROT_SECT_NORMAL,
>> +                             PAGE_KERNEL);
>> +             create_mapping(kernel_x_start,
>> +                             __phys_to_virt(kernel_x_start),
>> +                             kernel_x_end - kernel_x_start,
>> +                             PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
>> +             if (kernel_x_end < end)
>> +                     create_mapping(kernel_x_end,
>> +                             __phys_to_virt(kernel_x_end),
>> +                             end - kernel_x_end,
>> +                             PROT_SECT_NORMAL,
>> +                             PAGE_KERNEL);
>> +     }
>> +
>> +}
>> +#else
>> +static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
>> +{
>> +     create_mapping(start, __phys_to_virt(start), end - start,
>> +                     PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
>>  }
>> +#endif
>>
>>  static void __init map_mem(void)
>>  {
>> @@ -328,14 +503,69 @@ static void __init map_mem(void)
>>                       memblock_set_current_limit(limit);
>>               }
>>  #endif
>> -
>> -             create_mapping(start, __phys_to_virt(start), end - start);
>> +             __map_memblock(start, end);
>>       }
>>
>>       /* Limit no longer required. */
>>       memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
>>  }
>>
>> +void __init fixup_executable(void)
>> +{
>> +#ifdef CONFIG_DEBUG_RODATA
>> +     /* now that we are actually fully mapped, make the start/end more fine grained */
>> +     if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
>> +             unsigned long aligned_start = round_down(__pa(_stext),
>> +                                                     SECTION_SIZE);
>> +
>> +             create_mapping(aligned_start, __phys_to_virt(aligned_start),
>> +                             __pa(_stext) - aligned_start,
>> +                             PROT_SECT_NORMAL,
>> +                             PAGE_KERNEL);
>> +     }
>> +
>> +     if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
>> +             unsigned long aligned_end = round_up(__pa(__init_end),
>> +                                                     SECTION_SIZE);
>> +             create_mapping(__pa(__init_end), (unsigned long)__init_end,
>> +                             aligned_end - __pa(__init_end),
>> +                             PROT_SECT_NORMAL,
>> +                             PAGE_KERNEL);
>> +     }
>> +#endif
>> +}
>> +
>> +#ifdef CONFIG_DEBUG_RODATA
>> +void mark_rodata_ro(void)
>> +{
>> +     create_mapping_late(__pa(_stext), (unsigned long)_stext,
>> +                             (unsigned long)_etext - (unsigned long)_stext,
>> +                             PROT_SECT_NORMAL_EXEC | PMD_SECT_RDONLY,
>> +                             PAGE_KERNEL_EXEC | PTE_RDONLY);
>> +
>> +}
>> +#endif
>> +
>> +static int __flush_mappings(void *unused)
>> +{
>> +     flush_tlb_kernel_range((unsigned long)__init_begin,
>> +                             (unsigned long)__init_end);
>> +     return 0;
>> +}
>> +
>> +void __ref fixup_init(void)
>> +{
>> +     phys_addr_t start = __pa(__init_begin);
>> +     phys_addr_t end = __pa(__init_end);
>> +
>> +     create_mapping_late(start, (unsigned long)__init_begin,
>> +                     end - start,
>> +                     PROT_SECT_NORMAL,
>> +                     PAGE_KERNEL);
>> +     if (!IS_ALIGNED(start, SECTION_SIZE) || !IS_ALIGNED(end, SECTION_SIZE))
>> +             stop_machine(__flush_mappings, NULL, NULL);
>> +}
>> +
>>  /*
>>   * paging_init() sets up the page tables, initialises the zone memory
>>   * maps and sets up the zero page.
>> @@ -345,6 +575,7 @@ void __init paging_init(void)
>>       void *zero_page;
>>
>>       map_mem();
>> +     fixup_executable();
>>
>>       /*
>>        * Finally flush the caches and tlb to ensure that we're in a
>> --
>> Qualcomm Innovation Center, Inc.
>> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
>>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 3/7] arm64: Move cpu_resume into the text section
  2014-10-28  8:22     ` Ard Biesheuvel
@ 2014-10-28 12:43       ` Mark Rutland
  2014-10-28 15:26         ` Lorenzo Pieralisi
  0 siblings, 1 reply; 30+ messages in thread
From: Mark Rutland @ 2014-10-28 12:43 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Oct 28, 2014 at 08:22:00AM +0000, Ard Biesheuvel wrote:
> On 28 October 2014 09:10, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> > On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
> >> The function cpu_resume currently lives in the .data
> >> section. There's no reason for it to be there since
> >> we can use relative instructions without a problem.
> >> Move it to the text section where it belongs.
> >>
> >> Reviewed-by: Kees Cook <keescook@chromium.org>
> >> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> >> ---
> >>  arch/arm64/kernel/sleep.S | 12 +++++++-----
> >>  1 file changed, 7 insertions(+), 5 deletions(-)
> >>
> >
> > Hi Laura,
> >
> > Apologies for waiting until v4 to bring this up, but I have some minor comments.
> >
> >> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
> >> index a564b44..5762b16 100644
> >> --- a/arch/arm64/kernel/sleep.S
> >> +++ b/arch/arm64/kernel/sleep.S
> >> @@ -147,12 +147,12 @@ cpu_resume_after_mmu:
> >>         ret
> >>  ENDPROC(cpu_resume_after_mmu)
> >>
> >> -       .data
> >>  ENTRY(cpu_resume)
> >>         bl      el2_setup               // if in EL2 drop to EL1 cleanly
> >>  #ifdef CONFIG_SMP
> >>         mrs     x1, mpidr_el1
> >> -       adr     x4, mpidr_hash_ptr
> >> +       adrp    x4, mpidr_hash_ptr
> >> +       add     x4, x4, #:lo12:mpidr_hash_ptr
> >
> > Instead of this change, you could put mpidr_hash_ptr itself in .text as well.
> >
> 
> Actually, looking more closely, it appears mpidr_hash_ptr can be
> dropped completely:
> the address obtained by adrp/add is PC relative, so the whole sequence
> could just be
> 
> adrp x8, mpidr_hash
> add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
> 
> (and the ldr and add below can be dropped, and so can mpidr_hash_ptr at the end)

Also, if you update arch/arm64/kernel/suspend.c to drop the extern from
sleep_save_sp and sleep_idmap_phys, you can drop all of the .data from
sleep.S

I gave all of that a go locally and my Juno happily booted to userspace
with cpuidle running. I had to apply Marc's timer fix to get cpuidle to
work at all with v3.18-rc2, but that should be fixed for -rc3.

So with those changes:

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 7/7] arm64: add better page protections to arm64
  2014-10-28 11:44     ` Ard Biesheuvel
@ 2014-10-28 13:40       ` Steve Capper
  0 siblings, 0 replies; 30+ messages in thread
From: Steve Capper @ 2014-10-28 13:40 UTC (permalink / raw)
  To: linux-arm-kernel

On 28 October 2014 11:44, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 28 October 2014 12:29, Steve Capper <steve.capper@linaro.org> wrote:
>> On Mon, Oct 27, 2014 at 01:12:32PM -0700, Laura Abbott wrote:

[...]

>>
>> I think we should wipe out map_io from all these functions as it's
>> causing too much complexity. It's enough to have just sect_prot and
>> pte_prot. I posted some similar feedback to Ard:
>> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/296918.html
>>
>
> I do agree. But could we have a single enum that maps onto {sect_prot,
> pte_prot} tuples rather than having to pass both values everywhere we
> call any of these functions?
> I.e., { MMU_PROT_DEFAULT, MMU_PROT_READONLY, MMU_PROT_READWRITE,
> MMU_PROT_READEXEC, MMU_PROT_DEVICE }, with the mapping local to mmu.c?
>

arch/arm has a mem_types array, I'm not sure how applicable that would
be for arm64
One could also be (slightly) filthy and promote a pte_prot into a
sect_prot via manipulation of the lower 2 bits (see for example
pte_mkhuge).

Basically as long as the mapping logic doesn't make decisions about
which pgprots to use, then I'm happy :-).

Cheers,
--
Steve

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 5/7] arm64: Factor out fixmap initialiation from ioremap
  2014-10-27 20:12 ` [PATCHv4 5/7] arm64: Factor out fixmap initialiation from ioremap Laura Abbott
@ 2014-10-28 14:17   ` Mark Rutland
  0 siblings, 0 replies; 30+ messages in thread
From: Mark Rutland @ 2014-10-28 14:17 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Laura,

On Mon, Oct 27, 2014 at 08:12:30PM +0000, Laura Abbott wrote:
> The fixmap API was originally added for arm64 for
> early_ioremap purposes. It can be used for other purposes too
> so move the initialization from ioremap to somewhere more
> generic. This makes it obvious where the fixmap is being set
> up and allows for a cleaner implementation of __set_fixmap.
> 
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> ---
>  arch/arm64/include/asm/fixmap.h |  7 +--
>  arch/arm64/kernel/setup.c       |  3 +-
>  arch/arm64/mm/ioremap.c         | 93 ++--------------------------------------
>  arch/arm64/mm/mmu.c             | 94 +++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 103 insertions(+), 94 deletions(-)

This doesn't apply cleanly to v3.18-rc2 due to a conflict with with
c0260ba906c4dfbc (arm64: mm: Correct fixmap pagetable types). Luckily
the difference is trivial (the types of bm_pmd and bm_pud were updated
from pte_t[] to pmd_t[] and pud_t[] respectively), so I fixed that up
locally, and my Juno boots happily.

Otherewise this looks fine to me.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>

Mark.

> 
> diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
> index 5f7bfe6..db26a2f2 100644
> --- a/arch/arm64/include/asm/fixmap.h
> +++ b/arch/arm64/include/asm/fixmap.h
> @@ -56,10 +56,11 @@ enum fixed_addresses {
>  
>  #define FIXMAP_PAGE_IO     __pgprot(PROT_DEVICE_nGnRE)
>  
> -extern void __early_set_fixmap(enum fixed_addresses idx,
> -			       phys_addr_t phys, pgprot_t flags);
> +void __init early_fixmap_init(void);
>  
> -#define __set_fixmap __early_set_fixmap
> +#define __early_set_fixmap __set_fixmap
> +
> +extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
>  
>  #include <asm-generic/fixmap.h>
>  
> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> index 2437196..efc8bc5 100644
> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
> @@ -376,7 +376,8 @@ void __init setup_arch(char **cmdline_p)
>  
>  	*cmdline_p = boot_command_line;
>  
> -	early_ioremap_init();
> +	early_fixmap_init();
> +	early_ioremap_setup();
>  
>  	parse_early_param();
>  
> diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
> index fa324bd..cbb99c8 100644
> --- a/arch/arm64/mm/ioremap.c
> +++ b/arch/arm64/mm/ioremap.c
> @@ -103,97 +103,10 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
>  }
>  EXPORT_SYMBOL(ioremap_cache);
>  
> -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
> -#if CONFIG_ARM64_PGTABLE_LEVELS > 2
> -static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
> -#endif
> -#if CONFIG_ARM64_PGTABLE_LEVELS > 3
> -static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
> -#endif
> -
> -static inline pud_t * __init early_ioremap_pud(unsigned long addr)
> -{
> -	pgd_t *pgd;
> -
> -	pgd = pgd_offset_k(addr);
> -	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
> -
> -	return pud_offset(pgd, addr);
> -}
> -
> -static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
> -{
> -	pud_t *pud = early_ioremap_pud(addr);
> -
> -	BUG_ON(pud_none(*pud) || pud_bad(*pud));
> -
> -	return pmd_offset(pud, addr);
> -}
> -
> -static inline pte_t * __init early_ioremap_pte(unsigned long addr)
> -{
> -	pmd_t *pmd = early_ioremap_pmd(addr);
> -
> -	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
> -
> -	return pte_offset_kernel(pmd, addr);
> -}
> -
> +/*
> + * Must be called after early_fixmap_init
> + */
>  void __init early_ioremap_init(void)
>  {
> -	pgd_t *pgd;
> -	pud_t *pud;
> -	pmd_t *pmd;
> -	unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN);
> -
> -	pgd = pgd_offset_k(addr);
> -	pgd_populate(&init_mm, pgd, bm_pud);
> -	pud = pud_offset(pgd, addr);
> -	pud_populate(&init_mm, pud, bm_pmd);
> -	pmd = pmd_offset(pud, addr);
> -	pmd_populate_kernel(&init_mm, pmd, bm_pte);
> -
> -	/*
> -	 * The boot-ioremap range spans multiple pmds, for which
> -	 * we are not prepared:
> -	 */
> -	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
> -		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
> -
> -	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
> -		WARN_ON(1);
> -		pr_warn("pmd %p != %p\n",
> -			pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
> -		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
> -			fix_to_virt(FIX_BTMAP_BEGIN));
> -		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
> -			fix_to_virt(FIX_BTMAP_END));
> -
> -		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
> -		pr_warn("FIX_BTMAP_BEGIN:     %d\n",
> -			FIX_BTMAP_BEGIN);
> -	}
> -
>  	early_ioremap_setup();
>  }
> -
> -void __init __early_set_fixmap(enum fixed_addresses idx,
> -			       phys_addr_t phys, pgprot_t flags)
> -{
> -	unsigned long addr = __fix_to_virt(idx);
> -	pte_t *pte;
> -
> -	if (idx >= __end_of_fixed_addresses) {
> -		BUG();
> -		return;
> -	}
> -
> -	pte = early_ioremap_pte(addr);
> -
> -	if (pgprot_val(flags))
> -		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
> -	else {
> -		pte_clear(&init_mm, addr, pte);
> -		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
> -	}
> -}
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 6894ef3..e92f633 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -28,6 +28,7 @@
>  #include <linux/io.h>
>  
>  #include <asm/cputype.h>
> +#include <asm/fixmap.h>
>  #include <asm/sections.h>
>  #include <asm/setup.h>
>  #include <asm/sizes.h>
> @@ -459,3 +460,96 @@ void vmemmap_free(unsigned long start, unsigned long end)
>  {
>  }
>  #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
> +
> +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
> +#if CONFIG_ARM64_PGTABLE_LEVELS > 2
> +static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
> +#endif
> +#if CONFIG_ARM64_PGTABLE_LEVELS > 3
> +static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
> +#endif
> +
> +static inline pud_t * fixmap_pud(unsigned long addr)
> +{
> +	pgd_t *pgd = pgd_offset_k(addr);
> +
> +	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
> +
> +	return pud_offset(pgd, addr);
> +}
> +
> +static inline pmd_t * fixmap_pmd(unsigned long addr)
> +{
> +	pud_t *pud = fixmap_pud(addr);
> +
> +	BUG_ON(pud_none(*pud) || pud_bad(*pud));
> +
> +	return pmd_offset(pud, addr);
> +}
> +
> +static inline pte_t * fixmap_pte(unsigned long addr)
> +{
> +	pmd_t *pmd = fixmap_pmd(addr);
> +
> +	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
> +
> +	return pte_offset_kernel(pmd, addr);
> +}
> +
> +void __init early_fixmap_init(void)
> +{
> +	pgd_t *pgd;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +	unsigned long addr = FIXADDR_START;
> +
> +	pgd = pgd_offset_k(addr);
> +	pgd_populate(&init_mm, pgd, bm_pud);
> +	pud = pud_offset(pgd, addr);
> +	pud_populate(&init_mm, pud, bm_pmd);
> +	pmd = pmd_offset(pud, addr);
> +	pmd_populate_kernel(&init_mm, pmd, bm_pte);
> +
> +	/*
> +	 * The boot-ioremap range spans multiple pmds, for which
> +	 * we are not preparted:
> +	 */
> +	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
> +		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
> +
> +	if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
> +	     || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
> +		WARN_ON(1);
> +		pr_warn("pmd %p != %p, %p\n",
> +			pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
> +			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
> +		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
> +			fix_to_virt(FIX_BTMAP_BEGIN));
> +		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
> +			fix_to_virt(FIX_BTMAP_END));
> +
> +		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
> +		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
> +	}
> +}
> +
> +void __set_fixmap(enum fixed_addresses idx,
> +			       phys_addr_t phys, pgprot_t flags)
> +{
> +	unsigned long addr = __fix_to_virt(idx);
> +	pte_t *pte;
> +
> +	if (idx >= __end_of_fixed_addresses) {
> +		BUG();
> +		return;
> +	}
> +
> +	pte = fixmap_pte(addr);
> +
> +	if (pgprot_val(flags)) {
> +		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
> +	} else {
> +		pte_clear(&init_mm, addr, pte);
> +		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
> +	}
> +}
> -- 
> Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
> 
> 

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 7/7] arm64: add better page protections to arm64
  2014-10-27 20:12 ` [PATCHv4 7/7] arm64: add better page protections to arm64 Laura Abbott
  2014-10-28 11:29   ` Steve Capper
@ 2014-10-28 14:20   ` Mark Rutland
  2014-10-30 17:17     ` Laura Abbott
  1 sibling, 1 reply; 30+ messages in thread
From: Mark Rutland @ 2014-10-28 14:20 UTC (permalink / raw)
  To: linux-arm-kernel

[resending due to previous header destruction]

Hi Laura,

On Mon, Oct 27, 2014 at 08:12:32PM +0000, Laura Abbott wrote:
> Add page protections for arm64 similar to those in arm.
> This is for security reasons to prevent certain classes
> of exploits. The current method:
>
> - Map all memory as either RWX or RW. We round to the nearest
>   section to avoid creating page tables before everything is mapped
> - Once everything is mapped, if either end of the RWX section should
>   not be X, we split the PMD and remap as necessary
> - When initmem is to be freed, we change the permissions back to
>   RW (using stop machine if necessary to flush the TLB)
> - If CONFIG_DEBUG_RODATA is set, the read only sections are set
>   read only.
>
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> ---
> v4: Combined the Kconfig options
> ---
>  arch/arm64/Kconfig.debug            |  23 +++
>  arch/arm64/include/asm/cacheflush.h |   4 +
>  arch/arm64/kernel/vmlinux.lds.S     |  17 ++
>  arch/arm64/mm/init.c                |   1 +
>  arch/arm64/mm/mm.h                  |   2 +
>  arch/arm64/mm/mmu.c                 | 303 +++++++++++++++++++++++++++++++-----
>  6 files changed, 314 insertions(+), 36 deletions(-)

With this patch applied to v3.18-rc2, my board to blows up at boot when
using UEFI (without DEBUG_RODATA selected):

---->8----
EFI stub: Booting Linux Kernel...
Initializing cgroup subsys cpu
Linux version 3.18.0-rc2+ (mark at leverpostej) (gcc version 4.9.2 20140904 (prerelease) (crosstool-NG linaro-1.13.1-4.9-2014.09 - Linaro GCC 4.9-2014.09) ) #112 SMP PREEMPT Tue Oct 28 13:58:41 GMT 2014
CPU: AArch64 Processor [410fd030] revision 0
Detected VIPT I-cache on CPU0
bootconsole [uart0] enabled
efi: Getting EFI parameters from FDT:
EFI v2.40 by ARM Juno EFI Oct  7 2014 15:05:42
efi:  ACPI=0xfebdc000  ACPI 2.0=0xfebdc014
cma: Reserved 16 MiB at fd800000
BUG: failure at arch/arm64/mm/mmu.c:234/alloc_init_pmd()!
Kernel panic - not syncing: BUG!
CPU: 0 PID: 0 Comm: swapper Not tainted 3.18.0-rc2+ #112
Call trace:
[<ffffffc000087ec8>] dump_backtrace+0x0/0x124
[<ffffffc000087ffc>] show_stack+0x10/0x1c
[<ffffffc0004ebd58>] dump_stack+0x74/0xb8
[<ffffffc0004eb018>] panic+0xe0/0x220
[<ffffffc0004e8e08>] alloc_init_pmd+0x1cc/0x1dc
[<ffffffc0004e8e3c>] alloc_init_pud+0x24/0x6c
[<ffffffc0004e8f54>] __create_mapping+0xd0/0xf0
[<ffffffc00069a0a0>] create_id_mapping+0x5c/0x68
[<ffffffc00069964c>] efi_idmap_init+0x54/0xd8
[<ffffffc0006978a8>] setup_arch+0x408/0x5c0
[<ffffffc00069566c>] start_kernel+0x94/0x3a0
---[ end Kernel panic - not syncing: BUG!
---->8----

I've not yet figured out precisely why. I haven't tried a !EFI boot
because of the way my board is set up at the moment.

Mark.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 3/7] arm64: Move cpu_resume into the text section
  2014-10-28 12:43       ` Mark Rutland
@ 2014-10-28 15:26         ` Lorenzo Pieralisi
  2014-10-28 15:31           ` Mark Rutland
  0 siblings, 1 reply; 30+ messages in thread
From: Lorenzo Pieralisi @ 2014-10-28 15:26 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Oct 28, 2014 at 12:43:15PM +0000, Mark Rutland wrote:
> On Tue, Oct 28, 2014 at 08:22:00AM +0000, Ard Biesheuvel wrote:
> > On 28 October 2014 09:10, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> > > On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
> > >> The function cpu_resume currently lives in the .data
> > >> section. There's no reason for it to be there since
> > >> we can use relative instructions without a problem.
> > >> Move it to the text section where it belongs.
> > >>
> > >> Reviewed-by: Kees Cook <keescook@chromium.org>
> > >> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> > >> ---
> > >>  arch/arm64/kernel/sleep.S | 12 +++++++-----
> > >>  1 file changed, 7 insertions(+), 5 deletions(-)
> > >>
> > >
> > > Hi Laura,
> > >
> > > Apologies for waiting until v4 to bring this up, but I have some minor comments.
> > >
> > >> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
> > >> index a564b44..5762b16 100644
> > >> --- a/arch/arm64/kernel/sleep.S
> > >> +++ b/arch/arm64/kernel/sleep.S
> > >> @@ -147,12 +147,12 @@ cpu_resume_after_mmu:
> > >>         ret
> > >>  ENDPROC(cpu_resume_after_mmu)
> > >>
> > >> -       .data
> > >>  ENTRY(cpu_resume)
> > >>         bl      el2_setup               // if in EL2 drop to EL1 cleanly
> > >>  #ifdef CONFIG_SMP
> > >>         mrs     x1, mpidr_el1
> > >> -       adr     x4, mpidr_hash_ptr
> > >> +       adrp    x4, mpidr_hash_ptr
> > >> +       add     x4, x4, #:lo12:mpidr_hash_ptr
> > >
> > > Instead of this change, you could put mpidr_hash_ptr itself in .text as well.
> > >
> > 
> > Actually, looking more closely, it appears mpidr_hash_ptr can be
> > dropped completely:
> > the address obtained by adrp/add is PC relative, so the whole sequence
> > could just be
> > 
> > adrp x8, mpidr_hash
> > add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
> > 
> > (and the ldr and add below can be dropped, and so can mpidr_hash_ptr at the end)
> 
> Also, if you update arch/arm64/kernel/suspend.c to drop the extern from
> sleep_save_sp and sleep_idmap_phys, you can drop all of the .data from
> sleep.S

If with that you also mean that all adr references to those data structures
in the resume path should become adrp I agree otherwise we might have an
offset issue.

Please respin the patch with the above changes, I tested by applying
local changes myself, I will add my tags to the final version.

Thanks,
Lorenzo

> I gave all of that a go locally and my Juno happily booted to userspace
> with cpuidle running. I had to apply Marc's timer fix to get cpuidle to
> work at all with v3.18-rc2, but that should be fixed for -rc3.
> 
> So with those changes:
> 
> Reviewed-by: Mark Rutland <mark.rutland@arm.com>
> Tested-by: Mark Rutland <mark.rutland@arm.com>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 3/7] arm64: Move cpu_resume into the text section
  2014-10-28 15:26         ` Lorenzo Pieralisi
@ 2014-10-28 15:31           ` Mark Rutland
  2014-10-30 16:49             ` Laura Abbott
  0 siblings, 1 reply; 30+ messages in thread
From: Mark Rutland @ 2014-10-28 15:31 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Oct 28, 2014 at 03:26:56PM +0000, Lorenzo Pieralisi wrote:
> On Tue, Oct 28, 2014 at 12:43:15PM +0000, Mark Rutland wrote:
> > On Tue, Oct 28, 2014 at 08:22:00AM +0000, Ard Biesheuvel wrote:
> > > On 28 October 2014 09:10, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> > > > On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
> > > >> The function cpu_resume currently lives in the .data
> > > >> section. There's no reason for it to be there since
> > > >> we can use relative instructions without a problem.
> > > >> Move it to the text section where it belongs.
> > > >>
> > > >> Reviewed-by: Kees Cook <keescook@chromium.org>
> > > >> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> > > >> ---
> > > >>  arch/arm64/kernel/sleep.S | 12 +++++++-----
> > > >>  1 file changed, 7 insertions(+), 5 deletions(-)
> > > >>
> > > >
> > > > Hi Laura,
> > > >
> > > > Apologies for waiting until v4 to bring this up, but I have some minor comments.
> > > >
> > > >> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
> > > >> index a564b44..5762b16 100644
> > > >> --- a/arch/arm64/kernel/sleep.S
> > > >> +++ b/arch/arm64/kernel/sleep.S
> > > >> @@ -147,12 +147,12 @@ cpu_resume_after_mmu:
> > > >>         ret
> > > >>  ENDPROC(cpu_resume_after_mmu)
> > > >>
> > > >> -       .data
> > > >>  ENTRY(cpu_resume)
> > > >>         bl      el2_setup               // if in EL2 drop to EL1 cleanly
> > > >>  #ifdef CONFIG_SMP
> > > >>         mrs     x1, mpidr_el1
> > > >> -       adr     x4, mpidr_hash_ptr
> > > >> +       adrp    x4, mpidr_hash_ptr
> > > >> +       add     x4, x4, #:lo12:mpidr_hash_ptr
> > > >
> > > > Instead of this change, you could put mpidr_hash_ptr itself in .text as well.
> > > >
> > > 
> > > Actually, looking more closely, it appears mpidr_hash_ptr can be
> > > dropped completely:
> > > the address obtained by adrp/add is PC relative, so the whole sequence
> > > could just be
> > > 
> > > adrp x8, mpidr_hash
> > > add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
> > > 
> > > (and the ldr and add below can be dropped, and so can mpidr_hash_ptr at the end)
> > 
> > Also, if you update arch/arm64/kernel/suspend.c to drop the extern from
> > sleep_save_sp and sleep_idmap_phys, you can drop all of the .data from
> > sleep.S
> 
> If with that you also mean that all adr references to those data structures
> in the resume path should become adrp I agree otherwise we might have an
> offset issue.

Yes. The original patch had those modifications already, so I assumed
they would remain.

Mark.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 3/7] arm64: Move cpu_resume into the text section
  2014-10-28 15:31           ` Mark Rutland
@ 2014-10-30 16:49             ` Laura Abbott
  0 siblings, 0 replies; 30+ messages in thread
From: Laura Abbott @ 2014-10-30 16:49 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/28/2014 8:31 AM, Mark Rutland wrote:
> On Tue, Oct 28, 2014 at 03:26:56PM +0000, Lorenzo Pieralisi wrote:
>> On Tue, Oct 28, 2014 at 12:43:15PM +0000, Mark Rutland wrote:
>>> On Tue, Oct 28, 2014 at 08:22:00AM +0000, Ard Biesheuvel wrote:
>>>> On 28 October 2014 09:10, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
>>>>> On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
>>>>>> The function cpu_resume currently lives in the .data
>>>>>> section. There's no reason for it to be there since
>>>>>> we can use relative instructions without a problem.
>>>>>> Move it to the text section where it belongs.
>>>>>>
>>>>>> Reviewed-by: Kees Cook <keescook@chromium.org>
>>>>>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>>>>>> ---
>>>>>>   arch/arm64/kernel/sleep.S | 12 +++++++-----
>>>>>>   1 file changed, 7 insertions(+), 5 deletions(-)
>>>>>>
>>>>>
>>>>> Hi Laura,
>>>>>
>>>>> Apologies for waiting until v4 to bring this up, but I have some minor comments.
>>>>>
>>>>>> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
>>>>>> index a564b44..5762b16 100644
>>>>>> --- a/arch/arm64/kernel/sleep.S
>>>>>> +++ b/arch/arm64/kernel/sleep.S
>>>>>> @@ -147,12 +147,12 @@ cpu_resume_after_mmu:
>>>>>>          ret
>>>>>>   ENDPROC(cpu_resume_after_mmu)
>>>>>>
>>>>>> -       .data
>>>>>>   ENTRY(cpu_resume)
>>>>>>          bl      el2_setup               // if in EL2 drop to EL1 cleanly
>>>>>>   #ifdef CONFIG_SMP
>>>>>>          mrs     x1, mpidr_el1
>>>>>> -       adr     x4, mpidr_hash_ptr
>>>>>> +       adrp    x4, mpidr_hash_ptr
>>>>>> +       add     x4, x4, #:lo12:mpidr_hash_ptr
>>>>>
>>>>> Instead of this change, you could put mpidr_hash_ptr itself in .text as well.
>>>>>
>>>>
>>>> Actually, looking more closely, it appears mpidr_hash_ptr can be
>>>> dropped completely:
>>>> the address obtained by adrp/add is PC relative, so the whole sequence
>>>> could just be
>>>>
>>>> adrp x8, mpidr_hash
>>>> add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
>>>>
>>>> (and the ldr and add below can be dropped, and so can mpidr_hash_ptr at the end)
>>>
>>> Also, if you update arch/arm64/kernel/suspend.c to drop the extern from
>>> sleep_save_sp and sleep_idmap_phys, you can drop all of the .data from
>>> sleep.S
>>
>> If with that you also mean that all adr references to those data structures
>> in the resume path should become adrp I agree otherwise we might have an
>> offset issue.
>
> Yes. The original patch had those modifications already, so I assumed
> they would remain.
>

Thanks all. I'll incorporate the suggestions in the next version.

Laura

-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 4/7] arm64: Move some head.text functions to executable section
  2014-10-28 11:10     ` Mark Rutland
@ 2014-10-30 17:06       ` Laura Abbott
  2014-10-30 18:45         ` Mark Rutland
  0 siblings, 1 reply; 30+ messages in thread
From: Laura Abbott @ 2014-10-30 17:06 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/28/2014 4:10 AM, Mark Rutland wrote:
> On Tue, Oct 28, 2014 at 08:35:37AM +0000, Ard Biesheuvel wrote:
>> On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
>>> The head.text section is intended to be run at early bootup
>>> before any of the regular kernel mappings have been setup.
>>> Parts of head.text may be freed back into the buddy allocator
>>> due to TEXT_OFFSET so for security requirements this memory
>>> must not be executable. The suspend/resume/hotplug code path
>>> requires some of these head.S functions to run however which
>>> means they need to be executable. Support these conflicting
>>> requirements by moving the few head.text functions that need
>>> to be executable to the text section which has the appropriate
>>> page table permissions.
>>>
>>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>>> ---
>>> v4: New apprach based on discussions with Mark
>>> ---
>>>   arch/arm64/kernel/head.S | 4 ++++
>>>   1 file changed, 4 insertions(+)
>>>
>>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>>> index 10f5cc0..dc362da 100644
>>> --- a/arch/arm64/kernel/head.S
>>> +++ b/arch/arm64/kernel/head.S
>>> @@ -432,12 +432,14 @@ ENTRY(secondary_startup)
>>>          b       __enable_mmu
>>>   ENDPROC(secondary_startup)
>>>
>>> +       .pushsection    .text, "ax"
>>>   ENTRY(__secondary_switched)
>>>          ldr     x0, [x21]                       // get secondary_data.stack
>>>          mov     sp, x0
>>>          mov     x29, #0
>>>          b       secondary_start_kernel
>>>   ENDPROC(__secondary_switched)
>>> +       .popsection
>>>   #endif /* CONFIG_SMP */
>>>
>>>   /*
>>> @@ -471,11 +473,13 @@ ENDPROC(__enable_mmu)
>>>    * table to map the entire function.
>>>    */
>>>          .align  4
>>> +       .pushsection    .text, "ax"
>>
>> There is a comment before this .align that explains why it is
>> separated from __enable_mmu, and I think jumping into another section
>> right after it kind of defeats the purpose.
>> Perhaps it is better to put the pushsection before __enable_mmu instead?
>
> To keep the alignment correct we just need to move the .align after the
> pushsection. With that changed I think this patch is Ok.
>
> As __enable_mmu is only executed with the MMU off it doesn't need to be
> moved into an executable section to prevent the MMU from blowing up in
> our faces -- it would be wrong to call it with the MMU on anyway.
>
> However, this does raise a potential problem in that an attacker could
> scribble over code executed before the MMU is on. Then they just have to
> wait for the next CPU hotplug or suspend/resume for it to be executed.
> So some functions including __enable_mmu and el2_setup aren't
> necessarily safe in their current location.
>
> There are a few ways of solving that, either moving stuff around or
> releasing less memory for allocation.
>

My original version of the patch did move everything around to keep
__enable_mmu and others into the text section so they couldn't
be modified. It seemed rather unwieldy though so I went with this
approach. Releasing less memory for allocation is less than optimal
so I'd prefer if we moved code around to the appropriate section.
Any opinions about going back to my original approach of moving things
around vs. going with more pushsection annotations?

Thanks,
Laura

-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 7/7] arm64: add better page protections to arm64
  2014-10-28 11:29   ` Steve Capper
  2014-10-28 11:44     ` Ard Biesheuvel
@ 2014-10-30 17:12     ` Laura Abbott
  1 sibling, 0 replies; 30+ messages in thread
From: Laura Abbott @ 2014-10-30 17:12 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/28/2014 4:29 AM, Steve Capper wrote:
> On Mon, Oct 27, 2014 at 01:12:32PM -0700, Laura Abbott wrote:
>> Add page protections for arm64 similar to those in arm.
>> This is for security reasons to prevent certain classes
>> of exploits. The current method:
>>
>> - Map all memory as either RWX or RW. We round to the nearest
>>    section to avoid creating page tables before everything is mapped
>> - Once everything is mapped, if either end of the RWX section should
>>    not be X, we split the PMD and remap as necessary
>> - When initmem is to be freed, we change the permissions back to
>>    RW (using stop machine if necessary to flush the TLB)
>> - If CONFIG_DEBUG_RODATA is set, the read only sections are set
>>    read only.
>>
>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>
> Hi Laura,
> I have some comments below.
>
...
>> @@ -156,29 +195,52 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>>   	} while (pte++, addr += PAGE_SIZE, addr != end);
>>   }
>>
>> -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>> +void __ref split_pud(pud_t *old_pud, pmd_t *pmd)
>> +{
>> +	unsigned long pfn = pud_pfn(*old_pud);
>> +        const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
>> +                              PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
>> +                              PMD_SECT_VALID;
>> +	pgprot_t prot = __pgprot(pud_val(*old_pud) & mask);
>
> This looks a little fragile, if I've understood things correctly then
> all we want to do is create a set of pmds with the same pgprot_t as the
> original pud?
>
> In that case it would probably be easier to simply mask out the pfn
> from our pud to create the pgprot rather than mask in a set of
> attributes (that may change in future)?
>

Good suggestion. I'll check that in the next version.

>> +	int i = 0;
>> +
>> +	do {
>> +		set_pmd(pmd, pfn_pmd(pfn, prot));
>> +		pfn++;
>> +	} while (pmd++, i++, i < PTRS_PER_PMD);
>> +}
>> +
>> +static void __ref alloc_init_pmd(pud_t *pud, unsigned long addr,
>>   				  unsigned long end, phys_addr_t phys,
>> -				  int map_io)
>> +				  pgprot_t sect_prot, pgprot_t pte_prot,
>> +				  bool early, int map_io)
>>   {
>>   	pmd_t *pmd;
>>   	unsigned long next;
>> -	pmdval_t prot_sect;
>> -	pgprot_t prot_pte;
>>
>>   	if (map_io) {
>> -		prot_sect = PROT_SECT_DEVICE_nGnRE;
>> -		prot_pte = __pgprot(PROT_DEVICE_nGnRE);
>> -	} else {
>> -		prot_sect = PROT_SECT_NORMAL_EXEC;
>> -		prot_pte = PAGE_KERNEL_EXEC;
>> +		sect_prot = PROT_SECT_DEVICE_nGnRE;
>> +		pte_prot = __pgprot(PROT_DEVICE_nGnRE);
>>   	}
>
> I think we should wipe out map_io from all these functions as it's
> causing too much complexity. It's enough to have just sect_prot and
> pte_prot. I posted some similar feedback to Ard:
> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/296918.html
>

Sounds fine. I think my series would conflict with Ard's so I should
give that series a review and see if it makes sense to base this
series off of that series. Part of this work might also be relevent
for adding DEBUG_PAGEALLOC for arm64 so I might split that out into
a separate patch as well.

Thanks,
Laura

-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 7/7] arm64: add better page protections to arm64
  2014-10-28 14:20   ` Mark Rutland
@ 2014-10-30 17:17     ` Laura Abbott
  0 siblings, 0 replies; 30+ messages in thread
From: Laura Abbott @ 2014-10-30 17:17 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/28/2014 7:20 AM, Mark Rutland wrote:
> [resending due to previous header destruction]
>
> Hi Laura,
>
> On Mon, Oct 27, 2014 at 08:12:32PM +0000, Laura Abbott wrote:
>> Add page protections for arm64 similar to those in arm.
>> This is for security reasons to prevent certain classes
>> of exploits. The current method:
>>
>> - Map all memory as either RWX or RW. We round to the nearest
>>    section to avoid creating page tables before everything is mapped
>> - Once everything is mapped, if either end of the RWX section should
>>    not be X, we split the PMD and remap as necessary
>> - When initmem is to be freed, we change the permissions back to
>>    RW (using stop machine if necessary to flush the TLB)
>> - If CONFIG_DEBUG_RODATA is set, the read only sections are set
>>    read only.
>>
>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>> ---
>> v4: Combined the Kconfig options
>> ---
>>   arch/arm64/Kconfig.debug            |  23 +++
>>   arch/arm64/include/asm/cacheflush.h |   4 +
>>   arch/arm64/kernel/vmlinux.lds.S     |  17 ++
>>   arch/arm64/mm/init.c                |   1 +
>>   arch/arm64/mm/mm.h                  |   2 +
>>   arch/arm64/mm/mmu.c                 | 303 +++++++++++++++++++++++++++++++-----
>>   6 files changed, 314 insertions(+), 36 deletions(-)
>
> With this patch applied to v3.18-rc2, my board to blows up at boot when
> using UEFI (without DEBUG_RODATA selected):
>
> ---->8----
> EFI stub: Booting Linux Kernel...
> Initializing cgroup subsys cpu
> Linux version 3.18.0-rc2+ (mark at leverpostej) (gcc version 4.9.2 20140904 (prerelease) (crosstool-NG linaro-1.13.1-4.9-2014.09 - Linaro GCC 4.9-2014.09) ) #112 SMP PREEMPT Tue Oct 28 13:58:41 GMT 2014
> CPU: AArch64 Processor [410fd030] revision 0
> Detected VIPT I-cache on CPU0
> bootconsole [uart0] enabled
> efi: Getting EFI parameters from FDT:
> EFI v2.40 by ARM Juno EFI Oct  7 2014 15:05:42
> efi:  ACPI=0xfebdc000  ACPI 2.0=0xfebdc014
> cma: Reserved 16 MiB at fd800000
> BUG: failure at arch/arm64/mm/mmu.c:234/alloc_init_pmd()!
> Kernel panic - not syncing: BUG!
> CPU: 0 PID: 0 Comm: swapper Not tainted 3.18.0-rc2+ #112
> Call trace:
> [<ffffffc000087ec8>] dump_backtrace+0x0/0x124
> [<ffffffc000087ffc>] show_stack+0x10/0x1c
> [<ffffffc0004ebd58>] dump_stack+0x74/0xb8
> [<ffffffc0004eb018>] panic+0xe0/0x220
> [<ffffffc0004e8e08>] alloc_init_pmd+0x1cc/0x1dc
> [<ffffffc0004e8e3c>] alloc_init_pud+0x24/0x6c
> [<ffffffc0004e8f54>] __create_mapping+0xd0/0xf0
> [<ffffffc00069a0a0>] create_id_mapping+0x5c/0x68
> [<ffffffc00069964c>] efi_idmap_init+0x54/0xd8
> [<ffffffc0006978a8>] setup_arch+0x408/0x5c0
> [<ffffffc00069566c>] start_kernel+0x94/0x3a0
> ---[ end Kernel panic - not syncing: BUG!
> ---->8----
>
> I've not yet figured out precisely why. I haven't tried a !EFI boot
> because of the way my board is set up at the moment.
>

I think it's because the idmap is now being created earlier than
expected so it's trying to get memory from the normal allocator
and dying. I'll have to see if I can get my hands on an EFI
board and test it out.

Thanks,
Laura

-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCHv4 4/7] arm64: Move some head.text functions to executable section
  2014-10-30 17:06       ` Laura Abbott
@ 2014-10-30 18:45         ` Mark Rutland
  0 siblings, 0 replies; 30+ messages in thread
From: Mark Rutland @ 2014-10-30 18:45 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Oct 30, 2014 at 05:06:05PM +0000, Laura Abbott wrote:
> On 10/28/2014 4:10 AM, Mark Rutland wrote:
> > On Tue, Oct 28, 2014 at 08:35:37AM +0000, Ard Biesheuvel wrote:
> >> On 27 October 2014 21:12, Laura Abbott <lauraa@codeaurora.org> wrote:
> >>> The head.text section is intended to be run at early bootup
> >>> before any of the regular kernel mappings have been setup.
> >>> Parts of head.text may be freed back into the buddy allocator
> >>> due to TEXT_OFFSET so for security requirements this memory
> >>> must not be executable. The suspend/resume/hotplug code path
> >>> requires some of these head.S functions to run however which
> >>> means they need to be executable. Support these conflicting
> >>> requirements by moving the few head.text functions that need
> >>> to be executable to the text section which has the appropriate
> >>> page table permissions.
> >>>
> >>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> >>> ---
> >>> v4: New apprach based on discussions with Mark
> >>> ---
> >>>   arch/arm64/kernel/head.S | 4 ++++
> >>>   1 file changed, 4 insertions(+)
> >>>
> >>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> >>> index 10f5cc0..dc362da 100644
> >>> --- a/arch/arm64/kernel/head.S
> >>> +++ b/arch/arm64/kernel/head.S
> >>> @@ -432,12 +432,14 @@ ENTRY(secondary_startup)
> >>>          b       __enable_mmu
> >>>   ENDPROC(secondary_startup)
> >>>
> >>> +       .pushsection    .text, "ax"
> >>>   ENTRY(__secondary_switched)
> >>>          ldr     x0, [x21]                       // get secondary_data.stack
> >>>          mov     sp, x0
> >>>          mov     x29, #0
> >>>          b       secondary_start_kernel
> >>>   ENDPROC(__secondary_switched)
> >>> +       .popsection
> >>>   #endif /* CONFIG_SMP */
> >>>
> >>>   /*
> >>> @@ -471,11 +473,13 @@ ENDPROC(__enable_mmu)
> >>>    * table to map the entire function.
> >>>    */
> >>>          .align  4
> >>> +       .pushsection    .text, "ax"
> >>
> >> There is a comment before this .align that explains why it is
> >> separated from __enable_mmu, and I think jumping into another section
> >> right after it kind of defeats the purpose.
> >> Perhaps it is better to put the pushsection before __enable_mmu instead?
> >
> > To keep the alignment correct we just need to move the .align after the
> > pushsection. With that changed I think this patch is Ok.
> >
> > As __enable_mmu is only executed with the MMU off it doesn't need to be
> > moved into an executable section to prevent the MMU from blowing up in
> > our faces -- it would be wrong to call it with the MMU on anyway.
> >
> > However, this does raise a potential problem in that an attacker could
> > scribble over code executed before the MMU is on. Then they just have to
> > wait for the next CPU hotplug or suspend/resume for it to be executed.
> > So some functions including __enable_mmu and el2_setup aren't
> > necessarily safe in their current location.
> >
> > There are a few ways of solving that, either moving stuff around or
> > releasing less memory for allocation.
> >
> 
> My original version of the patch did move everything around to keep
> __enable_mmu and others into the text section so they couldn't
> be modified. It seemed rather unwieldy though so I went with this
> approach. Releasing less memory for allocation is less than optimal
> so I'd prefer if we moved code around to the appropriate section.
> Any opinions about going back to my original approach of moving things
> around vs. going with more pushsection annotations?

I guess moving things around is the way to go. That will create a clear
distinction between one-time boot code and the portions which get used
repeatedly (and perhaps we can release the memory used by the former).

Looking back at v3, I think we don't need to duplicate any functions
(e.g. enable_mmu) if we replace adr uses with adrp and a :lo12:
immediate, and I think we can place the reused portions straight in
.text rather than adding .latehead.text. That should make the diffstat a
little nicer.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2014-10-30 18:45 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1414440752-9411-1-git-send-email-lauraa@codeaurora.org>
2014-10-27 20:12 ` [PATCHv4 1/7] arm64: Treat handle_arch_irq as a function pointer Laura Abbott
2014-10-28  8:11   ` Ard Biesheuvel
2014-10-28 10:25   ` Mark Rutland
2014-10-27 20:12 ` [PATCHv4 2/7] arm64: Switch to ldr for loading the stub vectors Laura Abbott
2014-10-28  8:23   ` Ard Biesheuvel
2014-10-28  9:51   ` Marc Zyngier
2014-10-28 10:27   ` Mark Rutland
2014-10-27 20:12 ` [PATCHv4 3/7] arm64: Move cpu_resume into the text section Laura Abbott
2014-10-28  8:10   ` Ard Biesheuvel
2014-10-28  8:22     ` Ard Biesheuvel
2014-10-28 12:43       ` Mark Rutland
2014-10-28 15:26         ` Lorenzo Pieralisi
2014-10-28 15:31           ` Mark Rutland
2014-10-30 16:49             ` Laura Abbott
2014-10-27 20:12 ` [PATCHv4 4/7] arm64: Move some head.text functions to executable section Laura Abbott
2014-10-28  8:35   ` Ard Biesheuvel
2014-10-28 11:10     ` Mark Rutland
2014-10-30 17:06       ` Laura Abbott
2014-10-30 18:45         ` Mark Rutland
2014-10-27 20:12 ` [PATCHv4 5/7] arm64: Factor out fixmap initialiation from ioremap Laura Abbott
2014-10-28 14:17   ` Mark Rutland
2014-10-27 20:12 ` [PATCHv4 6/7] arm64: use fixmap for text patching when text is RO Laura Abbott
2014-10-27 20:12 ` [PATCHv4 7/7] arm64: add better page protections to arm64 Laura Abbott
2014-10-28 11:29   ` Steve Capper
2014-10-28 11:44     ` Ard Biesheuvel
2014-10-28 13:40       ` Steve Capper
2014-10-30 17:12     ` Laura Abbott
2014-10-28 14:20   ` Mark Rutland
2014-10-30 17:17     ` Laura Abbott
2014-10-27 20:19 ` [PATCHv4 0/7] Better page protections for arm64 Laura Abbott

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).