linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] ARM: mm: implement CONFIG_DEBUG_RODATA
@ 2014-04-04  2:15 Kees Cook
  2014-04-04  2:15 ` [PATCH 1/2] ARM: mm: mark non-text sections non-executable Kees Cook
  2014-04-04  2:15 ` [PATCH 2/2] ARM: mm: make text and rodata read-only Kees Cook
  0 siblings, 2 replies; 13+ messages in thread
From: Kees Cook @ 2014-04-04  2:15 UTC (permalink / raw)
  To: linux-arm-kernel

This is the next round of trying to implement RODATA. This series works
for the page tables, but I can't make ftrace work. I would love to know
what I should fix here. :)

Thanks to everyone who's been helping with this series!

-Kees

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/2] ARM: mm: mark non-text sections non-executable
  2014-04-04  2:15 [RFC PATCH] ARM: mm: implement CONFIG_DEBUG_RODATA Kees Cook
@ 2014-04-04  2:15 ` Kees Cook
  2014-04-04  2:15 ` [PATCH 2/2] ARM: mm: make text and rodata read-only Kees Cook
  1 sibling, 0 replies; 13+ messages in thread
From: Kees Cook @ 2014-04-04  2:15 UTC (permalink / raw)
  To: linux-arm-kernel

Adds CONFIG_ARM_KERNMEM_PERMS to separate the kernel memory regions
into section-sized areas that can have different permisions. Performs
the NX permission changes during free_initmem, so that init memory can be
reclaimed.

This uses section size instead of PMD size to reduce memory caps on
non-LPAE systems.

Based on work by Brad Spengler, Larry Bassel, and Laura Abbott.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm/kernel/vmlinux.lds.S |   17 ++++++++
 arch/arm/mm/Kconfig           |    9 ++++
 arch/arm/mm/init.c            |   92 +++++++++++++++++++++++++++++++++++++++++
 arch/arm/mm/mmu.c             |   13 +++++-
 4 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 7bcee5c9b604..08fa667ef2f1 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -8,6 +8,9 @@
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/page.h>
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+#include <asm/pgtable.h>
+#endif
 	
 #define PROC_INFO							\
 	. = ALIGN(4);							\
@@ -90,6 +93,11 @@ SECTIONS
 		_text = .;
 		HEAD_TEXT
 	}
+
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+	. = ALIGN(1<<SECTION_SHIFT);
+#endif
+
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
 			__exception_text_start = .;
@@ -145,7 +153,11 @@ SECTIONS
 	_etext = .;			/* End of text and rodata section */
 
 #ifndef CONFIG_XIP_KERNEL
+# ifdef CONFIG_ARM_KERNMEM_PERMS
+	. = ALIGN(1<<SECTION_SHIFT);
+# else
 	. = ALIGN(PAGE_SIZE);
+# endif
 	__init_begin = .;
 #endif
 	/*
@@ -220,7 +232,12 @@ SECTIONS
 	. = PAGE_OFFSET + TEXT_OFFSET;
 #else
 	__init_end = .;
+
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+	. = ALIGN(1<<SECTION_SHIFT);
+#else
 	. = ALIGN(THREAD_SIZE);
+#endif
 	__data_loc = .;
 #endif
 
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 1f8fed94c2a4..8848d7b73e66 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -958,3 +958,12 @@ config ARCH_SUPPORTS_BIG_ENDIAN
 	help
 	  This option specifies the architecture can support big endian
 	  operation.
+
+config ARM_KERNMEM_PERMS
+	bool "Restrict kernel memory permissions"
+	help
+	  If this is set, kernel memory other than kernel text (and rodata)
+	  will be made non-executable. The tradeoff is that each region is
+	  padded to section-size (1MiB) boundaries (because their permissions
+	  are different and splitting the 1M pages into 4K ones causes TLB
+	  performance problems), wasting memory.
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 804d61566a53..8539eb2a01ad 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -31,6 +31,11 @@
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
 
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+#include <asm/system_info.h>
+#include <asm/cp15.h>
+#endif
+
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
@@ -621,11 +626,98 @@ void __init mem_init(void)
 	}
 }
 
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+struct section_perm {
+	unsigned long start;
+	unsigned long end;
+	pmdval_t mask;
+	pmdval_t prot;
+};
+
+struct section_perm section_perms[] = {
+	/* Make pages tables, etc before _stext RW (set NX). */
+	{
+		.start	= PAGE_OFFSET,
+		.end	= (unsigned long)_stext,
+		.mask	= ~PMD_SECT_XN,
+		.prot	= PMD_SECT_XN,
+	},
+	/* Make init RW (set NX). */
+	{
+		.start	= (unsigned long)__init_begin,
+		.end	= (unsigned long)_sdata,
+		.mask	= ~PMD_SECT_XN,
+		.prot	= PMD_SECT_XN,
+	},
+};
+
+static inline void section_update(unsigned long addr, pmdval_t mask,
+				  pmdval_t prot)
+{
+	pmd_t *pmd = pmd_off_k(addr);
+
+#ifdef CONFIG_ARM_LPAE
+	pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot);
+#else
+	if (addr & SECTION_SIZE)
+		pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot);
+	else
+		pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot);
+#endif
+	flush_pmd_entry(pmd);
+}
+
+static inline bool arch_has_strict_perms(void)
+{
+	unsigned int cr;
+
+	if (cpu_architecture() < CPU_ARCH_ARMv6)
+		return false;
+
+	cr = get_cr();
+	if (!(cr & CR_XP))
+		return false;
+
+	return true;
+}
+
+static inline void fix_kernmem_perms(void)
+{
+	unsigned long addr;
+	unsigned int i;
+
+	if (!arch_has_strict_perms())
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(section_perms); i++) {
+		if (!IS_ALIGNED(section_perms[i].start, SECTION_SIZE) ||
+		    !IS_ALIGNED(section_perms[i].end, SECTION_SIZE)) {
+			pr_err("BUG: section %lx-%lx not aligned to %lx\n",
+				section_perms[i].start, section_perms[i].end,
+				SECTION_SIZE);
+			continue;
+		}
+
+		for (addr = section_perms[i].start;
+		     addr < section_perms[i].end;
+		     addr += SECTION_SIZE)
+			section_update(addr, section_perms[i].mask,
+				       section_perms[i].prot);
+	}
+}
+#else
+static inline void fix_kernmem_perms(void) { }
+#endif /* CONFIG_ARM_KERNMEM_PERMS */
+
 void free_initmem(void)
 {
 #ifdef CONFIG_HAVE_TCM
 	extern char __tcm_start, __tcm_end;
+#endif
+
+	fix_kernmem_perms();
 
+#ifdef CONFIG_HAVE_TCM
 	poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start);
 	free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link");
 #endif
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index a623cb3ad012..9d89de8cc349 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1369,13 +1369,24 @@ static void __init map_lowmem(void)
 		if (start >= end)
 			break;
 
-		if (end < kernel_x_start || start >= kernel_x_end) {
+		if (end < kernel_x_start) {
 			map.pfn = __phys_to_pfn(start);
 			map.virtual = __phys_to_virt(start);
 			map.length = end - start;
 			map.type = MT_MEMORY_RWX;
 
 			create_mapping(&map);
+		} else if (start >= kernel_x_end) {
+			map.pfn = __phys_to_pfn(start);
+			map.virtual = __phys_to_virt(start);
+			map.length = end - start;
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+			map.type = MT_MEMORY_RW;
+#else
+			map.type = MT_MEMORY_RWX;
+#endif
+
+			create_mapping(&map);
 		} else {
 			/* This better cover the entire kernel */
 			if (start < kernel_x_start) {
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-04  2:15 [RFC PATCH] ARM: mm: implement CONFIG_DEBUG_RODATA Kees Cook
  2014-04-04  2:15 ` [PATCH 1/2] ARM: mm: mark non-text sections non-executable Kees Cook
@ 2014-04-04  2:15 ` Kees Cook
  2014-04-04 19:58   ` Rabin Vincent
  1 sibling, 1 reply; 13+ messages in thread
From: Kees Cook @ 2014-04-04  2:15 UTC (permalink / raw)
  To: linux-arm-kernel

This introduces CONFIG_DEBUG_RODATA, making kernel text and rodata
read-only. It splits rodata from text so that rodata can also be NX.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm/include/asm/cacheflush.h |    9 ++++
 arch/arm/kernel/ftrace.c          |   17 +++++++
 arch/arm/kernel/vmlinux.lds.S     |    3 ++
 arch/arm/mm/Kconfig               |   11 +++++
 arch/arm/mm/init.c                |   97 +++++++++++++++++++++++++++++--------
 5 files changed, 117 insertions(+), 20 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 8b8b61685a34..b6fea0a1a88b 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -487,4 +487,13 @@ int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
 
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+void set_kernel_text_rw(void);
+void set_kernel_text_ro(void);
+#else
+static inline void set_kernel_text_rw(void) { }
+static inline void set_kernel_text_ro(void) { }
+#endif
+
 #endif
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 34e56647dcee..4ae343c1e2a3 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -14,6 +14,7 @@
 
 #include <linux/ftrace.h>
 #include <linux/uaccess.h>
+#include <linux/stop_machine.h>
 
 #include <asm/cacheflush.h>
 #include <asm/opcodes.h>
@@ -34,6 +35,22 @@
 
 #define	OLD_NOP		0xe1a00000	/* mov r0, r0 */
 
+static int __ftrace_modify_code(void *data)
+{
+	int *command = data;
+
+	set_kernel_text_rw();
+	ftrace_modify_all_code(*command);
+	set_kernel_text_ro();
+
+	return 0;
+}
+
+void arch_ftrace_update_code(int command)
+{
+	stop_machine(__ftrace_modify_code, &command, NULL);
+}
+
 static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
 {
 	return rec->arch.old_mcount ? OLD_NOP : NOP;
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 08fa667ef2f1..ec79e7268e09 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -120,6 +120,9 @@ SECTIONS
 			ARM_CPU_KEEP(PROC_INFO)
 	}
 
+#ifdef CONFIG_DEBUG_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+#endif
 	RO_DATA(PAGE_SIZE)
 
 	. = ALIGN(4);
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 8848d7b73e66..3c7adea7e2f6 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -967,3 +967,14 @@ config ARM_KERNMEM_PERMS
 	  padded to section-size (1MiB) boundaries (because their permissions
 	  are different and splitting the 1M pages into 4K ones causes TLB
 	  performance problems), wasting memory.
+
+config DEBUG_RODATA
+	bool "Make kernel text and rodata read-only"
+	depends on ARM_KERNMEM_PERMS && KEXEC=n && KPROBES=n
+	default y
+	help
+	  If this is set, kernel text and rodata will be made read-only.
+	  This additionally splits rodata from kernel text so it can be made
+	  non-executable. This creates another section-size padded region,
+	  so it can waste more memory space while gaining a pure read-only
+	  rodata region.
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 8539eb2a01ad..3baac4ad165f 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -632,9 +632,10 @@ struct section_perm {
 	unsigned long end;
 	pmdval_t mask;
 	pmdval_t prot;
+	pmdval_t clear;
 };
 
-struct section_perm section_perms[] = {
+struct section_perm nx_perms[] = {
 	/* Make pages tables, etc before _stext RW (set NX). */
 	{
 		.start	= PAGE_OFFSET,
@@ -649,12 +650,46 @@ struct section_perm section_perms[] = {
 		.mask	= ~PMD_SECT_XN,
 		.prot	= PMD_SECT_XN,
 	},
+#ifdef CONFIG_DEBUG_RODATA
+	/* Make rodata NX (set RO in ro_perms below). */
+	{
+		.start  = (unsigned long)__start_rodata,
+		.end    = (unsigned long)__init_begin,
+		.mask   = ~PMD_SECT_XN,
+		.prot   = PMD_SECT_XN,
+	},
+#endif
+};
+
+#ifdef CONFIG_DEBUG_RODATA
+struct section_perm ro_perms[] = {
+	/*
+	 * Make kernel code and rodata RX (set RO).
+	 * This entry must be first for set_kernel_text_rw() to find it.
+	 */
+	{
+		.start  = (unsigned long)_stext,
+		.end    = (unsigned long)__init_begin,
+#ifdef CONFIG_ARM_LPAE
+		.mask   = ~PMD_SECT_RDONLY,
+		.prot   = PMD_SECT_RDONLY,
+#else
+		.mask   = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE),
+		.prot   = PMD_SECT_APX | PMD_SECT_AP_WRITE,
+		.clear  = PMD_SECT_AP_WRITE,
+#endif
+	},
 };
+#endif
 
 static inline void section_update(unsigned long addr, pmdval_t mask,
 				  pmdval_t prot)
 {
-	pmd_t *pmd = pmd_off_k(addr);
+	struct mm_struct *mm;
+	pmd_t *pmd;
+
+	mm = current->active_mm;
+	pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
 
 #ifdef CONFIG_ARM_LPAE
 	pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot);
@@ -681,30 +716,52 @@ static inline bool arch_has_strict_perms(void)
 	return true;
 }
 
+#define set_section_perms(perms, field)	{				\
+	size_t i;							\
+	unsigned long addr;						\
+									\
+	if (!arch_has_strict_perms())					\
+		return;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(perms); i++) {			\
+		if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) ||	\
+		    !IS_ALIGNED(perms[i].end, SECTION_SIZE)) {		\
+			pr_err("BUG: section %lx-%lx not aligned to %lx\n", \
+				perms[i].start, perms[i].end,		\
+				SECTION_SIZE);				\
+			continue;					\
+		}							\
+									\
+		for (addr = perms[i].start;				\
+		     addr < perms[i].end;				\
+		     addr += SECTION_SIZE)				\
+			section_update(addr, perms[i].mask,		\
+				       perms[i].field);			\
+	}								\
+}
+
 static inline void fix_kernmem_perms(void)
 {
-	unsigned long addr;
-	unsigned int i;
+	set_section_perms(nx_perms, prot);
+}
 
-	if (!arch_has_strict_perms())
-		return;
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void)
+{
+	set_section_perms(ro_perms, prot);
+}
 
-	for (i = 0; i < ARRAY_SIZE(section_perms); i++) {
-		if (!IS_ALIGNED(section_perms[i].start, SECTION_SIZE) ||
-		    !IS_ALIGNED(section_perms[i].end, SECTION_SIZE)) {
-			pr_err("BUG: section %lx-%lx not aligned to %lx\n",
-				section_perms[i].start, section_perms[i].end,
-				SECTION_SIZE);
-			continue;
-		}
+void set_kernel_text_rw(void)
+{
+	set_section_perms(ro_perms, clear);
+}
 
-		for (addr = section_perms[i].start;
-		     addr < section_perms[i].end;
-		     addr += SECTION_SIZE)
-			section_update(addr, section_perms[i].mask,
-				       section_perms[i].prot);
-	}
+void set_kernel_text_ro(void)
+{
+	set_section_perms(ro_perms, prot);
 }
+#endif /* CONFIG_DEBUG_RODATA */
+
 #else
 static inline void fix_kernmem_perms(void) { }
 #endif /* CONFIG_ARM_KERNMEM_PERMS */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-04  2:15 ` [PATCH 2/2] ARM: mm: make text and rodata read-only Kees Cook
@ 2014-04-04 19:58   ` Rabin Vincent
  2014-04-05  0:07     ` Kees Cook
  0 siblings, 1 reply; 13+ messages in thread
From: Rabin Vincent @ 2014-04-04 19:58 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Apr 03, 2014 at 07:15:19PM -0700, Kees Cook wrote:
> diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
> index 34e56647dcee..4ae343c1e2a3 100644
> --- a/arch/arm/kernel/ftrace.c
> +++ b/arch/arm/kernel/ftrace.c
> @@ -14,6 +14,7 @@
>  
>  #include <linux/ftrace.h>
>  #include <linux/uaccess.h>
> +#include <linux/stop_machine.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/opcodes.h>
> @@ -34,6 +35,22 @@
>  
>  #define	OLD_NOP		0xe1a00000	/* mov r0, r0 */
>  
> +static int __ftrace_modify_code(void *data)

This is in the CONFIG_OLD_MCOUNT ifdef, but should be in the outer ifdef
(CONFIG_DYNAMIC_FTRACE) instead, otherwise it will not get enabled for
for example Thumb-2 kernels.  This was wrong in my example patch too.

> diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
> index 8539eb2a01ad..3baac4ad165f 100644
> --- a/arch/arm/mm/init.c
> +++ b/arch/arm/mm/init.c
> @@ -681,30 +716,52 @@ static inline bool arch_has_strict_perms(void)
>  	return true;
>  }
>  
> +#define set_section_perms(perms, field)	{				\
> +	size_t i;							\
> +	unsigned long addr;						\
> +									\
> +	if (!arch_has_strict_perms())					\
> +		return;							\
> +									\
> +	for (i = 0; i < ARRAY_SIZE(perms); i++) {			\
> +		if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) ||	\
> +		    !IS_ALIGNED(perms[i].end, SECTION_SIZE)) {		\
> +			pr_err("BUG: section %lx-%lx not aligned to %lx\n", \
> +				perms[i].start, perms[i].end,		\
> +				SECTION_SIZE);				\
> +			continue;					\
> +		}							\
> +									\
> +		for (addr = perms[i].start;				\
> +		     addr < perms[i].end;				\
> +		     addr += SECTION_SIZE)				\
> +			section_update(addr, perms[i].mask,		\
> +				       perms[i].field);			\
> +	}								\
> +}
> +
>  static inline void fix_kernmem_perms(void)
>  {
> -	unsigned long addr;
> -	unsigned int i;
> +	set_section_perms(nx_perms, prot);
> +}
>  
> -	if (!arch_has_strict_perms())
> -		return;
> +#ifdef CONFIG_DEBUG_RODATA
> +void mark_rodata_ro(void)
> +{
> +	set_section_perms(ro_perms, prot);
> +}
>  
> -	for (i = 0; i < ARRAY_SIZE(section_perms); i++) {
> -		if (!IS_ALIGNED(section_perms[i].start, SECTION_SIZE) ||
> -		    !IS_ALIGNED(section_perms[i].end, SECTION_SIZE)) {
> -			pr_err("BUG: section %lx-%lx not aligned to %lx\n",
> -				section_perms[i].start, section_perms[i].end,
> -				SECTION_SIZE);
> -			continue;
> -		}
> +void set_kernel_text_rw(void)
> +{
> +	set_section_perms(ro_perms, clear);
> +}

You need a TLB flush.  I had a flush_tlb_all() in my example patch,
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/244335.html,
but the following is probably nicer (on top of this patch):

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9bea524..a92c45a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -741,6 +741,8 @@ static inline bool arch_has_strict_perms(void)
 		     addr += SECTION_SIZE)				\
 			section_update(addr, perms[i].mask,		\
 				       perms[i].field);			\
+									\
+		flush_tlb_kernel_range(perms[i].start, perms[i].end);	\
 	}								\
 }
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-04 19:58   ` Rabin Vincent
@ 2014-04-05  0:07     ` Kees Cook
  2014-04-08 12:41       ` Jon Medhurst (Tixy)
  0 siblings, 1 reply; 13+ messages in thread
From: Kees Cook @ 2014-04-05  0:07 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Apr 4, 2014 at 12:58 PM, Rabin Vincent <rabin@rab.in> wrote:
> On Thu, Apr 03, 2014 at 07:15:19PM -0700, Kees Cook wrote:
>> diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
>> index 34e56647dcee..4ae343c1e2a3 100644
>> --- a/arch/arm/kernel/ftrace.c
>> +++ b/arch/arm/kernel/ftrace.c
>> @@ -14,6 +14,7 @@
>>
>>  #include <linux/ftrace.h>
>>  #include <linux/uaccess.h>
>> +#include <linux/stop_machine.h>
>>
>>  #include <asm/cacheflush.h>
>>  #include <asm/opcodes.h>
>> @@ -34,6 +35,22 @@
>>
>>  #define      OLD_NOP         0xe1a00000      /* mov r0, r0 */
>>
>> +static int __ftrace_modify_code(void *data)
>
> This is in the CONFIG_OLD_MCOUNT ifdef, but should be in the outer ifdef
> (CONFIG_DYNAMIC_FTRACE) instead, otherwise it will not get enabled for
> for example Thumb-2 kernels.  This was wrong in my example patch too.

Ah! Yes, good point. I've moved this now.

>> diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
>> index 8539eb2a01ad..3baac4ad165f 100644
>> --- a/arch/arm/mm/init.c
>> +++ b/arch/arm/mm/init.c
>> @@ -681,30 +716,52 @@ static inline bool arch_has_strict_perms(void)
>>       return true;
>>  }
>>
>> +#define set_section_perms(perms, field)      {                               \
>> +     size_t i;                                                       \
>> +     unsigned long addr;                                             \
>> +                                                                     \
>> +     if (!arch_has_strict_perms())                                   \
>> +             return;                                                 \
>> +                                                                     \
>> +     for (i = 0; i < ARRAY_SIZE(perms); i++) {                       \
>> +             if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) ||        \
>> +                 !IS_ALIGNED(perms[i].end, SECTION_SIZE)) {          \
>> +                     pr_err("BUG: section %lx-%lx not aligned to %lx\n", \
>> +                             perms[i].start, perms[i].end,           \
>> +                             SECTION_SIZE);                          \
>> +                     continue;                                       \
>> +             }                                                       \
>> +                                                                     \
>> +             for (addr = perms[i].start;                             \
>> +                  addr < perms[i].end;                               \
>> +                  addr += SECTION_SIZE)                              \
>> +                     section_update(addr, perms[i].mask,             \
>> +                                    perms[i].field);                 \
>> +     }                                                               \
>> +}
>> +
>>  static inline void fix_kernmem_perms(void)
>>  {
>> -     unsigned long addr;
>> -     unsigned int i;
>> +     set_section_perms(nx_perms, prot);
>> +}
>>
>> -     if (!arch_has_strict_perms())
>> -             return;
>> +#ifdef CONFIG_DEBUG_RODATA
>> +void mark_rodata_ro(void)
>> +{
>> +     set_section_perms(ro_perms, prot);
>> +}
>>
>> -     for (i = 0; i < ARRAY_SIZE(section_perms); i++) {
>> -             if (!IS_ALIGNED(section_perms[i].start, SECTION_SIZE) ||
>> -                 !IS_ALIGNED(section_perms[i].end, SECTION_SIZE)) {
>> -                     pr_err("BUG: section %lx-%lx not aligned to %lx\n",
>> -                             section_perms[i].start, section_perms[i].end,
>> -                             SECTION_SIZE);
>> -                     continue;
>> -             }
>> +void set_kernel_text_rw(void)
>> +{
>> +     set_section_perms(ro_perms, clear);
>> +}
>
> You need a TLB flush.  I had a flush_tlb_all() in my example patch,
> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/244335.html,
> but the following is probably nicer (on top of this patch):
>
> diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
> index 9bea524..a92c45a 100644
> --- a/arch/arm/mm/init.c
> +++ b/arch/arm/mm/init.c
> @@ -741,6 +741,8 @@ static inline bool arch_has_strict_perms(void)
>                      addr += SECTION_SIZE)                              \
>                         section_update(addr, perms[i].mask,             \
>                                        perms[i].field);                 \
> +                                                                       \
> +               flush_tlb_kernel_range(perms[i].start, perms[i].end);   \
>         }                                                               \
>  }
>

When I do this, I hang the system, and get a WARN due to the tlb call
attempting to flush on all CPUs, I think:

[   34.246034] WARNING: at
/mnt/host/source/src/third_party/kernel-next/kernel/smp.c:466
smp_call_function_many+0xac/0x26c()
...
[   34.246617] Backtrace:
[   34.246697] [<c010d3b8>] (unwind_backtrace+0x0/0x118) from
[<c060b9d8>] (dump_stack+0x28/0x30)
[   34.246765] [<c060b9d8>] (dump_stack+0x28/0x30) from [<c0123044>]
(warn_slowpath_null+0x44/0x5c)
[   34.246824] [<c0123044>] (warn_slowpath_null+0x44/0x5c) from
[<c017426c>] (smp_call_function_many+0xac/0x26c)
[   34.246881] [<c017426c>] (smp_call_function_many+0xac/0x26c) from
[<c0174468>] (smp_call_function+0x3c/0x48)
[   34.246937] [<c0174468>] (smp_call_function+0x3c/0x48) from
[<c010c0fc>] (broadcast_tlb_a15_erratum+0x40/0x4c)
[   34.246994] [<c010c0fc>] (broadcast_tlb_a15_erratum+0x40/0x4c) from
[<c010c590>] (flush_tlb_kernel_range+0x74/0xa0)
[   34.247046] [<c010c590>] (flush_tlb_kernel_range+0x74/0xa0) from
[<c011403c>] (set_kernel_text_rw+0xd8/0xec)
[   34.247099] [<c011403c>] (set_kernel_text_rw+0xd8/0xec) from
[<c010c878>] (__ftrace_modify_code+0x14/0x28)
[   34.247156] [<c010c878>] (__ftrace_modify_code+0x14/0x28) from
[<c0184318>] (stop_machine_cpu_stop+0xc0/0x114)
[   34.247212] [<c0184318>] (stop_machine_cpu_stop+0xc0/0x114) from
[<c01841cc>] (cpu_stopper_thread+0xd8/0x164)
[   34.247266] [<c01841cc>] (cpu_stopper_thread+0xd8/0x164) from
[<c0145c14>] (kthread+0xc8/0xd8)
[   34.247323] [<c0145c14>] (kthread+0xc8/0xd8) from [<c0106118>]
(ret_from_fork+0x14/0x20)

Using local_flush_tlb_kernel_range() fixed it though. Thank you for
your help on this! :)

-Kees

-- 
Kees Cook
Chrome OS Security

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-05  0:07     ` Kees Cook
@ 2014-04-08 12:41       ` Jon Medhurst (Tixy)
  2014-04-08 16:01         ` Kees Cook
  0 siblings, 1 reply; 13+ messages in thread
From: Jon Medhurst (Tixy) @ 2014-04-08 12:41 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 2014-04-04 at 17:07 -0700, Kees Cook wrote:
> On Fri, Apr 4, 2014 at 12:58 PM, Rabin Vincent <rabin@rab.in> wrote:
[...]
> > You need a TLB flush.  I had a flush_tlb_all() in my example patch,
> > http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/244335.html,
> > but the following is probably nicer (on top of this patch):
> >
> > diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
> > index 9bea524..a92c45a 100644
> > --- a/arch/arm/mm/init.c
> > +++ b/arch/arm/mm/init.c
> > @@ -741,6 +741,8 @@ static inline bool arch_has_strict_perms(void)
> >                      addr += SECTION_SIZE)                              \
> >                         section_update(addr, perms[i].mask,             \
> >                                        perms[i].field);                 \
> > +                                                                       \
> > +               flush_tlb_kernel_range(perms[i].start, perms[i].end);   \
> >         }                                                               \
> >  }
> >
> 
> When I do this, I hang the system, and get a WARN due to the tlb call
> attempting to flush on all CPUs, I think:
> 
> [   34.246034] WARNING: at
> /mnt/host/source/src/third_party/kernel-next/kernel/smp.c:466
> smp_call_function_many+0xac/0x26c()
> ...
> [   34.246617] Backtrace:
> [   34.246697] [<c010d3b8>] (unwind_backtrace+0x0/0x118) from
> [<c060b9d8>] (dump_stack+0x28/0x30)
> [   34.246765] [<c060b9d8>] (dump_stack+0x28/0x30) from [<c0123044>]
> (warn_slowpath_null+0x44/0x5c)
> [   34.246824] [<c0123044>] (warn_slowpath_null+0x44/0x5c) from
> [<c017426c>] (smp_call_function_many+0xac/0x26c)
> [   34.246881] [<c017426c>] (smp_call_function_many+0xac/0x26c) from
> [<c0174468>] (smp_call_function+0x3c/0x48)
> [   34.246937] [<c0174468>] (smp_call_function+0x3c/0x48) from
> [<c010c0fc>] (broadcast_tlb_a15_erratum+0x40/0x4c)
> [   34.246994] [<c010c0fc>] (broadcast_tlb_a15_erratum+0x40/0x4c) from
> [<c010c590>] (flush_tlb_kernel_range+0x74/0xa0)
> [   34.247046] [<c010c590>] (flush_tlb_kernel_range+0x74/0xa0) from
> [<c011403c>] (set_kernel_text_rw+0xd8/0xec)
> [   34.247099] [<c011403c>] (set_kernel_text_rw+0xd8/0xec) from
> [<c010c878>] (__ftrace_modify_code+0x14/0x28)
> [   34.247156] [<c010c878>] (__ftrace_modify_code+0x14/0x28) from
> [<c0184318>] (stop_machine_cpu_stop+0xc0/0x114)
> [   34.247212] [<c0184318>] (stop_machine_cpu_stop+0xc0/0x114) from
> [<c01841cc>] (cpu_stopper_thread+0xd8/0x164)
> [   34.247266] [<c01841cc>] (cpu_stopper_thread+0xd8/0x164) from
> [<c0145c14>] (kthread+0xc8/0xd8)
> [   34.247323] [<c0145c14>] (kthread+0xc8/0xd8) from [<c0106118>]
> (ret_from_fork+0x14/0x20)
> 
> Using local_flush_tlb_kernel_range() fixed it though.

What about if another CPU had a TLB entry with the old permissions in?
Or do you consider that the likelihood and consequences of that aren't
significant?

-- 
Tixy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-08 12:41       ` Jon Medhurst (Tixy)
@ 2014-04-08 16:01         ` Kees Cook
  2014-04-08 16:12           ` Jon Medhurst (Tixy)
  0 siblings, 1 reply; 13+ messages in thread
From: Kees Cook @ 2014-04-08 16:01 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Apr 8, 2014 at 5:41 AM, Jon Medhurst (Tixy) <tixy@linaro.org> wrote:
> On Fri, 2014-04-04 at 17:07 -0700, Kees Cook wrote:
>> On Fri, Apr 4, 2014 at 12:58 PM, Rabin Vincent <rabin@rab.in> wrote:
> [...]
>> > You need a TLB flush.  I had a flush_tlb_all() in my example patch,
>> > http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/244335.html,
>> > but the following is probably nicer (on top of this patch):
>> >
>> > diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
>> > index 9bea524..a92c45a 100644
>> > --- a/arch/arm/mm/init.c
>> > +++ b/arch/arm/mm/init.c
>> > @@ -741,6 +741,8 @@ static inline bool arch_has_strict_perms(void)
>> >                      addr += SECTION_SIZE)                              \
>> >                         section_update(addr, perms[i].mask,             \
>> >                                        perms[i].field);                 \
>> > +                                                                       \
>> > +               flush_tlb_kernel_range(perms[i].start, perms[i].end);   \
>> >         }                                                               \
>> >  }
>> >
>>
>> When I do this, I hang the system, and get a WARN due to the tlb call
>> attempting to flush on all CPUs, I think:
>>
>> [   34.246034] WARNING: at
>> /mnt/host/source/src/third_party/kernel-next/kernel/smp.c:466
>> smp_call_function_many+0xac/0x26c()
>> ...
>> [   34.246617] Backtrace:
>> [   34.246697] [<c010d3b8>] (unwind_backtrace+0x0/0x118) from
>> [<c060b9d8>] (dump_stack+0x28/0x30)
>> [   34.246765] [<c060b9d8>] (dump_stack+0x28/0x30) from [<c0123044>]
>> (warn_slowpath_null+0x44/0x5c)
>> [   34.246824] [<c0123044>] (warn_slowpath_null+0x44/0x5c) from
>> [<c017426c>] (smp_call_function_many+0xac/0x26c)
>> [   34.246881] [<c017426c>] (smp_call_function_many+0xac/0x26c) from
>> [<c0174468>] (smp_call_function+0x3c/0x48)
>> [   34.246937] [<c0174468>] (smp_call_function+0x3c/0x48) from
>> [<c010c0fc>] (broadcast_tlb_a15_erratum+0x40/0x4c)
>> [   34.246994] [<c010c0fc>] (broadcast_tlb_a15_erratum+0x40/0x4c) from
>> [<c010c590>] (flush_tlb_kernel_range+0x74/0xa0)
>> [   34.247046] [<c010c590>] (flush_tlb_kernel_range+0x74/0xa0) from
>> [<c011403c>] (set_kernel_text_rw+0xd8/0xec)
>> [   34.247099] [<c011403c>] (set_kernel_text_rw+0xd8/0xec) from
>> [<c010c878>] (__ftrace_modify_code+0x14/0x28)
>> [   34.247156] [<c010c878>] (__ftrace_modify_code+0x14/0x28) from
>> [<c0184318>] (stop_machine_cpu_stop+0xc0/0x114)
>> [   34.247212] [<c0184318>] (stop_machine_cpu_stop+0xc0/0x114) from
>> [<c01841cc>] (cpu_stopper_thread+0xd8/0x164)
>> [   34.247266] [<c01841cc>] (cpu_stopper_thread+0xd8/0x164) from
>> [<c0145c14>] (kthread+0xc8/0xd8)
>> [   34.247323] [<c0145c14>] (kthread+0xc8/0xd8) from [<c0106118>]
>> (ret_from_fork+0x14/0x20)
>>
>> Using local_flush_tlb_kernel_range() fixed it though.
>
> What about if another CPU had a TLB entry with the old permissions in?
> Or do you consider that the likelihood and consequences of that aren't
> significant?

The purpose of the function is to temporarily make text writable, do
the write, and then restore read-only. Since only the writer needs to
care about TLB state, this works fine. It's actually nice that only
the current CPU can make text writes.

-Kees

-- 
Kees Cook
Chrome OS Security

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-08 16:01         ` Kees Cook
@ 2014-04-08 16:12           ` Jon Medhurst (Tixy)
  2014-04-08 16:59             ` Kees Cook
  0 siblings, 1 reply; 13+ messages in thread
From: Jon Medhurst (Tixy) @ 2014-04-08 16:12 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 2014-04-08 at 09:01 -0700, Kees Cook wrote:
> On Tue, Apr 8, 2014 at 5:41 AM, Jon Medhurst (Tixy) <tixy@linaro.org> wrote:
> > On Fri, 2014-04-04 at 17:07 -0700, Kees Cook wrote:
> >> On Fri, Apr 4, 2014 at 12:58 PM, Rabin Vincent <rabin@rab.in> wrote:
> > [...]
> >> > You need a TLB flush.  I had a flush_tlb_all() in my example patch,
> >> > http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/244335.html,
> >> > but the following is probably nicer (on top of this patch):
> >> >
> >> > diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
> >> > index 9bea524..a92c45a 100644
> >> > --- a/arch/arm/mm/init.c
> >> > +++ b/arch/arm/mm/init.c
> >> > @@ -741,6 +741,8 @@ static inline bool arch_has_strict_perms(void)
> >> >                      addr += SECTION_SIZE)                              \
> >> >                         section_update(addr, perms[i].mask,             \
> >> >                                        perms[i].field);                 \
> >> > +                                                                       \
> >> > +               flush_tlb_kernel_range(perms[i].start, perms[i].end);   \
> >> >         }                                                               \
> >> >  }
> >> >
> >>
> >> When I do this, I hang the system, and get a WARN due to the tlb call
> >> attempting to flush on all CPUs, I think:
> >>
> >> [   34.246034] WARNING: at
> >> /mnt/host/source/src/third_party/kernel-next/kernel/smp.c:466
> >> smp_call_function_many+0xac/0x26c()
> >> ...
> >> [   34.246617] Backtrace:
> >> [   34.246697] [<c010d3b8>] (unwind_backtrace+0x0/0x118) from
> >> [<c060b9d8>] (dump_stack+0x28/0x30)
> >> [   34.246765] [<c060b9d8>] (dump_stack+0x28/0x30) from [<c0123044>]
> >> (warn_slowpath_null+0x44/0x5c)
> >> [   34.246824] [<c0123044>] (warn_slowpath_null+0x44/0x5c) from
> >> [<c017426c>] (smp_call_function_many+0xac/0x26c)
> >> [   34.246881] [<c017426c>] (smp_call_function_many+0xac/0x26c) from
> >> [<c0174468>] (smp_call_function+0x3c/0x48)
> >> [   34.246937] [<c0174468>] (smp_call_function+0x3c/0x48) from
> >> [<c010c0fc>] (broadcast_tlb_a15_erratum+0x40/0x4c)
> >> [   34.246994] [<c010c0fc>] (broadcast_tlb_a15_erratum+0x40/0x4c) from
> >> [<c010c590>] (flush_tlb_kernel_range+0x74/0xa0)
> >> [   34.247046] [<c010c590>] (flush_tlb_kernel_range+0x74/0xa0) from
> >> [<c011403c>] (set_kernel_text_rw+0xd8/0xec)
> >> [   34.247099] [<c011403c>] (set_kernel_text_rw+0xd8/0xec) from
> >> [<c010c878>] (__ftrace_modify_code+0x14/0x28)
> >> [   34.247156] [<c010c878>] (__ftrace_modify_code+0x14/0x28) from
> >> [<c0184318>] (stop_machine_cpu_stop+0xc0/0x114)
> >> [   34.247212] [<c0184318>] (stop_machine_cpu_stop+0xc0/0x114) from
> >> [<c01841cc>] (cpu_stopper_thread+0xd8/0x164)
> >> [   34.247266] [<c01841cc>] (cpu_stopper_thread+0xd8/0x164) from
> >> [<c0145c14>] (kthread+0xc8/0xd8)
> >> [   34.247323] [<c0145c14>] (kthread+0xc8/0xd8) from [<c0106118>]
> >> (ret_from_fork+0x14/0x20)
> >>
> >> Using local_flush_tlb_kernel_range() fixed it though.
> >
> > What about if another CPU had a TLB entry with the old permissions in?
> > Or do you consider that the likelihood and consequences of that aren't
> > significant?
> 
> The purpose of the function is to temporarily make text writable, do
> the write, and then restore read-only. Since only the writer needs to
> care about TLB state, this works fine. It's actually nice that only
> the current CPU can make text writes.

And is the page table being modified unique to the current CPU? I
thought a common set of page tables was shared across all of them. If
that is the case then one CPU can modify the PTE to be writeable,
another CPU take a TLB miss and pull in that writeable entry, which will
stay there until it drops out the TLB at some indefinite point in the
future. That's the scenario I was getting at with my previous comment.

-- 
Tixy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-08 16:12           ` Jon Medhurst (Tixy)
@ 2014-04-08 16:59             ` Kees Cook
  2014-04-08 19:48               ` Rabin Vincent
  0 siblings, 1 reply; 13+ messages in thread
From: Kees Cook @ 2014-04-08 16:59 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Apr 8, 2014 at 9:12 AM, Jon Medhurst (Tixy) <tixy@linaro.org> wrote:
> On Tue, 2014-04-08 at 09:01 -0700, Kees Cook wrote:
>> On Tue, Apr 8, 2014 at 5:41 AM, Jon Medhurst (Tixy) <tixy@linaro.org> wrote:
>> > On Fri, 2014-04-04 at 17:07 -0700, Kees Cook wrote:
>> >> On Fri, Apr 4, 2014 at 12:58 PM, Rabin Vincent <rabin@rab.in> wrote:
>> > [...]
>> >> > You need a TLB flush.  I had a flush_tlb_all() in my example patch,
>> >> > http://lists.infradead.org/pipermail/linux-arm-kernel/2014-April/244335.html,
>> >> > but the following is probably nicer (on top of this patch):
>> >> >
>> >> > diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
>> >> > index 9bea524..a92c45a 100644
>> >> > --- a/arch/arm/mm/init.c
>> >> > +++ b/arch/arm/mm/init.c
>> >> > @@ -741,6 +741,8 @@ static inline bool arch_has_strict_perms(void)
>> >> >                      addr += SECTION_SIZE)                              \
>> >> >                         section_update(addr, perms[i].mask,             \
>> >> >                                        perms[i].field);                 \
>> >> > +                                                                       \
>> >> > +               flush_tlb_kernel_range(perms[i].start, perms[i].end);   \
>> >> >         }                                                               \
>> >> >  }
>> >> >
>> >>
>> >> When I do this, I hang the system, and get a WARN due to the tlb call
>> >> attempting to flush on all CPUs, I think:
>> >>
>> >> [   34.246034] WARNING: at
>> >> /mnt/host/source/src/third_party/kernel-next/kernel/smp.c:466
>> >> smp_call_function_many+0xac/0x26c()
>> >> ...
>> >> [   34.246617] Backtrace:
>> >> [   34.246697] [<c010d3b8>] (unwind_backtrace+0x0/0x118) from
>> >> [<c060b9d8>] (dump_stack+0x28/0x30)
>> >> [   34.246765] [<c060b9d8>] (dump_stack+0x28/0x30) from [<c0123044>]
>> >> (warn_slowpath_null+0x44/0x5c)
>> >> [   34.246824] [<c0123044>] (warn_slowpath_null+0x44/0x5c) from
>> >> [<c017426c>] (smp_call_function_many+0xac/0x26c)
>> >> [   34.246881] [<c017426c>] (smp_call_function_many+0xac/0x26c) from
>> >> [<c0174468>] (smp_call_function+0x3c/0x48)
>> >> [   34.246937] [<c0174468>] (smp_call_function+0x3c/0x48) from
>> >> [<c010c0fc>] (broadcast_tlb_a15_erratum+0x40/0x4c)
>> >> [   34.246994] [<c010c0fc>] (broadcast_tlb_a15_erratum+0x40/0x4c) from
>> >> [<c010c590>] (flush_tlb_kernel_range+0x74/0xa0)
>> >> [   34.247046] [<c010c590>] (flush_tlb_kernel_range+0x74/0xa0) from
>> >> [<c011403c>] (set_kernel_text_rw+0xd8/0xec)
>> >> [   34.247099] [<c011403c>] (set_kernel_text_rw+0xd8/0xec) from
>> >> [<c010c878>] (__ftrace_modify_code+0x14/0x28)
>> >> [   34.247156] [<c010c878>] (__ftrace_modify_code+0x14/0x28) from
>> >> [<c0184318>] (stop_machine_cpu_stop+0xc0/0x114)
>> >> [   34.247212] [<c0184318>] (stop_machine_cpu_stop+0xc0/0x114) from
>> >> [<c01841cc>] (cpu_stopper_thread+0xd8/0x164)
>> >> [   34.247266] [<c01841cc>] (cpu_stopper_thread+0xd8/0x164) from
>> >> [<c0145c14>] (kthread+0xc8/0xd8)
>> >> [   34.247323] [<c0145c14>] (kthread+0xc8/0xd8) from [<c0106118>]
>> >> (ret_from_fork+0x14/0x20)
>> >>
>> >> Using local_flush_tlb_kernel_range() fixed it though.
>> >
>> > What about if another CPU had a TLB entry with the old permissions in?
>> > Or do you consider that the likelihood and consequences of that aren't
>> > significant?
>>
>> The purpose of the function is to temporarily make text writable, do
>> the write, and then restore read-only. Since only the writer needs to
>> care about TLB state, this works fine. It's actually nice that only
>> the current CPU can make text writes.
>
> And is the page table being modified unique to the current CPU? I
> thought a common set of page tables was shared across all of them. If
> that is the case then one CPU can modify the PTE to be writeable,
> another CPU take a TLB miss and pull in that writeable entry, which will
> stay there until it drops out the TLB at some indefinite point in the
> future. That's the scenario I was getting at with my previous comment.

As I understood it, this would be true for small PTEs, but sections
are fully duplicated on each CPU so we don't run that risk. This was
the whole source of my problem with this patch series: even a full
all-CPU TLB flush wasn't working -- the section permissions were
unique to the CPU since the entries were duplicated.

-Kees

-- 
Kees Cook
Chrome OS Security

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-08 16:59             ` Kees Cook
@ 2014-04-08 19:48               ` Rabin Vincent
  2014-04-08 20:19                 ` Kees Cook
  2014-04-09 10:29                 ` Jon Medhurst (Tixy)
  0 siblings, 2 replies; 13+ messages in thread
From: Rabin Vincent @ 2014-04-08 19:48 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Apr 08, 2014 at 09:59:07AM -0700, Kees Cook wrote:
> On Tue, Apr 8, 2014 at 9:12 AM, Jon Medhurst (Tixy) <tixy@linaro.org> wrote:
> > And is the page table being modified unique to the current CPU? I
> > thought a common set of page tables was shared across all of them. If
> > that is the case then one CPU can modify the PTE to be writeable,
> > another CPU take a TLB miss and pull in that writeable entry, which will
> > stay there until it drops out the TLB at some indefinite point in the
> > future. That's the scenario I was getting at with my previous comment.
> 
> As I understood it, this would be true for small PTEs, but sections
> are fully duplicated on each CPU so we don't run that risk. This was
> the whole source of my problem with this patch series: even a full
> all-CPU TLB flush wasn't working -- the section permissions were
> unique to the CPU since the entries were duplicated.

The PGD is per-mm_struct.  mm_structs can be shared between processes.
So the PGD is not per CPU.

This set_kernel_text_rw() is called from ftrace in stop_machine() on one
CPU.  All other CPUs will be spinning in kernel threads inside the loop
in multi_cpu_stop(), with interrupts disabled.  Since kernel threads use
the last process' mm, it is possible for the other CPU(s) to be
currently using the same mm as the modifying CPU.

For any other CPU to pull in the writable entry it would have to get a
TLB miss inside the loop in multi_cpu_stop(), after the state transition
to MULTI_STOP_RUN and before the state transition to MULTI_STOP_EXIT.
This is unlikely, but theoretically possible, for example if
multi_cpu_stop() straddles sections.

To prevent any stale entries being used indefinitely, perhaps the all
CPU TLB flush can be inserted into
ftrace_arch_code_modify_post_process(), which is called after the
stop_machine() and which is where x86 for example makes the entries
read-only again.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-08 19:48               ` Rabin Vincent
@ 2014-04-08 20:19                 ` Kees Cook
  2014-04-14 21:08                   ` Rabin Vincent
  2014-04-09 10:29                 ` Jon Medhurst (Tixy)
  1 sibling, 1 reply; 13+ messages in thread
From: Kees Cook @ 2014-04-08 20:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Apr 8, 2014 at 12:48 PM, Rabin Vincent <rabin@rab.in> wrote:
> On Tue, Apr 08, 2014 at 09:59:07AM -0700, Kees Cook wrote:
>> On Tue, Apr 8, 2014 at 9:12 AM, Jon Medhurst (Tixy) <tixy@linaro.org> wrote:
>> > And is the page table being modified unique to the current CPU? I
>> > thought a common set of page tables was shared across all of them. If
>> > that is the case then one CPU can modify the PTE to be writeable,
>> > another CPU take a TLB miss and pull in that writeable entry, which will
>> > stay there until it drops out the TLB at some indefinite point in the
>> > future. That's the scenario I was getting at with my previous comment.
>>
>> As I understood it, this would be true for small PTEs, but sections
>> are fully duplicated on each CPU so we don't run that risk. This was
>> the whole source of my problem with this patch series: even a full
>> all-CPU TLB flush wasn't working -- the section permissions were
>> unique to the CPU since the entries were duplicated.
>
> The PGD is per-mm_struct.  mm_structs can be shared between processes.
> So the PGD is not per CPU.
>
> This set_kernel_text_rw() is called from ftrace in stop_machine() on one
> CPU.  All other CPUs will be spinning in kernel threads inside the loop
> in multi_cpu_stop(), with interrupts disabled.  Since kernel threads use
> the last process' mm, it is possible for the other CPU(s) to be
> currently using the same mm as the modifying CPU.
>
> For any other CPU to pull in the writable entry it would have to get a
> TLB miss inside the loop in multi_cpu_stop(), after the state transition
> to MULTI_STOP_RUN and before the state transition to MULTI_STOP_EXIT.
> This is unlikely, but theoretically possible, for example if
> multi_cpu_stop() straddles sections.

Ah! Now I understand. Thanks for the clarification.

> To prevent any stale entries being used indefinitely, perhaps the all
> CPU TLB flush can be inserted into
> ftrace_arch_code_modify_post_process(), which is called after the
> stop_machine() and which is where x86 for example makes the entries
> read-only again.

Do you mean something like this?

diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index ea446ae09c89..b8c75e45a950 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -90,6 +90,8 @@ int ftrace_arch_code_modify_prepare(void)
 int ftrace_arch_code_modify_post_process(void)
 {
        set_all_modules_text_ro();
+       /* Make sure any TLB misses during machine stop are cleared. */
+       flush_tlb_all();
        return 0;
 }

Thanks!

-Kees

-- 
Kees Cook
Chrome OS Security

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-08 19:48               ` Rabin Vincent
  2014-04-08 20:19                 ` Kees Cook
@ 2014-04-09 10:29                 ` Jon Medhurst (Tixy)
  1 sibling, 0 replies; 13+ messages in thread
From: Jon Medhurst (Tixy) @ 2014-04-09 10:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 2014-04-08 at 21:48 +0200, Rabin Vincent wrote:
[...]
> For any other CPU to pull in the writable entry it would have to get a
> TLB miss inside the loop in multi_cpu_stop(), after the state transition
> to MULTI_STOP_RUN and before the state transition to MULTI_STOP_EXIT.
> This is unlikely, but theoretically possible, for example if
> multi_cpu_stop() straddles sections.

With speculative execution it is also possible for the CPU to fill the
TLB with entries for a memory address that the program would never
actually access. Basically, whatever is in the MMU registers and page
tables at any given time, the CPU can speculatively use that address
translation and read that memory. And if it's marked cacheable, pull it
into the cache. Oh, and if there is a dirty cacheline in another
CPU/clusters cache, move that dirty entry over into it's own cache (I
believe).

-- 
Tixy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/2] ARM: mm: make text and rodata read-only
  2014-04-08 20:19                 ` Kees Cook
@ 2014-04-14 21:08                   ` Rabin Vincent
  0 siblings, 0 replies; 13+ messages in thread
From: Rabin Vincent @ 2014-04-14 21:08 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Apr 08, 2014 at 01:19:01PM -0700, Kees Cook wrote:
> > To prevent any stale entries being used indefinitely, perhaps the all
> > CPU TLB flush can be inserted into
> > ftrace_arch_code_modify_post_process(), which is called after the
> > stop_machine() and which is where x86 for example makes the entries
> > read-only again.
> 
> Do you mean something like this?

Yes, something like that should probably be sufficient.

> 
> diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
> index ea446ae09c89..b8c75e45a950 100644
> --- a/arch/arm/kernel/ftrace.c
> +++ b/arch/arm/kernel/ftrace.c
> @@ -90,6 +90,8 @@ int ftrace_arch_code_modify_prepare(void)
>  int ftrace_arch_code_modify_post_process(void)
>  {
>         set_all_modules_text_ro();
> +       /* Make sure any TLB misses during machine stop are cleared. */
> +       flush_tlb_all();
>         return 0;
>  }

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2014-04-14 21:08 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-04-04  2:15 [RFC PATCH] ARM: mm: implement CONFIG_DEBUG_RODATA Kees Cook
2014-04-04  2:15 ` [PATCH 1/2] ARM: mm: mark non-text sections non-executable Kees Cook
2014-04-04  2:15 ` [PATCH 2/2] ARM: mm: make text and rodata read-only Kees Cook
2014-04-04 19:58   ` Rabin Vincent
2014-04-05  0:07     ` Kees Cook
2014-04-08 12:41       ` Jon Medhurst (Tixy)
2014-04-08 16:01         ` Kees Cook
2014-04-08 16:12           ` Jon Medhurst (Tixy)
2014-04-08 16:59             ` Kees Cook
2014-04-08 19:48               ` Rabin Vincent
2014-04-08 20:19                 ` Kees Cook
2014-04-14 21:08                   ` Rabin Vincent
2014-04-09 10:29                 ` Jon Medhurst (Tixy)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).