public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC] stack and heap are executable on x86_64
@ 2012-12-21  3:00 Kees Cook
  2012-12-21  3:06 ` Kees Cook
  2012-12-21  4:44 ` H. Peter Anvin
  0 siblings, 2 replies; 14+ messages in thread
From: Kees Cook @ 2012-12-21  3:00 UTC (permalink / raw)
  To: linux-kernel; +Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, Kees Cook

This patch only works up until 706276543b699d80f546e45f8b12574e7b18d952
(v3.5), where the exception tables are made relative. Prior to that,
stock test_nx didn't work because of 84e1c6bb38eb318e456558b610396d9f1afaabf0
(v2.6.38) makes the table read-only.

While trying to fix test_nx, I discovered that it looks like stack and
heap are executable again (at least on x86_64). :( I tried to bisect
this, but ran into a lot of trouble in the x86/x86_64 merging, where
things either didn't compile or didn't boot. Regardless:

1a98fd1 bad
...
736f12b good

So it looks like it broke in 2.6.27, and no one noticed until 2.6.38 when
it wasn't possible to run the test any more, and then stayed unnoticed.

This change for pre-v3.5 creates a new exception table instead of
trying to rewrite the old one. Since the tables are now relative,
we can't actually set up an exception for things in stack and heap on
x86_64 since the distance between the address and the table's .insn is
more than INT_MAX. And if the table were moved into the stack or heap,
then the fixup couldn't point back to the module's text segment. For
v3.5 and later, I'm not sure what to do...

So, mainly two things:
- we need to fix the stack/heap markings.
- it'd be nice to get test_nx back on its feet again.

Thoughts?

-Kees


Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/kernel/test_nx.c |  107 ++++++++++++++++++++++++++++++---------------
 1 file changed, 71 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 3f92ce0..cbaa557 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -9,6 +9,9 @@
  * as published by the Free Software Foundation; version 2
  * of the License.
  */
+
+#define pr_fmt(fmt) "test_nx: " fmt
+
 #include <linux/module.h>
 #include <linux/sort.h>
 #include <linux/slab.h>
@@ -16,6 +19,11 @@
 #include <asm/uaccess.h>
 #include <asm/asm.h>
 
+/* 0xC3 is the opcode for "ret" */
+#define INSN_RET	0xC3
+/* 0x90 is the opcode for "nop" */
+#define INSN_NOP	0x90
+
 extern int rodata_test_data;
 
 /*
@@ -34,18 +42,15 @@ extern int rodata_test_data;
  * (otherwise we'd have to sort and that's just too messy)
  */
 
-
-
 /*
  * We want to set up an exception handling point on our stack,
- * which means a variable value. This function is rather dirty
- * and walks the exception table of the module, looking for a magic
- * marker and replaces it with a specific function.
+ * which means a variable value.
  */
-static void fudze_exception_table(void *marker, void *new)
+static struct exception_table_entry *original_extable;
+static struct exception_table_entry replaced_extable;
+static void fake_exception_table(void *new)
 {
 	struct module *mod = THIS_MODULE;
-	struct exception_table_entry *extable;
 
 	/*
 	 * Note: This module has only 1 exception table entry,
@@ -54,12 +59,30 @@ static void fudze_exception_table(void *marker, void *new)
 	 * table.
 	 */
 	if (mod->num_exentries > 1) {
-		printk(KERN_ERR "test_nx: too many exception table entries!\n");
-		printk(KERN_ERR "test_nx: test results are not reliable.\n");
+		pr_err("too many exception table entries!\n");
+		pr_err("test results are not reliable.\n");
 		return;
 	}
-	extable = (struct exception_table_entry *)mod->extable;
-	extable[0].insn = (unsigned long)new;
+	/* Save original exception table when first called. */
+	if (original_extable == NULL)
+		original_extable = mod->extable;
+	/* Replace the original exception table with a duplicate. */
+	if (mod->extable == original_extable)
+		mod->extable = &replaced_extable;
+
+	/* Record new insn address. */
+	mod->extable->insn = (unsigned long)new;
+
+	/* Record new fixup address. */
+	mod->extable->fixup = original_extable->fixup;
+}
+
+static void restore_exception_table(void)
+{
+	struct module *mod = THIS_MODULE;
+
+	if (original_extable)
+		mod->extable = original_extable;
 }
 
 
@@ -82,7 +105,8 @@ static noinline int test_address(void *address)
 	unsigned long result;
 
 	/* Set up an exception table entry for our address */
-	fudze_exception_table(&foo_label, address);
+	fake_exception_table(address);
+	pr_info("calling %p (0x%02X)\n", address, *(unsigned char *)address);
 	result = 1;
 	asm volatile(
 		"foo_label:\n"
@@ -97,76 +121,87 @@ static noinline int test_address(void *address)
 		: [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
 	);
 	/* change the exception table back for the next round */
-	fudze_exception_table(address, &foo_label);
+	restore_exception_table();
 
 	if (result)
 		return -ENODEV;
 	return 0;
 }
 
-static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */
+static unsigned char test_data;
+static const int module_rodata_test_data = INSN_RET;
 
 static int test_NX(void)
 {
 	int ret = 0;
-	/* 0xC3 is the opcode for "ret" */
-	char stackcode[] = {0xC3, 0x90, 0 };
+	char stackcode[] = {INSN_RET, INSN_NOP, 0 };
 	char *heap;
 
-	test_data = 0xC3;
+	test_data = INSN_RET;
 
-	printk(KERN_INFO "Testing NX protection\n");
+	pr_info("Testing NX protection ...\n");
 
 	/* Test 1: check if the stack is not executable */
 	if (test_address(&stackcode)) {
-		printk(KERN_ERR "test_nx: stack was executable\n");
+		pr_err("stack is executable\n");
 		ret = -ENODEV;
 	}
 
-
 	/* Test 2: Check if the heap is executable */
 	heap = kmalloc(64, GFP_KERNEL);
 	if (!heap)
 		return -ENOMEM;
-	heap[0] = 0xC3; /* opcode for "ret" */
+	heap[0] = INSN_RET;
 
 	if (test_address(heap)) {
-		printk(KERN_ERR "test_nx: heap was executable\n");
+		pr_err("heap is executable\n");
 		ret = -ENODEV;
 	}
 	kfree(heap);
 
-	/*
-	 * The following 2 tests currently fail, this needs to get fixed
-	 * Until then, don't run them to avoid too many people getting scared
-	 * by the error message
-	 */
-
 #ifdef CONFIG_DEBUG_RODATA
-	/* Test 3: Check if the .rodata section is executable */
-	if (rodata_test_data != 0xC3) {
-		printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
+	/* Test 3: Check if the kernel .rodata section is executable */
+	if (rodata_test_data != INSN_RET) {
+		pr_err("kernel .rodata marker has invalid value\n");
 		ret = -ENODEV;
 	} else if (test_address(&rodata_test_data)) {
-		printk(KERN_ERR "test_nx: .rodata section is executable\n");
+		pr_err("kernel .rodata section is executable\n");
 		ret = -ENODEV;
 	}
+#else
+	pr_err("kernel .rodata section executable: missing " \
+		"CONFIG_DEBUG_RODATA\n");
+	ret = -ENODEV;
 #endif
 
-#if 0
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
 	/* Test 4: Check if the .data section of a module is executable */
 	if (test_address(&test_data)) {
-		printk(KERN_ERR "test_nx: .data section is executable\n");
+		pr_err(".data section is executable\n");
 		ret = -ENODEV;
 	}
 
+	/* Test 5: Check if the .rodata section of a module is executable */
+	if (module_rodata_test_data != INSN_RET) {
+		pr_err("module .rodata marker has invalid value\n");
+		ret = -ENODEV;
+	} else if (test_address((void *)&module_rodata_test_data)) {
+		pr_err("module .rodata section is executable\n");
+		ret = -ENODEV;
+	}
+#else
+	pr_err("module .data and .rodata sections executable: missing " \
+		"CONFIG_DEBUG_SET_MODULE_RONX\n");
+	ret = -ENODEV;
 #endif
+
+	pr_info("Done testing.\n");
+
 	return ret;
 }
 
 static void test_exit(void)
-{
-}
+{ }
 
 module_init(test_NX);
 module_exit(test_exit);
-- 
1.7.9.5


-- 
Kees Cook
Chrome OS Security

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21  3:00 [RFC] stack and heap are executable on x86_64 Kees Cook
@ 2012-12-21  3:06 ` Kees Cook
  2012-12-21  3:30   ` H. Peter Anvin
  2012-12-21  4:44 ` H. Peter Anvin
  1 sibling, 1 reply; 14+ messages in thread
From: Kees Cook @ 2012-12-21  3:06 UTC (permalink / raw)
  To: LKML; +Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, Kees Cook

On Thu, Dec 20, 2012 at 7:00 PM, Kees Cook <keescook@chromium.org> wrote:
> While trying to fix test_nx, I discovered that it looks like stack and
> heap are executable again (at least on x86_64). :( I tried to bisect

Btw, this ends up looking like this on v3.4:

[    2.486223] test_nx: Testing NX protection ...
[    2.487468] test_nx: calling ffff88001dd57e45 (0xC3)
[    2.488766] test_nx: stack is executable
[    2.489802] test_nx: calling ffff88001e2c7580 (0xC3)
[    2.491106] test_nx: heap is executable
[    2.492086] test_nx: calling ffffffff81803c80 (0xC3)
[    2.493201] test_nx: calling ffffffffa0002220 (0xC3)
[    2.493843] test_nx: calling ffffffffa0001224 (0xC3)
[    2.494549] test_nx: Done testing.

-Kees

--
Kees Cook
Chrome OS Security

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21  3:06 ` Kees Cook
@ 2012-12-21  3:30   ` H. Peter Anvin
  0 siblings, 0 replies; 14+ messages in thread
From: H. Peter Anvin @ 2012-12-21  3:30 UTC (permalink / raw)
  To: Kees Cook, LKML; +Cc: Thomas Gleixner, Ingo Molnar, x86

Wtf...

Kees Cook <keescook@chromium.org> wrote:

>On Thu, Dec 20, 2012 at 7:00 PM, Kees Cook <keescook@chromium.org>
>wrote:
>> While trying to fix test_nx, I discovered that it looks like stack
>and
>> heap are executable again (at least on x86_64). :( I tried to bisect
>
>Btw, this ends up looking like this on v3.4:
>
>[    2.486223] test_nx: Testing NX protection ...
>[    2.487468] test_nx: calling ffff88001dd57e45 (0xC3)
>[    2.488766] test_nx: stack is executable
>[    2.489802] test_nx: calling ffff88001e2c7580 (0xC3)
>[    2.491106] test_nx: heap is executable
>[    2.492086] test_nx: calling ffffffff81803c80 (0xC3)
>[    2.493201] test_nx: calling ffffffffa0002220 (0xC3)
>[    2.493843] test_nx: calling ffffffffa0001224 (0xC3)
>[    2.494549] test_nx: Done testing.
>
>-Kees
>
>--
>Kees Cook
>Chrome OS Security

-- 
Sent from my mobile phone. Please excuse brevity and lack of formatting.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21  3:00 [RFC] stack and heap are executable on x86_64 Kees Cook
  2012-12-21  3:06 ` Kees Cook
@ 2012-12-21  4:44 ` H. Peter Anvin
  2012-12-21  6:27   ` Yinghai Lu
  1 sibling, 1 reply; 14+ messages in thread
From: H. Peter Anvin @ 2012-12-21  4:44 UTC (permalink / raw)
  To: Kees Cook
  Cc: linux-kernel, Thomas Gleixner, Ingo Molnar, x86, Jim Kukunas,
	Arjan van de Ven

On 12/20/2012 07:00 PM, Kees Cook wrote:
>
> This change for pre-v3.5 creates a new exception table instead of
> trying to rewrite the old one. Since the tables are now relative,
> we can't actually set up an exception for things in stack and heap on
> x86_64 since the distance between the address and the table's .insn is
> more than INT_MAX. And if the table were moved into the stack or heap,
> then the fixup couldn't point back to the module's text segment. For
> v3.5 and later, I'm not sure what to do...
>
> So, mainly two things:
> - we need to fix the stack/heap markings.
> - it'd be nice to get test_nx back on its feet again.
>

I just looked at a /sys/kernel/debug/kernel_page_tables dump.... and 
there are a bunch of pages which are RWX:

0xffff880000000000-0xffff880000097000  604K RW             GLB x  pte
0xffff88000009d000-0xffff880000200000 1420K RW             GLB x  pte
0xffff880000200000-0xffff880001000000   14M RW         PSE GLB x  pmd
0xffff880001c00000-0xffff880035e00000  834M RW         PSE GLB x  pmd
0xffff880035e00000-0xffff880035ffe000 2040K RW             GLB x  pte
0xffff880036ff7000-0xffff880037000000   36K RW             GLB x  pte
0xffff880037000000-0xffff880040000000  144M RW         PSE GLB x  pmd
0xffffffff81c00000-0xffffffff81cea000  936K RW             GLB x  pte
0xffffffff81dfd000-0xffffffff81e00000   12K RW             GLB x  pte
0xffffffff81e00000-0xffffffff82000000    2M RW         PSE GLB x  pmd

One particular piece of idiocy is that we tied marking the pages 
executable into the PAT system, which means that if it is executable 
anywhere in the kernel it is executable everywhere.  That being said, I 
don't think that is the only thing at play here.

On the other hand... do we *ever* have a legitimate reason to run code 
below the -2G point?  If so we could/should probably mark those NX 
already at the higher paging levels...

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21  4:44 ` H. Peter Anvin
@ 2012-12-21  6:27   ` Yinghai Lu
  2012-12-21 17:01     ` H. Peter Anvin
  0 siblings, 1 reply; 14+ messages in thread
From: Yinghai Lu @ 2012-12-21  6:27 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Kees Cook, linux-kernel, Thomas Gleixner, Ingo Molnar, x86,
	Jim Kukunas, Arjan van de Ven

On Thu, Dec 20, 2012 at 8:44 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> I just looked at a /sys/kernel/debug/kernel_page_tables dump.... and there
> are a bunch of pages which are RWX:
>
> 0xffff880000000000-0xffff880000097000  604K RW             GLB x  pte
> 0xffff88000009d000-0xffff880000200000 1420K RW             GLB x  pte
> 0xffff880000200000-0xffff880001000000   14M RW         PSE GLB x  pmd
> 0xffff880001c00000-0xffff880035e00000  834M RW         PSE GLB x  pmd
> 0xffff880035e00000-0xffff880035ffe000 2040K RW             GLB x  pte
> 0xffff880036ff7000-0xffff880037000000   36K RW             GLB x  pte
> 0xffff880037000000-0xffff880040000000  144M RW         PSE GLB x  pmd
> 0xffffffff81c00000-0xffffffff81cea000  936K RW             GLB x  pte
> 0xffffffff81dfd000-0xffffffff81e00000   12K RW             GLB x  pte
> 0xffffffff81e00000-0xffffffff82000000    2M RW         PSE GLB x  pmd

after for-x86-boot we will have
---[ Low Kernel Mapping ]---
0xffff880000000000-0xffff880000099000         612K     RW             GLB NX pte
0xffff880000099000-0xffff88000009a000           4K     ro             GLB NX pte
0xffff88000009a000-0xffff88000009b000           4K     ro             GLB x  pte
0xffff88000009b000-0xffff880000200000        1428K     RW             GLB NX pte
0xffff880000200000-0xffff8800dfe00000        3580M     RW         PSE GLB NX pmd
0xffff8800dfe00000-0xffff8800dfffe000        2040K     RW             GLB NX pte
0xffff8800dfffe000-0xffff8800e0000000           8K                           pte
0xffff8800e0000000-0xffff880100000000         512M                           pmd
0xffff880100000000-0xffff8801a0000000        2560M     RW         PSE GLB NX pmd
---[ High Kernel Mapping ]---
0xffffffff80000000-0xffffffff81000000          16M                           pmd
0xffffffff81000000-0xffffffff82a00000          26M     RW         PSE GLB x  pmd
0xffffffff82a00000-0xffffffff82b21000        1156K     RW             GLB x  pte
0xffffffff82b21000-0xffffffff82c00000         892K     RW             GLB NX pte
0xffffffff82c00000-0xffffffff82e00000           2M     RW         PSE GLB NX pmd
0xffffffff82e00000-0xffffffff82e92000         584K     RW             GLB NX pte
0xffffffff82e92000-0xffffffff83000000        1464K     RW             GLB x  pte
0xffffffff83000000-0xffffffff83c00000          12M     RW         PSE GLB x  pmd
0xffffffff83c00000-0xffffffffa0000000         452M                           pmd

so low mapping will only have trampoline get x set.
is that expected ?

Do we need to set low mapping corresponding to kernel range to x?

Yinghai

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21  6:27   ` Yinghai Lu
@ 2012-12-21 17:01     ` H. Peter Anvin
  2012-12-21 17:28       ` Yinghai Lu
  0 siblings, 1 reply; 14+ messages in thread
From: H. Peter Anvin @ 2012-12-21 17:01 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Kees Cook, linux-kernel, Thomas Gleixner, Ingo Molnar, x86,
	Jim Kukunas, Arjan van de Ven

On 12/20/2012 10:27 PM, Yinghai Lu wrote:
>
> after for-x86-boot we will have
> ---[ Low Kernel Mapping ]---
> 0xffff880000000000-0xffff880000099000         612K     RW             GLB NX pte
> 0xffff880000099000-0xffff88000009a000           4K     ro             GLB NX pte
> 0xffff88000009a000-0xffff88000009b000           4K     ro             GLB x  pte
> 0xffff88000009b000-0xffff880000200000        1428K     RW             GLB NX pte
> 0xffff880000200000-0xffff8800dfe00000        3580M     RW         PSE GLB NX pmd
> 0xffff8800dfe00000-0xffff8800dfffe000        2040K     RW             GLB NX pte
> 0xffff8800dfffe000-0xffff8800e0000000           8K                           pte
> 0xffff8800e0000000-0xffff880100000000         512M                           pmd
> 0xffff880100000000-0xffff8801a0000000        2560M     RW         PSE GLB NX pmd
> ---[ High Kernel Mapping ]---
> 0xffffffff80000000-0xffffffff81000000          16M                           pmd
> 0xffffffff81000000-0xffffffff82a00000          26M     RW         PSE GLB x  pmd
> 0xffffffff82a00000-0xffffffff82b21000        1156K     RW             GLB x  pte
> 0xffffffff82b21000-0xffffffff82c00000         892K     RW             GLB NX pte
> 0xffffffff82c00000-0xffffffff82e00000           2M     RW         PSE GLB NX pmd
> 0xffffffff82e00000-0xffffffff82e92000         584K     RW             GLB NX pte
> 0xffffffff82e92000-0xffffffff83000000        1464K     RW             GLB x  pte
> 0xffffffff83000000-0xffffffff83c00000          12M     RW         PSE GLB x  pmd
> 0xffffffff83c00000-0xffffffffa0000000         452M                           pmd
>
> so low mapping will only have trampoline get x set.
> is that expected ?
>

Yes.

> Do we need to set low mapping corresponding to kernel range to x?

No; we probably should never have the low mappings set to X, which comes 
down to what I said earlier... we should mark the low mapping NX at the 
PGD/PML4 level.

However, this isn't good enough.  You still have a large number of pages 
which are RWX, and we should *never* have RWX pages, period, full stop, 
and your map above sill have megabytes of them.

Furthermore, just saying "we applied this patchset and it seems to go 
away" isn't good enough... we need an understanding of *why* it makes 
things go away and how that makes it safe.

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21 17:01     ` H. Peter Anvin
@ 2012-12-21 17:28       ` Yinghai Lu
  2012-12-21 17:36         ` H. Peter Anvin
  0 siblings, 1 reply; 14+ messages in thread
From: Yinghai Lu @ 2012-12-21 17:28 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Kees Cook, linux-kernel, Thomas Gleixner, Ingo Molnar, x86,
	Jim Kukunas, Arjan van de Ven

On Fri, Dec 21, 2012 at 9:01 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 12/20/2012 10:27 PM, Yinghai Lu wrote:
>>
>>
>> after for-x86-boot we will have
>> ---[ Low Kernel Mapping ]---
>> 0xffff880000000000-0xffff880000099000         612K     RW             GLB
>> NX pte
>> 0xffff880000099000-0xffff88000009a000           4K     ro             GLB
>> NX pte
>> 0xffff88000009a000-0xffff88000009b000           4K     ro             GLB
>> x  pte
>> 0xffff88000009b000-0xffff880000200000        1428K     RW             GLB
>> NX pte
>> 0xffff880000200000-0xffff8800dfe00000        3580M     RW         PSE GLB
>> NX pmd
>> 0xffff8800dfe00000-0xffff8800dfffe000        2040K     RW             GLB
>> NX pte
>> 0xffff8800dfffe000-0xffff8800e0000000           8K
>> pte
>> 0xffff8800e0000000-0xffff880100000000         512M
>> pmd
>> 0xffff880100000000-0xffff8801a0000000        2560M     RW         PSE GLB
>> NX pmd
>> ---[ High Kernel Mapping ]---
>> 0xffffffff80000000-0xffffffff81000000          16M
>> pmd
>> 0xffffffff81000000-0xffffffff82a00000          26M     RW         PSE GLB
>> x  pmd
>> 0xffffffff82a00000-0xffffffff82b21000        1156K     RW             GLB
>> x  pte
>> 0xffffffff82b21000-0xffffffff82c00000         892K     RW             GLB
>> NX pte
>> 0xffffffff82c00000-0xffffffff82e00000           2M     RW         PSE GLB
>> NX pmd
>> 0xffffffff82e00000-0xffffffff82e92000         584K     RW             GLB
>> NX pte
>> 0xffffffff82e92000-0xffffffff83000000        1464K     RW             GLB
>> x  pte
>> 0xffffffff83000000-0xffffffff83c00000          12M     RW         PSE GLB
>> x  pmd
>> 0xffffffff83c00000-0xffffffffa0000000         452M
>> pmd
>>
>> so low mapping will only have trampoline get x set.
>> is that expected ?
>>
>
> Yes.
>
>
>> Do we need to set low mapping corresponding to kernel range to x?
>
>
> No; we probably should never have the low mappings set to X, which comes
> down to what I said earlier... we should mark the low mapping NX at the
> PGD/PML4 level.
>
> However, this isn't good enough.  You still have a large number of pages
> which are RWX, and we should *never* have RWX pages, period, full stop, and
> your map above sill have megabytes of them.

which line?

 0xffffffff83000000-0xffffffff83c00000          12M     RW         PSE
GLB  x  pmd

my kernel INIT_SIZE is 27M, and it includes everything that i needed
in the kernel.

>
> Furthermore, just saying "we applied this patchset and it seems to go away"
> isn't good enough... we need an understanding of *why* it makes things go
> away and how that makes it safe.

i know the reason:

current kernel set x from trampoline to be x too late. So it set EXEC
in head_64.c for all 1G range.

so patch
http://git.kernel.org/?p=linux/kernel/git/yinghai/linux-yinghai.git;a=commitdiff;h=b927466d72df250fa9b79fce3624cd5efd1317b7
set that x early.
and in #PF handler version:
http://git.kernel.org/?p=linux/kernel/git/yinghai/linux-yinghai.git;a=commitdiff;h=3c06147be8e6db30dba4cafb170ad66f7baffbfc
init_mapping_kernel will set mapping for kernel  and first 2M to
LARGEPAGE only without EXEC.

that is the reason why #PF handler version does not work with SMP.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21 17:28       ` Yinghai Lu
@ 2012-12-21 17:36         ` H. Peter Anvin
  2012-12-21 21:26           ` Yinghai Lu
  0 siblings, 1 reply; 14+ messages in thread
From: H. Peter Anvin @ 2012-12-21 17:36 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Kees Cook, linux-kernel, Thomas Gleixner, Ingo Molnar, x86,
	Jim Kukunas, Arjan van de Ven

On 12/21/2012 09:28 AM, Yinghai Lu wrote:
> 
> which line?
> 
>  0xffffffff83000000-0xffffffff83c00000          12M     RW         PSE
> GLB  x  pmd
> 
> my kernel INIT_SIZE is 27M, and it includes everything that i needed
> in the kernel.
> 

We should NEVER have RW + x at the same time (at least when the kernel
is compiled properly.)  Looks like your patch does get rid of a bunch of
stuff in the low mapping -- although the low mapping really should never
be +x at all -- but there are still problems with the high mapping.

Oh yes, and then there is EFI.  If that means we need to keep a
completely separate page tables for EFI than so be it...

	-hpa


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21 17:36         ` H. Peter Anvin
@ 2012-12-21 21:26           ` Yinghai Lu
  2012-12-21 22:22             ` Yinghai Lu
  0 siblings, 1 reply; 14+ messages in thread
From: Yinghai Lu @ 2012-12-21 21:26 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Kees Cook, linux-kernel, Thomas Gleixner, Ingo Molnar, x86,
	Jim Kukunas, Arjan van de Ven

On Fri, Dec 21, 2012 at 9:36 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> We should NEVER have RW + x at the same time (at least when the kernel
> is compiled properly.)  Looks like your patch does get rid of a bunch of
> stuff in the low mapping -- although the low mapping really should never
> be +x at all -- but there are still problems with the high mapping

after enabling CONFIG_DEBUG_RODATA

get
---[ High Kernel Mapping ]---
0xffffffff80000000-0xffffffff81000000          16M                           pmd
0xffffffff81000000-0xffffffff82200000          18M     ro         PSE GLB x  pmd
0xffffffff82200000-0xffffffff82c00000          10M     ro         PSE GLB NX pmd
0xffffffff82c00000-0xffffffff82dc9000        1828K     RW             GLB x  pte
0xffffffff82dc9000-0xffffffff82e00000         220K     RW             GLB NX pte
0xffffffff82e00000-0xffffffff83000000           2M     RW         PSE GLB NX pmd
0xffffffff83000000-0xffffffff8313a000        1256K     RW             GLB NX pte
0xffffffff8313a000-0xffffffff83200000         792K     RW             GLB x  pte
0xffffffff83200000-0xffffffff83e00000          12M     RW         PSE GLB x  pmd
0xffffffff83e00000-0xffffffffa0000000         450M                           pmd

for
[    0.000000] Kernel Layout:
[    0.000000]   .text: [0x01000000-0x021434f8]
[    0.000000] .rodata: [0x02200000-0x02a13fff]
[    0.000000]   .data: [0x02c00000-0x02dc763f]
[    0.000000]   .init: [0x02dc9000-0x0312cfff]
[    0.000000]    .bss: [0x0313b000-0x03dd6fff]
[    0.000000]    .brk: [0x03dd7000-0x03dfffff]

looks like .data, .bss, .brk should not have x set.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21 21:26           ` Yinghai Lu
@ 2012-12-21 22:22             ` Yinghai Lu
  2012-12-21 22:23               ` H. Peter Anvin
  0 siblings, 1 reply; 14+ messages in thread
From: Yinghai Lu @ 2012-12-21 22:22 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Kees Cook, linux-kernel, Thomas Gleixner, Ingo Molnar, x86,
	Jim Kukunas, Arjan van de Ven

[-- Attachment #1: Type: text/plain, Size: 1833 bytes --]

On Fri, Dec 21, 2012 at 1:26 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Fri, Dec 21, 2012 at 9:36 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>> We should NEVER have RW + x at the same time (at least when the kernel
>> is compiled properly.)  Looks like your patch does get rid of a bunch of
>> stuff in the low mapping -- although the low mapping really should never
>> be +x at all -- but there are still problems with the high mapping
>
> after enabling CONFIG_DEBUG_RODATA
>
> get
> ---[ High Kernel Mapping ]---
> 0xffffffff80000000-0xffffffff81000000          16M                           pmd
> 0xffffffff81000000-0xffffffff82200000          18M     ro         PSE GLB x  pmd
> 0xffffffff82200000-0xffffffff82c00000          10M     ro         PSE GLB NX pmd
> 0xffffffff82c00000-0xffffffff82dc9000        1828K     RW             GLB x  pte
> 0xffffffff82dc9000-0xffffffff82e00000         220K     RW             GLB NX pte
> 0xffffffff82e00000-0xffffffff83000000           2M     RW         PSE GLB NX pmd
> 0xffffffff83000000-0xffffffff8313a000        1256K     RW             GLB NX pte
> 0xffffffff8313a000-0xffffffff83200000         792K     RW             GLB x  pte
> 0xffffffff83200000-0xffffffff83e00000          12M     RW         PSE GLB x  pmd
> 0xffffffff83e00000-0xffffffffa0000000         450M                           pmd
>
> for
> [    0.000000] Kernel Layout:
> [    0.000000]   .text: [0x01000000-0x021434f8]
> [    0.000000] .rodata: [0x02200000-0x02a13fff]
> [    0.000000]   .data: [0x02c00000-0x02dc763f]
> [    0.000000]   .init: [0x02dc9000-0x0312cfff]
> [    0.000000]    .bss: [0x0313b000-0x03dd6fff]
> [    0.000000]    .brk: [0x03dd7000-0x03dfffff]
>
> looks like .data, .bss, .brk should not have x set.

please check attached patch that set NX for data/bss/brk with 64bit.

Thanks

Yinghai

[-- Attachment #2: no_x_for_data_bss_brk.patch --]
[-- Type: application/octet-stream, Size: 3185 bytes --]

Subject: [PATCH] x86, 64bit, mm: Mark data/bss/brk to nx

HPA said, we should not have RW and +x set at the time.

for kernel layout:
[    0.000000] Kernel Layout:
[    0.000000]   .text: [0x01000000-0x021434f8]
[    0.000000] .rodata: [0x02200000-0x02a13fff]
[    0.000000]   .data: [0x02c00000-0x02dc763f]
[    0.000000]   .init: [0x02dc9000-0x0312cfff]
[    0.000000]    .bss: [0x0313b000-0x03dd6fff]
[    0.000000]    .brk: [0x03dd7000-0x03dfffff]

before the patch, we have
---[ High Kernel Mapping ]---
0xffffffff80000000-0xffffffff81000000          16M                           pmd
0xffffffff81000000-0xffffffff82200000          18M     ro         PSE GLB x  pmd
0xffffffff82200000-0xffffffff82c00000          10M     ro         PSE GLB NX pmd
0xffffffff82c00000-0xffffffff82dc9000        1828K     RW             GLB x  pte
0xffffffff82dc9000-0xffffffff82e00000         220K     RW             GLB NX pte
0xffffffff82e00000-0xffffffff83000000           2M     RW         PSE GLB NX pmd
0xffffffff83000000-0xffffffff8313a000        1256K     RW             GLB NX pte
0xffffffff8313a000-0xffffffff83200000         792K     RW             GLB x  pte
0xffffffff83200000-0xffffffff83e00000          12M     RW         PSE GLB x  pmd
0xffffffff83e00000-0xffffffffa0000000         450M                           pmd

after patch,, we get
---[ High Kernel Mapping ]---
0xffffffff80000000-0xffffffff81000000          16M                           pmd
0xffffffff81000000-0xffffffff82200000          18M     ro         PSE GLB x  pmd
0xffffffff82200000-0xffffffff82c00000          10M     ro         PSE GLB NX pmd
0xffffffff82c00000-0xffffffff82e00000           2M     RW             GLB NX pte
0xffffffff82e00000-0xffffffff83000000           2M     RW         PSE GLB NX pmd
0xffffffff83000000-0xffffffff83200000           2M     RW             GLB NX pte
0xffffffff83200000-0xffffffff83e00000          12M     RW         PSE GLB NX pmd
0xffffffff83e00000-0xffffffffa0000000         450M                           pmd

so data, bss, brk get NX ...

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/mm/init_64.c |    7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -811,6 +811,7 @@ void mark_rodata_ro(void)
 	unsigned long end = (unsigned long) &__end_rodata_hpage_align;
 	unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
 	unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
+	unsigned long all_end = PFN_ALIGN(&_end);
 
 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
 	       (end - start) >> 10);
@@ -819,10 +820,10 @@ void mark_rodata_ro(void)
 	kernel_set_to_readonly = 1;
 
 	/*
-	 * The rodata section (but not the kernel text!) should also be
-	 * not-executable.
+	 * The rodata/data/bss/brk section (but not the kernel text!)
+	 * should also be not-executable.
 	 */
-	set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
+	set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
 
 	rodata_test();
 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21 22:22             ` Yinghai Lu
@ 2012-12-21 22:23               ` H. Peter Anvin
  2012-12-21 22:26                 ` Yinghai Lu
  0 siblings, 1 reply; 14+ messages in thread
From: H. Peter Anvin @ 2012-12-21 22:23 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Kees Cook, linux-kernel, Thomas Gleixner, Ingo Molnar, x86,
	Jim Kukunas, Arjan van de Ven

On 12/21/2012 02:22 PM, Yinghai Lu wrote:
> 
> please check attached patch that set NX for data/bss/brk with 64bit.
> 

This is on top of for-x86-boot I presume?

	-hpa



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21 22:23               ` H. Peter Anvin
@ 2012-12-21 22:26                 ` Yinghai Lu
  2012-12-21 22:28                   ` H. Peter Anvin
  0 siblings, 1 reply; 14+ messages in thread
From: Yinghai Lu @ 2012-12-21 22:26 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Kees Cook, linux-kernel, Thomas Gleixner, Ingo Molnar, x86,
	Jim Kukunas, Arjan van de Ven

On Fri, Dec 21, 2012 at 2:23 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 12/21/2012 02:22 PM, Yinghai Lu wrote:
>>
>> please check attached patch that set NX for data/bss/brk with 64bit.
>>
>
> This is on top of for-x86-boot I presume?

yes, but should work on current linus tree.

Do we want one that handle low mapping for current linus tree ?

Yinghai

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21 22:26                 ` Yinghai Lu
@ 2012-12-21 22:28                   ` H. Peter Anvin
  2012-12-21 22:30                     ` Yinghai Lu
  0 siblings, 1 reply; 14+ messages in thread
From: H. Peter Anvin @ 2012-12-21 22:28 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Kees Cook, linux-kernel, Thomas Gleixner, Ingo Molnar, x86,
	Jim Kukunas, Arjan van de Ven

On 12/21/2012 02:26 PM, Yinghai Lu wrote:
> On Fri, Dec 21, 2012 at 2:23 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>> On 12/21/2012 02:22 PM, Yinghai Lu wrote:
>>>
>>> please check attached patch that set NX for data/bss/brk with 64bit.
>>>
>>
>> This is on top of for-x86-boot I presume?
> 
> yes, but should work on current linus tree.
> 
> Do we want one that handle low mapping for current linus tree ?
> 

Let's worry about that one after New Year.  I am hoping to get all these
patchsets into -tip either the week of Jan 7 or Jan 14 (I currently
don't know when I will be back.)

	-hpa



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] stack and heap are executable on x86_64
  2012-12-21 22:28                   ` H. Peter Anvin
@ 2012-12-21 22:30                     ` Yinghai Lu
  0 siblings, 0 replies; 14+ messages in thread
From: Yinghai Lu @ 2012-12-21 22:30 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Kees Cook, linux-kernel, Thomas Gleixner, Ingo Molnar, x86,
	Jim Kukunas, Arjan van de Ven

On Fri, Dec 21, 2012 at 2:28 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 12/21/2012 02:26 PM, Yinghai Lu wrote:

>> On Fri, Dec 21, 2012 at 2:23 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>>> On 12/21/2012 02:22 PM, Yinghai Lu wrote:
>>>>
>>>> please check attached patch that set NX for data/bss/brk with 64bit.
>>>>
>>>
>>> This is on top of for-x86-boot I presume?
>>
>> yes, but should work on current linus tree.
>>
>> Do we want one that handle low mapping for current linus tree ?
>>
>
> Let's worry about that one after New Year.  I am hoping to get all these
> patchsets into -tip either the week of Jan 7 or Jan 14 (I currently
> don't know when I will be back.)

ok.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-12-21 22:30 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-12-21  3:00 [RFC] stack and heap are executable on x86_64 Kees Cook
2012-12-21  3:06 ` Kees Cook
2012-12-21  3:30   ` H. Peter Anvin
2012-12-21  4:44 ` H. Peter Anvin
2012-12-21  6:27   ` Yinghai Lu
2012-12-21 17:01     ` H. Peter Anvin
2012-12-21 17:28       ` Yinghai Lu
2012-12-21 17:36         ` H. Peter Anvin
2012-12-21 21:26           ` Yinghai Lu
2012-12-21 22:22             ` Yinghai Lu
2012-12-21 22:23               ` H. Peter Anvin
2012-12-21 22:26                 ` Yinghai Lu
2012-12-21 22:28                   ` H. Peter Anvin
2012-12-21 22:30                     ` Yinghai Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox