public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] x86/asm: Pin sensitive CR4 bits
@ 2019-02-21 18:09 Kees Cook
  2019-02-23 10:16 ` [tip:x86/asm] " tip-bot for Kees Cook
  0 siblings, 1 reply; 3+ messages in thread
From: Kees Cook @ 2019-02-21 18:09 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Jann Horn, Sean Christopherson, Dominik Brodowski, LKML,
	Kernel Hardening, X86 ML

Several recent exploits have used direct calls to the native_write_cr4()
function to disable SMEP and SMAP before then continuing their exploits
using userspace memory access. This pins bits of cr4 so that they cannot
be changed through a common function. This is not intended to be general
ROP protection (which would require CFI to defend against properly), but
rather a way to avoid trivial direct function calling (or CFI bypassing
via a matching function prototype) as seen in:

https://googleprojectzero.blogspot.com/2017/05/exploiting-linux-kernel-via-packet.html
(https://github.com/xairy/kernel-exploits/tree/master/CVE-2017-7308)

The goals of this change:
 - pin specific bits (SMEP, SMAP, and UMIP) when writing cr4.
 - avoid setting the bits too early (they must become pinned only after
   first being used).
 - pinning mask needs to be read-only during normal runtime.
 - pinning needs to be rechecked after set to avoid jumps into the middle
   of the function.

Using __ro_after_init on the mask is done so it can't be first disabled
with a malicious write. And since it becomes read-only, we must avoid
writing to it later (hence the check for bits already having been set
instead of unconditionally writing to the mask).

The use of volatile is done to force the compiler to perform a full reload
of the mask after setting cr4 (to protect against just jumping into the
function past where the masking happens; we must check that the mask was
applied after we do the set). Due to how this function can be built by the
compiler (especially due to the removal of frame pointers), jumping into
the middle of the function frequently doesn't require stack manipulation
to construct a stack frame (there may only a retq without pops, which is
sufficient for use with exploits like timer overwrites mentioned above).

For example, without the recheck, the function may appear as:

   native_write_cr4:
      mov [pin], %rbx
      or  %rbx, %rdi
   1: mov %rdi, %cr4
      retq

The masking "or" could be trivially bypassed by just calling to label "1"
instead of "native_write_cr4". (CFI will force calls to only be able to
call into native_write_cr4, but CFI and CET are uncommon currently.)

Signed-off-by: Kees Cook <keescook@chromium.org>
---
v3: improve WARN message (Sean Christopherson)
v2: fix think-o in cr4_pin recheck (Jann Horn)
---
 arch/x86/include/asm/special_insns.h | 13 +++++++++++++
 arch/x86/kernel/cpu/common.c         | 12 +++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 43c029cdc3fe..fabda1400137 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -72,9 +72,22 @@ static inline unsigned long native_read_cr4(void)
 	return val;
 }
 
+extern volatile unsigned long cr4_pin;
+
 static inline void native_write_cr4(unsigned long val)
 {
+again:
+	val |= cr4_pin;
 	asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
+	/*
+	 * If the MOV above was used directly as a ROP gadget we can
+	 * notice the lack of pinned bits in "val" and start the function
+	 * from the beginning to gain the cr4_pin bits for sure.
+	 */
+	if (WARN_ONCE((val & cr4_pin) != cr4_pin,
+		      "Attempt to unpin cr4 bits: %lx, cr4 bypass attack?!",
+		      ~val & cr4_pin))
+		goto again;
 }
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659..7e0ea4470f8e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -312,10 +312,16 @@ static __init int setup_disable_smep(char *arg)
 }
 __setup("nosmep", setup_disable_smep);
 
+volatile unsigned long cr4_pin __ro_after_init;
+EXPORT_SYMBOL_GPL(cr4_pin);
+
 static __always_inline void setup_smep(struct cpuinfo_x86 *c)
 {
-	if (cpu_has(c, X86_FEATURE_SMEP))
+	if (cpu_has(c, X86_FEATURE_SMEP)) {
+		if (!(cr4_pin & X86_CR4_SMEP))
+			cr4_pin |= X86_CR4_SMEP;
 		cr4_set_bits(X86_CR4_SMEP);
+	}
 }
 
 static __init int setup_disable_smap(char *arg)
@@ -334,6 +340,8 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 
 	if (cpu_has(c, X86_FEATURE_SMAP)) {
 #ifdef CONFIG_X86_SMAP
+		if (!(cr4_pin & X86_CR4_SMAP))
+			cr4_pin |= X86_CR4_SMAP;
 		cr4_set_bits(X86_CR4_SMAP);
 #else
 		cr4_clear_bits(X86_CR4_SMAP);
@@ -351,6 +359,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
 	if (!cpu_has(c, X86_FEATURE_UMIP))
 		goto out;
 
+	if (!(cr4_pin & X86_CR4_UMIP))
+		cr4_pin |= X86_CR4_UMIP;
 	cr4_set_bits(X86_CR4_UMIP);
 
 	pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n");
-- 
2.17.1


-- 
Kees Cook

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [tip:x86/asm] x86/asm: Pin sensitive CR4 bits
  2019-02-21 18:09 [PATCH v3] x86/asm: Pin sensitive CR4 bits Kees Cook
@ 2019-02-23 10:16 ` tip-bot for Kees Cook
  0 siblings, 0 replies; 3+ messages in thread
From: tip-bot for Kees Cook @ 2019-02-23 10:16 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: kernel-hardening, keescook, mingo, jannh, hpa,
	sean.j.christopherson, tglx, linux-kernel, linux

Commit-ID:  679cd5ce3bc7755bfe29ec22fa8d2cebede0d7c0
Gitweb:     https://git.kernel.org/tip/679cd5ce3bc7755bfe29ec22fa8d2cebede0d7c0
Author:     Kees Cook <keescook@chromium.org>
AuthorDate: Thu, 21 Feb 2019 10:09:47 -0800
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Sat, 23 Feb 2019 11:07:11 +0100

x86/asm: Pin sensitive CR4 bits

Several recent exploits have used direct calls to the native_write_cr4()
function to disable SMEP and SMAP before then continuing their exploits
using userspace memory access. This pins bits of cr4 so that they cannot
be changed through a common function. This is not intended to be general
ROP protection (which would require CFI to defend against properly), but
rather a way to avoid trivial direct function calling (or CFI bypassing
via a matching function prototype) as seen in:

https://googleprojectzero.blogspot.com/2017/05/exploiting-linux-kernel-via-packet.html
(https://github.com/xairy/kernel-exploits/tree/master/CVE-2017-7308)

The goals of this change:
 - pin specific bits (SMEP, SMAP, and UMIP) when writing cr4.
 - avoid setting the bits too early (they must become pinned only after
   first being used).
 - pinning mask needs to be read-only during normal runtime.
 - pinning needs to be rechecked after set to avoid jumps into the middle
   of the function.

Using __ro_after_init on the mask is done so it can't be first disabled
with a malicious write. And since it becomes read-only, it must be avoided
writing to it later (hence the check for bits already having been set
instead of unconditionally writing to the mask).

The use of volatile is done to force the compiler to perform a full reload
of the mask after setting cr4 (to protect against just jumping into the
function past where the masking happens; it must check that the mask was
applied after the set). Due to how this function can be built by the
compiler (especially due to the removal of frame pointers), jumping into
the middle of the function frequently doesn't require stack manipulation to
construct a stack frame (there may only a retq without pops, which is
sufficient for use with exploits like timer overwrites mentioned above).

For example, without the recheck, the function may appear as:

   native_write_cr4:
      mov [pin], %rbx
      or  %rbx, %rdi
   1: mov %rdi, %cr4
      retq

The masking "or" could be trivially bypassed by just calling to label "1"
instead of "native_write_cr4". (CFI will force calls to only be able to
call into native_write_cr4, but CFI and CET are uncommon currently.)

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jann Horn <jannh@google.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
Link: https://lkml.kernel.org/r/20190221180947.GA24138@beast

---
 arch/x86/include/asm/special_insns.h | 13 +++++++++++++
 arch/x86/kernel/cpu/common.c         | 12 +++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 43c029cdc3fe..fabda1400137 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -72,9 +72,22 @@ static inline unsigned long native_read_cr4(void)
 	return val;
 }
 
+extern volatile unsigned long cr4_pin;
+
 static inline void native_write_cr4(unsigned long val)
 {
+again:
+	val |= cr4_pin;
 	asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
+	/*
+	 * If the MOV above was used directly as a ROP gadget we can
+	 * notice the lack of pinned bits in "val" and start the function
+	 * from the beginning to gain the cr4_pin bits for sure.
+	 */
+	if (WARN_ONCE((val & cr4_pin) != cr4_pin,
+		      "Attempt to unpin cr4 bits: %lx, cr4 bypass attack?!",
+		      ~val & cr4_pin))
+		goto again;
 }
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659..7e0ea4470f8e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -312,10 +312,16 @@ static __init int setup_disable_smep(char *arg)
 }
 __setup("nosmep", setup_disable_smep);
 
+volatile unsigned long cr4_pin __ro_after_init;
+EXPORT_SYMBOL_GPL(cr4_pin);
+
 static __always_inline void setup_smep(struct cpuinfo_x86 *c)
 {
-	if (cpu_has(c, X86_FEATURE_SMEP))
+	if (cpu_has(c, X86_FEATURE_SMEP)) {
+		if (!(cr4_pin & X86_CR4_SMEP))
+			cr4_pin |= X86_CR4_SMEP;
 		cr4_set_bits(X86_CR4_SMEP);
+	}
 }
 
 static __init int setup_disable_smap(char *arg)
@@ -334,6 +340,8 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 
 	if (cpu_has(c, X86_FEATURE_SMAP)) {
 #ifdef CONFIG_X86_SMAP
+		if (!(cr4_pin & X86_CR4_SMAP))
+			cr4_pin |= X86_CR4_SMAP;
 		cr4_set_bits(X86_CR4_SMAP);
 #else
 		cr4_clear_bits(X86_CR4_SMAP);
@@ -351,6 +359,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
 	if (!cpu_has(c, X86_FEATURE_UMIP))
 		goto out;
 
+	if (!(cr4_pin & X86_CR4_UMIP))
+		cr4_pin |= X86_CR4_UMIP;
 	cr4_set_bits(X86_CR4_UMIP);
 
 	pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n");

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [tip:x86/asm] x86/asm: Pin sensitive CR4 bits
  2019-06-18  4:55 [PATCH v3 2/3] " Kees Cook
@ 2019-06-22  9:58 ` tip-bot for Kees Cook
  0 siblings, 0 replies; 3+ messages in thread
From: tip-bot for Kees Cook @ 2019-06-22  9:58 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: keescook, hpa, mingo, torvalds, dave.hansen, linux-kernel, tglx,
	peterz

Commit-ID:  873d50d58f67ef15d2777b5e7f7a5268bb1fbae2
Gitweb:     https://git.kernel.org/tip/873d50d58f67ef15d2777b5e7f7a5268bb1fbae2
Author:     Kees Cook <keescook@chromium.org>
AuthorDate: Mon, 17 Jun 2019 21:55:02 -0700
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Sat, 22 Jun 2019 11:55:22 +0200

x86/asm: Pin sensitive CR4 bits

Several recent exploits have used direct calls to the native_write_cr4()
function to disable SMEP and SMAP before then continuing their exploits
using userspace memory access.

Direct calls of this form can be mitigate by pinning bits of CR4 so that
they cannot be changed through a common function. This is not intended to
be a general ROP protection (which would require CFI to defend against
properly), but rather a way to avoid trivial direct function calling (or
CFI bypasses via a matching function prototype) as seen in:

https://googleprojectzero.blogspot.com/2017/05/exploiting-linux-kernel-via-packet.html
(https://github.com/xairy/kernel-exploits/tree/master/CVE-2017-7308)

The goals of this change:

 - Pin specific bits (SMEP, SMAP, and UMIP) when writing CR4.

 - Avoid setting the bits too early (they must become pinned only after
   CPU feature detection and selection has finished).

 - Pinning mask needs to be read-only during normal runtime.

 - Pinning needs to be checked after write to validate the cr4 state

Using __ro_after_init on the mask is done so it can't be first disabled
with a malicious write.

Since these bits are global state (once established by the boot CPU and
kernel boot parameters), they are safe to write to secondary CPUs before
those CPUs have finished feature detection. As such, the bits are set at
the first cr4 write, so that cr4 write bugs can be detected (instead of
silently papered over). This uses a few bytes less storage of a location we
don't have: read-only per-CPU data.

A check is performed after the register write because an attack could just
skip directly to the register write. Such a direct jump is possible because
of how this function may be built by the compiler (especially due to the
removal of frame pointers) where it doesn't add a stack frame (function
exit may only be a retq without pops) which is sufficient for trivial
exploitation like in the timer overwrites mentioned above).

The asm argument constraints gain the "+" modifier to convince the compiler
that it shouldn't make ordering assumptions about the arguments or memory,
and treat them as changed.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: https://lkml.kernel.org/r/20190618045503.39105-3-keescook@chromium.org

---
 arch/x86/include/asm/special_insns.h | 22 +++++++++++++++++++++-
 arch/x86/kernel/cpu/common.c         | 20 ++++++++++++++++++++
 arch/x86/kernel/smpboot.c            |  8 +++++++-
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 0a3c4cab39db..c8c8143ab27b 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,6 +6,8 @@
 #ifdef __KERNEL__
 
 #include <asm/nops.h>
+#include <asm/processor-flags.h>
+#include <linux/jump_label.h>
 
 /*
  * Volatile isn't enough to prevent the compiler from reordering the
@@ -16,6 +18,10 @@
  */
 extern unsigned long __force_order;
 
+/* Starts false and gets enabled once CPU feature detection is done. */
+DECLARE_STATIC_KEY_FALSE(cr_pinning);
+extern unsigned long cr4_pinned_bits;
+
 static inline unsigned long native_read_cr0(void)
 {
 	unsigned long val;
@@ -74,7 +80,21 @@ static inline unsigned long native_read_cr4(void)
 
 static inline void native_write_cr4(unsigned long val)
 {
-	asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
+	unsigned long bits_missing = 0;
+
+set_register:
+	asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
+
+	if (static_branch_likely(&cr_pinning)) {
+		if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
+			bits_missing = ~val & cr4_pinned_bits;
+			val |= bits_missing;
+			goto set_register;
+		}
+		/* Warn after we've set the missing bits. */
+		WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
+			  bits_missing);
+	}
 }
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2c57fffebf9b..c578addfcf8a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -366,6 +366,25 @@ out:
 	cr4_clear_bits(X86_CR4_UMIP);
 }
 
+DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
+EXPORT_SYMBOL(cr_pinning);
+unsigned long cr4_pinned_bits __ro_after_init;
+EXPORT_SYMBOL(cr4_pinned_bits);
+
+/*
+ * Once CPU feature detection is finished (and boot params have been
+ * parsed), record any of the sensitive CR bits that are set, and
+ * enable CR pinning.
+ */
+static void __init setup_cr_pinning(void)
+{
+	unsigned long mask;
+
+	mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
+	cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+	static_key_enable(&cr_pinning.key);
+}
+
 /*
  * Protection Keys are not available in 32-bit mode.
  */
@@ -1464,6 +1483,7 @@ void __init identify_boot_cpu(void)
 	enable_sep_cpu();
 #endif
 	cpu_detect_tlb(&boot_cpu_data);
+	setup_cr_pinning();
 }
 
 void identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 362dd8953f48..1af7a2d89419 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -205,13 +205,19 @@ static int enable_start_cpu0;
  */
 static void notrace start_secondary(void *unused)
 {
+	unsigned long cr4 = __read_cr4();
+
 	/*
 	 * Don't put *anything* except direct CPU state initialization
 	 * before cpu_init(), SMP booting is too fragile that we want to
 	 * limit the things done here to the most necessary things.
 	 */
 	if (boot_cpu_has(X86_FEATURE_PCID))
-		__write_cr4(__read_cr4() | X86_CR4_PCIDE);
+		cr4 |= X86_CR4_PCIDE;
+	if (static_branch_likely(&cr_pinning))
+		cr4 |= cr4_pinned_bits;
+
+	__write_cr4(cr4);
 
 #ifdef CONFIG_X86_32
 	/* switch away from the initial page table */

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-06-22  9:58 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-02-21 18:09 [PATCH v3] x86/asm: Pin sensitive CR4 bits Kees Cook
2019-02-23 10:16 ` [tip:x86/asm] " tip-bot for Kees Cook
  -- strict thread matches above, loose matches on Subject: below --
2019-06-18  4:55 [PATCH v3 2/3] " Kees Cook
2019-06-22  9:58 ` [tip:x86/asm] " tip-bot for Kees Cook

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox