public inbox for kernel-hardening@lists.openwall.com
 help / color / mirror / Atom feed
* [kernel-hardening] PAX_USERCOPY
@ 2011-06-28 17:21 Vasiliy Kulikov
  2011-07-14 16:13 ` [kernel-hardening] PAX_USERCOPY testing Vasiliy Kulikov
  0 siblings, 1 reply; 3+ messages in thread
From: Vasiliy Kulikov @ 2011-06-28 17:21 UTC (permalink / raw)
  To: kernel-hardening

Hi,

This is an initial version of PAX_USERCOPY port on Linux 3.0.

It lacks:

1) white list of allowed slabs (SLAB_USERCOPY).

2) current task termination and stack unrolling on a failed check.
   (I think it will be rejected in upstream anyway).

3) mem/kmem handling.  As they read/write into the raw memory, it should be
   copied to/from temporary buffer before userspace interaction.


The basic tests are passed.  BTW, a stack protector catches the smaller
overflows, however it is bypassable.

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 99ddd14..e78057b 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -10,6 +10,9 @@
 #include <asm/asm.h>
 #include <asm/page.h>
 
+extern bool slab_access_ok(const void *ptr, unsigned long len);
+extern bool stack_access_ok(const void *ptr, unsigned long len);
+
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
 
@@ -78,6 +81,55 @@
  */
 #define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
 
+#if defined(CONFIG_FRAME_POINTER)
+/* MUST be always_inline to correctly count stack frame numbers */
+inline static __attribute__((always_inline))
+bool arch_check_object_on_stack_frame(const void *stack,
+	     const void *stackend, const void *obj, unsigned long len)
+{
+	const void *frame = NULL;
+	const void *oldframe;
+
+	/*
+	  low ----------------------------------------------> high
+	  [saved bp][saved ip][args][local vars][saved bp][saved ip]
+			      ^----------------^
+			  allow copies only within here
+	*/
+
+#if 0
+	oldframe = __builtin_frame_address(1);
+	if (oldframe)
+		frame = __builtin_frame_address(2);
+#endif
+
+	/*
+	 * Get the stack_access_ok() caller frame.
+	 * __builtin_frame_address(0) returns stack_access_ok() frame
+	 * as arch_ is inline and stack_ is noinline.
+	 */
+	oldframe = __builtin_frame_address(0);
+	frame = __builtin_frame_address(1);
+
+	/* if obj + len extends past the last frame, this
+	   check won't pass and the next frame will be 0,
+	   causing us to bail out and correctly report
+	   the copy as invalid
+	*/
+	while (stack <= frame && frame < stackend) {
+		if (obj + len <= frame) {
+			if (obj >= oldframe + 2 * sizeof(void *))
+				return true;
+			return false;
+		}
+		oldframe = frame;
+		frame = *(const void * const *)frame;
+	}
+	return false;
+}
+#define arch_check_object_on_stack_frame arch_check_object_on_stack_frame
+#endif
+
 /*
  * The exception table consists of pairs of addresses: the first is the
  * address of an instruction that is allowed to fault, and the second is
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 566e803..9a9df71 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -61,6 +61,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 			return ret;
 		}
 	}
+
+	if (!slab_access_ok(from, n) || !stack_access_ok(from, n))
+		return n;
+
 	return __copy_to_user_ll(to, from, n);
 }
 
@@ -108,6 +112,10 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 			return ret;
 		}
 	}
+
+	if (!slab_access_ok(to, n) || !stack_access_ok(to, n))
+		return n;
+
 	return __copy_from_user_ll_nozero(to, from, n);
 }
 
@@ -152,6 +160,10 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 			return ret;
 		}
 	}
+
+	if (!slab_access_ok(to, n) || !stack_access_ok(to, n))
+		return n;
+
 	return __copy_from_user_ll(to, from, n);
 }
 
@@ -174,6 +186,10 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
 			return ret;
 		}
 	}
+
+	if (!slab_access_ok(to, n) || !stack_access_ok(to, n))
+		return n;
+
 	return __copy_from_user_ll_nocache(to, from, n);
 }
 
@@ -181,7 +197,10 @@ static __always_inline unsigned long
 __copy_from_user_inatomic_nocache(void *to, const void __user *from,
 				  unsigned long n)
 {
-       return __copy_from_user_ll_nocache_nozero(to, from, n);
+	if (!slab_access_ok(to, n) || !stack_access_ok(to, n))
+		return n;
+
+	return __copy_from_user_ll_nocache_nozero(to, from, n);
 }
 
 unsigned long __must_check copy_to_user(void __user *to,
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 1c66d30..6753a24 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -50,8 +50,11 @@ static inline unsigned long __must_check copy_from_user(void *to,
 	int sz = __compiletime_object_size(to);
 
 	might_fault();
-	if (likely(sz == -1 || sz >= n))
+	if (likely(sz == -1 || sz >= n)) {
+		if (!slab_access_ok(to, n) || !stack_access_ok(to, n))
+			return n;
 		n = _copy_from_user(to, from, n);
+	}
 #ifdef CONFIG_DEBUG_VM
 	else
 		WARN(1, "Buffer overflow detected!\n");
@@ -64,6 +67,9 @@ int copy_to_user(void __user *dst, const void *src, unsigned size)
 {
 	might_fault();
 
+	if (!slab_access_ok(src, size) || !stack_access_ok(src, size))
+		return size;
+	
 	return _copy_to_user(dst, src, size);
 }
 
@@ -73,6 +79,10 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)
 	int ret = 0;
 
 	might_fault();
+
+	if (!slab_access_ok(dst, size) || !stack_access_ok(dst, size))
+		return size;
+
 	if (!__builtin_constant_p(size))
 		return copy_user_generic(dst, (__force void *)src, size);
 	switch (size) {
@@ -117,6 +127,10 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size)
 	int ret = 0;
 
 	might_fault();
+
+	if (!slab_access_ok(src, size) || !stack_access_ok(src, size))
+		return size;
+
 	if (!__builtin_constant_p(size))
 		return copy_user_generic((__force void *)dst, src, size);
 	switch (size) {
@@ -221,12 +235,18 @@ __must_check unsigned long __clear_user(void __user *mem, unsigned long len);
 static __must_check __always_inline int
 __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size)
 {
+	if (!slab_access_ok(dst, size) || !stack_access_ok(dst, size))
+		return size;
+
 	return copy_user_generic(dst, (__force const void *)src, size);
 }
 
 static __must_check __always_inline int
 __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
 {
+	if (!slab_access_ok(src, size) || !stack_access_ok(src, size))
+		return size;
+
 	return copy_user_generic((__force void *)dst, src, size);
 }
 
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index b7c2849..699f45b 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -42,6 +42,10 @@ long
 __strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	long res;
+
+	if (!slab_access_ok(dst, count) || !stack_access_ok(dst, count))
+		return count;
+
 	__do_strncpy_from_user(dst, src, count, res);
 	return res;
 }
diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
index ac68c99..7124db6 100644
--- a/include/asm-generic/uaccess.h
+++ b/include/asm-generic/uaccess.h
@@ -50,6 +50,15 @@ static inline int __access_ok(unsigned long addr, unsigned long size)
 }
 #endif
 
+#ifndef arch_check_object_on_stack_frame
+static inline bool arch_check_object_on_stack_frame(const void *stack,
+	     const void *stackend, const void *obj, unsigned long len)
+{
+	return true;
+}
+#define arch_check_object_on_stack_frame arch_check_object_on_stack_frame
+#endif /* arch_check_object_on_stack_frame */
+
 /*
  * The exception table consists of pairs of addresses: the first is the
  * address of an instruction that is allowed to fault, and the second is
@@ -99,6 +108,9 @@ static inline __must_check long __copy_from_user(void *to,
 		}
 	}
 
+	if (!slab_access_ok(to, n) || !stack_access_ok(to, n))
+		return n;
+
 	memcpy(to, (const void __force *)from, n);
 	return 0;
 }
@@ -129,6 +141,9 @@ static inline __must_check long __copy_to_user(void __user *to,
 		}
 	}
 
+	if (!slab_access_ok(from, n) || !stack_access_ok(from, n))
+		return n;
+
 	memcpy((void __force *)to, from, n);
 	return 0;
 }
@@ -268,6 +283,10 @@ static inline long
 __strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	char *tmp;
+
+	if (!slab_access_ok(dst, count) || !stack_access_ok(dst, count))
+		return count;
+
 	strncpy(dst, (const char __force *)src, count);
 	for (tmp = dst; *tmp && count > 0; tmp++, count--)
 		;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index ad4dd1c..8e564bb 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -333,4 +333,13 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 
 void __init kmem_cache_init_late(void);
 
+/*
+ * slab_access_ok() checks whether ptr belongs to the slab cache and whether
+ * it fits in a single allocated area.
+ *
+ * Returns false only if ptr belongs to a slab cache and overflows allocated
+ * slab area.
+ */
+extern bool slab_access_ok(const void *ptr, unsigned long len);
+
 #endif	/* _LINUX_SLAB_H */
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 5ca0951..336223b 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -108,4 +108,5 @@ extern long __probe_kernel_read(void *dst, const void *src, size_t size);
 extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
 extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
 
+
 #endif		/* __LINUX_UACCESS_H__ */
diff --git a/mm/maccess.c b/mm/maccess.c
index 4cee182..0b8f3eb 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -3,6 +3,7 @@
  */
 #include <linux/module.h>
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/uaccess.h>
 
 /**
@@ -60,3 +61,43 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
 	return ret ? -EFAULT : 0;
 }
 EXPORT_SYMBOL_GPL(probe_kernel_write);
+
+/*
+ * stack_access_ok() checks whether object is on the stack and
+ * whether it fits in a single stack frame (in case arch allows
+ * to learn this information).
+ *
+ * Returns true in cases:
+ * a) object is not a stack object at all
+ * b) object is located on the stack and fits in a single frame
+ *
+ * MUST be noinline not to confuse arch_check_object_on_stack_frame.
+ */
+bool noinline stack_access_ok(const void *obj, unsigned long len)
+{
+	const void * const stack = task_stack_page(current);
+	const void * const stackend = stack + THREAD_SIZE;
+	bool rc = false;
+
+	/* Does obj+len overflow vm space? */
+	if (unlikely(obj + len < obj))
+		goto exit;
+
+	/* Does [obj; obj+len) at least touch our stack? */
+	if (unlikely(obj + len <= stack || stackend <= obj)) {
+		rc = true;
+		goto exit;
+	}
+
+	/* Does [obj; obj+len) overflow/underflow the stack? */
+	if (unlikely(obj < stack || stackend < obj + len))
+		goto exit;
+
+	rc = arch_check_object_on_stack_frame(stack, stackend, obj, len);
+
+exit:
+	if (!rc)
+		pr_err("stack_access_ok failed (ptr = %p, len = %lu)\n", obj, len);
+	return rc;
+}
+EXPORT_SYMBOL(stack_access_ok);
diff --git a/mm/slab.c b/mm/slab.c
index d96e223..a10ae39 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3843,6 +3843,35 @@ unsigned int kmem_cache_size(struct kmem_cache *cachep)
 }
 EXPORT_SYMBOL(kmem_cache_size);
 
+bool slab_access_ok(const void *ptr, unsigned long len)
+{
+	struct page *page;
+	struct kmem_cache *cachep = NULL;
+	struct slab *slabp;
+	unsigned int objnr;
+	unsigned long offset;
+
+	if (!len)
+		return true;
+	if (!virt_addr_valid(ptr))
+		return true;
+	page = virt_to_head_page(ptr);
+	if (!PageSlab(page))
+		return true;
+
+	cachep = page_get_cache(page);
+	slabp = page_get_slab(page);
+	objnr = obj_to_index(cachep, slabp, (void *)ptr);
+	BUG_ON(objnr >= cachep->num);
+	offset = ptr - index_to_obj(cachep, slabp, objnr) - obj_offset(cachep);
+	if (offset <= obj_size(cachep) && len <= obj_size(cachep) - offset)
+		return true;
+
+	pr_err("slab_access_ok failed (addr %p, len %lu)\n", ptr, len);
+	return false;
+}
+EXPORT_SYMBOL(slab_access_ok);
+
 /*
  * This initializes kmem_list3 or resizes various caches for all nodes.
  */
diff --git a/mm/slob.c b/mm/slob.c
index 46e0aee..f02a43b 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -666,6 +666,64 @@ unsigned int kmem_cache_size(struct kmem_cache *c)
 }
 EXPORT_SYMBOL(kmem_cache_size);
 
+/*
+ * slab_access_ok() checks whether object is on the slab cache and whether
+ * it fits in a single allocated area.
+ *
+ * Returns true in cases:
+ * a) object is not a slab object
+ * b) object is located in the slab cache and fully fits into the allocated area
+ */
+bool slab_access_ok(const void *ptr, unsigned long len)
+{
+	struct slob_page *sp;
+	const slob_t *pfree;
+	const void *base;
+
+	if (!len)
+		return true;
+	if (!virt_addr_valid(ptr))
+		return true;
+	sp = slob_page(ptr);
+	if (!PageSlab((struct page *)sp))
+		return true;
+
+	if (sp->size) {
+		base = page_address(&sp->page);
+		if (base <= ptr && len <= sp->size - (ptr - base))
+			return true;
+		return false;
+	}
+
+	/* some tricky double walking to find the chunk */
+	base = (void *)((unsigned long)ptr & PAGE_MASK);
+	pfree = sp->free;
+
+	while (!slob_last(pfree) && (void *)free <= ptr) {
+		base = pfree + slob_units(free);
+		pfree = slob_next(free);
+	}
+
+	while (base < (void *)pfree) {
+		slobidx_t m = ((slob_t *)base)[0].units,
+			  align = ((slob_t *)base)[1].units;
+		int size = SLOB_UNIT * SLOB_UNITS(m + align);
+		int offset;
+
+		if (ptr < base + align)
+			return false;
+
+		offset = ptr - base - align;
+		if (offset < m) {
+			if (len <= m - offset)
+				return true;
+			return false;
+		}
+		base += size;
+	}
+}
+EXPORT_SYMBOL(slab_access_ok);
+
 int kmem_cache_shrink(struct kmem_cache *d)
 {
 	return 0;
diff --git a/mm/slub.c b/mm/slub.c
index 35f351f..78af08f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2623,6 +2623,38 @@ unsigned int kmem_cache_size(struct kmem_cache *s)
 }
 EXPORT_SYMBOL(kmem_cache_size);
 
+/*
+ * slab_access_ok() checks whether object is on the slab cache and whether
+ * it fits in a single allocated area.
+ *
+ * Returns true in cases:
+ * a) object is not a slab object
+ * b) object is located in the slab cache and fully fits into the allocated area
+ */
+bool slab_access_ok(const void *ptr, unsigned long n)
+{
+	struct page *page;
+	struct kmem_cache *s = NULL;
+	unsigned long offset;
+
+	if (!n)
+		return true;
+	if (!virt_addr_valid(ptr))
+		return true;
+	page = virt_to_head_page(ptr);
+	if (!PageSlab(page))
+		return true;
+
+	s = page->slab;
+	offset = (ptr - page_address(page)) % s->size;
+	if (offset <= s->objsize && n <= s->objsize - offset)
+		return true;
+
+	pr_err("slab_access_ok failed (addr %p, len %lu)\n", ptr, n);
+	return false;
+}
+EXPORT_SYMBOL(slab_access_ok);
+
 static void list_slab_objects(struct kmem_cache *s, struct page *page,
 							const char *text)
 {
---

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [kernel-hardening] PAX_USERCOPY testing
  2011-06-28 17:21 [kernel-hardening] PAX_USERCOPY Vasiliy Kulikov
@ 2011-07-14 16:13 ` Vasiliy Kulikov
  2011-07-18 18:46   ` [kernel-hardening] " Vasiliy Kulikov
  0 siblings, 1 reply; 3+ messages in thread
From: Vasiliy Kulikov @ 2011-07-14 16:13 UTC (permalink / raw)
  To: kernel-hardening

Hi,

This is a version of PAX_USERCOPY I want to send to LKML.  However, the
patch without performance testing would be incomplete.  I have some
syscalls measurements, the worst case is gethostname() with 6-7% penalty
on x86-64 (Intel Core 2 Duo 2Ghz).

If someone is able to test and measure the penalty, especially on x86-32
(I have no such machine available for testing now, unfortunately), it
would be great.  Any comments about what workloads would suffer
more/less than others are appreciated.

BTW, Tested-by: tag is a Good Thing (tm) when proposing a patch ;)

Changes since v1:
* moved all checks to CONFIG_DEBUG_RUNTIME_USER_COPY_CHECKS,
* added fast check if length and/or object length is known at the
  compile time, 
* renamed _nocheck -> _unchecked as there is already put_user_nocheck
  stuff,
* removed some odd tests in rare pathes or when an overflow is unlikely.

The patch is rebased against 3.0-rc7.

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 99ddd14..96bad6c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -9,6 +9,7 @@
 #include <linux/string.h>
 #include <asm/asm.h>
 #include <asm/page.h>
+#include <linux/uaccess-check.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
@@ -78,6 +79,54 @@
  */
 #define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
 
+#if defined(CONFIG_FRAME_POINTER)
+/*
+ * MUST be always_inline to correctly count stack frame numbers.
+ *
+ * low ----------------------------------------------> high
+ * [saved bp][saved ip][args][local vars][saved bp][saved ip]
+ *		       ^----------------^
+ *		  allow copies only within here
+*/
+#undef arch_check_object_on_stack_frame
+inline static __attribute__((always_inline))
+bool arch_check_object_on_stack_frame(const void *stack,
+	     const void *stackend, const void *obj, unsigned long len)
+{
+	const void *frame = NULL;
+	const void *oldframe;
+
+	/*
+	 * Get the kernel_access_ok() caller frame.
+	 * __builtin_frame_address(0) returns kernel_access_ok() frame
+	 * as arch_ and stack_ are inline and kernel_ is noinline.
+	 */
+	oldframe = __builtin_frame_address(0);
+	if (oldframe)
+		frame = __builtin_frame_address(1);
+
+	while (stack <= frame && frame < stackend) {
+		/*
+		 * If obj + len extends past the last frame, this
+		 * check won't pass and the next frame will be 0,
+		 * causing us to bail out and correctly report
+		 * the copy as invalid.
+		 */
+		if (obj + len <= frame) {
+			/* EBP + EIP */
+			int protected_regs_size = 2*sizeof(void *);
+
+			if (obj >= oldframe + protected_regs_size)
+				return true;
+			return false;
+		}
+		oldframe = frame;
+		frame = *(const void * const *)frame;
+	}
+	return false;
+}
+#endif
+
 /*
  * The exception table consists of pairs of addresses: the first is the
  * address of an instruction that is allowed to fault, and the second is
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 566e803..d48fa9c 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -82,6 +82,8 @@ static __always_inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
+	if (!kernel_access_ok(from, n))
+		return n;
 	return __copy_to_user_inatomic(to, from, n);
 }
 
@@ -152,6 +154,8 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 			return ret;
 		}
 	}
+	if (!kernel_access_ok(to, n))
+		return n;
 	return __copy_from_user_ll(to, from, n);
 }
 
@@ -205,11 +209,30 @@ static inline unsigned long __must_check copy_from_user(void *to,
 {
 	int sz = __compiletime_object_size(to);
 
-	if (likely(sz == -1 || sz >= n))
-		n = _copy_from_user(to, from, n);
-	else
+	if (likely(sz == -1 || sz >= n)) {
+		if (kernel_access_ok(to, n))
+			n = _copy_from_user(to, from, n);
+	} else {
 		copy_from_user_overflow();
+	}
+
+	return n;
+}
+
+#undef copy_from_user_uncheched
+static inline unsigned long __must_check copy_from_user_uncheched(void *to,
+					  const void __user *from,
+					  unsigned long n)
+{
+	return _copy_from_user(to, from, n);
+}
 
+#undef copy_to_user_uncheched
+static inline unsigned long copy_to_user_unchecked(void __user *to,
+     const void *from, unsigned long n)
+{
+	if (access_ok(VERIFY_WRITE, to, n))
+		n = __copy_to_user(to, from, n);
 	return n;
 }
 
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 1c66d30..10c5a0a 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -50,8 +50,10 @@ static inline unsigned long __must_check copy_from_user(void *to,
 	int sz = __compiletime_object_size(to);
 
 	might_fault();
-	if (likely(sz == -1 || sz >= n))
-		n = _copy_from_user(to, from, n);
+	if (likely(sz == -1 || sz >= n)) {
+		if (kernel_access_ok(to, n))
+			n = _copy_from_user(to, from, n);
+	}
 #ifdef CONFIG_DEBUG_VM
 	else
 		WARN(1, "Buffer overflow detected!\n");
@@ -59,11 +61,33 @@ static inline unsigned long __must_check copy_from_user(void *to,
 	return n;
 }
 
+#undef copy_from_user_unchecked
+static inline unsigned long __must_check copy_from_user_unchecked(void *to,
+					  const void __user *from,
+					  unsigned long n)
+{
+	might_fault();
+
+	return _copy_from_user(to, from, n);
+}
+
 static __always_inline __must_check
 int copy_to_user(void __user *dst, const void *src, unsigned size)
 {
 	might_fault();
 
+	if (!kernel_access_ok(src, size))
+		return size;
+
+	return _copy_to_user(dst, src, size);
+}
+
+#undef copy_to_user_unchecked
+static __always_inline __must_check
+int copy_to_user_unchecked(void __user *dst, const void *src, unsigned size)
+{
+	might_fault();
+
 	return _copy_to_user(dst, src, size);
 }
 
@@ -73,8 +97,12 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)
 	int ret = 0;
 
 	might_fault();
+	if (!kernel_access_ok(dst, size))
+		return size;
+
 	if (!__builtin_constant_p(size))
 		return copy_user_generic(dst, (__force void *)src, size);
+
 	switch (size) {
 	case 1:__get_user_asm(*(u8 *)dst, (u8 __user *)src,
 			      ret, "b", "b", "=q", 1);
@@ -117,8 +145,12 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size)
 	int ret = 0;
 
 	might_fault();
+	if (!kernel_access_ok(dst, size))
+		return size;
+
 	if (!__builtin_constant_p(size))
 		return copy_user_generic((__force void *)dst, src, size);
+
 	switch (size) {
 	case 1:__put_user_asm(*(u8 *)src, (u8 __user *)dst,
 			      ret, "b", "b", "iq", 1);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index e218d5d..e136309 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -851,7 +851,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
 unsigned long
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	if (access_ok(VERIFY_WRITE, to, n))
+	if (access_ok(VERIFY_WRITE, to, n) && kernel_access_ok(from, n))
 		n = __copy_to_user(to, from, n);
 	return n;
 }
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 8fc04b4..77c93d4 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -132,7 +132,7 @@ static ssize_t read_mem(struct file *file, char __user *buf,
 		if (!ptr)
 			return -EFAULT;
 
-		remaining = copy_to_user(buf, ptr, sz);
+		remaining = copy_to_user_unchecked(buf, ptr, sz);
 		unxlate_dev_mem_ptr(p, ptr);
 		if (remaining)
 			return -EFAULT;
@@ -190,7 +190,7 @@ static ssize_t write_mem(struct file *file, const char __user *buf,
 			return -EFAULT;
 		}
 
-		copied = copy_from_user(ptr, buf, sz);
+		copied = copy_from_user_unchecked(ptr, buf, sz);
 		unxlate_dev_mem_ptr(p, ptr);
 		if (copied) {
 			written += sz - copied;
@@ -428,7 +428,7 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
 			 */
 			kbuf = xlate_dev_kmem_ptr((char *)p);
 
-			if (copy_to_user(buf, kbuf, sz))
+			if (copy_to_user_unchecked(buf, kbuf, sz))
 				return -EFAULT;
 			buf += sz;
 			p += sz;
@@ -498,7 +498,7 @@ static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
 		 */
 		ptr = xlate_dev_kmem_ptr((char *)p);
 
-		copied = copy_from_user(ptr, buf, sz);
+		copied = copy_from_user_unchecked(ptr, buf, sz);
 		if (copied) {
 			written += sz - copied;
 			if (written)
diff --git a/include/linux/uaccess-check.h b/include/linux/uaccess-check.h
new file mode 100644
index 0000000..5592a3e
--- /dev/null
+++ b/include/linux/uaccess-check.h
@@ -0,0 +1,37 @@
+#ifndef __LINUX_UACCESS_CHECK_H__
+#define __LINUX_UACCESS_CHECK_H__
+
+#ifdef CONFIG_DEBUG_RUNTIME_USER_COPY_CHECKS
+extern bool __kernel_access_ok(const void *ptr, unsigned long len);
+static inline bool kernel_access_ok(const void *ptr, unsigned long len)
+{
+	size_t sz = __compiletime_object_size(ptr);
+
+	if (sz != (size_t)-1) {
+		if (sz >= len)
+			return true;
+		pr_err("kernel_access_ok(static) failed, ptr = %p, length = %lu\n",
+			ptr, len);
+		dump_stack();
+		return false;
+	}
+
+	/* We care about "len" overflows only. */
+	if (__builtin_constant_p(len))
+		return true;
+
+	return __kernel_access_ok(ptr, len);
+}
+#else
+static inline bool kernel_access_ok(const void *ptr, unsigned long len)
+{
+	return true;
+}
+#endif /* CONFIG_DEBUG_RUNTIME_USER_COPY_CHECKS */
+
+#define copy_to_user_unchecked copy_to_user
+#define copy_from_user_unchecked copy_from_user
+
+#define arch_check_object_on_stack_frame(s,se,o,len) true
+
+#endif		/* __LINUX_UACCESS_CHECK_H__ */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dd373c8..cb4a62a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -679,6 +679,27 @@ config DEBUG_STACK_USAGE
 
 	  This option will slow down process creation somewhat.
 
+config DEBUG_RUNTIME_USER_COPY_CHECKS
+	bool "Runtime copy size checks"
+	default n
+	depends on DEBUG_KERNEL && X86
+	---help---
+	  Enabling this option adds additional runtime checks into copy_from_user()
+	  and similar functions.
+
+	  Specifically, this checking prevents information leaking from the kernel
+	  heap/stack during kernel to userland copies (if the kernel object is
+	  otherwise fully initialized, including padding bytes) and prevents kernel
+	  heap/stack overflows during userland to kernel copies.
+
+          If frame pointers are enabled on x86, this option will also restrict
+          copies into and out of the kernel stack to local variables within a
+          single frame.
+
+	  The option has a performance drawback (up to 7% on small syscalls like
+	  gethostname), so don't turn it on unless you care enough about possible
+	  exploitation of buffer overflows.
+
 config DEBUG_KOBJECT
 	bool "kobject debugging"
 	depends on DEBUG_KERNEL
diff --git a/mm/maccess.c b/mm/maccess.c
index 4cee182..af450b8 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -3,8 +3,11 @@
  */
 #include <linux/module.h>
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/uaccess.h>
 
+extern bool slab_access_ok(const void *ptr, unsigned long len);
+
 /**
  * probe_kernel_read(): safely attempt to read from a location
  * @dst: pointer to the buffer that shall take the data
@@ -60,3 +63,56 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
 	return ret ? -EFAULT : 0;
 }
 EXPORT_SYMBOL_GPL(probe_kernel_write);
+
+#ifdef CONFIG_DEBUG_RUNTIME_USER_COPY_CHECKS
+/*
+ * stack_access_ok() checks whether object is on the stack and
+ * whether it fits in a single stack frame (in case arch allows
+ * to learn this information).
+ *
+ * Returns true in cases:
+ * a) object is not a stack object at all
+ * b) object is located on the stack and fits in a single frame
+ *
+ * MUST be inline not to confuse arch_check_object_on_stack_frame.
+ */
+inline static bool __attribute__((always_inline))
+stack_access_ok(const void *obj, unsigned long len)
+{
+	const void * const stack = task_stack_page(current);
+	const void * const stackend = stack + THREAD_SIZE;
+
+	/* Does obj+len overflow vm space? */
+	if (unlikely(obj + len < obj))
+		return false;
+
+	/* Does [obj; obj+len) at least touch our stack? */
+	if (unlikely(obj + len <= stack || stackend <= obj))
+		return true;
+
+	/* Does [obj; obj+len) overflow/underflow the stack? */
+	if (unlikely(obj < stack || stackend < obj + len))
+		return false;
+
+	return arch_check_object_on_stack_frame(stack, stackend, obj, len);
+}
+
+noinline bool __kernel_access_ok(const void *ptr, unsigned long len)
+{
+	if (!slab_access_ok(ptr, len)) {
+		pr_err("slab_access_ok failed, ptr = %p, length = %lu\n",
+			ptr, len);
+		dump_stack();
+		return false;
+	}
+	if (!stack_access_ok(ptr, len)) {
+		pr_err("stack_access_ok failed, ptr = %p, length = %lu\n",
+			ptr, len);
+		dump_stack();
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(__kernel_access_ok);
+#endif /* CONFIG_DEBUG_RUNTIME_USER_COPY_CHECKS */
diff --git a/mm/slab.c b/mm/slab.c
index d96e223..60e062c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3844,6 +3844,40 @@ unsigned int kmem_cache_size(struct kmem_cache *cachep)
 EXPORT_SYMBOL(kmem_cache_size);
 
 /*
+ * Returns false if and only if [ptr; ptr+len) touches the slab,
+ * but breaks objects boundaries.  It doesn't check whether the
+ * accessed object is actually allocated.
+ */
+bool slab_access_ok(const void *ptr, unsigned long len)
+{
+	struct page *page;
+	struct kmem_cache *cachep = NULL;
+	struct slab *slabp;
+	unsigned int objnr;
+	unsigned long offset;
+
+	if (!len)
+		return true;
+	if (!virt_addr_valid(ptr))
+		return true;
+	page = virt_to_head_page(ptr);
+	if (!PageSlab(page))
+		return true;
+
+	cachep = page_get_cache(page);
+	slabp = page_get_slab(page);
+	objnr = obj_to_index(cachep, slabp, (void *)ptr);
+	BUG_ON(objnr >= cachep->num);
+	offset = (const char *)ptr - obj_offset(cachep) -
+	    (const char *)index_to_obj(cachep, slabp, objnr);
+	if (offset <= obj_size(cachep) && len <= obj_size(cachep) - offset)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(slab_access_ok);
+
+/*
  * This initializes kmem_list3 or resizes various caches for all nodes.
  */
 static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
diff --git a/mm/slob.c b/mm/slob.c
index 46e0aee..2d9bb2b 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -666,6 +666,16 @@ unsigned int kmem_cache_size(struct kmem_cache *c)
 }
 EXPORT_SYMBOL(kmem_cache_size);
 
+bool slab_access_ok(const void *ptr, unsigned long len)
+{
+	/*
+	 * TODO: is it worth checking?  We have to gain a lock and
+	 * walk through all chunks.
+	 */
+	return true;
+}
+EXPORT_SYMBOL(slab_access_ok);
+
 int kmem_cache_shrink(struct kmem_cache *d)
 {
 	return 0;
diff --git a/mm/slub.c b/mm/slub.c
index 35f351f..be64e77 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2623,6 +2623,34 @@ unsigned int kmem_cache_size(struct kmem_cache *s)
 }
 EXPORT_SYMBOL(kmem_cache_size);
 
+/*
+ * Returns false if and only if [ptr; ptr+len) touches the slab,
+ * but breaks objects boundaries.  It doesn't check whether the
+ * accessed object is actually allocated.
+ */
+bool slab_access_ok(const void *ptr, unsigned long len)
+{
+	struct page *page;
+	struct kmem_cache *s = NULL;
+	unsigned long offset;
+
+	if (len == 0)
+		return true;
+	if (!virt_addr_valid(ptr))
+		return true;
+	page = virt_to_head_page(ptr);
+	if (!PageSlab(page))
+		return true;
+
+	s = page->slab;
+	offset = ((const char *)ptr - (const char *)page_address(page)) % s->size;
+	if (offset <= s->objsize && len <= s->objsize - offset)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(slab_access_ok);
+
 static void list_slab_objects(struct kmem_cache *s, struct page *page,
 							const char *text)
 {
-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [kernel-hardening] Re: PAX_USERCOPY testing
  2011-07-14 16:13 ` [kernel-hardening] PAX_USERCOPY testing Vasiliy Kulikov
@ 2011-07-18 18:46   ` Vasiliy Kulikov
  0 siblings, 0 replies; 3+ messages in thread
From: Vasiliy Kulikov @ 2011-07-18 18:46 UTC (permalink / raw)
  To: kernel-hardening

Hi,

On Thu, Jul 14, 2011 at 20:13 +0400, Vasiliy Kulikov wrote:
> This is a version of PAX_USERCOPY I want to send to LKML.  However, the
> patch without performance testing would be incomplete.  I have some
> syscalls measurements, the worst case is gethostname() with 6-7% penalty
> on x86-64 (Intel Core 2 Duo 2Ghz).

The previous benchmarks are totally wrong.  I didn't disable background
daemons, which created significant error, and used on-demand CPU
governor, which finally smashed results.

Measurements with maximum governor and with minimum processes showed
great "boost":

The worst case of tiny syscalls, gethostname(), has 0,9% slowdown.

Real workflows - find /usr, git log -Sredirect, make in kernel tree,
files copying - are not affected by the feature at all (the slowdown is
so low that I cannot effectively measure it, so it is less than 0,1%).

So, I've sent RFCv2 to LKML as-is :-)

I suppose no sysctl to control the behaviour is needed - the permormance
is acceptable for real workflows IMO.

Thanks,

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-07-18 18:46 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-06-28 17:21 [kernel-hardening] PAX_USERCOPY Vasiliy Kulikov
2011-07-14 16:13 ` [kernel-hardening] PAX_USERCOPY testing Vasiliy Kulikov
2011-07-18 18:46   ` [kernel-hardening] " Vasiliy Kulikov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox