stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous
       [not found] <cover.1437592883.git.luto@kernel.org>
@ 2015-07-22 19:23 ` Andy Lutomirski
  2015-07-22 22:20   ` Boris Ostrovsky
                     ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Andy Lutomirski @ 2015-07-22 19:23 UTC (permalink / raw)
  To: Peter Zijlstra, Steven Rostedt
  Cc: security@kernel.org, X86 ML, Borislav Petkov, Sasha Levin,
	linux-kernel, Konrad Rzeszutek Wilk, Boris Ostrovsky,
	Andrew Cooper, Jan Beulich, xen-devel, Andy Lutomirski, stable

modify_ldt has questionable locking and does not synchronize
threads.  Improve it: redesign the locking and synchronize all
threads' LDTs using an IPI on all modifications.

This will dramatically slow down modify_ldt in multithreaded
programs, but there shouldn't be any multithreaded programs that
care about modify_ldt's performance in the first place.

Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/include/asm/desc.h        |  15 ---
 arch/x86/include/asm/mmu.h         |   3 +-
 arch/x86/include/asm/mmu_context.h |  53 +++++++-
 arch/x86/kernel/cpu/common.c       |   4 +-
 arch/x86/kernel/cpu/perf_event.c   |  12 +-
 arch/x86/kernel/ldt.c              | 258 ++++++++++++++++++++-----------------
 arch/x86/kernel/process_64.c       |   4 +-
 arch/x86/kernel/step.c             |   6 +-
 arch/x86/power/cpu.c               |   3 +-
 9 files changed, 209 insertions(+), 149 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index a0bf89fd2647..4e10d73cf018 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -280,21 +280,6 @@ static inline void clear_LDT(void)
 	set_ldt(NULL, 0);
 }
 
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock(mm_context_t *pc)
-{
-	set_ldt(pc->ldt, pc->size);
-}
-
-static inline void load_LDT(mm_context_t *pc)
-{
-	preempt_disable();
-	load_LDT_nolock(pc);
-	preempt_enable();
-}
-
 static inline unsigned long get_desc_base(const struct desc_struct *desc)
 {
 	return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 09b9620a73b4..364d27481a52 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -9,8 +9,7 @@
  * we put the segment information here.
  */
 typedef struct {
-	void *ldt;
-	int size;
+	struct ldt_struct *ldt;
 
 #ifdef CONFIG_X86_64
 	/* True if mm supports a task running in 32 bit compatibility mode. */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 804a3a6030ca..3fcff70c398e 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -34,6 +34,49 @@ static inline void load_mm_cr4(struct mm_struct *mm) {}
 #endif
 
 /*
+ * ldt_structs can be allocated, used, and freed, but they are never
+ * modified while live.
+ */
+struct ldt_struct {
+	/*
+	 * Xen requires page-aligned LDTs with special permissions.  This is
+	 * needed to prevent us from installing evil descriptors such as
+	 * call gates.  On native, we could merge the ldt_struct and LDT
+	 * allocations, but it's not worth trying to optimize.
+	 */
+	struct desc_struct *entries;
+	int size;
+};
+
+static inline void load_mm_ldt(struct mm_struct *mm)
+{
+	struct ldt_struct *ldt;
+	DEBUG_LOCKS_WARN_ON(!irqs_disabled());
+
+	/* lockless_dereference synchronizes with smp_store_release */
+	ldt = lockless_dereference(mm->context.ldt);
+
+	/*
+	 * Any change to mm->context.ldt is followed by an IPI to all
+	 * CPUs with the mm active.  The LDT will not be freed until
+	 * after the IPI is handled by all such CPUs.  This means that,
+	 * if the ldt_struct changes before we return, the values we see
+	 * will be safe, and the new values will be loaded before we run
+	 * any user code.
+	 *
+	 * NB: don't try to convert this to use RCU without extreme care.
+	 * We would still need IRQs off, because we don't want to change
+	 * the local LDT after an IPI loaded a newer value than the one
+	 * that we can see.
+	 */
+
+	if (unlikely(ldt))
+		set_ldt(ldt->entries, ldt->size);
+	else
+		clear_LDT();
+}
+
+/*
  * Used for LDT copy/destruction.
  */
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
@@ -78,12 +121,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		 * was called and then modify_ldt changed
 		 * prev->context.ldt but suppressed an IPI to this CPU.
 		 * In this case, prev->context.ldt != NULL, because we
-		 * never free an LDT while the mm still exists.  That
-		 * means that next->context.ldt != prev->context.ldt,
-		 * because mms never share an LDT.
+		 * never set context.ldt to NULL while the mm still
+		 * exists.  That means that next->context.ldt !=
+		 * prev->context.ldt, because mms never share an LDT.
 		 */
 		if (unlikely(prev->context.ldt != next->context.ldt))
-			load_LDT_nolock(&next->context);
+			load_mm_ldt(next);
 	}
 #ifdef CONFIG_SMP
 	  else {
@@ -106,7 +149,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			load_cr3(next->pgd);
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 			load_mm_cr4(next);
-			load_LDT_nolock(&next->context);
+			load_mm_ldt(next);
 		}
 	}
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 922c5e0cea4c..cb9e5df42dd2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1410,7 +1410,7 @@ void cpu_init(void)
 	load_sp0(t, &current->thread);
 	set_tss_desc(cpu, t);
 	load_TR_desc();
-	load_LDT(&init_mm.context);
+	load_mm_ldt(&init_mm);
 
 	clear_all_debug_regs();
 	dbg_restore_debug_regs();
@@ -1459,7 +1459,7 @@ void cpu_init(void)
 	load_sp0(t, thread);
 	set_tss_desc(cpu, t);
 	load_TR_desc();
-	load_LDT(&init_mm.context);
+	load_mm_ldt(&init_mm);
 
 	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3658de47900f..9469dfa55607 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment)
 	int idx = segment >> 3;
 
 	if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+		struct ldt_struct *ldt;
+
 		if (idx > LDT_ENTRIES)
 			return 0;
 
-		if (idx > current->active_mm->context.size)
+		/* IRQs are off, so this synchronizes with smp_store_release */
+		ldt = lockless_dereference(current->active_mm->context.ldt);
+		if (!ldt || idx > ldt->size)
 			return 0;
 
-		desc = current->active_mm->context.ldt;
+		desc = &ldt->entries[idx];
 	} else {
 		if (idx > GDT_ENTRIES)
 			return 0;
 
-		desc = raw_cpu_ptr(gdt_page.gdt);
+		desc = raw_cpu_ptr(gdt_page.gdt) + idx;
 	}
 
-	return get_desc_base(desc + idx);
+	return get_desc_base(desc);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index c37886d759cc..3ae308029dee 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,6 +12,7 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
 
@@ -20,82 +21,83 @@
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
 
-#ifdef CONFIG_SMP
 static void flush_ldt(void *current_mm)
 {
-	if (current->active_mm == current_mm)
-		load_LDT(&current->active_mm->context);
+	if (current->active_mm == current_mm) {
+		/* context.lock is held for us, so we don't need any locking. */
+		mm_context_t *pc = &current->active_mm->context;
+		set_ldt(pc->ldt->entries, pc->ldt->size);
+	}
 }
-#endif
 
-static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+/* The caller must call finalize_ldt_struct on the result. */
+static struct ldt_struct *alloc_ldt_struct(int size)
 {
-	void *oldldt, *newldt;
-	int oldsize;
+	struct ldt_struct *new_ldt;
+	int alloc_size;
 
-	if (mincount <= pc->size)
-		return 0;
-	oldsize = pc->size;
-	mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
-			(~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
-	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
+	if (size > LDT_ENTRIES)
+		return NULL;
+
+	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
+	if (!new_ldt)
+		return NULL;
+
+	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
+	alloc_size = size * LDT_ENTRY_SIZE;
+
+	if (alloc_size > PAGE_SIZE)
+		new_ldt->entries = vmalloc(alloc_size);
 	else
-		newldt = (void *)__get_free_page(GFP_KERNEL);
+		new_ldt->entries = (void *)__get_free_page(GFP_KERNEL);
+	if (!new_ldt->entries) {
+		kfree(new_ldt);
+		return NULL;
+	}
 
-	if (!newldt)
-		return -ENOMEM;
+	new_ldt->size = size;
+	return new_ldt;
+}
+
+/* After calling this, the LDT is immutable. */
+static void finalize_ldt_struct(struct ldt_struct *ldt)
+{
+	paravirt_alloc_ldt(ldt->entries, ldt->size);
+}
 
-	if (oldsize)
-		memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
-	oldldt = pc->ldt;
-	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
-	       (mincount - oldsize) * LDT_ENTRY_SIZE);
+static void install_ldt(struct mm_struct *current_mm,
+			struct ldt_struct *ldt)
+{
+	/* context.lock is held */
+	preempt_disable();
 
-	paravirt_alloc_ldt(newldt, mincount);
+	/* Synchronizes with lockless_dereference in load_mm_ldt. */
+	smp_store_release(&current_mm->context.ldt, ldt);
 
-#ifdef CONFIG_X86_64
-	/* CHECKME: Do we really need this ? */
-	wmb();
-#endif
-	pc->ldt = newldt;
-	wmb();
-	pc->size = mincount;
-	wmb();
+	/* Activate for this CPU. */
+	flush_ldt(current->mm);
 
-	if (reload) {
 #ifdef CONFIG_SMP
-		preempt_disable();
-		load_LDT(pc);
-		if (!cpumask_equal(mm_cpumask(current->mm),
-				   cpumask_of(smp_processor_id())))
-			smp_call_function(flush_ldt, current->mm, 1);
-		preempt_enable();
-#else
-		load_LDT(pc);
+	/* Synchronize with other CPUs. */
+	if (!cpumask_equal(mm_cpumask(current_mm),
+			   cpumask_of(smp_processor_id())))
+		smp_call_function(flush_ldt, current_mm, 1);
 #endif
-	}
-	if (oldsize) {
-		paravirt_free_ldt(oldldt, oldsize);
-		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(oldldt);
-		else
-			put_page(virt_to_page(oldldt));
-	}
-	return 0;
+	preempt_enable();
 }
 
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+static void free_ldt_struct(struct ldt_struct *ldt)
 {
-	int err = alloc_ldt(new, old->size, 0);
-	int i;
-
-	if (err < 0)
-		return err;
-
-	for (i = 0; i < old->size; i++)
-		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
-	return 0;
+	if (unlikely(ldt)) {
+		int alloc_size = sizeof(struct ldt_struct) +
+			ldt->size * LDT_ENTRY_SIZE;
+		paravirt_free_ldt(ldt->entries, ldt->size);
+		if (alloc_size > PAGE_SIZE)
+			vfree(ldt->entries);
+		else
+			put_page(virt_to_page(ldt->entries));
+		kfree(ldt);
+	}
 }
 
 /*
@@ -108,13 +110,32 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	int retval = 0;
 
 	mutex_init(&mm->context.lock);
-	mm->context.size = 0;
 	old_mm = current->mm;
-	if (old_mm && old_mm->context.size > 0) {
-		mutex_lock(&old_mm->context.lock);
-		retval = copy_ldt(&mm->context, &old_mm->context);
-		mutex_unlock(&old_mm->context.lock);
+	if (!old_mm) {
+		mm->context.ldt = NULL;
+		return 0;
+	}
+
+	mutex_lock(&old_mm->context.lock);
+	if (old_mm->context.ldt) {
+		struct ldt_struct *new_ldt =
+			alloc_ldt_struct(old_mm->context.ldt->size);
+		if (!new_ldt) {
+			retval = -ENOMEM;
+			goto out_unlock;
+		}
+
+		memcpy(new_ldt->entries, old_mm->context.ldt->entries,
+		       new_ldt->size * LDT_ENTRY_SIZE);
+		finalize_ldt_struct(new_ldt);
+
+		mm->context.ldt = new_ldt;
+	} else {
+		mm->context.ldt = NULL;
 	}
+
+out_unlock:
+	mutex_unlock(&old_mm->context.lock);
 	return retval;
 }
 
@@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
  */
 void destroy_context(struct mm_struct *mm)
 {
-	if (mm->context.size) {
-#ifdef CONFIG_X86_32
-		/* CHECKME: Can this ever happen ? */
-		if (mm == current->active_mm)
-			clear_LDT();
-#endif
-		paravirt_free_ldt(mm->context.ldt, mm->context.size);
-		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(mm->context.ldt);
-		else
-			put_page(virt_to_page(mm->context.ldt));
-		mm->context.size = 0;
-	}
+	free_ldt_struct(mm->context.ldt);
+	mm->context.ldt = NULL;
 }
 
 static int read_ldt(void __user *ptr, unsigned long bytecount)
 {
-	int err;
+	int retval;
 	unsigned long size;
 	struct mm_struct *mm = current->mm;
 
-	if (!mm->context.size)
-		return 0;
+	mutex_lock(&mm->context.lock);
+
+	if (!mm->context.ldt) {
+		retval = 0;
+		goto out_unlock;
+	}
+
 	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
 		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
 
-	mutex_lock(&mm->context.lock);
-	size = mm->context.size * LDT_ENTRY_SIZE;
+	size = mm->context.ldt->size * LDT_ENTRY_SIZE;
 	if (size > bytecount)
 		size = bytecount;
 
-	err = 0;
-	if (copy_to_user(ptr, mm->context.ldt, size))
-		err = -EFAULT;
-	mutex_unlock(&mm->context.lock);
-	if (err < 0)
-		goto error_return;
+	if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
+		retval = -EFAULT;
+		goto out_unlock;
+	}
+
 	if (size != bytecount) {
-		/* zero-fill the rest */
+		/* Zero-fill the rest and pretend we read bytecount bytes. */
 		if (clear_user(ptr + size, bytecount - size) != 0) {
-			err = -EFAULT;
-			goto error_return;
+			retval = -EFAULT;
+			goto out_unlock;
 		}
 	}
-	return bytecount;
-error_return:
-	return err;
+	retval = bytecount;
+
+out_unlock:
+	mutex_unlock(&mm->context.lock);
+	return retval;
 }
 
 static int read_default_ldt(void __user *ptr, unsigned long bytecount)
@@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 	struct desc_struct ldt;
 	int error;
 	struct user_desc ldt_info;
+	int oldsize, newsize;
+	struct ldt_struct *new_ldt, *old_ldt;
 
 	error = -EINVAL;
 	if (bytecount != sizeof(ldt_info))
@@ -213,34 +230,43 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 			goto out;
 	}
 
-	mutex_lock(&mm->context.lock);
-	if (ldt_info.entry_number >= mm->context.size) {
-		error = alloc_ldt(&current->mm->context,
-				  ldt_info.entry_number + 1, 1);
-		if (error < 0)
-			goto out_unlock;
-	}
-
-	/* Allow LDTs to be cleared by the user. */
-	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode || LDT_empty(&ldt_info)) {
-			memset(&ldt, 0, sizeof(ldt));
-			goto install;
+	if ((oldmode && ldt_info.base_addr == 0 && ldt_info.limit == 0) ||
+	    LDT_empty(&ldt_info)) {
+		/* The user wants to clear the entry. */
+		memset(&ldt, 0, sizeof(ldt));
+	} else {
+		if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
+			error = -EINVAL;
+			goto out;
 		}
+
+		fill_ldt(&ldt, &ldt_info);
+		if (oldmode)
+			ldt.avl = 0;
 	}
 
-	if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
-		error = -EINVAL;
+	mutex_lock(&mm->context.lock);
+
+	old_ldt = mm->context.ldt;
+	oldsize = old_ldt ? old_ldt->size : 0;
+	newsize = max((int)(ldt_info.entry_number + 1), oldsize);
+
+	error = -ENOMEM;
+	new_ldt = alloc_ldt_struct(newsize);
+	if (!new_ldt)
 		goto out_unlock;
-	}
 
-	fill_ldt(&ldt, &ldt_info);
-	if (oldmode)
-		ldt.avl = 0;
+	if (old_ldt) {
+		memcpy(new_ldt->entries, old_ldt->entries,
+		       oldsize * LDT_ENTRY_SIZE);
+	}
+	memset(new_ldt->entries + oldsize * LDT_ENTRY_SIZE, 0,
+	       (newsize - oldsize) * LDT_ENTRY_SIZE);
+	new_ldt->entries[ldt_info.entry_number] = ldt;
+	finalize_ldt_struct(new_ldt);
 
-	/* Install the new entry ...  */
-install:
-	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
+	install_ldt(mm, new_ldt);
+	free_ldt_struct(old_ldt);
 	error = 0;
 
 out_unlock:
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71d7849a07f7..f6b916387590 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all)
 void release_thread(struct task_struct *dead_task)
 {
 	if (dead_task->mm) {
-		if (dead_task->mm->context.size) {
+		if (dead_task->mm->context.ldt) {
 			pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
 				dead_task->comm,
 				dead_task->mm->context.ldt,
-				dead_task->mm->context.size);
+				dead_task->mm->context.ldt->size);
 			BUG();
 		}
 	}
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 9b4d51d0c0d0..6273324186ac 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -5,6 +5,7 @@
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <asm/desc.h>
+#include <asm/mmu_context.h>
 
 unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
 {
@@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
 		seg &= ~7UL;
 
 		mutex_lock(&child->mm->context.lock);
-		if (unlikely((seg >> 3) >= child->mm->context.size))
+		if (unlikely(!child->mm->context.ldt ||
+			     (seg >> 3) >= child->mm->context.ldt->size))
 			addr = -1L; /* bogus selector, access would fault */
 		else {
-			desc = child->mm->context.ldt + seg;
+			desc = &child->mm->context.ldt->entries[seg];
 			base = get_desc_base(desc);
 
 			/* 16-bit code segment? */
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 0d7dd1f5ac36..9ab52791fed5 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -22,6 +22,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/debugreg.h>
 #include <asm/cpu.h>
+#include <asm/mmu_context.h>
 
 #ifdef CONFIG_X86_32
 __visible unsigned long saved_context_ebx;
@@ -153,7 +154,7 @@ static void fix_processor_context(void)
 	syscall_init();				/* This sets MSR_*STAR and related */
 #endif
 	load_TR_desc();				/* This does ltr */
-	load_LDT(&current->active_mm->context);	/* This does lldt */
+	load_mm_ldt(current->active_mm);	/* This does lldt */
 
 	fpu__resume_cpu();
 }
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous
  2015-07-22 19:23 ` [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous Andy Lutomirski
@ 2015-07-22 22:20   ` Boris Ostrovsky
  2015-07-25  4:13     ` Boris Ostrovsky
  2015-07-24  6:37   ` Borislav Petkov
  2015-07-24 15:29   ` Borislav Petkov
  2 siblings, 1 reply; 8+ messages in thread
From: Boris Ostrovsky @ 2015-07-22 22:20 UTC (permalink / raw)
  To: Andy Lutomirski, Peter Zijlstra, Steven Rostedt
  Cc: security@kernel.org, X86 ML, Borislav Petkov, Sasha Levin,
	linux-kernel, Konrad Rzeszutek Wilk, Andrew Cooper, Jan Beulich,
	xen-devel, stable

On 07/22/2015 03:23 PM, Andy Lutomirski wrote:
> modify_ldt has questionable locking and does not synchronize
> threads.  Improve it: redesign the locking and synchronize all
> threads' LDTs using an IPI on all modifications.
>
> This will dramatically slow down modify_ldt in multithreaded
> programs, but there shouldn't be any multithreaded programs that
> care about modify_ldt's performance in the first place.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>   arch/x86/include/asm/desc.h        |  15 ---
>   arch/x86/include/asm/mmu.h         |   3 +-
>   arch/x86/include/asm/mmu_context.h |  53 +++++++-
>   arch/x86/kernel/cpu/common.c       |   4 +-
>   arch/x86/kernel/cpu/perf_event.c   |  12 +-
>   arch/x86/kernel/ldt.c              | 258 ++++++++++++++++++++-----------------
>   arch/x86/kernel/process_64.c       |   4 +-
>   arch/x86/kernel/step.c             |   6 +-
>   arch/x86/power/cpu.c               |   3 +-
>   9 files changed, 209 insertions(+), 149 deletions(-)
>
> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
> index a0bf89fd2647..4e10d73cf018 100644
> --- a/arch/x86/include/asm/desc.h
> +++ b/arch/x86/include/asm/desc.h
> @@ -280,21 +280,6 @@ static inline void clear_LDT(void)
>   	set_ldt(NULL, 0);
>   }
>   
> -/*
> - * load one particular LDT into the current CPU
> - */
> -static inline void load_LDT_nolock(mm_context_t *pc)
> -{
> -	set_ldt(pc->ldt, pc->size);
> -}
> -
> -static inline void load_LDT(mm_context_t *pc)
> -{
> -	preempt_disable();
> -	load_LDT_nolock(pc);
> -	preempt_enable();
> -}
> -
>   static inline unsigned long get_desc_base(const struct desc_struct *desc)
>   {
>   	return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index 09b9620a73b4..364d27481a52 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -9,8 +9,7 @@
>    * we put the segment information here.
>    */
>   typedef struct {
> -	void *ldt;
> -	int size;
> +	struct ldt_struct *ldt;
>   
>   #ifdef CONFIG_X86_64
>   	/* True if mm supports a task running in 32 bit compatibility mode. */
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index 804a3a6030ca..3fcff70c398e 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -34,6 +34,49 @@ static inline void load_mm_cr4(struct mm_struct *mm) {}
>   #endif
>   
>   /*
> + * ldt_structs can be allocated, used, and freed, but they are never
> + * modified while live.
> + */
> +struct ldt_struct {
> +	/*
> +	 * Xen requires page-aligned LDTs with special permissions.  This is
> +	 * needed to prevent us from installing evil descriptors such as
> +	 * call gates.  On native, we could merge the ldt_struct and LDT
> +	 * allocations, but it's not worth trying to optimize.
> +	 */
> +	struct desc_struct *entries;
> +	int size;
> +};
> +
> +static inline void load_mm_ldt(struct mm_struct *mm)
> +{
> +	struct ldt_struct *ldt;
> +	DEBUG_LOCKS_WARN_ON(!irqs_disabled());
> +
> +	/* lockless_dereference synchronizes with smp_store_release */
> +	ldt = lockless_dereference(mm->context.ldt);
> +
> +	/*
> +	 * Any change to mm->context.ldt is followed by an IPI to all
> +	 * CPUs with the mm active.  The LDT will not be freed until
> +	 * after the IPI is handled by all such CPUs.  This means that,
> +	 * if the ldt_struct changes before we return, the values we see
> +	 * will be safe, and the new values will be loaded before we run
> +	 * any user code.
> +	 *
> +	 * NB: don't try to convert this to use RCU without extreme care.
> +	 * We would still need IRQs off, because we don't want to change
> +	 * the local LDT after an IPI loaded a newer value than the one
> +	 * that we can see.
> +	 */
> +
> +	if (unlikely(ldt))
> +		set_ldt(ldt->entries, ldt->size);
> +	else
> +		clear_LDT();
> +}
> +
> +/*
>    * Used for LDT copy/destruction.
>    */
>   int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
> @@ -78,12 +121,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
>   		 * was called and then modify_ldt changed
>   		 * prev->context.ldt but suppressed an IPI to this CPU.
>   		 * In this case, prev->context.ldt != NULL, because we
> -		 * never free an LDT while the mm still exists.  That
> -		 * means that next->context.ldt != prev->context.ldt,
> -		 * because mms never share an LDT.
> +		 * never set context.ldt to NULL while the mm still
> +		 * exists.  That means that next->context.ldt !=
> +		 * prev->context.ldt, because mms never share an LDT.
>   		 */
>   		if (unlikely(prev->context.ldt != next->context.ldt))
> -			load_LDT_nolock(&next->context);
> +			load_mm_ldt(next);
>   	}
>   #ifdef CONFIG_SMP
>   	  else {
> @@ -106,7 +149,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
>   			load_cr3(next->pgd);
>   			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
>   			load_mm_cr4(next);
> -			load_LDT_nolock(&next->context);
> +			load_mm_ldt(next);
>   		}
>   	}
>   #endif
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 922c5e0cea4c..cb9e5df42dd2 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1410,7 +1410,7 @@ void cpu_init(void)
>   	load_sp0(t, &current->thread);
>   	set_tss_desc(cpu, t);
>   	load_TR_desc();
> -	load_LDT(&init_mm.context);
> +	load_mm_ldt(&init_mm);
>   
>   	clear_all_debug_regs();
>   	dbg_restore_debug_regs();
> @@ -1459,7 +1459,7 @@ void cpu_init(void)
>   	load_sp0(t, thread);
>   	set_tss_desc(cpu, t);
>   	load_TR_desc();
> -	load_LDT(&init_mm.context);
> +	load_mm_ldt(&init_mm);
>   
>   	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
>   
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 3658de47900f..9469dfa55607 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment)
>   	int idx = segment >> 3;
>   
>   	if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
> +		struct ldt_struct *ldt;
> +
>   		if (idx > LDT_ENTRIES)
>   			return 0;
>   
> -		if (idx > current->active_mm->context.size)
> +		/* IRQs are off, so this synchronizes with smp_store_release */
> +		ldt = lockless_dereference(current->active_mm->context.ldt);
> +		if (!ldt || idx > ldt->size)
>   			return 0;
>   
> -		desc = current->active_mm->context.ldt;
> +		desc = &ldt->entries[idx];
>   	} else {
>   		if (idx > GDT_ENTRIES)
>   			return 0;
>   
> -		desc = raw_cpu_ptr(gdt_page.gdt);
> +		desc = raw_cpu_ptr(gdt_page.gdt) + idx;
>   	}
>   
> -	return get_desc_base(desc + idx);
> +	return get_desc_base(desc);
>   }
>   
>   #ifdef CONFIG_COMPAT
> diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
> index c37886d759cc..3ae308029dee 100644
> --- a/arch/x86/kernel/ldt.c
> +++ b/arch/x86/kernel/ldt.c
> @@ -12,6 +12,7 @@
>   #include <linux/string.h>
>   #include <linux/mm.h>
>   #include <linux/smp.h>
> +#include <linux/slab.h>
>   #include <linux/vmalloc.h>
>   #include <linux/uaccess.h>
>   
> @@ -20,82 +21,83 @@
>   #include <asm/mmu_context.h>
>   #include <asm/syscalls.h>
>   
> -#ifdef CONFIG_SMP
>   static void flush_ldt(void *current_mm)
>   {
> -	if (current->active_mm == current_mm)
> -		load_LDT(&current->active_mm->context);
> +	if (current->active_mm == current_mm) {
> +		/* context.lock is held for us, so we don't need any locking. */
> +		mm_context_t *pc = &current->active_mm->context;
> +		set_ldt(pc->ldt->entries, pc->ldt->size);
> +	}
>   }
> -#endif
>   
> -static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
> +/* The caller must call finalize_ldt_struct on the result. */
> +static struct ldt_struct *alloc_ldt_struct(int size)
>   {
> -	void *oldldt, *newldt;
> -	int oldsize;
> +	struct ldt_struct *new_ldt;
> +	int alloc_size;
>   
> -	if (mincount <= pc->size)
> -		return 0;
> -	oldsize = pc->size;
> -	mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
> -			(~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
> -	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
> -		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
> +	if (size > LDT_ENTRIES)
> +		return NULL;
> +
> +	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
> +	if (!new_ldt)
> +		return NULL;
> +
> +	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
> +	alloc_size = size * LDT_ENTRY_SIZE;
> +
> +	if (alloc_size > PAGE_SIZE)
> +		new_ldt->entries = vmalloc(alloc_size);
>   	else
> -		newldt = (void *)__get_free_page(GFP_KERNEL);
> +		new_ldt->entries = (void *)__get_free_page(GFP_KERNEL);
> +	if (!new_ldt->entries) {
> +		kfree(new_ldt);
> +		return NULL;
> +	}
>   
> -	if (!newldt)
> -		return -ENOMEM;
> +	new_ldt->size = size;
> +	return new_ldt;
> +}
> +
> +/* After calling this, the LDT is immutable. */
> +static void finalize_ldt_struct(struct ldt_struct *ldt)
> +{
> +	paravirt_alloc_ldt(ldt->entries, ldt->size);
> +}
>   
> -	if (oldsize)
> -		memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
> -	oldldt = pc->ldt;
> -	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
> -	       (mincount - oldsize) * LDT_ENTRY_SIZE);
> +static void install_ldt(struct mm_struct *current_mm,
> +			struct ldt_struct *ldt)
> +{
> +	/* context.lock is held */
> +	preempt_disable();
>   
> -	paravirt_alloc_ldt(newldt, mincount);
> +	/* Synchronizes with lockless_dereference in load_mm_ldt. */
> +	smp_store_release(&current_mm->context.ldt, ldt);
>   
> -#ifdef CONFIG_X86_64
> -	/* CHECKME: Do we really need this ? */
> -	wmb();
> -#endif
> -	pc->ldt = newldt;
> -	wmb();
> -	pc->size = mincount;
> -	wmb();
> +	/* Activate for this CPU. */
> +	flush_ldt(current->mm);
>   
> -	if (reload) {
>   #ifdef CONFIG_SMP
> -		preempt_disable();
> -		load_LDT(pc);
> -		if (!cpumask_equal(mm_cpumask(current->mm),
> -				   cpumask_of(smp_processor_id())))
> -			smp_call_function(flush_ldt, current->mm, 1);
> -		preempt_enable();
> -#else
> -		load_LDT(pc);
> +	/* Synchronize with other CPUs. */
> +	if (!cpumask_equal(mm_cpumask(current_mm),
> +			   cpumask_of(smp_processor_id())))
> +		smp_call_function(flush_ldt, current_mm, 1);
>   #endif
> -	}
> -	if (oldsize) {
> -		paravirt_free_ldt(oldldt, oldsize);
> -		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
> -			vfree(oldldt);
> -		else
> -			put_page(virt_to_page(oldldt));
> -	}
> -	return 0;
> +	preempt_enable();
>   }
>   
> -static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
> +static void free_ldt_struct(struct ldt_struct *ldt)
>   {
> -	int err = alloc_ldt(new, old->size, 0);
> -	int i;
> -
> -	if (err < 0)
> -		return err;
> -
> -	for (i = 0; i < old->size; i++)
> -		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
> -	return 0;
> +	if (unlikely(ldt)) {
> +		int alloc_size = sizeof(struct ldt_struct) +
> +			ldt->size * LDT_ENTRY_SIZE;
> +		paravirt_free_ldt(ldt->entries, ldt->size);
> +		if (alloc_size > PAGE_SIZE)
> +			vfree(ldt->entries);
> +		else
> +			put_page(virt_to_page(ldt->entries));
> +		kfree(ldt);
> +	}
>   }
>   
>   /*
> @@ -108,13 +110,32 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
>   	int retval = 0;
>   
>   	mutex_init(&mm->context.lock);
> -	mm->context.size = 0;
>   	old_mm = current->mm;
> -	if (old_mm && old_mm->context.size > 0) {
> -		mutex_lock(&old_mm->context.lock);
> -		retval = copy_ldt(&mm->context, &old_mm->context);
> -		mutex_unlock(&old_mm->context.lock);
> +	if (!old_mm) {
> +		mm->context.ldt = NULL;
> +		return 0;
> +	}
> +
> +	mutex_lock(&old_mm->context.lock);
> +	if (old_mm->context.ldt) {
> +		struct ldt_struct *new_ldt =
> +			alloc_ldt_struct(old_mm->context.ldt->size);
> +		if (!new_ldt) {
> +			retval = -ENOMEM;
> +			goto out_unlock;
> +		}
> +
> +		memcpy(new_ldt->entries, old_mm->context.ldt->entries,
> +		       new_ldt->size * LDT_ENTRY_SIZE);
> +		finalize_ldt_struct(new_ldt);
> +
> +		mm->context.ldt = new_ldt;
> +	} else {
> +		mm->context.ldt = NULL;
>   	}
> +
> +out_unlock:
> +	mutex_unlock(&old_mm->context.lock);
>   	return retval;
>   }
>   
> @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
>    */
>   void destroy_context(struct mm_struct *mm)
>   {
> -	if (mm->context.size) {
> -#ifdef CONFIG_X86_32
> -		/* CHECKME: Can this ever happen ? */
> -		if (mm == current->active_mm)
> -			clear_LDT();
> -#endif
> -		paravirt_free_ldt(mm->context.ldt, mm->context.size);
> -		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
> -			vfree(mm->context.ldt);
> -		else
> -			put_page(virt_to_page(mm->context.ldt));
> -		mm->context.size = 0;
> -	}
> +	free_ldt_struct(mm->context.ldt);
> +	mm->context.ldt = NULL;
>   }
>   
>   static int read_ldt(void __user *ptr, unsigned long bytecount)
>   {
> -	int err;
> +	int retval;
>   	unsigned long size;
>   	struct mm_struct *mm = current->mm;
>   
> -	if (!mm->context.size)
> -		return 0;
> +	mutex_lock(&mm->context.lock);
> +
> +	if (!mm->context.ldt) {
> +		retval = 0;
> +		goto out_unlock;
> +	}
> +
>   	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
>   		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
>   
> -	mutex_lock(&mm->context.lock);
> -	size = mm->context.size * LDT_ENTRY_SIZE;
> +	size = mm->context.ldt->size * LDT_ENTRY_SIZE;
>   	if (size > bytecount)
>   		size = bytecount;
>   
> -	err = 0;
> -	if (copy_to_user(ptr, mm->context.ldt, size))
> -		err = -EFAULT;
> -	mutex_unlock(&mm->context.lock);
> -	if (err < 0)
> -		goto error_return;
> +	if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
> +		retval = -EFAULT;
> +		goto out_unlock;
> +	}
> +
>   	if (size != bytecount) {
> -		/* zero-fill the rest */
> +		/* Zero-fill the rest and pretend we read bytecount bytes. */
>   		if (clear_user(ptr + size, bytecount - size) != 0) {
> -			err = -EFAULT;
> -			goto error_return;
> +			retval = -EFAULT;
> +			goto out_unlock;
>   		}
>   	}
> -	return bytecount;
> -error_return:
> -	return err;
> +	retval = bytecount;
> +
> +out_unlock:
> +	mutex_unlock(&mm->context.lock);
> +	return retval;
>   }
>   
>   static int read_default_ldt(void __user *ptr, unsigned long bytecount)
> @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
>   	struct desc_struct ldt;
>   	int error;
>   	struct user_desc ldt_info;
> +	int oldsize, newsize;
> +	struct ldt_struct *new_ldt, *old_ldt;
>   
>   	error = -EINVAL;
>   	if (bytecount != sizeof(ldt_info))
> @@ -213,34 +230,43 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
>   			goto out;
>   	}
>   
> -	mutex_lock(&mm->context.lock);
> -	if (ldt_info.entry_number >= mm->context.size) {
> -		error = alloc_ldt(&current->mm->context,
> -				  ldt_info.entry_number + 1, 1);
> -		if (error < 0)
> -			goto out_unlock;
> -	}
> -
> -	/* Allow LDTs to be cleared by the user. */
> -	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
> -		if (oldmode || LDT_empty(&ldt_info)) {
> -			memset(&ldt, 0, sizeof(ldt));
> -			goto install;
> +	if ((oldmode && ldt_info.base_addr == 0 && ldt_info.limit == 0) ||
> +	    LDT_empty(&ldt_info)) {
> +		/* The user wants to clear the entry. */
> +		memset(&ldt, 0, sizeof(ldt));
> +	} else {
> +		if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
> +			error = -EINVAL;
> +			goto out;
>   		}
> +
> +		fill_ldt(&ldt, &ldt_info);
> +		if (oldmode)
> +			ldt.avl = 0;
>   	}
>   
> -	if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
> -		error = -EINVAL;
> +	mutex_lock(&mm->context.lock);
> +
> +	old_ldt = mm->context.ldt;
> +	oldsize = old_ldt ? old_ldt->size : 0;
> +	newsize = max((int)(ldt_info.entry_number + 1), oldsize);
> +
> +	error = -ENOMEM;
> +	new_ldt = alloc_ldt_struct(newsize);
> +	if (!new_ldt)
>   		goto out_unlock;
> -	}
>   
> -	fill_ldt(&ldt, &ldt_info);
> -	if (oldmode)
> -		ldt.avl = 0;
> +	if (old_ldt) {
> +		memcpy(new_ldt->entries, old_ldt->entries,
> +		       oldsize * LDT_ENTRY_SIZE);
> +	}
> +	memset(new_ldt->entries + oldsize * LDT_ENTRY_SIZE, 0,
> +	       (newsize - oldsize) * LDT_ENTRY_SIZE);

We need to zero out full page (probably better in alloc_ldt_struct() 
with vmzalloc/__GFP_ZERO) --- Xen checks whole page that is assigned to  
G/LDT and gets unhappy if an invalid descriptor is found there.

This fixes one problem. There is something else that Xen gets upset 
about, I haven't figured what it is yet (and I am out tomorrow so it may 
need to wait until Friday).


-boris


> +	new_ldt->entries[ldt_info.entry_number] = ldt;
> +	finalize_ldt_struct(new_ldt);
>   
> -	/* Install the new entry ...  */
> -install:
> -	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
> +	install_ldt(mm, new_ldt);
> +	free_ldt_struct(old_ldt);
>   	error = 0;
>   
>   out_unlock:
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 71d7849a07f7..f6b916387590 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all)
>   void release_thread(struct task_struct *dead_task)
>   {
>   	if (dead_task->mm) {
> -		if (dead_task->mm->context.size) {
> +		if (dead_task->mm->context.ldt) {
>   			pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
>   				dead_task->comm,
>   				dead_task->mm->context.ldt,
> -				dead_task->mm->context.size);
> +				dead_task->mm->context.ldt->size);
>   			BUG();
>   		}
>   	}
> diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
> index 9b4d51d0c0d0..6273324186ac 100644
> --- a/arch/x86/kernel/step.c
> +++ b/arch/x86/kernel/step.c
> @@ -5,6 +5,7 @@
>   #include <linux/mm.h>
>   #include <linux/ptrace.h>
>   #include <asm/desc.h>
> +#include <asm/mmu_context.h>
>   
>   unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
>   {
> @@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
>   		seg &= ~7UL;
>   
>   		mutex_lock(&child->mm->context.lock);
> -		if (unlikely((seg >> 3) >= child->mm->context.size))
> +		if (unlikely(!child->mm->context.ldt ||
> +			     (seg >> 3) >= child->mm->context.ldt->size))
>   			addr = -1L; /* bogus selector, access would fault */
>   		else {
> -			desc = child->mm->context.ldt + seg;
> +			desc = &child->mm->context.ldt->entries[seg];
>   			base = get_desc_base(desc);
>   
>   			/* 16-bit code segment? */
> diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
> index 0d7dd1f5ac36..9ab52791fed5 100644
> --- a/arch/x86/power/cpu.c
> +++ b/arch/x86/power/cpu.c
> @@ -22,6 +22,7 @@
>   #include <asm/fpu/internal.h>
>   #include <asm/debugreg.h>
>   #include <asm/cpu.h>
> +#include <asm/mmu_context.h>
>   
>   #ifdef CONFIG_X86_32
>   __visible unsigned long saved_context_ebx;
> @@ -153,7 +154,7 @@ static void fix_processor_context(void)
>   	syscall_init();				/* This sets MSR_*STAR and related */
>   #endif
>   	load_TR_desc();				/* This does ltr */
> -	load_LDT(&current->active_mm->context);	/* This does lldt */
> +	load_mm_ldt(current->active_mm);	/* This does lldt */
>   
>   	fpu__resume_cpu();
>   }


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous
  2015-07-22 19:23 ` [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous Andy Lutomirski
  2015-07-22 22:20   ` Boris Ostrovsky
@ 2015-07-24  6:37   ` Borislav Petkov
  2015-07-24 15:29   ` Borislav Petkov
  2 siblings, 0 replies; 8+ messages in thread
From: Borislav Petkov @ 2015-07-24  6:37 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Peter Zijlstra, Steven Rostedt, security@kernel.org, X86 ML,
	Sasha Levin, linux-kernel, Konrad Rzeszutek Wilk, Boris Ostrovsky,
	Andrew Cooper, Jan Beulich, xen-devel, stable

On Wed, Jul 22, 2015 at 12:23:46PM -0700, Andy Lutomirski wrote:
> modify_ldt has questionable locking and does not synchronize
> threads.  Improve it: redesign the locking and synchronize all
> threads' LDTs using an IPI on all modifications.
> 
> This will dramatically slow down modify_ldt in multithreaded
> programs, but there shouldn't be any multithreaded programs that
> care about modify_ldt's performance in the first place.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Andy Lutomirski <luto@kernel.org>

...

> +struct ldt_struct {
> +	/*
> +	 * Xen requires page-aligned LDTs with special permissions.  This is
> +	 * needed to prevent us from installing evil descriptors such as
> +	 * call gates.  On native, we could merge the ldt_struct and LDT
> +	 * allocations, but it's not worth trying to optimize.

I don't think baremetal should care about xen and frankly, this is
getting ridiculous, slowly - baremetal has to wait with a potentially
critical security fix just because it breaks xen. Dammit, this level of
intrusiveness into x86 should've never been allowed.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous
  2015-07-22 19:23 ` [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous Andy Lutomirski
  2015-07-22 22:20   ` Boris Ostrovsky
  2015-07-24  6:37   ` Borislav Petkov
@ 2015-07-24 15:29   ` Borislav Petkov
  2015-07-25  4:52     ` Andy Lutomirski
  2 siblings, 1 reply; 8+ messages in thread
From: Borislav Petkov @ 2015-07-24 15:29 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Peter Zijlstra, Steven Rostedt, security@kernel.org, X86 ML,
	Sasha Levin, linux-kernel, Konrad Rzeszutek Wilk, Boris Ostrovsky,
	Andrew Cooper, Jan Beulich, xen-devel, stable

On Wed, Jul 22, 2015 at 12:23:46PM -0700, Andy Lutomirski wrote:
> modify_ldt has questionable locking and does not synchronize
> threads.  Improve it: redesign the locking and synchronize all
> threads' LDTs using an IPI on all modifications.
> 
> This will dramatically slow down modify_ldt in multithreaded
> programs, but there shouldn't be any multithreaded programs that
> care about modify_ldt's performance in the first place.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Andy Lutomirski <luto@kernel.org>

Just minor stylistic nitpicks below. Otherwise looks ok to me.

...

> diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
> index c37886d759cc..3ae308029dee 100644
> --- a/arch/x86/kernel/ldt.c
> +++ b/arch/x86/kernel/ldt.c
> @@ -12,6 +12,7 @@
>  #include <linux/string.h>
>  #include <linux/mm.h>
>  #include <linux/smp.h>
> +#include <linux/slab.h>
>  #include <linux/vmalloc.h>
>  #include <linux/uaccess.h>
>  
> @@ -20,82 +21,83 @@
>  #include <asm/mmu_context.h>
>  #include <asm/syscalls.h>
>  
> -#ifdef CONFIG_SMP
>  static void flush_ldt(void *current_mm)
>  {
> -	if (current->active_mm == current_mm)
> -		load_LDT(&current->active_mm->context);
> +	if (current->active_mm == current_mm) {

Save indentation level:

	if (current->active_mm != current_mm)
		return;

> +		/* context.lock is held for us, so we don't need any locking. */

Stick that comment above the function name.

> +		mm_context_t *pc = &current->active_mm->context;
> +		set_ldt(pc->ldt->entries, pc->ldt->size);
> +	}
>  }
> -#endif
>  
> -static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
> +/* The caller must call finalize_ldt_struct on the result. */
> +static struct ldt_struct *alloc_ldt_struct(int size)
>  {
> -	void *oldldt, *newldt;
> -	int oldsize;
> +	struct ldt_struct *new_ldt;
> +	int alloc_size;
>  
> -	if (mincount <= pc->size)
> -		return 0;
> -	oldsize = pc->size;
> -	mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
> -			(~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
> -	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
> -		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
> +	if (size > LDT_ENTRIES)
> +		return NULL;
> +
> +	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
> +	if (!new_ldt)
> +		return NULL;
> +
> +	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
> +	alloc_size = size * LDT_ENTRY_SIZE;
> +
> +	if (alloc_size > PAGE_SIZE)
> +		new_ldt->entries = vmalloc(alloc_size);
>  	else
> -		newldt = (void *)__get_free_page(GFP_KERNEL);
> +		new_ldt->entries = (void *)__get_free_page(GFP_KERNEL);

newline here.

> +	if (!new_ldt->entries) {
> +		kfree(new_ldt);
> +		return NULL;
> +	}
>  
> -	if (!newldt)
> -		return -ENOMEM;
> +	new_ldt->size = size;
> +	return new_ldt;
> +}
> +
> +/* After calling this, the LDT is immutable. */
> +static void finalize_ldt_struct(struct ldt_struct *ldt)
> +{
> +	paravirt_alloc_ldt(ldt->entries, ldt->size);
> +}
>  
> -	if (oldsize)
> -		memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
> -	oldldt = pc->ldt;
> -	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
> -	       (mincount - oldsize) * LDT_ENTRY_SIZE);
> +static void install_ldt(struct mm_struct *current_mm,
> +			struct ldt_struct *ldt)
> +{
> +	/* context.lock is held */
> +	preempt_disable();
>  
> -	paravirt_alloc_ldt(newldt, mincount);
> +	/* Synchronizes with lockless_dereference in load_mm_ldt. */

Good.

> +	smp_store_release(&current_mm->context.ldt, ldt);
>  
> -#ifdef CONFIG_X86_64
> -	/* CHECKME: Do we really need this ? */
> -	wmb();
> -#endif
> -	pc->ldt = newldt;
> -	wmb();
> -	pc->size = mincount;
> -	wmb();
> +	/* Activate for this CPU. */
> +	flush_ldt(current->mm);
>  
> -	if (reload) {
>  #ifdef CONFIG_SMP
> -		preempt_disable();
> -		load_LDT(pc);
> -		if (!cpumask_equal(mm_cpumask(current->mm),
> -				   cpumask_of(smp_processor_id())))
> -			smp_call_function(flush_ldt, current->mm, 1);
> -		preempt_enable();
> -#else
> -		load_LDT(pc);
> +	/* Synchronize with other CPUs. */
> +	if (!cpumask_equal(mm_cpumask(current_mm),
> +			   cpumask_of(smp_processor_id())))

Let it stick out:

	if (!cpumask_equal(mm_cpumask(current_mm), cpumask_of(smp_processor_id())))
		smp_call_function(flush_ldt, current_mm, 1);
>  #endif
> -	}
> -	if (oldsize) {
> -		paravirt_free_ldt(oldldt, oldsize);
> -		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
> -			vfree(oldldt);
> -		else
> -			put_page(virt_to_page(oldldt));
> -	}
> -	return 0;
> +	preempt_enable();
>  }
>  
> -static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
> +static void free_ldt_struct(struct ldt_struct *ldt)
>  {
> -	int err = alloc_ldt(new, old->size, 0);
> -	int i;
> -
> -	if (err < 0)
> -		return err;
> -
> -	for (i = 0; i < old->size; i++)
> -		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
> -	return 0;
> +	if (unlikely(ldt)) {

Save an indentation level:

	int alloc_size;

	if (!ldt)
		return;

	alloc_size = sizeof(struct ldt_struct) + ldt->size * LDT_ENTRY_SIZE;

	...

> +		paravirt_free_ldt(ldt->entries, ldt->size);
> +		if (alloc_size > PAGE_SIZE)
> +			vfree(ldt->entries);
> +		else
> +			put_page(virt_to_page(ldt->entries));
> +		kfree(ldt);
> +	}
>  }
>  
>  /*
> @@ -108,13 +110,32 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
>  	int retval = 0;
>  
>  	mutex_init(&mm->context.lock);
> -	mm->context.size = 0;
>  	old_mm = current->mm;
> -	if (old_mm && old_mm->context.size > 0) {
> -		mutex_lock(&old_mm->context.lock);
> -		retval = copy_ldt(&mm->context, &old_mm->context);
> -		mutex_unlock(&old_mm->context.lock);
> +	if (!old_mm) {
> +		mm->context.ldt = NULL;
> +		return 0;
> +	}
> +
> +	mutex_lock(&old_mm->context.lock);
> +	if (old_mm->context.ldt) {

Same here:

	if (!old_mm->context.ldt) {
		mm->context.ldt = NULL;
		goto out_unlock;
	}

	new_ldt = ...

> +		struct ldt_struct *new_ldt =
> +			alloc_ldt_struct(old_mm->context.ldt->size);
> +		if (!new_ldt) {
> +			retval = -ENOMEM;
> +			goto out_unlock;
> +		}
> +
> +		memcpy(new_ldt->entries, old_mm->context.ldt->entries,
> +		       new_ldt->size * LDT_ENTRY_SIZE);
> +		finalize_ldt_struct(new_ldt);
> +
> +		mm->context.ldt = new_ldt;
> +	} else {
> +		mm->context.ldt = NULL;
>  	}
> +
> +out_unlock:
> +	mutex_unlock(&old_mm->context.lock);
>  	return retval;
>  }
>  
> @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
>   */
>  void destroy_context(struct mm_struct *mm)
>  {
> -	if (mm->context.size) {
> -#ifdef CONFIG_X86_32
> -		/* CHECKME: Can this ever happen ? */
> -		if (mm == current->active_mm)
> -			clear_LDT();
> -#endif
> -		paravirt_free_ldt(mm->context.ldt, mm->context.size);
> -		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
> -			vfree(mm->context.ldt);
> -		else
> -			put_page(virt_to_page(mm->context.ldt));
> -		mm->context.size = 0;
> -	}
> +	free_ldt_struct(mm->context.ldt);
> +	mm->context.ldt = NULL;
>  }
>  
>  static int read_ldt(void __user *ptr, unsigned long bytecount)
>  {
> -	int err;
> +	int retval;
>  	unsigned long size;
>  	struct mm_struct *mm = current->mm;
>  
> -	if (!mm->context.size)
> -		return 0;
> +	mutex_lock(&mm->context.lock);
> +
> +	if (!mm->context.ldt) {
> +		retval = 0;
> +		goto out_unlock;
> +	}
> +
>  	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
>  		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
>  
> -	mutex_lock(&mm->context.lock);
> -	size = mm->context.size * LDT_ENTRY_SIZE;
> +	size = mm->context.ldt->size * LDT_ENTRY_SIZE;
>  	if (size > bytecount)
>  		size = bytecount;
>  
> -	err = 0;
> -	if (copy_to_user(ptr, mm->context.ldt, size))
> -		err = -EFAULT;
> -	mutex_unlock(&mm->context.lock);
> -	if (err < 0)
> -		goto error_return;
> +	if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
> +		retval = -EFAULT;
> +		goto out_unlock;
> +	}
> +
>  	if (size != bytecount) {
> -		/* zero-fill the rest */
> +		/* Zero-fill the rest and pretend we read bytecount bytes. */
>  		if (clear_user(ptr + size, bytecount - size) != 0) {

Make that:

		if (clear_user(ptr + size, bytecount - size))

> -			err = -EFAULT;
> -			goto error_return;
> +			retval = -EFAULT;
> +			goto out_unlock;
>  		}
>  	}
> -	return bytecount;
> -error_return:
> -	return err;
> +	retval = bytecount;
> +
> +out_unlock:
> +	mutex_unlock(&mm->context.lock);
> +	return retval;
>  }
>  
>  static int read_default_ldt(void __user *ptr, unsigned long bytecount)
> @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
>  	struct desc_struct ldt;
>  	int error;
>  	struct user_desc ldt_info;
> +	int oldsize, newsize;
> +	struct ldt_struct *new_ldt, *old_ldt;
>  
>  	error = -EINVAL;
>  	if (bytecount != sizeof(ldt_info))
> @@ -213,34 +230,43 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
>  			goto out;
>  	}
>  
> -	mutex_lock(&mm->context.lock);
> -	if (ldt_info.entry_number >= mm->context.size) {
> -		error = alloc_ldt(&current->mm->context,
> -				  ldt_info.entry_number + 1, 1);
> -		if (error < 0)
> -			goto out_unlock;
> -	}
> -
> -	/* Allow LDTs to be cleared by the user. */
> -	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
> -		if (oldmode || LDT_empty(&ldt_info)) {
> -			memset(&ldt, 0, sizeof(ldt));
> -			goto install;
> +	if ((oldmode && ldt_info.base_addr == 0 && ldt_info.limit == 0) ||

Shorten:

	if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||

> +	    LDT_empty(&ldt_info)) {
> +		/* The user wants to clear the entry. */
> +		memset(&ldt, 0, sizeof(ldt));
> +	} else {
> +		if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
> +			error = -EINVAL;
> +			goto out;
>  		}
> +
> +		fill_ldt(&ldt, &ldt_info);
> +		if (oldmode)
> +			ldt.avl = 0;
>  	}
>  
> -	if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
> -		error = -EINVAL;
> +	mutex_lock(&mm->context.lock);
> +
> +	old_ldt = mm->context.ldt;
> +	oldsize = old_ldt ? old_ldt->size : 0;
> +	newsize = max((int)(ldt_info.entry_number + 1), oldsize);
> +
> +	error = -ENOMEM;
> +	new_ldt = alloc_ldt_struct(newsize);
> +	if (!new_ldt)
>  		goto out_unlock;
> -	}
>  
> -	fill_ldt(&ldt, &ldt_info);
> -	if (oldmode)
> -		ldt.avl = 0;
> +	if (old_ldt) {
> +		memcpy(new_ldt->entries, old_ldt->entries,
> +		       oldsize * LDT_ENTRY_SIZE);
> +	}

Single if-statement doesn't need {} and you don't absolutely need to
keep 80cols. Just let it stick out.

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous
  2015-07-22 22:20   ` Boris Ostrovsky
@ 2015-07-25  4:13     ` Boris Ostrovsky
  2015-07-25  4:58       ` Andy Lutomirski
  0 siblings, 1 reply; 8+ messages in thread
From: Boris Ostrovsky @ 2015-07-25  4:13 UTC (permalink / raw)
  To: Andy Lutomirski, Peter Zijlstra, Steven Rostedt
  Cc: security@kernel.org, X86 ML, Borislav Petkov, Sasha Levin,
	linux-kernel, Konrad Rzeszutek Wilk, Andrew Cooper, Jan Beulich,
	xen-devel, stable



On 07/22/2015 06:20 PM, Boris Ostrovsky wrote:
> On 07/22/2015 03:23 PM, Andy Lutomirski wrote:
>>
>> +    error = -ENOMEM;
>> +    new_ldt = alloc_ldt_struct(newsize);
>> +    if (!new_ldt)
>>           goto out_unlock;
>> -    }
>>   -    fill_ldt(&ldt, &ldt_info);
>> -    if (oldmode)
>> -        ldt.avl = 0;
>> +    if (old_ldt) {
>> +        memcpy(new_ldt->entries, old_ldt->entries,
>> +               oldsize * LDT_ENTRY_SIZE);
>> +    }
>> +    memset(new_ldt->entries + oldsize * LDT_ENTRY_SIZE, 0,
>> +           (newsize - oldsize) * LDT_ENTRY_SIZE);
>
> We need to zero out full page (probably better in alloc_ldt_struct() 
> with vmzalloc/__GFP_ZERO) --- Xen checks whole page that is assigned 
> to  G/LDT and gets unhappy if an invalid descriptor is found there.
>
> This fixes one problem. There is something else that Xen gets upset 
> about, I haven't figured what it is yet (and I am out tomorrow so it 
> may need to wait until Friday).
>


What I thought was another problem turned out not to be one so both 64- 
and 32-bit tests passed on 64-bit PV (when allocated LDT is zeroed out)

However, on 32-bit kernel the test is failing multicpu test, I don't 
know yet what it is.

-boris



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous
  2015-07-24 15:29   ` Borislav Petkov
@ 2015-07-25  4:52     ` Andy Lutomirski
  2015-07-25  8:37       ` Borislav Petkov
  0 siblings, 1 reply; 8+ messages in thread
From: Andy Lutomirski @ 2015-07-25  4:52 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Peter Zijlstra, Steven Rostedt,
	security@kernel.org, X86 ML, Sasha Levin,
	linux-kernel@vger.kernel.org, Konrad Rzeszutek Wilk,
	Boris Ostrovsky, Andrew Cooper, Jan Beulich, xen-devel, stable

On Fri, Jul 24, 2015 at 8:29 AM, Borislav Petkov <bp@alien8.de> wrote:
> Let it stick out:
>
>         if (!cpumask_equal(mm_cpumask(current_mm), cpumask_of(smp_processor_id())))
>                 smp_call_function(flush_ldt, current_mm, 1);

I see your wide terminal and raise you a complete rewrite of that
function.  Sigh, why did I assume the old code was the right way to do
it?

>>  #endif
>> -     }
>> -     if (oldsize) {
>> -             paravirt_free_ldt(oldldt, oldsize);
>> -             if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
>> -                     vfree(oldldt);
>> -             else
>> -                     put_page(virt_to_page(oldldt));
>> -     }
>> -     return 0;
>> +     preempt_enable();
>>  }
>>
>> -static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
>> +static void free_ldt_struct(struct ldt_struct *ldt)
>>  {
>> -     int err = alloc_ldt(new, old->size, 0);
>> -     int i;
>> -
>> -     if (err < 0)
>> -             return err;
>> -
>> -     for (i = 0; i < old->size; i++)
>> -             write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
>> -     return 0;
>> +     if (unlikely(ldt)) {
>
> Save an indentation level:
>
>         int alloc_size;
>
>         if (!ldt)
>                 return;
>
>         alloc_size = sizeof(struct ldt_struct) + ldt->size * LDT_ENTRY_SIZE;
>

Hah¸ we both missed it.  This is wrong.  (Fix your backport!)

>         ...
>
>> +             paravirt_free_ldt(ldt->entries, ldt->size);
>> +             if (alloc_size > PAGE_SIZE)
>> +                     vfree(ldt->entries);
>> +             else
>> +                     put_page(virt_to_page(ldt->entries));

I'm not sure this is correct, so I changed it to something obviously
correct (kmalloc/kfree).

>>
>> -     fill_ldt(&ldt, &ldt_info);
>> -     if (oldmode)
>> -             ldt.avl = 0;
>> +     if (old_ldt) {
>> +             memcpy(new_ldt->entries, old_ldt->entries,
>> +                    oldsize * LDT_ENTRY_SIZE);
>> +     }
>
> Single if-statement doesn't need {} and you don't absolutely need to
> keep 80cols. Just let it stick out.

You read too many of Linus' comments about using wider terminals :)

--Andy

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous
  2015-07-25  4:13     ` Boris Ostrovsky
@ 2015-07-25  4:58       ` Andy Lutomirski
  0 siblings, 0 replies; 8+ messages in thread
From: Andy Lutomirski @ 2015-07-25  4:58 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: Andy Lutomirski, Peter Zijlstra, Steven Rostedt,
	security@kernel.org, X86 ML, Borislav Petkov, Sasha Levin,
	linux-kernel@vger.kernel.org, Konrad Rzeszutek Wilk,
	Andrew Cooper, Jan Beulich, xen-devel, stable

On Fri, Jul 24, 2015 at 9:13 PM, Boris Ostrovsky
<boris.ostrovsky@oracle.com> wrote:
>
>
> On 07/22/2015 06:20 PM, Boris Ostrovsky wrote:
>>
>> On 07/22/2015 03:23 PM, Andy Lutomirski wrote:
>>>
>>>
>>> +    error = -ENOMEM;
>>> +    new_ldt = alloc_ldt_struct(newsize);
>>> +    if (!new_ldt)
>>>           goto out_unlock;
>>> -    }
>>>   -    fill_ldt(&ldt, &ldt_info);
>>> -    if (oldmode)
>>> -        ldt.avl = 0;
>>> +    if (old_ldt) {
>>> +        memcpy(new_ldt->entries, old_ldt->entries,
>>> +               oldsize * LDT_ENTRY_SIZE);
>>> +    }
>>> +    memset(new_ldt->entries + oldsize * LDT_ENTRY_SIZE, 0,
>>> +           (newsize - oldsize) * LDT_ENTRY_SIZE);
>>
>>
>> We need to zero out full page (probably better in alloc_ldt_struct() with
>> vmzalloc/__GFP_ZERO) --- Xen checks whole page that is assigned to  G/LDT
>> and gets unhappy if an invalid descriptor is found there.
>>
>> This fixes one problem. There is something else that Xen gets upset about,
>> I haven't figured what it is yet (and I am out tomorrow so it may need to
>> wait until Friday).
>>
>
>
> What I thought was another problem turned out not to be one so both 64- and
> 32-bit tests passed on 64-bit PV (when allocated LDT is zeroed out)
>
> However, on 32-bit kernel the test is failing multicpu test, I don't know
> yet what it is.

Test case bug or unrelated kernel bug depending on your point of view.
I forgot that x86_32 and x86_64 have very different handling of IRET
faults.  Wait for v2 :)

--Andy

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous
  2015-07-25  4:52     ` Andy Lutomirski
@ 2015-07-25  8:37       ` Borislav Petkov
  0 siblings, 0 replies; 8+ messages in thread
From: Borislav Petkov @ 2015-07-25  8:37 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, Peter Zijlstra, Steven Rostedt,
	security@kernel.org, X86 ML, Sasha Levin,
	linux-kernel@vger.kernel.org, Konrad Rzeszutek Wilk,
	Boris Ostrovsky, Andrew Cooper, Jan Beulich, xen-devel, stable

On Fri, Jul 24, 2015 at 09:52:01PM -0700, Andy Lutomirski wrote:
> I see your wide terminal and raise you a complete rewrite of that
> function.  Sigh, why did I assume the old code was the right way to do
> it?

That's a mostly wrong assumption, as experience proves.

> Hah¸ we both missed it.  This is wrong.  (Fix your backport!)

Yikes:

	alloc_size = size * LDT_ENTRY_SIZE;

But hey, I made you spot it, still! :-)

Done.

> I'm not sure this is correct, so I changed it to something obviously
> correct (kmalloc/kfree).

Someone thought she won't get contiguous memory from kmalloc(). But how
big can alloc_size be to fail...

> You read too many of Linus' comments about using wider terminals :)

Nah, I'm just trying to put back some sanity in that 80 cols rule which,
even you, think is a hard one. And I say, keep 80 cols but sanity can
override it if what 80 cols produces, is crap.

I trust you're sane enough to apply that and not think C++ or java
wankery. Woahahahah...

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-07-25  8:37 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <cover.1437592883.git.luto@kernel.org>
2015-07-22 19:23 ` [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous Andy Lutomirski
2015-07-22 22:20   ` Boris Ostrovsky
2015-07-25  4:13     ` Boris Ostrovsky
2015-07-25  4:58       ` Andy Lutomirski
2015-07-24  6:37   ` Borislav Petkov
2015-07-24 15:29   ` Borislav Petkov
2015-07-25  4:52     ` Andy Lutomirski
2015-07-25  8:37       ` Borislav Petkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).