linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5 1/3] x86/mm: Adapt MODULES_END based on Fixmap section size
@ 2017-03-06 22:03 Thomas Garnier
  2017-03-06 22:03 ` [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section Thomas Garnier
  2017-03-06 22:03 ` [PATCH v5 3/3] x86: Make the GDT remapping read-only on 64-bit Thomas Garnier
  0 siblings, 2 replies; 14+ messages in thread
From: Thomas Garnier @ 2017-03-06 22:03 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Jonathan Corbet,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Thomas Garnier, Kees Cook, Juergen Gross, Lorenzo Stoakes,
	Andy Lutomirski, Paul Gortmaker, Andrew Morton, Michal Hocko,
	zijun_hu, Chris Wilson, Joonsoo Kim, Andy Lutomirski,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Rusty Russell,
	Paolo Bonzini, Borislav Petkov, Christian Borntraeger,
	Luis R . Rodriguez, Fenghua Yu, He Chen, Brian Gerst,
	Frederic Weisbecker, Stanislaw Gruszka, Arnd Bergmann,
	Peter Zijlstra, Dave Hansen, Josh Poimboeuf, Vitaly Kuznetsov,
	David Vrabel, Tim Chen, Andi Kleen, Jiri Olsa, Prarit Bhargava,
	Michael Ellerman, Joerg Roedel, Radim Krčmář
  Cc: x86, linux-kernel, linux-doc, kasan-dev, linux-mm, linux-pm,
	linux-efi, xen-devel, lguest, kvm, kernel-hardening

This patch aligns MODULES_END to the beginning of the Fixmap section.
It optimizes the space available for both sections. The address is
pre-computed based on the number of pages required by the Fixmap
section.

It will allow GDT remapping in the Fixmap section. The current
MODULES_END static address does not provide enough space for the kernel
to support a large number of processors.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170306
---
 Documentation/x86/x86_64/mm.txt         | 5 ++++-
 arch/x86/include/asm/pgtable_64_types.h | 3 ++-
 arch/x86/kernel/module.c                | 1 +
 arch/x86/mm/dump_pagetables.c           | 1 +
 arch/x86/mm/kasan_init_64.c             | 1 +
 mm/vmalloc.c                            | 1 +
 6 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 5724092db811..ee3f9c30957c 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -19,7 +19,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
+ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
 ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
@@ -39,6 +39,9 @@ memory window (this size is arbitrary, it can be raised later if needed).
 The mappings are not part of any other kernel PGD and are only available
 during EFI runtime calls.
 
+The module mapping space size changes based on the CONFIG requirements for the
+following fixmap section.
+
 Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
 physical memory, vmalloc/ioremap space and virtual memory map are randomized.
 Their order is preserved but their base will be offset early at boot time.
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..bb05e21cf3c7 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -67,7 +67,8 @@ typedef struct { pteval_t pte; } pte_t;
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 #define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
 #define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END      _AC(0xffffffffff000000, UL)
+/* The module sections ends with the start of the fixmap */
+#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 #define ESPFIX_PGD_ENTRY _AC(-2, UL)
 #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 477ae806c2fa..fad61caac75e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -35,6 +35,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
+#include <asm/fixmap.h>
 
 #if 0
 #define DEBUGP(fmt, ...)				\
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 58b5bee7ea27..75efeecc85eb 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -20,6 +20,7 @@
 
 #include <asm/kasan.h>
 #include <asm/pgtable.h>
+#include <asm/fixmap.h>
 
 /*
  * The dumper groups pagetable entries of the same type into one, and for
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8d63d7a104c3..1bde19ef86bd 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -9,6 +9,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/fixmap.h>
 
 extern pgd_t early_level4_pgt[PTRS_PER_PGD];
 extern struct range pfn_mapped[E820_X_MAX];
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b4024d688f38..91b0d5e19af6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -35,6 +35,7 @@
 #include <linux/uaccess.h>
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
+#include <asm/fixmap.h>
 
 #include "internal.h"
 
-- 
2.12.0.rc1.440.g5b76565f74-goog

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-06 22:03 [PATCH v5 1/3] x86/mm: Adapt MODULES_END based on Fixmap section size Thomas Garnier
@ 2017-03-06 22:03 ` Thomas Garnier
  2017-03-09 21:32   ` Andy Lutomirski
  2017-03-06 22:03 ` [PATCH v5 3/3] x86: Make the GDT remapping read-only on 64-bit Thomas Garnier
  1 sibling, 1 reply; 14+ messages in thread
From: Thomas Garnier @ 2017-03-06 22:03 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Jonathan Corbet,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Thomas Garnier, Kees Cook, Juergen Gross, Lorenzo Stoakes,
	Andy Lutomirski, Paul Gortmaker, Andrew Morton, Michal Hocko,
	zijun_hu, Chris Wilson, Joonsoo Kim, Andy Lutomirski,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Rusty Russell,
	Paolo Bonzini, Borislav Petkov, Christian Borntraeger,
	Luis R . Rodriguez, Fenghua Yu, He Chen, Brian Gerst,
	Frederic Weisbecker, Stanislaw Gruszka, Arnd Bergmann,
	Peter Zijlstra, Dave Hansen, Josh Poimboeuf, Vitaly Kuznetsov,
	David Vrabel, Tim Chen, Andi Kleen, Jiri Olsa, Prarit Bhargava,
	Michael Ellerman, Joerg Roedel, Radim Krčmář
  Cc: x86, linux-kernel, linux-doc, kasan-dev, linux-mm, linux-pm,
	linux-efi, xen-devel, lguest, kvm, kernel-hardening

Each processor holds a GDT in its per-cpu structure. The sgdt
instruction gives the base address of the current GDT. This address can
be used to bypass KASLR memory randomization. With another bug, an
attacker could target other per-cpu structures or deduce the base of
the main memory section (PAGE_OFFSET).

This patch relocates the GDT table for each processor inside the
Fixmap section. The space is reserved based on number of supported
processors.

For consistency, the remapping is done by default on 32 and 64-bit.

Each processor switches to its remapped GDT at the end of
initialization. For hibernation, the main processor returns with the
original GDT and switches back to the remapping at completion.

This patch was tested on both architectures. Hibernation and KVM were
both tested specially for their usage of the GDT.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170306
---
 arch/x86/entry/vdso/vma.c             |  2 +-
 arch/x86/include/asm/desc.h           | 57 ++++++++++++++++++++++++++++++++---
 arch/x86/include/asm/fixmap.h         |  4 +++
 arch/x86/include/asm/processor.h      |  1 +
 arch/x86/include/asm/stackprotector.h |  2 +-
 arch/x86/kernel/acpi/sleep.c          |  2 +-
 arch/x86/kernel/apm_32.c              |  6 ++--
 arch/x86/kernel/cpu/common.c          | 26 ++++++++++++++--
 arch/x86/kernel/setup_percpu.c        |  2 +-
 arch/x86/kernel/smpboot.c             |  2 +-
 arch/x86/platform/efi/efi_32.c        |  4 +--
 arch/x86/power/cpu.c                  |  7 +++--
 arch/x86/xen/enlighten.c              |  2 +-
 arch/x86/xen/smp.c                    |  2 +-
 drivers/lguest/x86/core.c             |  6 ++--
 drivers/pnp/pnpbios/bioscalls.c       | 10 +++---
 16 files changed, 106 insertions(+), 29 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 226ca70dc6bd..5c5d4d7618e6 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -354,7 +354,7 @@ static void vgetcpu_cpu_init(void *arg)
 	d.p = 1;		/* Present */
 	d.d = 1;		/* 32-bit */
 
-	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+	write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
 }
 
 static int vgetcpu_online(unsigned int cpu)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 1548ca92ad3f..549393ae93a0 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -4,6 +4,7 @@
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
+#include <asm/fixmap.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -45,11 +46,57 @@ struct gdt_page {
 
 DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
 
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
 {
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+static inline unsigned long get_cpu_gdt_rw_vaddr(unsigned int cpu)
+{
+	return (unsigned long)get_cpu_gdt_rw(cpu);
+}
+
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_gdt_rw(void)
+{
+	return this_cpu_ptr(&gdt_page)->gdt;
+}
+
+static inline unsigned long get_current_gdt_rw_vaddr(void)
+{
+	return (unsigned long)get_current_gdt_rw();
+}
+
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+{
+	return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
+{
+	unsigned int idx = get_cpu_gdt_ro_index(cpu);
+	return (struct desc_struct *)__fix_to_virt(idx);
+}
+
+static inline unsigned long get_cpu_gdt_ro_vaddr(int cpu)
+{
+	return (unsigned long)get_cpu_gdt_ro(cpu);
+}
+
+/* Provide the current read-only GDT */
+static inline struct desc_struct *get_current_gdt_ro(void)
+{
+	return get_cpu_gdt_ro(smp_processor_id());
+}
+
+static inline unsigned long get_current_gdt_ro_vaddr(void)
+{
+	return (unsigned long)get_current_gdt_ro();
+}
+
 #ifdef CONFIG_X86_64
 
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -174,7 +221,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
 
 static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
 {
-	struct desc_struct *d = get_cpu_gdt_table(cpu);
+	struct desc_struct *d = get_cpu_gdt_rw(cpu);
 	tss_desc tss;
 
 	set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
@@ -194,7 +241,7 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 
 		set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
 				      entries * LDT_ENTRY_SIZE - 1);
-		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+		write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
 				&ldt, DESC_LDT);
 		asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
 	}
@@ -209,7 +256,7 @@ DECLARE_PER_CPU(bool, __tss_limit_invalid);
 
 static inline void force_reload_TR(void)
 {
-	struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
+	struct desc_struct *d = get_current_gdt_rw();
 	tss_desc tss;
 
 	memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -288,7 +335,7 @@ static inline unsigned long native_store_tr(void)
 
 static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 {
-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
 	unsigned int i;
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8554f960e21b..b65155cc3760 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -100,6 +100,10 @@ enum fixed_addresses {
 #ifdef	CONFIG_X86_INTEL_MID
 	FIX_LNW_VRTC,
 #endif
+	/* Fixmap entries to remap the GDTs, one per processor. */
+	FIX_GDT_REMAP_BEGIN,
+	FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
 	__end_of_permanent_fixed_addresses,
 
 	/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..2ec4d2dc559b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -716,6 +716,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 58505f01962f..dcbd9bcce714 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
 	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
-	struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
 	struct desc_struct desc;
 
 	desc = gdt_table[GDT_ENTRY_STACK_CANARY];
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 48587335ede8..ed014814ea35 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,7 +101,7 @@ int x86_acpi_suspend_lowlevel(void)
 #ifdef CONFIG_SMP
 	initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
 	early_gdt_descr.address =
-			(unsigned long)get_cpu_gdt_table(smp_processor_id());
+			(unsigned long)get_cpu_gdt_rw(smp_processor_id());
 	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5a414545e8a3..446b0d3d4932 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -609,7 +609,7 @@ static long __apm_bios_call(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_gdt_rw(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -685,7 +685,7 @@ static long __apm_bios_call_simple(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_gdt_rw(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -2352,7 +2352,7 @@ static int __init apm_init(void)
 	 * Note we only set APM segments on CPU zero, since we pin the APM
 	 * code to that CPU.
 	 */
-	gdt = get_cpu_gdt_table(0);
+	gdt = get_cpu_gdt_rw(0);
 	set_desc_base(&gdt[APM_CS >> 3],
 		 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
 	set_desc_base(&gdt[APM_CS_16 >> 3],
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 58094a1f9e9d..a9e847da014a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -448,6 +448,23 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* Setup the fixmap mapping only once per-processor */
+static inline void setup_fixmap_gdt(int cpu)
+{
+	__set_fixmap(get_cpu_gdt_ro_index(cpu),
+		     __pa(get_cpu_gdt_rw(cpu)), PAGE_KERNEL);
+}
+
+/* Load a fixmap remapping of the per-cpu GDT */
+void load_fixmap_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_gdt_ro(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
  * it's on the real one.
@@ -456,11 +473,10 @@ void switch_to_new_gdt(int cpu)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
+	gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 	/* Reload the per-cpu base */
-
 	load_percpu_segment(cpu);
 }
 
@@ -1526,6 +1542,9 @@ void cpu_init(void)
 
 	if (is_uv_system())
 		uv_cpu_init();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 
 #else
@@ -1581,6 +1600,9 @@ void cpu_init(void)
 	dbg_restore_debug_regs();
 
 	fpu__init_cpu();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 #endif
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9820d6d977c6..11338b0b3ad2 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -160,7 +160,7 @@ static inline void setup_percpu_segment(int cpu)
 	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
 			0x2 | DESCTYPE_S, 0x8);
 	gdt.s = 1;
-	write_gdt_entry(get_cpu_gdt_table(cpu),
+	write_gdt_entry(get_cpu_gdt_rw(cpu),
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
 #endif
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bd1f1ad35284..f04479a8f74f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -983,7 +983,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	unsigned long timeout;
 
 	idle->thread.sp = (unsigned long)task_pt_regs(idle);
-	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+	early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
 	initial_code = (unsigned long)start_secondary;
 	initial_stack  = idle->thread.sp;
 
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index cef39b097649..950071171436 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -68,7 +68,7 @@ pgd_t * __init efi_call_phys_prolog(void)
 	load_cr3(initial_page_table);
 	__flush_tlb_all();
 
-	gdt_descr.address = __pa(get_cpu_gdt_table(0));
+	gdt_descr.address = __pa(get_cpu_gdt_rw(0));
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
@@ -79,7 +79,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+	gdt_descr.address = (unsigned long)get_cpu_gdt_rw(0);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 66ade16c7693..6b05a9219ea2 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -95,7 +95,7 @@ static void __save_processor_state(struct saved_context *ctxt)
 	 * 'pmode_gdt' in wakeup_start.
 	 */
 	ctxt->gdt_desc.size = GDT_SIZE - 1;
-	ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
+	ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_rw(smp_processor_id());
 
 	store_tr(ctxt->tr);
 
@@ -162,7 +162,7 @@ static void fix_processor_context(void)
 	int cpu = smp_processor_id();
 	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
-	struct desc_struct *desc = get_cpu_gdt_table(cpu);
+	struct desc_struct *desc = get_cpu_gdt_rw(cpu);
 	tss_desc tss;
 #endif
 	set_tss_desc(cpu, t);	/*
@@ -183,6 +183,9 @@ static void fix_processor_context(void)
 	load_mm_ldt(current->active_mm);	/* This does lldt */
 
 	fpu__resume_cpu();
+
+	/* The processor is back on the direct GDT, load back the fixmap */
+	load_fixmap_gdt(cpu);
 }
 
 /**
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ec1d5c46e58f..4951fcf95143 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -710,7 +710,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
 
 	*shadow = t->tls_array[i];
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_gdt_rw(cpu);
 	maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
 	mc = __xen_mc_entry(0);
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7ff2f1bfb7ec..eaa36162ed4a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -392,7 +392,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	if (ctxt == NULL)
 		return -ENOMEM;
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_gdt_rw(cpu);
 
 #ifdef CONFIG_X86_32
 	ctxt->user_regs.fs = __KERNEL_PERCPU;
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index d71f6323ac00..b4f79b923aea 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -504,7 +504,7 @@ void __init lguest_arch_host_init(void)
 		 * byte, not the size, hence the "-1").
 		 */
 		state->host_gdt_desc.size = GDT_SIZE-1;
-		state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
+		state->host_gdt_desc.address = (long)get_cpu_gdt_rw(i);
 
 		/*
 		 * All CPUs on the Host use the same Interrupt Descriptor
@@ -554,8 +554,8 @@ void __init lguest_arch_host_init(void)
 		 * The Host needs to be able to use the LGUEST segments on this
 		 * CPU, too, so put them in the Host GDT.
 		 */
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
+		get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
+		get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
 	}
 
 	/*
diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index 438d4c72c7b3..ff563db025b3 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -54,7 +54,7 @@ __asm__(".text			\n"
 
 #define Q2_SET_SEL(cpu, selname, address, size) \
 do { \
-	struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
+	struct desc_struct *gdt = get_cpu_gdt_rw((cpu)); \
 	set_desc_base(&gdt[(selname) >> 3], (u32)(address)); \
 	set_desc_limit(&gdt[(selname) >> 3], (size) - 1); \
 } while(0)
@@ -95,8 +95,8 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 		return PNP_FUNCTION_NOT_SUPPORTED;
 
 	cpu = get_cpu();
-	save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-	get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
+	save_desc_40 = get_cpu_gdt_rw(cpu)[0x40 / 8];
+	get_cpu_gdt_rw(cpu)[0x40 / 8] = bad_bios_desc;
 
 	/* On some boxes IRQ's during PnP BIOS calls are deadly.  */
 	spin_lock_irqsave(&pnp_bios_lock, flags);
@@ -134,7 +134,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 			     :"memory");
 	spin_unlock_irqrestore(&pnp_bios_lock, flags);
 
-	get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
+	get_cpu_gdt_rw(cpu)[0x40 / 8] = save_desc_40;
 	put_cpu();
 
 	/* If we get here and this is set then the PnP BIOS faulted on us. */
@@ -477,7 +477,7 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
 	pnp_bios_callpoint.segment = PNP_CS16;
 
 	for_each_possible_cpu(i) {
-		struct desc_struct *gdt = get_cpu_gdt_table(i);
+		struct desc_struct *gdt = get_cpu_gdt_rw(i);
 		if (!gdt)
 			continue;
 		set_desc_base(&gdt[GDT_ENTRY_PNPBIOS_CS32],
-- 
2.12.0.rc1.440.g5b76565f74-goog

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v5 3/3] x86: Make the GDT remapping read-only on 64-bit
  2017-03-06 22:03 [PATCH v5 1/3] x86/mm: Adapt MODULES_END based on Fixmap section size Thomas Garnier
  2017-03-06 22:03 ` [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section Thomas Garnier
@ 2017-03-06 22:03 ` Thomas Garnier
  2017-03-09 21:35   ` Andy Lutomirski
  1 sibling, 1 reply; 14+ messages in thread
From: Thomas Garnier @ 2017-03-06 22:03 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Jonathan Corbet,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Thomas Garnier, Kees Cook, Juergen Gross, Lorenzo Stoakes,
	Andy Lutomirski, Paul Gortmaker, Andrew Morton, Michal Hocko,
	zijun_hu, Chris Wilson, Joonsoo Kim, Andy Lutomirski,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Rusty Russell,
	Paolo Bonzini, Borislav Petkov, Christian Borntraeger,
	Luis R . Rodriguez, Fenghua Yu, He Chen, Brian Gerst,
	Frederic Weisbecker, Stanislaw Gruszka, Arnd Bergmann,
	Peter Zijlstra, Dave Hansen, Josh Poimboeuf, Vitaly Kuznetsov,
	David Vrabel, Tim Chen, Andi Kleen, Jiri Olsa, Prarit Bhargava,
	Michael Ellerman, Joerg Roedel, Radim Krčmář
  Cc: x86, linux-kernel, linux-doc, kasan-dev, linux-mm, linux-pm,
	linux-efi, xen-devel, lguest, kvm, kernel-hardening

This patch makes the GDT remapped pages read-only to prevent corruption.
This change is done only on 64-bit.

The native_load_tr_desc function was adapted to correctly handle a
read-only GDT. The LTR instruction always writes to the GDT TSS entry.
This generates a page fault if the GDT is read-only. This change checks
if the current GDT is a remap and swap GDTs as needed. This function was
tested by booting multiple machines and checking hibernation works
properly.

KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
per-cpu variable was removed for functions to fetch the original GDT.
Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
expected. For testing, VMs were started and restored on multiple
configurations.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170306
---
 arch/x86/include/asm/desc.h      | 106 +++++++++++++++++++++++++--------------
 arch/x86/include/asm/processor.h |   1 +
 arch/x86/kernel/cpu/common.c     |  28 ++++++++---
 arch/x86/kvm/svm.c               |   4 +-
 arch/x86/kvm/vmx.c               |  11 ++--
 5 files changed, 96 insertions(+), 54 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 549393ae93a0..9b7fda6a2d73 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -247,9 +247,77 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 	}
 }
 
+static inline void native_load_gdt(const struct desc_ptr *dtr)
+{
+	asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static inline void native_load_idt(const struct desc_ptr *dtr)
+{
+	asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static inline void native_store_gdt(struct desc_ptr *dtr)
+{
+	asm volatile("sgdt %0":"=m" (*dtr));
+}
+
+static inline void native_store_idt(struct desc_ptr *dtr)
+{
+	asm volatile("sidt %0":"=m" (*dtr));
+}
+
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
 static inline void native_load_tr_desc(void)
 {
+	struct desc_ptr gdt;
+	int cpu = raw_smp_processor_id();
+	bool restore = 0;
+	struct desc_struct *fixmap_gdt;
+
+	native_store_gdt(&gdt);
+	fixmap_gdt = get_cpu_gdt_ro(cpu);
+
+	/*
+	 * If the current GDT is the read-only fixmap, swap to the original
+	 * writeable version. Swap back at the end.
+	 */
+	if (gdt.address == (unsigned long)fixmap_gdt) {
+		load_direct_gdt(cpu);
+		restore = 1;
+	}
 	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+	if (restore)
+		load_fixmap_gdt(cpu);
+}
+#else
+static inline void native_load_tr_desc(void)
+{
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+}
+#endif
+
+static inline unsigned long native_store_tr(void)
+{
+	unsigned long tr;
+
+	asm volatile("str %0":"=r" (tr));
+
+	return tr;
+}
+
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+	struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
+	unsigned int i;
+
+	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
 }
 
 DECLARE_PER_CPU(bool, __tss_limit_invalid);
@@ -304,44 +372,6 @@ static inline void invalidate_tss_limit(void)
 		this_cpu_write(__tss_limit_invalid, true);
 }
 
-static inline void native_load_gdt(const struct desc_ptr *dtr)
-{
-	asm volatile("lgdt %0"::"m" (*dtr));
-}
-
-static inline void native_load_idt(const struct desc_ptr *dtr)
-{
-	asm volatile("lidt %0"::"m" (*dtr));
-}
-
-static inline void native_store_gdt(struct desc_ptr *dtr)
-{
-	asm volatile("sgdt %0":"=m" (*dtr));
-}
-
-static inline void native_store_idt(struct desc_ptr *dtr)
-{
-	asm volatile("sidt %0":"=m" (*dtr));
-}
-
-static inline unsigned long native_store_tr(void)
-{
-	unsigned long tr;
-
-	asm volatile("str %0":"=r" (tr));
-
-	return tr;
-}
-
-static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
-	struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
-	unsigned int i;
-
-	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
-		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
-}
-
 /* This intentionally ignores lm, since 32-bit apps don't have that field. */
 #define LDT_empty(info)					\
 	((info)->base_addr		== 0	&&	\
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2ec4d2dc559b..28828f1f99a4 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -716,6 +716,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a9e847da014a..bff2f8bb13b5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -448,13 +448,31 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* On 64-bit the GDT remapping is read-only */
+#ifdef CONFIG_X86_64
+#define PAGE_FIXMAP_GDT PAGE_KERNEL_RO
+#else
+#define PAGE_FIXMAP_GDT PAGE_KERNEL
+#endif
+
 /* Setup the fixmap mapping only once per-processor */
 static inline void setup_fixmap_gdt(int cpu)
 {
 	__set_fixmap(get_cpu_gdt_ro_index(cpu),
-		     __pa(get_cpu_gdt_rw(cpu)), PAGE_KERNEL);
+		     __pa(get_cpu_gdt_rw(cpu)), PAGE_FIXMAP_GDT);
 }
 
+/* Load the original GDT from the per-cpu structure */
+void load_direct_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL_GPL(load_direct_gdt);
+
 /* Load a fixmap remapping of the per-cpu GDT */
 void load_fixmap_gdt(int cpu)
 {
@@ -464,6 +482,7 @@ void load_fixmap_gdt(int cpu)
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 }
+EXPORT_SYMBOL_GPL(load_fixmap_gdt);
 
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
@@ -471,11 +490,8 @@ void load_fixmap_gdt(int cpu)
  */
 void switch_to_new_gdt(int cpu)
 {
-	struct desc_ptr gdt_descr;
-
-	gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
-	gdt_descr.size = GDT_SIZE - 1;
-	load_gdt(&gdt_descr);
+	/* Load the original GDT */
+	load_direct_gdt(cpu);
 	/* Reload the per-cpu base */
 	load_percpu_segment(cpu);
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d1efe2c62b3f..c02b9af2056a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -741,7 +741,6 @@ static int svm_hardware_enable(void)
 
 	struct svm_cpu_data *sd;
 	uint64_t efer;
-	struct desc_ptr gdt_descr;
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
@@ -763,8 +762,7 @@ static int svm_hardware_enable(void)
 	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
 	sd->next_asid = sd->max_asid + 1;
 
-	native_store_gdt(&gdt_descr);
-	gdt = (struct desc_struct *)gdt_descr.address;
+	gdt = get_current_gdt_rw();
 	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
 	wrmsrl(MSR_EFER, efer | EFER_SVME);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 283aa8601833..440ba96e4dfe 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -935,7 +935,6 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
  */
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
 /*
  * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
@@ -2059,7 +2058,7 @@ static unsigned long segment_base(u16 selector)
 	if (!(selector & ~SEGMENT_RPL_MASK))
 		return 0;
 
-	table = (struct desc_struct *)gdt->address;
+	table = get_current_gdt_ro();
 
 	if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
 		u16 ldt_selector = kvm_read_ldt();
@@ -2164,7 +2163,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #endif
 	if (vmx->host_state.msr_host_bndcfgs)
 		wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
-	load_gdt(this_cpu_ptr(&host_gdt));
+	load_fixmap_gdt(raw_smp_processor_id());
 }
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -2266,7 +2265,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}
 
 	if (!already_loaded) {
-		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+		unsigned long gdt = get_current_gdt_ro_vaddr();
 		unsigned long sysenter_esp;
 
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -2277,7 +2276,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 */
 		vmcs_writel(HOST_TR_BASE,
 			    (unsigned long)this_cpu_ptr(&cpu_tss));
-		vmcs_writel(HOST_GDTR_BASE, gdt->address);
+		vmcs_writel(HOST_GDTR_BASE, gdt);   /* 22.2.4 */
 
 		/*
 		 * VM exits change the host TR limit to 0x67 after a VM
@@ -3465,8 +3464,6 @@ static int hardware_enable(void)
 		ept_sync_global();
 	}
 
-	native_store_gdt(this_cpu_ptr(&host_gdt));
-
 	return 0;
 }
 
-- 
2.12.0.rc1.440.g5b76565f74-goog

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-06 22:03 ` [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section Thomas Garnier
@ 2017-03-09 21:32   ` Andy Lutomirski
  2017-03-09 21:43     ` [Xen-devel] " Andrew Cooper
  0 siblings, 1 reply; 14+ messages in thread
From: Andy Lutomirski @ 2017-03-09 21:32 UTC (permalink / raw)
  To: Thomas Garnier, Boris Ostrovsky
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Jonathan Corbet,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, Kees Cook,
	Juergen Gross, Lorenzo Stoakes, Andy Lutomirski, Paul Gortmaker,
	Andrew Morton, Michal Hocko, zijun_hu, Chris Wilson, Joonsoo Kim,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Rusty Russell, Paolo Bonzini,
	Borislav Petkov, Christian Borntraeger, Luis R . Rodriguez,
	Fenghua Yu, He Chen, Brian Gerst, Frederic Weisbecker,
	Stanislaw Gruszka, Arnd Bergmann, Peter Zijlstra, Dave Hansen,
	Josh Poimboeuf, Vitaly Kuznetsov, David Vrabel, Tim Chen,
	Andi Kleen, Jiri Olsa, Prarit Bhargava, Michael Ellerman,
	Joerg Roedel, Radim Krčmář, X86 ML,
	linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	kasan-dev, linux-mm@kvack.org, linux-pm@vger.kernel.org,
	linux-efi@vger.kernel.org, xen-devel@lists.xenproject.org, lguest,
	kvm list, kernel-hardening@lists.openwall.com

On Mon, Mar 6, 2017 at 2:03 PM, Thomas Garnier <thgarnie@google.com> wrote:
> Each processor holds a GDT in its per-cpu structure. The sgdt
> instruction gives the base address of the current GDT. This address can
> be used to bypass KASLR memory randomization. With another bug, an
> attacker could target other per-cpu structures or deduce the base of
> the main memory section (PAGE_OFFSET).
>
> This patch relocates the GDT table for each processor inside the
> Fixmap section. The space is reserved based on number of supported
> processors.
>
> For consistency, the remapping is done by default on 32 and 64-bit.
>
> Each processor switches to its remapped GDT at the end of
> initialization. For hibernation, the main processor returns with the
> original GDT and switches back to the remapping at completion.
>
> This patch was tested on both architectures. Hibernation and KVM were
> both tested specially for their usage of the GDT.

Looks good with minor nitpicks.  Also, have you tested on Xen PV?

(If you aren't set up for it, virtme can do this test quite easily.  I
could run it for you if you like, too.)

> +static inline unsigned long get_current_gdt_rw_vaddr(void)
> +{
> +       return (unsigned long)get_current_gdt_rw();
> +}

This has no callers, so let's remove it.

> +static inline unsigned long get_cpu_gdt_ro_vaddr(int cpu)
> +{
> +       return (unsigned long)get_cpu_gdt_ro(cpu);
> +}

Ditto.

> +static inline unsigned long get_current_gdt_ro_vaddr(void)
> +{
> +       return (unsigned long)get_current_gdt_ro();
> +}

Ditto.

> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -710,7 +710,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
>
>         *shadow = t->tls_array[i];
>
> -       gdt = get_cpu_gdt_table(cpu);
> +       gdt = get_cpu_gdt_rw(cpu);
>         maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
>         mc = __xen_mc_entry(0);

Boris, is this right?  I don't see why it wouldn't be, but Xen is special.

> @@ -504,7 +504,7 @@ void __init lguest_arch_host_init(void)
>                  * byte, not the size, hence the "-1").
>                  */
>                 state->host_gdt_desc.size = GDT_SIZE-1;
> -               state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
> +               state->host_gdt_desc.address = (long)get_cpu_gdt_rw(i);

I suspect this should be get_cpu_gdt_ro(), but I don't know too much
about lguest.  Hmm, maybe the right thing to do is to give lguest a
nice farewell and retire it.  The last time I tried to test it, I gave
up.


--Andy

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v5 3/3] x86: Make the GDT remapping read-only on 64-bit
  2017-03-06 22:03 ` [PATCH v5 3/3] x86: Make the GDT remapping read-only on 64-bit Thomas Garnier
@ 2017-03-09 21:35   ` Andy Lutomirski
  0 siblings, 0 replies; 14+ messages in thread
From: Andy Lutomirski @ 2017-03-09 21:35 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Jonathan Corbet,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, Kees Cook,
	Juergen Gross, Lorenzo Stoakes, Andy Lutomirski, Paul Gortmaker,
	Andrew Morton, Michal Hocko, zijun_hu, Chris Wilson, Joonsoo Kim,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Rusty Russell,
	Paolo Bonzini, Borislav Petkov, Christian Borntraeger,
	Luis R . Rodriguez, Fenghua Yu, He Chen, Brian Gerst,
	Frederic Weisbecker, Stanislaw Gruszka, Arnd Bergmann,
	Peter Zijlstra, Dave Hansen, Josh Poimboeuf, Vitaly Kuznetsov,
	David Vrabel, Tim Chen, Andi Kleen, Jiri Olsa, Prarit Bhargava,
	Michael Ellerman, Joerg Roedel, Radim Krčmář,
	X86 ML, linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	kasan-dev, linux-mm@kvack.org, linux-pm@vger.kernel.org,
	linux-efi@vger.kernel.org, xen-devel@lists.xenproject.org, lguest,
	kvm list, kernel-hardening@lists.openwall.com

On Mon, Mar 6, 2017 at 2:03 PM, Thomas Garnier <thgarnie@google.com> wrote:
> This patch makes the GDT remapped pages read-only to prevent corruption.
> This change is done only on 64-bit.
>
> The native_load_tr_desc function was adapted to correctly handle a
> read-only GDT. The LTR instruction always writes to the GDT TSS entry.
> This generates a page fault if the GDT is read-only. This change checks
> if the current GDT is a remap and swap GDTs as needed. This function was
> tested by booting multiple machines and checking hibernation works
> properly.
>
> KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
> per-cpu variable was removed for functions to fetch the original GDT.
> Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
> expected. For testing, VMs were started and restored on multiple
> configurations.

I like this patch.

> +
> +/*
> + * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
> + * original writeable version when needed.
> + */
> +#ifdef CONFIG_X86_64
>  static inline void native_load_tr_desc(void)
>  {
> +       struct desc_ptr gdt;
> +       int cpu = raw_smp_processor_id();
> +       bool restore = 0;
> +       struct desc_struct *fixmap_gdt;
> +
> +       native_store_gdt(&gdt);

This part will slow this function down considerably, but with the
recent KVM improvements, I think that there are no callers left that
care about performance, so this should be fine.

--Andy

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-09 21:32   ` Andy Lutomirski
@ 2017-03-09 21:43     ` Andrew Cooper
  2017-03-09 21:46       ` Andy Lutomirski
  0 siblings, 1 reply; 14+ messages in thread
From: Andrew Cooper @ 2017-03-09 21:43 UTC (permalink / raw)
  To: Andy Lutomirski, Thomas Garnier, Boris Ostrovsky
  Cc: Michal Hocko, Stanislaw Gruszka, linux-doc@vger.kernel.org,
	kvm list, Fenghua Yu, Matt Fleming, Frederic Weisbecker, X86 ML,
	Chris Wilson, linux-mm@kvack.org, Paul Gortmaker,
	Radim Krčmář, linux-efi@vger.kernel.org,
	Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening@lists.openwall.com, Jiri Olsa, zijun_hu,
	Dave Hansen, Andi Kleen, xen-devel@lists.xenproject.org,
	Jonathan Corbet, Michael Ellerman, Joerg Roedel, Prarit Bhargava,
	kasan-dev, Vitaly Kuznetsov, Christian Borntraeger, Ingo Molnar,
	Andrey Ryabinin, Borislav Petkov, Len Brown, Rusty Russell,
	Kees Cook, Arnd Bergmann, He Chen, Brian Gerst, Jiri Kosina,
	lguest, Andy Lutomirski, Josh Poimboeuf, Thomas Gleixner,
	Andrew Morton, Dmitry Vyukov, Juergen Gross, Peter Zijlstra,
	Lorenzo Stoakes, Ard Biesheuvel, linux-pm@vger.kernel.org,
	Rafael J . Wysocki, linux-kernel@vger.kernel.org,
	Luis R . Rodriguez, David Vrabel, Paolo Bonzini, Joonsoo Kim,
	Tim Chen

On 09/03/2017 21:32, Andy Lutomirski wrote:
> On Mon, Mar 6, 2017 at 2:03 PM, Thomas Garnier <thgarnie@google.com> wrote:
>
>> --- a/arch/x86/xen/enlighten.c
>> +++ b/arch/x86/xen/enlighten.c
>> @@ -710,7 +710,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
>>
>>         *shadow = t->tls_array[i];
>>
>> -       gdt = get_cpu_gdt_table(cpu);
>> +       gdt = get_cpu_gdt_rw(cpu);
>>         maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
>>         mc = __xen_mc_entry(0);
> Boris, is this right?  I don't see why it wouldn't be, but Xen is special.

Under Xen PV, the GDT is already read-only at this point.  (It is not
safe to let the guest have writeable access to system tables, so the
guest must relinquish write access to the frames wishing to be used as
LDTs or GDTs.)

The hypercall acts on the frame, not a virtual address, so either alias
should be fine here.

Under this new scheme, there will be two read-only aliases.  I guess
this is easier to maintain the split consistently across Linux, than to
special case Xen PV because it doesn't need the second alias.

~Andrew

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-09 21:43     ` [Xen-devel] " Andrew Cooper
@ 2017-03-09 21:46       ` Andy Lutomirski
  2017-03-09 21:54         ` Thomas Garnier
  0 siblings, 1 reply; 14+ messages in thread
From: Andy Lutomirski @ 2017-03-09 21:46 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Thomas Garnier, Boris Ostrovsky, Michal Hocko, Stanislaw Gruszka,
	linux-doc@vger.kernel.org, kvm list, Fenghua Yu, Matt Fleming,
	Frederic Weisbecker, X86 ML, Chris Wilson, linux-mm@kvack.org,
	Paul Gortmaker, Radim Krčmář,
	linux-efi@vger.kernel.org, Alexander Potapenko, Pavel Machek,
	H . Peter Anvin, kernel-hardening@lists.openwall.com, Jiri Olsa,
	zijun_hu, Dave Hansen, Andi Kleen, xen-devel@lists.xenproject.org,
	Jonathan Corbet, Michael Ellerman, Joerg Roedel, Prarit Bhargava,
	kasan-dev, Vitaly Kuznetsov, Christian Borntraeger, Ingo Molnar,
	Andrey Ryabinin, Borislav Petkov, Len Brown, Rusty Russell,
	Kees Cook, Arnd Bergmann, He Chen, Brian Gerst, Jiri Kosina,
	lguest, Andy Lutomirski, Josh Poimboeuf, Thomas Gleixner,
	Andrew Morton, Dmitry Vyukov, Juergen Gross, Peter Zijlstra,
	Lorenzo Stoakes, Ard Biesheuvel, linux-pm@vger.kernel.org,
	Rafael J . Wysocki, linux-kernel@vger.kernel.org,
	Luis R . Rodriguez, David Vrabel, Paolo Bonzini, Joonsoo Kim,
	Tim Chen

On Thu, Mar 9, 2017 at 1:43 PM, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 09/03/2017 21:32, Andy Lutomirski wrote:
>> On Mon, Mar 6, 2017 at 2:03 PM, Thomas Garnier <thgarnie@google.com> wrote:
>>
>>> --- a/arch/x86/xen/enlighten.c
>>> +++ b/arch/x86/xen/enlighten.c
>>> @@ -710,7 +710,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
>>>
>>>         *shadow = t->tls_array[i];
>>>
>>> -       gdt = get_cpu_gdt_table(cpu);
>>> +       gdt = get_cpu_gdt_rw(cpu);
>>>         maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
>>>         mc = __xen_mc_entry(0);
>> Boris, is this right?  I don't see why it wouldn't be, but Xen is special.
>
> Under Xen PV, the GDT is already read-only at this point.  (It is not
> safe to let the guest have writeable access to system tables, so the
> guest must relinquish write access to the frames wishing to be used as
> LDTs or GDTs.)
>
> The hypercall acts on the frame, not a virtual address, so either alias
> should be fine here.
>
> Under this new scheme, there will be two read-only aliases.  I guess
> this is easier to maintain the split consistently across Linux, than to
> special case Xen PV because it doesn't need the second alias.
>

I think we would gain nothing at all by special-casing Xen PV -- Linux
allocates the fixmap vaddrs at compile time, so we'd still allocate
them even if we rejigger all the helpers to avoid using them.

--Andy

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-09 21:46       ` Andy Lutomirski
@ 2017-03-09 21:54         ` Thomas Garnier
  2017-03-09 21:56           ` Boris Ostrovsky
  0 siblings, 1 reply; 14+ messages in thread
From: Thomas Garnier @ 2017-03-09 21:54 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andrew Cooper, Boris Ostrovsky, Michal Hocko, Stanislaw Gruszka,
	linux-doc@vger.kernel.org, kvm list, Fenghua Yu, Matt Fleming,
	Frederic Weisbecker, X86 ML, Chris Wilson, linux-mm@kvack.org,
	Paul Gortmaker, Radim Krčmář,
	linux-efi@vger.kernel.org, Alexander Potapenko, Pavel Machek,
	H . Peter Anvin, kernel-hardening@lists.openwall.com, Jiri Olsa,
	zijun_hu, Dave Hansen, Andi Kleen, xen-devel@lists.xenproject.org,
	Jonathan Corbet, Michael Ellerman, Joerg Roedel, Prarit Bhargava,
	kasan-dev, Vitaly Kuznetsov, Christian Borntraeger, Ingo Molnar,
	Andrey Ryabinin, Borislav Petkov, Len Brown, Rusty Russell,
	Kees Cook, Arnd Bergmann, He Chen, Brian Gerst, Jiri Kosina,
	lguest, Andy Lutomirski, Josh Poimboeuf, Thomas Gleixner,
	Andrew Morton, Dmitry Vyukov, Juergen Gross, Peter Zijlstra,
	Lorenzo Stoakes, Ard Biesheuvel, linux-pm@vger.kernel.org,
	Rafael J . Wysocki, linux-kernel@vger.kernel.org,
	Luis R . Rodriguez, David Vrabel, Paolo Bonzini, Joonsoo Kim,
	Tim Chen

On Thu, Mar 9, 2017 at 1:46 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Thu, Mar 9, 2017 at 1:43 PM, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 09/03/2017 21:32, Andy Lutomirski wrote:
>>> On Mon, Mar 6, 2017 at 2:03 PM, Thomas Garnier <thgarnie@google.com> wrote:
>>>
>>>> --- a/arch/x86/xen/enlighten.c
>>>> +++ b/arch/x86/xen/enlighten.c
>>>> @@ -710,7 +710,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
>>>>
>>>>         *shadow = t->tls_array[i];
>>>>
>>>> -       gdt = get_cpu_gdt_table(cpu);
>>>> +       gdt = get_cpu_gdt_rw(cpu);
>>>>         maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
>>>>         mc = __xen_mc_entry(0);
>>> Boris, is this right?  I don't see why it wouldn't be, but Xen is special.
>>
>> Under Xen PV, the GDT is already read-only at this point.  (It is not
>> safe to let the guest have writeable access to system tables, so the
>> guest must relinquish write access to the frames wishing to be used as
>> LDTs or GDTs.)
>>
>> The hypercall acts on the frame, not a virtual address, so either alias
>> should be fine here.
>>
>> Under this new scheme, there will be two read-only aliases.  I guess
>> this is easier to maintain the split consistently across Linux, than to
>> special case Xen PV because it doesn't need the second alias.
>>
>
> I think we would gain nothing at all by special-casing Xen PV -- Linux
> allocates the fixmap vaddrs at compile time, so we'd still allocate
> them even if we rejigger all the helpers to avoid using them.
>

I don't have any experience with Xen so it would be great if virtme can test it.

I can remove the unused functions, I just thought they were useful
shortcuts given some of them are already used.

> --Andy



-- 
Thomas

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-09 21:54         ` Thomas Garnier
@ 2017-03-09 21:56           ` Boris Ostrovsky
  2017-03-09 22:13             ` Boris Ostrovsky
  0 siblings, 1 reply; 14+ messages in thread
From: Boris Ostrovsky @ 2017-03-09 21:56 UTC (permalink / raw)
  To: Thomas Garnier, Andy Lutomirski
  Cc: Andrew Cooper, Michal Hocko, Stanislaw Gruszka,
	linux-doc@vger.kernel.org, kvm list, Fenghua Yu, Matt Fleming,
	Frederic Weisbecker, X86 ML, Chris Wilson, linux-mm@kvack.org,
	Paul Gortmaker, Radim Krčmář,
	linux-efi@vger.kernel.org, Alexander Potapenko, Pavel Machek,
	H . Peter Anvin, kernel-hardening@lists.openwall.com, Jiri Olsa,
	zijun_hu, Dave Hansen, Andi Kleen, xen-devel@lists.xenproject.org,
	Jonathan Corbet, Michael Ellerman, Joerg Roedel, Prarit Bhargava,
	kasan-dev, Vitaly Kuznetsov, Christian Borntraeger, Ingo Molnar,
	Andrey Ryabinin, Borislav Petkov, Len Brown, Rusty Russell,
	Kees Cook, Arnd Bergmann, He Chen, Brian Gerst, Jiri Kosina,
	lguest, Andy Lutomirski, Josh Poimboeuf, Thomas Gleixner,
	Andrew Morton, Dmitry Vyukov, Juergen Gross, Peter Zijlstra,
	Lorenzo Stoakes, Ard Biesheuvel, linux-pm@vger.kernel.org,
	Rafael J . Wysocki, linux-kernel@vger.kernel.org,
	Luis R . Rodriguez, David Vrabel, Paolo Bonzini, Joonsoo Kim,
	Tim Chen

On 03/09/2017 04:54 PM, Thomas Garnier wrote:
> On Thu, Mar 9, 2017 at 1:46 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>> On Thu, Mar 9, 2017 at 1:43 PM, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>>> On 09/03/2017 21:32, Andy Lutomirski wrote:
>>>> On Mon, Mar 6, 2017 at 2:03 PM, Thomas Garnier <thgarnie@google.com> wrote:
>>>>
>>>>> --- a/arch/x86/xen/enlighten.c
>>>>> +++ b/arch/x86/xen/enlighten.c
>>>>> @@ -710,7 +710,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
>>>>>
>>>>>         *shadow = t->tls_array[i];
>>>>>
>>>>> -       gdt = get_cpu_gdt_table(cpu);
>>>>> +       gdt = get_cpu_gdt_rw(cpu);
>>>>>         maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
>>>>>         mc = __xen_mc_entry(0);
>>>> Boris, is this right?  I don't see why it wouldn't be, but Xen is special.
>>> Under Xen PV, the GDT is already read-only at this point.  (It is not
>>> safe to let the guest have writeable access to system tables, so the
>>> guest must relinquish write access to the frames wishing to be used as
>>> LDTs or GDTs.)
>>>
>>> The hypercall acts on the frame, not a virtual address, so either alias
>>> should be fine here.
>>>
>>> Under this new scheme, there will be two read-only aliases.  I guess
>>> this is easier to maintain the split consistently across Linux, than to
>>> special case Xen PV because it doesn't need the second alias.
>>>
>> I think we would gain nothing at all by special-casing Xen PV -- Linux
>> allocates the fixmap vaddrs at compile time, so we'd still allocate
>> them even if we rejigger all the helpers to avoid using them.
>>
> I don't have any experience with Xen so it would be great if virtme can test it.


I am pretty sure I tested this series at some point but I'll test it again.

-boris


>
> I can remove the unused functions, I just thought they were useful
> shortcuts given some of them are already used.
>
>> --Andy
>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-09 21:56           ` Boris Ostrovsky
@ 2017-03-09 22:13             ` Boris Ostrovsky
  2017-03-09 22:31               ` Thomas Garnier
  0 siblings, 1 reply; 14+ messages in thread
From: Boris Ostrovsky @ 2017-03-09 22:13 UTC (permalink / raw)
  To: Thomas Garnier, Andy Lutomirski
  Cc: Andrew Cooper, Michal Hocko, Stanislaw Gruszka,
	linux-doc@vger.kernel.org, kvm list, Fenghua Yu, Matt Fleming,
	Frederic Weisbecker, X86 ML, Chris Wilson, linux-mm@kvack.org,
	Paul Gortmaker, Radim Krčmář,
	linux-efi@vger.kernel.org, Alexander Potapenko, Pavel Machek,
	H . Peter Anvin, kernel-hardening@lists.openwall.com, Jiri Olsa,
	zijun_hu, Dave Hansen, Andi Kleen, xen-devel@lists.xenproject.org,
	Jonathan Corbet, Michael Ellerman, Joerg Roedel, Prarit Bhargava,
	kasan-dev, Vitaly Kuznetsov, Christian Borntraeger, Ingo Molnar,
	Andrey Ryabinin, Borislav Petkov, Len Brown, Rusty Russell,
	Kees Cook, Arnd Bergmann, He Chen, Brian Gerst, Jiri Kosina,
	lguest, Andy Lutomirski, Josh Poimboeuf, Thomas Gleixner,
	Andrew Morton, Dmitry Vyukov, Juergen Gross, Peter Zijlstra,
	Lorenzo Stoakes, Ard Biesheuvel, linux-pm@vger.kernel.org,
	Rafael J . Wysocki, linux-kernel@vger.kernel.org,
	Luis R . Rodriguez, Paolo Bonzini, Joonsoo Kim, Tim Chen


>> I don't have any experience with Xen so it would be great if virtme can test it.
>
> I am pretty sure I tested this series at some point but I'll test it again.
>


Fails 32-bit build:


/home/build/linux-boris/arch/x86/kvm/vmx.c: In function a??segment_basea??:
/home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: a??host_gdta??
undeclared (first use in this function)
/home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: (Each undeclared
identifier is reported only once
/home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: for each
function it appears in.)
/home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: type defaults to
a??inta?? in declaration of a??type namea??
/home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: type defaults to
a??inta?? in declaration of a??type namea??
/home/build/linux-boris/arch/x86/kvm/vmx.c:2054: warning: initialization
from incompatible pointer type
/home/build/linux-boris/arch/x86/kvm/vmx.c:2054: warning: unused
variable a??gdta??


-boris

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-09 22:13             ` Boris Ostrovsky
@ 2017-03-09 22:31               ` Thomas Garnier
  2017-03-09 23:17                 ` Boris Ostrovsky
  0 siblings, 1 reply; 14+ messages in thread
From: Thomas Garnier @ 2017-03-09 22:31 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: Andy Lutomirski, Andrew Cooper, Michal Hocko, Stanislaw Gruszka,
	linux-doc@vger.kernel.org, kvm list, Fenghua Yu, Matt Fleming,
	Frederic Weisbecker, X86 ML, Chris Wilson, linux-mm@kvack.org,
	Paul Gortmaker, Radim Krčmář,
	linux-efi@vger.kernel.org, Alexander Potapenko, Pavel Machek,
	H . Peter Anvin, kernel-hardening@lists.openwall.com, Jiri Olsa,
	zijun_hu, Dave Hansen, Andi Kleen, xen-devel@lists.xenproject.org,
	Jonathan Corbet, Michael Ellerman, Joerg Roedel, Prarit Bhargava,
	kasan-dev, Vitaly Kuznetsov, Christian Borntraeger, Ingo Molnar,
	Andrey Ryabinin, Borislav Petkov, Len Brown, Rusty Russell,
	Kees Cook, Arnd Bergmann, He Chen, Brian Gerst, Jiri Kosina,
	lguest, Andy Lutomirski, Josh Poimboeuf, Thomas Gleixner,
	Andrew Morton, Dmitry Vyukov, Juergen Gross, Peter Zijlstra,
	Lorenzo Stoakes, Ard Biesheuvel, linux-pm@vger.kernel.org,
	Rafael J . Wysocki, linux-kernel@vger.kernel.org,
	Luis R . Rodriguez, Paolo Bonzini, Joonsoo Kim, Tim Chen

On Thu, Mar 9, 2017 at 2:13 PM, Boris Ostrovsky
<boris.ostrovsky@oracle.com> wrote:
>
>>> I don't have any experience with Xen so it would be great if virtme can test it.
>>
>> I am pretty sure I tested this series at some point but I'll test it again.
>>
>
>
> Fails 32-bit build:
>
>
> /home/build/linux-boris/arch/x86/kvm/vmx.c: In function ‘segment_base’:
> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: ‘host_gdt’
> undeclared (first use in this function)
> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: (Each undeclared
> identifier is reported only once
> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: for each
> function it appears in.)
> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: type defaults to
> ‘int’ in declaration of ‘type name’
> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: type defaults to
> ‘int’ in declaration of ‘type name’
> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: warning: initialization
> from incompatible pointer type
> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: warning: unused
> variable ‘gdt’
>
>
> -boris

It seems that I forgot to remove line 2054 on the rebase. My 32-bit
build comes clean but I assume it is not good enough compare to the
full version I build for 64-bit KVM testing.

Remove just this line and it should build fine, I will fix this on the
next iteration.

Thanks for testing,

-- 
Thomas

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-09 22:31               ` Thomas Garnier
@ 2017-03-09 23:17                 ` Boris Ostrovsky
  2017-03-13 18:32                   ` Boris Ostrovsky
  0 siblings, 1 reply; 14+ messages in thread
From: Boris Ostrovsky @ 2017-03-09 23:17 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Michal Hocko, Stanislaw Gruszka, kvm list,
	linux-doc@vger.kernel.org, Matt Fleming, Frederic Weisbecker,
	Josh Poimboeuf, Chris Wilson, linux-mm@kvack.org, Dave Hansen,
	Radim Krčmář, linux-efi@vger.kernel.org,
	Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening@lists.openwall.com, Jiri Olsa, zijun_hu,
	Prarit Bhargava, Andi Kleen, Len Brown, Jonathan Corbet,
	Michael Ellerman, Joerg Roedel, X86 ML, Luis R . Rodriguez,
	kasan-dev, Christian Borntraeger, Ingo Molnar,
	xen-devel@lists.xenproject.org, Borislav Petkov, Fenghua Yu,
	Jiri Kosina, Kees Cook, Arnd Bergmann, He Chen, Brian Gerst,
	Rusty Russell, Joonsoo Kim, lguest, Andy Lutomirski,
	Andrey Ryabinin, Thomas Gleixner, Andrew Morton, Dmitry Vyukov,
	Juergen Gross, Lorenzo Stoakes, Paul Gortmaker, Andrew Cooper,
	linux-pm@vger.kernel.org, Ard Biesheuvel, Rafael J . Wysocki,
	linux-kernel@vger.kernel.org, Andy Lutomirski, Peter Zijlstra,
	Paolo Bonzini, Vitaly Kuznetsov, Tim Chen

On 03/09/2017 05:31 PM, Thomas Garnier wrote:
> On Thu, Mar 9, 2017 at 2:13 PM, Boris Ostrovsky
> <boris.ostrovsky@oracle.com> wrote:
>>>> I don't have any experience with Xen so it would be great if virtme can test it.
>>> I am pretty sure I tested this series at some point but I'll test it again.
>>>
>>
>> Fails 32-bit build:
>>
>>
>> /home/build/linux-boris/arch/x86/kvm/vmx.c: In function a??segment_basea??:
>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: a??host_gdta??
>> undeclared (first use in this function)
>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: (Each undeclared
>> identifier is reported only once
>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: for each
>> function it appears in.)
>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: type defaults to
>> a??inta?? in declaration of a??type namea??
>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: type defaults to
>> a??inta?? in declaration of a??type namea??
>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: warning: initialization
>> from incompatible pointer type
>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: warning: unused
>> variable a??gdta??
>>
>>
>> -boris
> It seems that I forgot to remove line 2054 on the rebase. My 32-bit
> build comes clean but I assume it is not good enough compare to the
> full version I build for 64-bit KVM testing.
>
> Remove just this line and it should build fine, I will fix this on the
> next iteration.
>
> Thanks for testing,
>


So this, in fact, does break Xen in that the hypercall to set GDT fails.

I will have lo look at this tomorrow but I definitely at least built
with v3 of this series. And I don't see why I wouldn't have tested it
once I built it.

-boris

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-09 23:17                 ` Boris Ostrovsky
@ 2017-03-13 18:32                   ` Boris Ostrovsky
  2017-03-13 19:24                     ` Thomas Garnier
  0 siblings, 1 reply; 14+ messages in thread
From: Boris Ostrovsky @ 2017-03-13 18:32 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Michal Hocko, Stanislaw Gruszka, kvm list,
	linux-doc@vger.kernel.org, Matt Fleming, Frederic Weisbecker,
	Josh Poimboeuf, Chris Wilson, linux-mm@kvack.org, Dave Hansen,
	Radim Krčmář, linux-efi@vger.kernel.org,
	Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening@lists.openwall.com, Jiri Olsa, zijun_hu,
	Prarit Bhargava, Andi Kleen, Len Brown, Jonathan Corbet,
	Michael Ellerman, Joerg Roedel, X86 ML, Luis R . Rodriguez,
	kasan-dev, Christian Borntraeger, Ingo Molnar,
	xen-devel@lists.xenproject.org, Borislav Petkov, Fenghua Yu,
	Jiri Kosina, Kees Cook, Arnd Bergmann, He Chen, Brian Gerst,
	Rusty Russell, Joonsoo Kim, lguest, Andy Lutomirski,
	Andrey Ryabinin, Thomas Gleixner, Andrew Morton, Dmitry Vyukov,
	Juergen Gross, Lorenzo Stoakes, Paul Gortmaker, Andrew Cooper,
	linux-pm@vger.kernel.org, Ard Biesheuvel, Rafael J . Wysocki,
	linux-kernel@vger.kernel.org, Andy Lutomirski, Peter Zijlstra,
	Paolo Bonzini, Vitaly Kuznetsov, Tim Chen

On 03/09/2017 06:17 PM, Boris Ostrovsky wrote:
> On 03/09/2017 05:31 PM, Thomas Garnier wrote:
>> On Thu, Mar 9, 2017 at 2:13 PM, Boris Ostrovsky
>> <boris.ostrovsky@oracle.com> wrote:
>>>>> I don't have any experience with Xen so it would be great if virtme can test it.
>>>> I am pretty sure I tested this series at some point but I'll test it again.
>>>>
>>>
>>> Fails 32-bit build:
>>>
>>>
>>> /home/build/linux-boris/arch/x86/kvm/vmx.c: In function a??segment_basea??:
>>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: a??host_gdta??
>>> undeclared (first use in this function)
>>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: (Each undeclared
>>> identifier is reported only once
>>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: for each
>>> function it appears in.)
>>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: type defaults to
>>> a??inta?? in declaration of a??type namea??
>>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: error: type defaults to
>>> a??inta?? in declaration of a??type namea??
>>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: warning: initialization
>>> from incompatible pointer type
>>> /home/build/linux-boris/arch/x86/kvm/vmx.c:2054: warning: unused
>>> variable a??gdta??
>>>
>>>
>>> -boris
>> It seems that I forgot to remove line 2054 on the rebase. My 32-bit
>> build comes clean but I assume it is not good enough compare to the
>> full version I build for 64-bit KVM testing.
>>
>> Remove just this line and it should build fine, I will fix this on the
>> next iteration.
>>
>> Thanks for testing,
>>
> 
> 
> So this, in fact, does break Xen in that the hypercall to set GDT fails.
> 
> I will have lo look at this tomorrow but I definitely at least built
> with v3 of this series. And I don't see why I wouldn't have tested it
> once I built it.


There are a couple of problems for Xen PV guests that need to be addressed:
1. Xen's set_fixmap op needs non-default handling for
FIX_GDT_REMAP_BEGIN range
2. GDT remapping for PV guests needs to be RO for both 64 and 32-bit guests.

I don't know how you prefer to deal with (2), patch below is one
suggestion. With it all my boot tests (Xen and bare-metal) passed.

One problem with applying it directly is that kernel becomes
not-bisectable (Xen-wise) between patches 2 and 3 so perhaps you might
pull some of the changes from patch 3 to patch 2.


-boris


diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 9b7fda6..ec05f9c 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -39,6 +39,7 @@ extern struct desc_ptr idt_descr;
 extern gate_desc idt_table[];
 extern const struct desc_ptr debug_idt_descr;
 extern gate_desc debug_idt_table[];
+extern pgprot_t pg_fixmap_gdt_flags;

 struct gdt_page {
        struct desc_struct gdt[GDT_ENTRIES];
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bff2f8b..2682355 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -450,16 +450,16 @@ void load_percpu_segment(int cpu)

 /* On 64-bit the GDT remapping is read-only */
 #ifdef CONFIG_X86_64
-#define PAGE_FIXMAP_GDT PAGE_KERNEL_RO
+pgprot_t pg_fixmap_gdt_flags = PAGE_KERNEL_RO;
 #else
-#define PAGE_FIXMAP_GDT PAGE_KERNEL
+pgprot_t pg_fixmap_gdt_flags = PAGE_KERNEL;
 #endif

 /* Setup the fixmap mapping only once per-processor */
 static inline void setup_fixmap_gdt(int cpu)
 {
        __set_fixmap(get_cpu_gdt_ro_index(cpu),
-                    __pa(get_cpu_gdt_rw(cpu)), PAGE_FIXMAP_GDT);
+                    __pa(get_cpu_gdt_rw(cpu)), pg_fixmap_gdt_flags);
 }

 /* Load the original GDT from the per-cpu structure */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f46d47b..8871bcd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2051,7 +2051,7 @@ static bool update_transition_efer(struct vcpu_vmx
*vmx, int efer_offset)
  */
 static unsigned long segment_base(u16 selector)
 {
-       struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+       //struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
        struct desc_struct *table;
        unsigned long v;

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 4951fcf..2dc5f97 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1545,6 +1545,9 @@ asmlinkage __visible void __init
xen_start_kernel(void)
         */
        xen_initial_gdt = &per_cpu(gdt_page, 0);

+       /* GDT can only be remapped RO. */
+       pg_fixmap_gdt_flags = PAGE_KERNEL_RO;
+
        xen_smp_init();

 #ifdef CONFIG_ACPI_NUMA
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 37cb5aa..ebbfe00 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2326,6 +2326,7 @@ static void xen_set_fixmap(unsigned idx,
phys_addr_t phys, pgprot_t prot)
 #endif
        case FIX_TEXT_POKE0:
        case FIX_TEXT_POKE1:
+       case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
                /* All local page mappings */
                pte = pfn_pte(phys, prot);
                break;


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section
  2017-03-13 18:32                   ` Boris Ostrovsky
@ 2017-03-13 19:24                     ` Thomas Garnier
  0 siblings, 0 replies; 14+ messages in thread
From: Thomas Garnier @ 2017-03-13 19:24 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: Michal Hocko, Stanislaw Gruszka, kvm list,
	linux-doc@vger.kernel.org, Matt Fleming, Frederic Weisbecker,
	Josh Poimboeuf, Chris Wilson, linux-mm@kvack.org, Dave Hansen,
	Radim Krčmář, linux-efi@vger.kernel.org,
	Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening@lists.openwall.com, Jiri Olsa, zijun_hu,
	Prarit Bhargava, Andi Kleen, Len Brown, Jonathan Corbet,
	Michael Ellerman, Joerg Roedel, X86 ML, Luis R . Rodriguez,
	kasan-dev, Christian Borntraeger, Ingo Molnar,
	xen-devel@lists.xenproject.org, Borislav Petkov, Fenghua Yu,
	Jiri Kosina, Kees Cook, Arnd Bergmann, He Chen, Brian Gerst,
	Rusty Russell, Joonsoo Kim, lguest, Andy Lutomirski,
	Andrey Ryabinin, Thomas Gleixner, Andrew Morton, Dmitry Vyukov,
	Juergen Gross, Lorenzo Stoakes, Paul Gortmaker, Andrew Cooper,
	linux-pm@vger.kernel.org, Ard Biesheuvel, Rafael J . Wysocki,
	linux-kernel@vger.kernel.org, Andy Lutomirski, Peter Zijlstra,
	Paolo Bonzini, Vitaly Kuznetsov, Tim Chen

On Mon, Mar 13, 2017 at 11:32 AM, Boris Ostrovsky
<boris.ostrovsky@oracle.com> wrote:
> There are a couple of problems for Xen PV guests that need to be addressed:
> 1. Xen's set_fixmap op needs non-default handling for
> FIX_GDT_REMAP_BEGIN range
> 2. GDT remapping for PV guests needs to be RO for both 64 and 32-bit guests.
>
> I don't know how you prefer to deal with (2), patch below is one
> suggestion. With it all my boot tests (Xen and bare-metal) passed.
>

Good suggestion, I think I will use most of it. Thanks!

> One problem with applying it directly is that kernel becomes
> not-bisectable (Xen-wise) between patches 2 and 3 so perhaps you might
> pull some of the changes from patch 3 to patch 2.
>

Yes that make sense, I will have to add the global variable on patch 2
and rebase 3 correctly.

-- 
Thomas

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-03-13 19:24 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-03-06 22:03 [PATCH v5 1/3] x86/mm: Adapt MODULES_END based on Fixmap section size Thomas Garnier
2017-03-06 22:03 ` [PATCH v5 2/3] x86: Remap GDT tables in the Fixmap section Thomas Garnier
2017-03-09 21:32   ` Andy Lutomirski
2017-03-09 21:43     ` [Xen-devel] " Andrew Cooper
2017-03-09 21:46       ` Andy Lutomirski
2017-03-09 21:54         ` Thomas Garnier
2017-03-09 21:56           ` Boris Ostrovsky
2017-03-09 22:13             ` Boris Ostrovsky
2017-03-09 22:31               ` Thomas Garnier
2017-03-09 23:17                 ` Boris Ostrovsky
2017-03-13 18:32                   ` Boris Ostrovsky
2017-03-13 19:24                     ` Thomas Garnier
2017-03-06 22:03 ` [PATCH v5 3/3] x86: Make the GDT remapping read-only on 64-bit Thomas Garnier
2017-03-09 21:35   ` Andy Lutomirski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).