xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/xpti: Hide almost all of .text and all .data/.rodata/.bss mappings
@ 2018-02-13 19:45 Andrew Cooper
  2018-02-14  7:54 ` Juergen Gross
  2018-02-14  9:14 ` Jan Beulich
  0 siblings, 2 replies; 13+ messages in thread
From: Andrew Cooper @ 2018-02-13 19:45 UTC (permalink / raw)
  To: Xen-devel; +Cc: Juergen Gross, Andrew Cooper, Wei Liu, Jan Beulich

The current XPTI implementation isolates the directmap (and therefore a lot of
guest data), but a large quantity of CPU0's state (including its stack)
remains visible.

Furthermore, an attacker able to read .text is in a vastly superior position
to normal when it comes to fingerprinting Xen for known vulnerabilities, or
scanning for ROP/Spectre gadgets.

Collect together the entrypoints in .text.entry (currently 3x4k frames, but
can almost certainly be slimmed down), and create a common mapping which is
inserted into each per-cpu shadow.  The stubs are also inserted into this
mapping by pointing at the in-use L2.  This allows stubs allocated later (SMP
boot, or CPU hotplug) to work without further changes to the common mappings.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Juergen Gross <jgross@suse.com>

RFC, because I don't think the stubs handling is particularly sensible.

We allocate 4k of virtual address space per CPU, but squash loads of CPUs
together onto a single MFN.  The stubs ought to be isolated as well (as they
leak the virtual addresses of each stack), which can be done by allocating an
MFN per CPU (and simplifies cpu_smpboot_alloc() somewhat).  At this point, we
can't use a common set of mappings, and will have to clone the single stub and
.entry.text into each PCPUs copy of the pagetables.

Also, my plan to cause .text.entry to straddle a 512TB boundary (and therefore
avoid any further pagetable allocations) has come a little unstuck because of
CONFIG_BIGMEM.  I'm still working out whether there is a sensible way to
rearrange the virtual layout for this plan to work.
---
 xen/arch/x86/smpboot.c             | 37 ++++++++++++++++++++++++++++++++-----
 xen/arch/x86/x86_64/compat/entry.S |  2 ++
 xen/arch/x86/x86_64/entry.S        |  4 +++-
 xen/arch/x86/xen.lds.S             |  7 +++++++
 4 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 2ebef03..2519141 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -622,6 +622,9 @@ unsigned long alloc_stub_page(unsigned int cpu, unsigned long *mfn)
         unmap_domain_page(memset(__map_domain_page(pg), 0xcc, PAGE_SIZE));
     }
 
+    /* Confirm that all stubs fit in a single L2 pagetable. */
+    BUILD_BUG_ON(NR_CPUS * PAGE_SIZE > (1u << L2_PAGETABLE_SHIFT));
+
     stub_va = XEN_VIRT_END - (cpu + 1) * PAGE_SIZE;
     if ( map_pages_to_xen(stub_va, mfn_x(page_to_mfn(pg)), 1,
                           PAGE_HYPERVISOR_RX | MAP_SMALL_PAGES) )
@@ -651,9 +654,6 @@ static int clone_mapping(const void *ptr, root_pgentry_t *rpt)
     l2_pgentry_t *pl2e;
     l1_pgentry_t *pl1e;
 
-    if ( linear < DIRECTMAP_VIRT_START )
-        return 0;
-
     flags = l3e_get_flags(*pl3e);
     ASSERT(flags & _PAGE_PRESENT);
     if ( flags & _PAGE_PSE )
@@ -744,6 +744,9 @@ static __read_mostly int8_t opt_xpti = -1;
 boolean_param("xpti", opt_xpti);
 DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
 
+static root_pgentry_t common_pgt;
+extern char _stextentry[], _etextentry[];
+
 static int setup_cpu_root_pgt(unsigned int cpu)
 {
     root_pgentry_t *rpt;
@@ -764,8 +767,32 @@ static int setup_cpu_root_pgt(unsigned int cpu)
         idle_pg_table[root_table_offset(RO_MPT_VIRT_START)];
     /* SH_LINEAR_PT inserted together with guest mappings. */
     /* PERDOMAIN inserted during context switch. */
-    rpt[root_table_offset(XEN_VIRT_START)] =
-        idle_pg_table[root_table_offset(XEN_VIRT_START)];
+
+    /* One-time setup of common_pgt, which maps .text.entry and the stubs. */
+    if ( unlikely(!root_get_intpte(common_pgt)) )
+    {
+        unsigned long stubs_linear = XEN_VIRT_END - 1;
+        l3_pgentry_t *stubs_main, *stubs_shadow;
+        char *ptr;
+
+        for ( rc = 0, ptr = _stextentry;
+              !rc && ptr < _etextentry; ptr += PAGE_SIZE )
+            rc = clone_mapping(ptr, rpt);
+
+        if ( rc )
+            return rc;
+
+        stubs_main = l4e_to_l3e(idle_pg_table[l4_table_offset(stubs_linear)]);
+        stubs_shadow = l4e_to_l3e(rpt[l4_table_offset(stubs_linear)]);
+
+        /* Splice into the regular L2 mapping the stubs. */
+        stubs_shadow[l3_table_offset(stubs_linear)] =
+            stubs_main[l3_table_offset(stubs_linear)];
+
+        common_pgt = rpt[root_table_offset(XEN_VIRT_START)];
+    }
+
+    rpt[root_table_offset(XEN_VIRT_START)] = common_pgt;
 
     /* Install direct map page table entries for stack, IDT, and TSS. */
     for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE )
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index 707c746..b001e79 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -13,6 +13,8 @@
 #include <public/xen.h>
 #include <irq_vectors.h>
 
+        .section .text.entry, "ax", @progbits
+
 ENTRY(entry_int82)
         ASM_CLAC
         pushq $0
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 58f652d..5c5310d 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -14,6 +14,8 @@
 #include <public/xen.h>
 #include <irq_vectors.h>
 
+        .section .text.entry, "ax", @progbits
+
 /* %rbx: struct vcpu */
 ENTRY(switch_to_kernel)
         leaq  VCPU_trap_bounce(%rbx),%rdx
@@ -854,7 +856,7 @@ GLOBAL(autogen_entrypoints)
         .popsection
         .endm
 
-        .text
+        .previous
 autogen_stubs: /* Automatically generated stubs. */
 
         vec = 0
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 0952980..25c6cbc 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -67,6 +67,13 @@ SECTIONS
        *(.text)
        *(.text.__x86_indirect_thunk_*)
        *(.text.page_aligned)
+
+       . = ALIGN(PAGE_SIZE);
+       _stextentry = .;
+       *(.text.entry)
+       . = ALIGN(PAGE_SIZE);
+       _etextentry = .;
+
        *(.text.cold)
        *(.text.unlikely)
        *(.fixup)
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread
* [PATCH] x86/xpti: Hide almost all of .text and all .data/.rodata/.bss mappings
@ 2018-02-14 16:23 Andrew Cooper
  2018-02-15 10:18 ` Jan Beulich
  0 siblings, 1 reply; 13+ messages in thread
From: Andrew Cooper @ 2018-02-14 16:23 UTC (permalink / raw)
  To: Xen-devel; +Cc: Juergen Gross, Andrew Cooper, Wei Liu, Jan Beulich

The current XPTI implementation isolates the directmap (and therefore a lot of
guest data), but a large quantity of CPU0's state (including its stack)
remains visible.

Furthermore, an attacker able to read .text is in a vastly superior position
to normal when it comes to fingerprinting Xen for known vulnerabilities, or
scanning for ROP/Spectre gadgets.

Collect together the entrypoints in .text.entry (currently 3x4k frames, but
can almost certainly be slimmed down), and create a common mapping which is
inserted into each per-cpu shadow.  The stubs are also inserted into this
mapping by pointing at the in-use L2.  This allows stubs allocated later (SMP
boot, or CPU hotplug) to work without further changes to the common mappings.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Juergen Gross <jgross@suse.com>

v2:
 * Drop "incomplete" warning from the docs.  This is about as complete as it
   can reasonably get.
 * Correct BUILD_BUG_ON() calculation, duplicate in the common_pgt code
 * scope/const improvements
 * Use .push/.popsection in preference to .previous
 * Exclude {compat_}create_bounce_frame from .text.entry
 * Extend the sanity checking of linear in clone_mapping().  There is a latent
   bug where a bad linear parameter would cause Xen to fall over a NULL pointer.
---
 docs/misc/xen-command-line.markdown |  3 --
 xen/arch/x86/smpboot.c              | 56 +++++++++++++++++++++++++++++++++----
 xen/arch/x86/x86_64/compat/entry.S  |  5 ++++
 xen/arch/x86/x86_64/entry.S         | 11 ++++++--
 xen/arch/x86/xen.lds.S              |  7 +++++
 5 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 79feba6..8317639 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1935,9 +1935,6 @@ mode.
 Override default selection of whether to isolate 64-bit PV guest page
 tables.
 
-** WARNING: Not yet a complete isolation implementation, but better than
-nothing. **
-
 ### xsave
 > `= <boolean>`
 
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 2ebef03..10bf2f3 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -622,6 +622,9 @@ unsigned long alloc_stub_page(unsigned int cpu, unsigned long *mfn)
         unmap_domain_page(memset(__map_domain_page(pg), 0xcc, PAGE_SIZE));
     }
 
+    /* Confirm that all stubs fit in a single L2 pagetable. */
+    BUILD_BUG_ON(NR_CPUS * PAGE_SIZE > (1u << L3_PAGETABLE_SHIFT));
+
     stub_va = XEN_VIRT_END - (cpu + 1) * PAGE_SIZE;
     if ( map_pages_to_xen(stub_va, mfn_x(page_to_mfn(pg)), 1,
                           PAGE_HYPERVISOR_RX | MAP_SMALL_PAGES) )
@@ -646,13 +649,23 @@ static int clone_mapping(const void *ptr, root_pgentry_t *rpt)
 {
     unsigned long linear = (unsigned long)ptr, pfn;
     unsigned int flags;
-    l3_pgentry_t *pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) +
-                         l3_table_offset(linear);
+    l3_pgentry_t *pl3e;
     l2_pgentry_t *pl2e;
     l1_pgentry_t *pl1e;
 
-    if ( linear < DIRECTMAP_VIRT_START )
-        return 0;
+    /*
+     * Sanity check 'linear'.  We only allow cloning from the Xen virtual
+     * range, and in particular, only from the directmap and .text ranges.
+     */
+    if ( root_table_offset(linear) > ROOT_PAGETABLE_LAST_XEN_SLOT ||
+         root_table_offset(linear) < ROOT_PAGETABLE_FIRST_XEN_SLOT )
+        return -EINVAL;
+    if ( !(linear >= DIRECTMAP_VIRT_START ||
+           (linear >= XEN_VIRT_START && linear < XEN_VIRT_END)) )
+        return -EINVAL;
+
+    pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) +
+        l3_table_offset(linear);
 
     flags = l3e_get_flags(*pl3e);
     ASSERT(flags & _PAGE_PRESENT);
@@ -744,8 +757,12 @@ static __read_mostly int8_t opt_xpti = -1;
 boolean_param("xpti", opt_xpti);
 DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
 
+extern const char _stextentry[], _etextentry[];
+
 static int setup_cpu_root_pgt(unsigned int cpu)
 {
+    static root_pgentry_t common_pgt;
+
     root_pgentry_t *rpt;
     unsigned int off;
     int rc;
@@ -764,8 +781,35 @@ static int setup_cpu_root_pgt(unsigned int cpu)
         idle_pg_table[root_table_offset(RO_MPT_VIRT_START)];
     /* SH_LINEAR_PT inserted together with guest mappings. */
     /* PERDOMAIN inserted during context switch. */
-    rpt[root_table_offset(XEN_VIRT_START)] =
-        idle_pg_table[root_table_offset(XEN_VIRT_START)];
+
+    /* One-time setup of common_pgt, which maps .text.entry and the stubs. */
+    if ( unlikely(!root_get_intpte(common_pgt)) )
+    {
+        unsigned long stubs_linear = XEN_VIRT_END - 1;
+        l3_pgentry_t *stubs_main, *stubs_shadow;
+        const char *ptr;
+
+        for ( rc = 0, ptr = _stextentry;
+              !rc && ptr < _etextentry; ptr += PAGE_SIZE )
+            rc = clone_mapping(ptr, rpt);
+
+        if ( rc )
+            return rc;
+
+        /* Confirm that all stubs fit in a single L2 pagetable. */
+        BUILD_BUG_ON(NR_CPUS * PAGE_SIZE > (1u << L3_PAGETABLE_SHIFT));
+
+        stubs_main = l4e_to_l3e(idle_pg_table[l4_table_offset(stubs_linear)]);
+        stubs_shadow = l4e_to_l3e(rpt[l4_table_offset(stubs_linear)]);
+
+        /* Splice into the regular L2 mapping the stubs. */
+        stubs_shadow[l3_table_offset(stubs_linear)] =
+            stubs_main[l3_table_offset(stubs_linear)];
+
+        common_pgt = rpt[root_table_offset(XEN_VIRT_START)];
+    }
+
+    rpt[root_table_offset(XEN_VIRT_START)] = common_pgt;
 
     /* Install direct map page table entries for stack, IDT, and TSS. */
     for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE )
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index 707c746..458d810 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -13,6 +13,8 @@
 #include <public/xen.h>
 #include <irq_vectors.h>
 
+        .section .text.entry, "ax", @progbits
+
 ENTRY(entry_int82)
         ASM_CLAC
         pushq $0
@@ -270,6 +272,9 @@ ENTRY(compat_int80_direct_trap)
         call  compat_create_bounce_frame
         jmp   compat_test_all_events
 
+        /* compat_create_bounce_frame & helpers don't need to be in .text.entry */
+        .text
+
 /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:            */
 /*   {[ERRCODE,] EIP, CS, EFLAGS, [ESP, SS]}                             */
 /* %rdx: trap_bounce, %rbx: struct vcpu                                  */
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 58f652d..941f06f 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -14,6 +14,8 @@
 #include <public/xen.h>
 #include <irq_vectors.h>
 
+        .section .text.entry, "ax", @progbits
+
 /* %rbx: struct vcpu */
 ENTRY(switch_to_kernel)
         leaq  VCPU_trap_bounce(%rbx),%rdx
@@ -357,6 +359,9 @@ int80_slow_path:
         subq  $2,UREGS_rip(%rsp)
         jmp   handle_exception_saved
 
+        /* create_bounce_frame & helpers don't need to be in .text.entry */
+        .text
+
 /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK:                     */
 /*   { RCX, R11, [ERRCODE,] RIP, CS, RFLAGS, RSP, SS }                   */
 /* %rdx: trap_bounce, %rbx: struct vcpu                                  */
@@ -487,6 +492,8 @@ ENTRY(dom_crash_sync_extable)
         jmp   asm_domain_crash_synchronous /* Does not return */
         .popsection
 
+        .section .text.entry, "ax", @progbits
+
 ENTRY(common_interrupt)
         SAVE_ALL CLAC
 
@@ -845,7 +852,7 @@ GLOBAL(trap_nop)
         iretq
 
 /* Table of automatically generated entry points.  One per vector. */
-        .section .init.rodata, "a", @progbits
+        .pushsection .init.rodata, "a", @progbits
 GLOBAL(autogen_entrypoints)
         /* pop into the .init.rodata section and record an entry point. */
         .macro entrypoint ent
@@ -854,7 +861,7 @@ GLOBAL(autogen_entrypoints)
         .popsection
         .endm
 
-        .text
+        .popsection
 autogen_stubs: /* Automatically generated stubs. */
 
         vec = 0
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 0952980..25c6cbc 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -67,6 +67,13 @@ SECTIONS
        *(.text)
        *(.text.__x86_indirect_thunk_*)
        *(.text.page_aligned)
+
+       . = ALIGN(PAGE_SIZE);
+       _stextentry = .;
+       *(.text.entry)
+       . = ALIGN(PAGE_SIZE);
+       _etextentry = .;
+
        *(.text.cold)
        *(.text.unlikely)
        *(.fixup)
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-02-15 10:18 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-02-13 19:45 [PATCH] x86/xpti: Hide almost all of .text and all .data/.rodata/.bss mappings Andrew Cooper
2018-02-14  7:54 ` Juergen Gross
2018-02-14 11:48   ` Andrew Cooper
2018-02-14 12:03     ` Juergen Gross
2018-02-14 12:15       ` Juergen Gross
2018-02-14 12:19         ` Andrew Cooper
2018-02-14 12:27           ` Juergen Gross
2018-02-14 12:36             ` Andrew Cooper
2018-02-14  9:14 ` Jan Beulich
2018-02-14 12:34   ` Andrew Cooper
2018-02-14 12:58     ` Jan Beulich
  -- strict thread matches above, loose matches on Subject: below --
2018-02-14 16:23 Andrew Cooper
2018-02-15 10:18 ` Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).