From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org
Cc: Juergen Gross <jgross@suse.com>,
andrew.cooper3@citrix.com, dfaggioli@suse.com, jbeulich@suse.com
Subject: [PATCH v3 17/17] x86: hide most hypervisor mappings in XPTI shadow page tables
Date: Fri, 9 Feb 2018 15:01:51 +0100 [thread overview]
Message-ID: <20180209140151.24714-18-jgross@suse.com> (raw)
In-Reply-To: <20180209140151.24714-1-jgross@suse.com>
Hide all but the absolute necessary hypervisor mappings in the XPTI
shadow page tables. The following mappings are needed:
- guest accessible areas, e.g. the RO M2P table
- IDT, TSS, GDT
- interrupt entry stacks
- interrupt handling code
For some of those mappings we need to setup lower level page tables
with just some entries populated.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
xen/arch/x86/pv/xpti.c | 229 ++++++++++++++++++++++++++++++++++++-
xen/arch/x86/traps.c | 2 +-
xen/arch/x86/x86_64/compat/entry.S | 4 +
xen/arch/x86/x86_64/entry.S | 4 +
xen/include/asm-x86/pv/mm.h | 5 +
5 files changed, 241 insertions(+), 3 deletions(-)
diff --git a/xen/arch/x86/pv/xpti.c b/xen/arch/x86/pv/xpti.c
index e08aa782bf..dea34322d7 100644
--- a/xen/arch/x86/pv/xpti.c
+++ b/xen/arch/x86/pv/xpti.c
@@ -19,13 +19,16 @@
* along with this program; If not, see <http://www.gnu.org/licenses/>.
*/
+#include <xen/cpu.h>
#include <xen/domain_page.h>
#include <xen/errno.h>
#include <xen/init.h>
#include <xen/keyhandler.h>
#include <xen/lib.h>
+#include <xen/notifier.h>
#include <xen/sched.h>
#include <asm/bitops.h>
+#include <asm/pv/mm.h>
/*
* For each L4 page table of the guest we need a shadow for the hypervisor.
@@ -118,6 +121,7 @@ struct xpti_domain {
unsigned int unused_first; /* List of unused slots */
spinlock_t lock; /* Protects all shadow lists */
struct domain *domain;
+ struct page_info *l3_shadow;
struct tasklet tasklet;
l1_pgentry_t **perdom_l1tab;
#ifdef XPTI_DEBUG
@@ -140,6 +144,9 @@ static __read_mostly enum {
XPTI_NODOM0
} opt_xpti = XPTI_DEFAULT;
+static bool xpti_l3_shadow = false;
+static l3_pgentry_t *xpti_l3_shadows[11];
+
static int parse_xpti(const char *s)
{
int rc = 0;
@@ -357,6 +364,34 @@ static unsigned int xpti_shadow_getforce(struct xpti_domain *xd)
return idx;
}
+static void xpti_update_l4_entry(struct xpti_domain *xd, l4_pgentry_t *dest,
+ l4_pgentry_t entry, unsigned int slot)
+{
+ l3_pgentry_t *l3pg;
+
+ switch ( slot )
+ {
+ case 257: /* ioremap area. */
+ case 258: /* linear page table (guest table). */
+ case 259: /* linear page table (shadow table). */
+ dest[slot] = l4e_empty();
+ break;
+ case 260: /* per-domain mappings. */
+ dest[slot] = l4e_from_page(xd->l3_shadow, __PAGE_HYPERVISOR);
+ break;
+ case 261 ... 271: /* hypervisor text and data, direct phys mapping. */
+ l3pg = xpti_l3_shadows[slot - 261];
+ dest[slot] = l3pg
+ ? l4e_from_mfn(_mfn(virt_to_mfn(l3pg)), __PAGE_HYPERVISOR)
+ : l4e_empty();
+ break;
+ case 256: /* read-only guest accessible m2p table. */
+ default:
+ dest[slot] = entry;
+ break;
+ }
+}
+
static void xpti_init_xen_l4(struct xpti_domain *xd, struct xpti_l4pg *l4pg)
{
unsigned int i;
@@ -365,7 +400,7 @@ static void xpti_init_xen_l4(struct xpti_domain *xd, struct xpti_l4pg *l4pg)
src = map_domain_page(_mfn(l4pg->guest_mfn));
dest = mfn_to_virt(l4pg->xen_mfn);
for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
- dest[i] = src[i];
+ xpti_update_l4_entry(xd, dest, src[i], i);
unmap_domain_page(src);
}
@@ -432,7 +467,7 @@ void xpti_update_l4(const struct domain *d, unsigned long mfn,
if ( idx != L4_INVALID )
{
l4 = mfn_to_virt(xd->l4pg[idx].xen_mfn);
- l4[slot] = e;
+ xpti_update_l4_entry(xd, l4, e, slot);
}
spin_unlock_irqrestore(&xd->lock, flags);
@@ -550,6 +585,8 @@ void xpti_domain_destroy(struct domain *d)
free_xenheap_page(xpti_shadow_free(xd, idx));
}
+ if ( xd->l3_shadow )
+ free_domheap_page(xd->l3_shadow);
xfree(xd->perdom_l1tab);
xfree(xd->l4pg);
xfree(xd->l4ref);
@@ -642,6 +679,125 @@ static int xpti_vcpu_init(struct vcpu *v)
return rc;
}
+static int xpti_add_mapping(unsigned long addr)
+{
+ unsigned int slot, flags, mapflags;
+ unsigned long mfn;
+ l3_pgentry_t *pl3e;
+ l2_pgentry_t *pl2e;
+ l1_pgentry_t *pl1e;
+
+ slot = l4_table_offset(addr);
+ pl3e = l4e_to_l3e(idle_pg_table[slot]);
+
+ slot = l3_table_offset(addr);
+ mapflags = l3e_get_flags(pl3e[slot]);
+ ASSERT(mapflags & _PAGE_PRESENT);
+ if ( mapflags & _PAGE_PSE )
+ {
+ mapflags &= ~_PAGE_PSE;
+ mfn = l3e_get_pfn(pl3e[slot]) & ~((1UL << (2 * PAGETABLE_ORDER)) - 1);
+ mfn |= PFN_DOWN(addr) & ((1UL << (2 * PAGETABLE_ORDER)) - 1);
+ }
+ else
+ {
+ pl2e = l3e_to_l2e(pl3e[slot]);
+ slot = l2_table_offset(addr);
+ mapflags = l2e_get_flags(pl2e[slot]);
+ ASSERT(mapflags & _PAGE_PRESENT);
+ if ( mapflags & _PAGE_PSE )
+ {
+ mapflags &= ~_PAGE_PSE;
+ mfn = l2e_get_pfn(pl2e[slot]) & ~((1UL << PAGETABLE_ORDER) - 1);
+ mfn |= PFN_DOWN(addr) & ((1UL << PAGETABLE_ORDER) - 1);
+ }
+ else
+ {
+ pl1e = l2e_to_l1e(pl2e[slot]);
+ slot = l1_table_offset(addr);
+ mapflags = l1e_get_flags(pl1e[slot]);
+ ASSERT(mapflags & _PAGE_PRESENT);
+ mfn = l1e_get_pfn(pl1e[slot]);
+ }
+ }
+
+ slot = l4_table_offset(addr);
+ ASSERT(slot >= 261 && slot <= 271);
+ pl3e = xpti_l3_shadows[slot - 261];
+ if ( !pl3e )
+ {
+ pl3e = alloc_xen_pagetable();
+ if ( !pl3e )
+ return -ENOMEM;
+ clear_page(pl3e);
+ xpti_l3_shadows[slot - 261] = pl3e;
+ }
+
+ slot = l3_table_offset(addr);
+ flags = l3e_get_flags(pl3e[slot]);
+ if ( !(flags & _PAGE_PRESENT) )
+ {
+ pl2e = alloc_xen_pagetable();
+ if ( !pl2e )
+ return -ENOMEM;
+ clear_page(pl2e);
+ pl3e[slot] = l3e_from_mfn(_mfn(virt_to_mfn(pl2e)), __PAGE_HYPERVISOR);
+ }
+ else
+ {
+ pl2e = l3e_to_l2e(pl3e[slot]);
+ }
+
+ slot = l2_table_offset(addr);
+ flags = l2e_get_flags(pl2e[slot]);
+ if ( !(flags & _PAGE_PRESENT) )
+ {
+ pl1e = alloc_xen_pagetable();
+ if ( !pl1e )
+ return -ENOMEM;
+ clear_page(pl1e);
+ pl2e[slot] = l2e_from_mfn(_mfn(virt_to_mfn(pl1e)), __PAGE_HYPERVISOR);
+ }
+ else
+ {
+ pl1e = l2e_to_l1e(pl2e[slot]);
+ }
+
+ slot = l1_table_offset(addr);
+ pl1e[slot] = l1e_from_mfn(_mfn(mfn), mapflags);
+
+ return 0;
+}
+
+static void xpti_rm_mapping(unsigned long addr)
+{
+ unsigned int slot, flags;
+ l3_pgentry_t *pl3e;
+ l2_pgentry_t *pl2e;
+ l1_pgentry_t *pl1e;
+
+ slot = l4_table_offset(addr);
+ ASSERT(slot >= 261 && slot <= 271);
+ pl3e = xpti_l3_shadows[slot - 261];
+ if ( !pl3e )
+ return;
+
+ slot = l3_table_offset(addr);
+ flags = l3e_get_flags(pl3e[slot]);
+ if ( !(flags & _PAGE_PRESENT) )
+ return;
+
+ pl2e = l3e_to_l2e(pl3e[slot]);
+ slot = l2_table_offset(addr);
+ flags = l2e_get_flags(pl2e[slot]);
+ if ( !(flags & _PAGE_PRESENT) )
+ return;
+
+ pl1e = l2e_to_l1e(pl2e[slot]);
+ slot = l1_table_offset(addr);
+ pl1e[slot] = l1e_empty();
+}
+
int xpti_domain_init(struct domain *d)
{
bool xpti = false;
@@ -649,7 +805,9 @@ int xpti_domain_init(struct domain *d)
struct vcpu *v;
struct xpti_domain *xd;
void *virt;
+ unsigned long addr;
unsigned int i, new;
+ l3_pgentry_t *l3tab, *l3shadow;
if ( !is_pv_domain(d) || is_pv_32bit_domain(d) )
return 0;
@@ -683,6 +841,27 @@ int xpti_domain_init(struct domain *d)
xd->lru_last = L4_INVALID;
xd->free_first = L4_INVALID;
+ if ( !xpti_l3_shadow )
+ {
+ xpti_l3_shadow = true;
+
+ for_each_online_cpu ( i )
+ if ( xpti_add_mapping((unsigned long)idt_tables[i]) )
+ goto done;
+
+ for ( addr = round_pgdown((unsigned long)&xpti_map_start);
+ addr <= round_pgdown((unsigned long)&xpti_map_end - 1);
+ addr += PAGE_SIZE )
+ if ( xpti_add_mapping(addr) )
+ goto done;
+
+ for ( addr = round_pgdown((unsigned long)&xpti_map_start_compat);
+ addr <= round_pgdown((unsigned long)&xpti_map_end_compat - 1);
+ addr += PAGE_SIZE )
+ if ( xpti_add_mapping(addr) )
+ goto done;
+ }
+
spin_lock_init(&xd->lock);
tasklet_init(&xd->tasklet, xpti_tasklet, (unsigned long)xd);
@@ -725,6 +904,16 @@ int xpti_domain_init(struct domain *d)
goto done;
}
+ xd->l3_shadow = alloc_domheap_page(d, MEMF_no_owner);
+ if ( !xd->l3_shadow )
+ goto done;
+ l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+ l3shadow = __map_domain_page(xd->l3_shadow);
+ clear_page(l3shadow);
+ l3shadow[0] = l3tab[0]; /* GDT/LDT shadow mapping. */
+ l3shadow[3] = l3tab[3]; /* XPTI mappings. */
+ unmap_domain_page(l3shadow);
+ unmap_domain_page(l3tab);
ret = 0;
printk("Enabling Xen Pagetable protection (XPTI) for Domain %d\n",
@@ -801,3 +990,39 @@ static int __init xpti_key_init(void)
return 0;
}
__initcall(xpti_key_init);
+
+static int xpti_cpu_callback(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ int rc = 0;
+
+ if ( !xpti_l3_shadow )
+ return NOTIFY_DONE;
+
+ switch ( action )
+ {
+ case CPU_DOWN_FAILED:
+ case CPU_ONLINE:
+ rc = xpti_add_mapping((unsigned long)idt_tables[cpu]);
+ break;
+ case CPU_DOWN_PREPARE:
+ xpti_rm_mapping((unsigned long)idt_tables[cpu]);
+ break;
+ default:
+ break;
+ }
+
+ return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
+}
+
+static struct notifier_block xpti_cpu_nfb = {
+ .notifier_call = xpti_cpu_callback
+};
+
+static int __init xpti_presmp_init(void)
+{
+ register_cpu_notifier(&xpti_cpu_nfb);
+ return 0;
+}
+presmp_initcall(xpti_presmp_init);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 93b228dced..00cc7cd9d7 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -102,7 +102,7 @@ DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table);
DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table);
/* Master table, used by CPU0. */
-idt_entry_t idt_table[IDT_ENTRIES];
+idt_entry_t idt_table[IDT_ENTRIES] __aligned(PAGE_SIZE);
/* Pointer to the IDT of every CPU. */
idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index 206bc9a05a..575a3e5d8e 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -13,6 +13,8 @@
#include <public/xen.h>
#include <irq_vectors.h>
+ENTRY(xpti_map_start_compat)
+
ENTRY(entry_int82)
ASM_CLAC
pushq $0
@@ -367,3 +369,5 @@ compat_crash_page_fault:
jmp .Lft14
.previous
_ASM_EXTABLE(.Lft14, .Lfx14)
+
+ENTRY(xpti_map_end_compat)
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 909f6eea66..d1cb355044 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -14,6 +14,8 @@
#include <public/xen.h>
#include <irq_vectors.h>
+ENTRY(xpti_map_start)
+
/* %rbx: struct vcpu, %r12: user_regs */
ENTRY(switch_to_kernel)
leaq VCPU_trap_bounce(%rbx),%rdx
@@ -735,6 +737,8 @@ ENTRY(enable_nmis)
GLOBAL(trap_nop)
iretq
+ENTRY(xpti_map_end)
+
/* Table of automatically generated entry points. One per vector. */
.section .init.rodata, "a", @progbits
GLOBAL(autogen_entrypoints)
diff --git a/xen/include/asm-x86/pv/mm.h b/xen/include/asm-x86/pv/mm.h
index 8a90af1084..36e1856b8d 100644
--- a/xen/include/asm-x86/pv/mm.h
+++ b/xen/include/asm-x86/pv/mm.h
@@ -23,6 +23,11 @@
#ifdef CONFIG_PV
+extern void *xpti_map_start;
+extern void *xpti_map_end;
+extern void *xpti_map_start_compat;
+extern void *xpti_map_end_compat;
+
int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs);
long pv_set_gdt(struct vcpu *v, unsigned long *frames, unsigned int entries);
--
2.13.6
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
next prev parent reply other threads:[~2018-02-09 14:05 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-02-09 14:01 [PATCH v3 00/17] Alternative Meltdown mitigation Juergen Gross
2018-02-09 14:01 ` [PATCH v3 01/17] x86: don't use hypervisor stack size for dumping guest stacks Juergen Gross
2018-02-09 14:01 ` [PATCH v3 02/17] x86: do a revert of e871e80c38547d9faefc6604532ba3e985e65873 Juergen Gross
2018-02-13 10:14 ` Jan Beulich
2018-02-09 14:01 ` [PATCH v3 03/17] x86: revert 5784de3e2067ed73efc2fe42e62831e8ae7f46c4 Juergen Gross
2018-02-09 14:01 ` [PATCH v3 04/17] x86: don't access saved user regs via rsp in trap handlers Juergen Gross
2018-02-09 14:01 ` [PATCH v3 05/17] x86: add a xpti command line parameter Juergen Gross
2018-02-09 14:01 ` [PATCH v3 06/17] x86: allow per-domain mappings without NX bit or with specific mfn Juergen Gross
2018-02-09 14:01 ` [PATCH v3 07/17] xen/x86: split _set_tssldt_desc() into ldt and tss specific functions Juergen Gross
2018-02-09 14:01 ` [PATCH v3 08/17] x86: add support for spectre mitigation with local thunk Juergen Gross
2018-02-09 14:01 ` [PATCH v3 09/17] x86: create syscall stub for per-domain mapping Juergen Gross
2018-02-09 14:01 ` [PATCH v3 10/17] x86: allocate per-vcpu stacks for interrupt entries Juergen Gross
2018-02-09 14:01 ` [PATCH v3 11/17] x86: modify interrupt handlers to support stack switching Juergen Gross
2018-02-09 14:01 ` [PATCH v3 12/17] x86: activate per-vcpu stacks in case of xpti Juergen Gross
2018-02-09 14:01 ` [PATCH v3 13/17] x86: allocate hypervisor L4 page table for XPTI Juergen Gross
2018-02-09 14:01 ` [PATCH v3 14/17] xen: add domain pointer to fill_ro_mpt() and zap_ro_mpt() functions Juergen Gross
2018-02-09 14:01 ` [PATCH v3 15/17] x86: fill XPTI shadow pages and keep them in sync with guest L4 Juergen Gross
2018-02-09 14:01 ` [PATCH v3 16/17] x86: do page table switching when entering/leaving hypervisor Juergen Gross
2018-02-09 14:01 ` Juergen Gross [this message]
2018-02-12 17:54 ` [PATCH v3 00/17] Alternative Meltdown mitigation Dario Faggioli
2018-02-13 11:36 ` Juergen Gross
2018-02-13 14:16 ` Jan Beulich
[not found] ` <5A83014E02000078001A7619@suse.com>
2018-02-13 14:29 ` Juergen Gross
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180209140151.24714-18-jgross@suse.com \
--to=jgross@suse.com \
--cc=andrew.cooper3@citrix.com \
--cc=dfaggioli@suse.com \
--cc=jbeulich@suse.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).