From: Christoph Egger <Christoph.Egger@amd.com>
To: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>
Cc: Tim Deegan <Tim.Deegan@citrix.com>
Subject: Re: [PATCH 12/12] Nested Virtualization: hap-on-hap
Date: Tue, 5 Apr 2011 17:48:10 +0200 [thread overview]
Message-ID: <4D9B39BA.8090807@amd.com> (raw)
In-Reply-To: <201103311725.13871.Christoph.Egger@amd.com>
[-- Attachment #1: Type: text/plain, Size: 502 bytes --]
On 03/31/11 17:25, Christoph Egger wrote:
>
> This is the new version. I fixed the open items from Tim's last review.
Sorry, I mistakenly resent an older version and noticed it just now.
This time this is the latest version.
Christoph
--
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632
[-- Attachment #2: xen_nh12_haphap.diff --]
[-- Type: text/plain, Size: 66809 bytes --]
# HG changeset patch
# User cegger
# Date 1302011049 -7200
Implement Nested-on-Nested.
This allows the guest to run nested guest with hap enabled.
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1170,21 +1170,50 @@ void hvm_inject_exception(unsigned int t
hvm_funcs.inject_exception(trapnr, errcode, cr2);
}
-bool_t hvm_hap_nested_page_fault(unsigned long gpa,
- bool_t gla_valid,
- unsigned long gla,
- bool_t access_valid,
- bool_t access_r,
- bool_t access_w,
- bool_t access_x)
+int hvm_hap_nested_page_fault(unsigned long gpa,
+ bool_t gla_valid,
+ unsigned long gla,
+ bool_t access_valid,
+ bool_t access_r,
+ bool_t access_w,
+ bool_t access_x)
{
unsigned long gfn = gpa >> PAGE_SHIFT;
p2m_type_t p2mt;
p2m_access_t p2ma;
mfn_t mfn;
struct vcpu *v = current;
- struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
-
+ struct p2m_domain *p2m = NULL;
+
+ /* On Nested Virtualization, walk the guest page table.
+ * If this succeeds, all is fine.
+ * If this fails, inject a nested page fault into the guest.
+ */
+ if ( nestedhvm_enabled(v->domain)
+ && nestedhvm_vcpu_in_guestmode(v)
+ && nestedhvm_paging_mode_hap(v) )
+ {
+ int rv;
+
+ /* The vcpu is in guest mode and the l1 guest
+ * uses hap. That means 'gpa' is in l2 guest
+ * physical address space.
+ * Fix the nested p2m or inject nested page fault
+ * into l1 guest if not fixable. The algorithm is
+ * the same as for shadow paging.
+ */
+ rv = nestedhvm_hap_nested_page_fault(v, gpa);
+ switch (rv) {
+ case NESTEDHVM_PAGEFAULT_DONE:
+ return 1;
+ case NESTEDHVM_PAGEFAULT_ERROR:
+ return 0;
+ case NESTEDHVM_PAGEFAULT_INJECT:
+ return -1;
+ }
+ }
+
+ p2m = p2m_get_hostp2m(v->domain);
mfn = gfn_to_mfn_type_current(p2m, gfn, &p2mt, &p2ma, p2m_guest);
/* Check access permissions first, then handle faults */
@@ -1328,6 +1357,15 @@ int hvm_set_efer(uint64_t value)
return X86EMUL_EXCEPTION;
}
+ if ( nestedhvm_enabled(v->domain) && cpu_has_svm &&
+ ((value & EFER_SVME) == 0 ) &&
+ ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_SVME) )
+ {
+ /* Cleared EFER.SVME: Flush all nestedp2m tables */
+ p2m_flush_nestedp2m(v->domain);
+ nestedhvm_vcpu_reset(v);
+ }
+
value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA;
v->arch.hvm_vcpu.guest_efer = value;
hvm_update_guest_efer(v);
@@ -1478,8 +1516,12 @@ int hvm_set_cr0(unsigned long value)
v->arch.hvm_vcpu.guest_cr[0] = value;
hvm_update_guest_cr(v, 0);
- if ( (value ^ old_value) & X86_CR0_PG )
- paging_update_paging_modes(v);
+ if ( (value ^ old_value) & X86_CR0_PG ) {
+ if ( !nestedhvm_vmswitch_in_progress(v) && nestedhvm_vcpu_in_guestmode(v) )
+ paging_update_nestedmode(v);
+ else
+ paging_update_paging_modes(v);
+ }
return X86EMUL_OKAY;
@@ -1546,8 +1588,12 @@ int hvm_set_cr4(unsigned long value)
hvm_update_guest_cr(v, 4);
/* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */
- if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
- paging_update_paging_modes(v);
+ if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) {
+ if ( !nestedhvm_vmswitch_in_progress(v) && nestedhvm_vcpu_in_guestmode(v) )
+ paging_update_nestedmode(v);
+ else
+ paging_update_paging_modes(v);
+ }
return X86EMUL_OKAY;
@@ -2060,7 +2106,7 @@ static enum hvm_copy_result __hvm_copy(
void *buf, paddr_t addr, int size, unsigned int flags, uint32_t pfec)
{
struct vcpu *curr = current;
- struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
+ struct p2m_domain *p2m;
unsigned long gfn, mfn;
p2m_type_t p2mt;
char *p;
@@ -2082,6 +2128,8 @@ static enum hvm_copy_result __hvm_copy(
return HVMCOPY_unhandleable;
#endif
+ p2m = p2m_get_hostp2m(curr->domain);
+
while ( todo > 0 )
{
count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/hvm/nestedhvm.c
--- a/xen/arch/x86/hvm/nestedhvm.c
+++ b/xen/arch/x86/hvm/nestedhvm.c
@@ -20,6 +20,7 @@
#include <asm/msr.h>
#include <asm/hvm/support.h> /* for HVM_DELIVER_NO_ERROR_CODE */
#include <asm/hvm/hvm.h>
+#include <asm/p2m.h> /* for struct p2m_domain */
#include <asm/hvm/nestedhvm.h>
#include <asm/event.h> /* for local_event_delivery_(en|dis)able */
#include <asm/paging.h> /* for paging_mode_hap() */
@@ -96,6 +97,54 @@ nestedhvm_vcpu_destroy(struct vcpu *v)
return nhvm_vcpu_destroy(v);
}
+static void
+nestedhvm_flushtlb_ipi(void *info)
+{
+ struct vcpu *v = current;
+ struct domain *d = info;
+
+ ASSERT(d != NULL);
+ if (v->domain != d) {
+ /* This cpu doesn't belong to the domain */
+ return;
+ }
+
+ /* Just flush the ASID (or request a new one).
+ * This is cheaper than flush_tlb_local() and has
+ * the same desired effect.
+ */
+ hvm_asid_flush_core();
+ vcpu_nestedhvm(v).nv_p2m = NULL;
+}
+
+void
+nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m)
+{
+ on_selected_cpus(&p2m->p2m_dirty_cpumask, nestedhvm_flushtlb_ipi,
+ p2m->domain, 1);
+ cpus_clear(p2m->p2m_dirty_cpumask);
+}
+
+void
+nestedhvm_vmcx_flushtlbdomain(struct domain *d)
+{
+ on_selected_cpus(&d->domain_dirty_cpumask, nestedhvm_flushtlb_ipi, d, 1);
+}
+
+bool_t
+nestedhvm_is_n2(struct vcpu *v)
+{
+ if (!nestedhvm_enabled(v->domain)
+ || nestedhvm_vmswitch_in_progress(v)
+ || !nestedhvm_paging_mode_hap(v))
+ return 0;
+
+ if (nestedhvm_vcpu_in_guestmode(v))
+ return 1;
+
+ return 0;
+}
+
/* Common shadow IO Permission bitmap */
/* There four global patterns of io bitmap each guest can
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/hvm/svm/nestedsvm.c
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -26,6 +26,7 @@
#include <asm/hvm/svm/svmdebug.h>
#include <asm/paging.h> /* paging_mode_hap */
#include <asm/event.h> /* for local_event_delivery_(en|dis)able */
+#include <asm/p2m.h> /* p2m_get_pagetable, p2m_get_nestedp2m */
static void
nestedsvm_vcpu_clgi(struct vcpu *v)
@@ -320,6 +321,18 @@ static int nsvm_vmrun_permissionmap(stru
return 0;
}
+static void nestedsvm_vmcb_set_nestedp2m(struct vcpu *v,
+ struct vmcb_struct *vvmcb, struct vmcb_struct *n2vmcb)
+{
+ struct p2m_domain *p2m;
+
+ ASSERT(v != NULL);
+ ASSERT(vvmcb != NULL);
+ ASSERT(n2vmcb != NULL);
+ p2m = p2m_get_nestedp2m(v, vvmcb->_h_cr3);
+ n2vmcb->_h_cr3 = pagetable_get_paddr(p2m_get_pagetable(p2m));
+}
+
static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs *regs)
{
struct nestedvcpu *nv = &vcpu_nestedhvm(v);
@@ -475,6 +488,9 @@ static int nsvm_vmcb_prepare4vmrun(struc
/* Nested paging mode */
if (nestedhvm_paging_mode_hap(v)) {
/* host nested paging + guest nested paging. */
+ n2vmcb->_np_enable = 1;
+
+ nestedsvm_vmcb_set_nestedp2m(v, ns_vmcb, n2vmcb);
/* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
rc = hvm_set_cr3(ns_vmcb->_cr3);
@@ -1318,8 +1334,20 @@ asmlinkage void nsvm_vcpu_switch(struct
ret = nsvm_vcpu_vmrun(v, regs);
if (ret < 0)
goto vmexit;
+
+ ASSERT(nestedhvm_vcpu_in_guestmode(v));
nv->nv_vmentry_pending = 0;
- return;
+ }
+
+ if (nestedhvm_vcpu_in_guestmode(v)
+ && nestedhvm_paging_mode_hap(v))
+ {
+ /* In case left the l2 guest due to a physical interrupt (e.g. IPI)
+ * that is not for the l1 guest then we continue running the l2 guest
+ * but check if the nestedp2m is still valid.
+ */
+ if (nv->nv_p2m == NULL)
+ nestedsvm_vmcb_set_nestedp2m(v, nv->nv_vvmcx, nv->nv_n2vmcx);
}
}
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1014,14 +1014,16 @@ struct hvm_function_table * __init start
return &svm_function_table;
}
-static void svm_do_nested_pgfault(paddr_t gpa)
+static void svm_do_nested_pgfault(struct vcpu *v,
+ struct cpu_user_regs *regs, paddr_t gpa)
{
+ int ret;
unsigned long gfn = gpa >> PAGE_SHIFT;
mfn_t mfn;
p2m_type_t p2mt;
- struct p2m_domain *p2m;
+ struct p2m_domain *p2m = NULL;
- p2m = p2m_get_hostp2m(current->domain);
+ ret = hvm_hap_nested_page_fault(gpa, 0, ~0ul, 0, 0, 0, 0);
if ( tb_init_done )
{
@@ -1032,6 +1034,7 @@ static void svm_do_nested_pgfault(paddr_
uint32_t p2mt;
} _d;
+ p2m = p2m_get_p2m(v);
_d.gpa = gpa;
_d.qualification = 0;
_d.mfn = mfn_x(gfn_to_mfn_query(p2m, gfn, &_d.p2mt));
@@ -1039,14 +1042,26 @@ static void svm_do_nested_pgfault(paddr_
__trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d);
}
- if ( hvm_hap_nested_page_fault(gpa, 0, ~0ul, 0, 0, 0, 0) )
+ switch (ret) {
+ case 0:
+ break;
+ case 1:
return;
+ case -1:
+ ASSERT(nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v));
+ /* inject #VMEXIT(NPF) into guest. */
+ nestedsvm_vmexit_defer(v, VMEXIT_NPF, regs->error_code, gpa);
+ return;
+ }
+ if ( p2m == NULL )
+ p2m = p2m_get_p2m(v);
/* Everything else is an error. */
mfn = gfn_to_mfn_guest(p2m, gfn, &p2mt);
- gdprintk(XENLOG_ERR, "SVM violation gpa %#"PRIpaddr", mfn %#lx, type %i\n",
- gpa, mfn_x(mfn), p2mt);
- domain_crash(current->domain);
+ gdprintk(XENLOG_ERR,
+ "SVM violation gpa %#"PRIpaddr", mfn %#lx, type %i\n",
+ gpa, mfn_x(mfn), p2mt);
+ domain_crash(v->domain);
}
static void svm_fpu_dirty_intercept(void)
@@ -1659,6 +1674,8 @@ asmlinkage void svm_vmexit_handler(struc
struct vmcb_struct *ns_vmcb = nv->nv_vvmcx;
uint64_t exitinfo1, exitinfo2;
+ paging_update_nestedmode(v);
+
/* Write real exitinfo1 back into virtual vmcb.
* nestedsvm_check_intercepts() expects to have the correct
* exitinfo1 value there.
@@ -1948,7 +1965,7 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_NPF:
perfc_incra(svmexits, VMEXIT_NPF_PERFC);
regs->error_code = vmcb->exitinfo1;
- svm_do_nested_pgfault(vmcb->exitinfo2);
+ svm_do_nested_pgfault(v, regs, vmcb->exitinfo2);
break;
case VMEXIT_IRET: {
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/mm/hap/Makefile
--- a/xen/arch/x86/mm/hap/Makefile
+++ b/xen/arch/x86/mm/hap/Makefile
@@ -3,6 +3,7 @@ obj-y += guest_walk_2level.o
obj-y += guest_walk_3level.o
obj-y += guest_walk_4level.o
obj-y += p2m-ept.o
+obj-y += nested_hap.o
guest_levels = $(subst level,,$(filter %level,$(subst ., ,$(subst _, ,$(1)))))
guest_walk_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1))
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c
+++ b/xen/arch/x86/mm/hap/guest_walk.c
@@ -29,24 +29,32 @@
#define _hap_gva_to_gfn(levels) hap_gva_to_gfn_##levels##_levels
#define hap_gva_to_gfn(levels) _hap_gva_to_gfn(levels)
+#define _hap_p2m_ga_to_gfn(levels) hap_p2m_ga_to_gfn_##levels##_levels
+#define hap_p2m_ga_to_gfn(levels) _hap_p2m_ga_to_gfn(levels)
+
#if GUEST_PAGING_LEVELS <= CONFIG_PAGING_LEVELS
#include <asm/guest_pt.h>
#include <asm/p2m.h>
unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
- struct vcpu *v, unsigned long gva, uint32_t *pfec)
+ struct vcpu *v, struct p2m_domain *p2m, unsigned long gva, uint32_t *pfec)
{
- unsigned long cr3;
+ unsigned long cr3 = v->arch.hvm_vcpu.guest_cr[3];
+ return hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(v, p2m, cr3, gva, pfec);
+}
+
+unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
+ struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
+ paddr_t ga, uint32_t *pfec)
+{
uint32_t missing;
mfn_t top_mfn;
void *top_map;
p2m_type_t p2mt;
walk_t gw;
- struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
/* Get the top-level table's MFN */
- cr3 = v->arch.hvm_vcpu.guest_cr[3];
top_mfn = gfn_to_mfn_unshare(p2m, cr3 >> PAGE_SHIFT, &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
@@ -72,7 +80,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
#if GUEST_PAGING_LEVELS == 3
top_map += (cr3 & ~(PAGE_MASK | 31));
#endif
- missing = guest_walk_tables(v, p2m, gva, &gw, pfec[0], top_mfn, top_map);
+ missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map);
unmap_domain_page(top_map);
/* Interpret the answer */
@@ -122,6 +130,15 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
return INVALID_GFN;
}
+unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)(
+ struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
+ paddr_t ga, uint32_t *pfec)
+{
+ gdprintk(XENLOG_ERR,
+ "Guest paging level is greater than host paging level!\n");
+ domain_crash(v->domain);
+ return INVALID_GFN;
+}
#endif
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -40,6 +40,7 @@
#include <asm/p2m.h>
#include <asm/domain.h>
#include <xen/numa.h>
+#include <asm/hvm/nestedhvm.h>
#include "private.h"
@@ -582,6 +583,7 @@ void hap_domain_init(struct domain *d)
int hap_enable(struct domain *d, u32 mode)
{
unsigned int old_pages;
+ uint8_t i;
int rv = 0;
domain_pause(d);
@@ -620,6 +622,12 @@ int hap_enable(struct domain *d, u32 mod
goto out;
}
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+ rv = p2m_alloc_table(d->arch.nested_p2m[i]);
+ if ( rv != 0 )
+ goto out;
+ }
+
/* Now let other users see the new mode */
d->arch.paging.mode = mode | PG_HAP_enable;
@@ -630,6 +638,13 @@ int hap_enable(struct domain *d, u32 mod
void hap_final_teardown(struct domain *d)
{
+ uint8_t i;
+
+ /* Destroy nestedp2m's first */
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+ p2m_teardown(d->arch.nested_p2m[i]);
+ }
+
if ( d->arch.paging.hap.total_pages != 0 )
hap_teardown(d);
@@ -657,7 +672,7 @@ void hap_teardown(struct domain *d)
/* release the monitor table held by each vcpu */
for_each_vcpu ( d, v )
{
- if ( v->arch.paging.mode && paging_mode_external(d) )
+ if ( paging_get_hostmode(v) && paging_mode_external(d) )
{
mfn = pagetable_get_mfn(v->arch.monitor_table);
if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
@@ -725,6 +740,7 @@ static const struct paging_mode hap_pagi
void hap_vcpu_init(struct vcpu *v)
{
v->arch.paging.mode = &hap_paging_real_mode;
+ v->arch.paging.nestedmode = &hap_paging_real_mode;
}
/************************************************/
@@ -751,6 +767,15 @@ static int hap_page_fault(struct vcpu *v
*/
static int hap_invlpg(struct vcpu *v, unsigned long va)
{
+ if (nestedhvm_enabled(v->domain)) {
+ /* Emulate INVLPGA:
+ * Must perform the flush right now or an other vcpu may
+ * use it when we use the next VMRUN emulation, otherwise.
+ */
+ p2m_flush(v, vcpu_nestedhvm(v).nv_p2m);
+ return 1;
+ }
+
HAP_ERROR("Intercepted a guest INVLPG (%u:%u) with HAP enabled.\n",
v->domain->domain_id, v->vcpu_id);
domain_crash(v->domain);
@@ -763,17 +788,22 @@ static void hap_update_cr3(struct vcpu *
hvm_update_guest_cr(v, 3);
}
+const struct paging_mode *
+hap_paging_get_mode(struct vcpu *v)
+{
+ return !hvm_paging_enabled(v) ? &hap_paging_real_mode :
+ hvm_long_mode_enabled(v) ? &hap_paging_long_mode :
+ hvm_pae_enabled(v) ? &hap_paging_pae_mode :
+ &hap_paging_protected_mode;
+}
+
static void hap_update_paging_modes(struct vcpu *v)
{
struct domain *d = v->domain;
hap_lock(d);
- v->arch.paging.mode =
- !hvm_paging_enabled(v) ? &hap_paging_real_mode :
- hvm_long_mode_enabled(v) ? &hap_paging_long_mode :
- hvm_pae_enabled(v) ? &hap_paging_pae_mode :
- &hap_paging_protected_mode;
+ v->arch.paging.mode = hap_paging_get_mode(v);
if ( pagetable_is_null(v->arch.monitor_table) )
{
@@ -834,38 +864,70 @@ static void
hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
mfn_t table_mfn, l1_pgentry_t new, unsigned int level)
{
+ struct domain *d = v->domain;
uint32_t old_flags;
+ bool_t flush_nestedp2m = 0;
- hap_lock(v->domain);
+ /* We know always use the host p2m here, regardless if the vcpu
+ * is in host or guest mode. The vcpu can be in guest mode by
+ * a hypercall which passes a domain and chooses mostly the first
+ * vcpu.
+ * XXX This is the reason why this function can not be used re-used
+ * for updating the nestedp2m. Otherwise, hypercalls would randomly
+ * operate on host p2m and nested p2m.
+ */
+ hap_lock(d);
old_flags = l1e_get_flags(*p);
+
+ if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT) ) {
+ /* We are replacing a valid entry so we need to flush nested p2ms,
+ * unless the only change is an increase in access rights. */
+ mfn_t omfn = _mfn(l1e_get_pfn(*p));
+ mfn_t nmfn = _mfn(l1e_get_pfn(new));
+ flush_nestedp2m = !( mfn_x(omfn) == mfn_x(nmfn)
+ && perms_strictly_increased(old_flags, l1e_get_flags(new)) );
+ }
+
safe_write_pte(p, new);
if ( (old_flags & _PAGE_PRESENT)
&& (level == 1 || (level == 2 && (old_flags & _PAGE_PSE))) )
- flush_tlb_mask(&v->domain->domain_dirty_cpumask);
+ flush_tlb_mask(&d->domain_dirty_cpumask);
#if CONFIG_PAGING_LEVELS == 3
/* install P2M in monitor table for PAE Xen */
if ( level == 3 )
/* We have written to the p2m l3: need to sync the per-vcpu
* copies of it in the monitor tables */
- p2m_install_entry_in_monitors(v->domain, (l3_pgentry_t *)p);
+ p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p);
#endif
- hap_unlock(v->domain);
+ hap_unlock(d);
+
+ if ( flush_nestedp2m )
+ p2m_flush_nestedp2m(d);
}
static unsigned long hap_gva_to_gfn_real_mode(
- struct vcpu *v, unsigned long gva, uint32_t *pfec)
+ struct vcpu *v, struct p2m_domain *p2m, unsigned long gva, uint32_t *pfec)
{
return ((paddr_t)gva >> PAGE_SHIFT);
}
+static unsigned long hap_p2m_ga_to_gfn_real_mode(
+ struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
+ paddr_t ga, uint32_t *pfec)
+{
+ return (ga >> PAGE_SHIFT);
+}
+
+
/* Entry points into this mode of the hap code. */
static const struct paging_mode hap_paging_real_mode = {
.page_fault = hap_page_fault,
.invlpg = hap_invlpg,
.gva_to_gfn = hap_gva_to_gfn_real_mode,
+ .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_real_mode,
.update_cr3 = hap_update_cr3,
.update_paging_modes = hap_update_paging_modes,
.write_p2m_entry = hap_write_p2m_entry,
@@ -876,6 +938,7 @@ static const struct paging_mode hap_pagi
.page_fault = hap_page_fault,
.invlpg = hap_invlpg,
.gva_to_gfn = hap_gva_to_gfn_2_levels,
+ .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_2_levels,
.update_cr3 = hap_update_cr3,
.update_paging_modes = hap_update_paging_modes,
.write_p2m_entry = hap_write_p2m_entry,
@@ -886,6 +949,7 @@ static const struct paging_mode hap_pagi
.page_fault = hap_page_fault,
.invlpg = hap_invlpg,
.gva_to_gfn = hap_gva_to_gfn_3_levels,
+ .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_3_levels,
.update_cr3 = hap_update_cr3,
.update_paging_modes = hap_update_paging_modes,
.write_p2m_entry = hap_write_p2m_entry,
@@ -896,6 +960,7 @@ static const struct paging_mode hap_pagi
.page_fault = hap_page_fault,
.invlpg = hap_invlpg,
.gva_to_gfn = hap_gva_to_gfn_4_levels,
+ .p2m_ga_to_gfn = hap_p2m_ga_to_gfn_4_levels,
.update_cr3 = hap_update_cr3,
.update_paging_modes = hap_update_paging_modes,
.write_p2m_entry = hap_write_p2m_entry,
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/mm/hap/nested_hap.c
--- /dev/null
+++ b/xen/arch/x86/mm/hap/nested_hap.c
@@ -0,0 +1,236 @@
+/******************************************************************************
+ * arch/x86/mm/hap/nested_hap.c
+ *
+ * Code for Nested Virtualization
+ * Copyright (c) 2011 Advanced Micro Devices
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <asm/domain.h>
+#include <asm/page.h>
+#include <asm/paging.h>
+#include <asm/p2m.h>
+#include <asm/mem_event.h>
+#include <public/mem_event.h>
+#include <asm/mem_sharing.h>
+#include <xen/event.h>
+#include <asm/hap.h>
+#include <asm/hvm/support.h>
+
+#include <asm/hvm/nestedhvm.h>
+
+#include "private.h"
+
+/* AlGORITHM for NESTED PAGE FAULT
+ *
+ * NOTATION
+ * Levels: L0, L1, L2
+ * Guests: L1 guest, L2 guest
+ * Hypervisor: L0 hypervisor
+ * Addresses: L2-GVA, L2-GPA, L1-GVA, L1-GPA, MPA
+ *
+ * On L0, when #NPF happens, the handler function should do:
+ * hap_page_fault(GPA)
+ * {
+ * 1. If #NPF is from L1 guest, then we crash the guest VM (same as old
+ * code)
+ * 2. If #NPF is from L2 guest, then we continue from (3)
+ * 3. Get h_cr3 from L1 guest. Map h_cr3 into L0 hypervisor address space.
+ * 4. Walk the h_cr3 page table
+ * 5. - if not present, then we inject #NPF back to L1 guest and
+ * re-launch L1 guest (L1 guest will either treat this #NPF as MMIO,
+ * or fix its p2m table for L2 guest)
+ * 6. - if present, then we will get the a new translated value L1-GPA
+ * (points to L1 machine memory)
+ * 7. * Use L1-GPA to walk L0 P2M table
+ * 8. - if not present, then crash the guest (should not happen)
+ * 9. - if present, then we get a new translated value MPA
+ * (points to real machine memory)
+ * 10. * Finally, use GPA and MPA to walk nested_p2m
+ * and fix the bits.
+ * }
+ *
+ */
+
+
+/********************************************/
+/* NESTED VIRT P2M FUNCTIONS */
+/********************************************/
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_valid
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
+#undef page_to_mfn
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
+
+void
+nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
+ l1_pgentry_t *p, mfn_t table_mfn, l1_pgentry_t new, unsigned int level)
+{
+ struct domain *d = p2m->domain;
+ uint32_t old_flags;
+
+ hap_lock(d);
+
+ old_flags = l1e_get_flags(*p);
+ safe_write_pte(p, new);
+ if (old_flags & _PAGE_PRESENT)
+ nestedhvm_vmcx_flushtlb(p2m);
+
+ hap_unlock(d);
+}
+
+/********************************************/
+/* NESTED VIRT FUNCTIONS */
+/********************************************/
+static void
+nestedhap_fix_p2m(struct p2m_domain *p2m, paddr_t L2_gpa, paddr_t L0_gpa,
+ p2m_type_t p2mt, p2m_access_t p2ma)
+{
+ int rv;
+ ASSERT(p2m);
+ ASSERT(p2m->set_entry);
+
+ rv = p2m->set_entry(p2m, L2_gpa >> PAGE_SHIFT,
+ page_to_mfn(maddr_to_page(L0_gpa)),
+ 0 /*4K*/, p2mt, p2ma);
+ if (rv == 0) {
+ gdprintk(XENLOG_ERR,
+ "failed to set entry for 0x%"PRIx64" -> 0x%"PRIx64"\n",
+ L2_gpa, L0_gpa);
+ BUG();
+ }
+}
+
+/* This function uses L1_gpa to walk the P2M table in L0 hypervisor. If the
+ * walk is successful, the translated value is returned in L0_gpa. The return
+ * value tells the upper level what to do.
+ */
+static int
+nestedhap_walk_L0_p2m(struct p2m_domain *p2m, paddr_t L1_gpa, paddr_t *L0_gpa)
+{
+ mfn_t mfn;
+ p2m_type_t p2mt;
+
+ /* we use gfn_to_mfn_query() function to walk L0 P2M table */
+ mfn = gfn_to_mfn_query(p2m, L1_gpa >> PAGE_SHIFT, &p2mt);
+
+ if ( p2m_is_paging(p2mt) || p2m_is_shared(p2mt) || !p2m_is_ram(p2mt) )
+ return NESTEDHVM_PAGEFAULT_ERROR;
+
+ if ( !mfn_valid(mfn) )
+ return NESTEDHVM_PAGEFAULT_ERROR;
+
+ *L0_gpa = (mfn_x(mfn) << PAGE_SHIFT) + (L1_gpa & ~PAGE_MASK);
+ return NESTEDHVM_PAGEFAULT_DONE;
+}
+
+/* This function uses L2_gpa to walk the P2M page table in L1. If the
+ * walk is successful, the translated value is returned in
+ * L1_gpa. The result value tells what to do next.
+ */
+static int
+nestedhap_walk_L1_p2m(struct vcpu *v, struct p2m_domain *p2m,
+ paddr_t L2_gpa, paddr_t *L1_gpa)
+{
+ uint32_t pfec;
+ unsigned long nested_cr3, gfn;
+ const struct paging_mode *mode = paging_get_hostmode(v);
+
+ nested_cr3 = nhvm_vcpu_hostcr3(v);
+
+ /* walk the guest table */
+ gfn = paging_p2m_ga_to_gfn(v, p2m, mode, nested_cr3, L2_gpa, &pfec);
+
+ if ( gfn == INVALID_GFN )
+ return NESTEDHVM_PAGEFAULT_INJECT;
+
+ *L1_gpa = (gfn << PAGE_SHIFT) + (L2_gpa & ~PAGE_MASK);
+ return NESTEDHVM_PAGEFAULT_DONE;
+}
+
+/*
+ * The following function, nestedhap_page_fault(), is for steps (3)--(10).
+ *
+ * Returns:
+ */
+int
+nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t L2_gpa)
+{
+ int rv;
+ paddr_t L1_gpa, L0_gpa;
+ struct domain *d = v->domain;
+ struct p2m_domain *p2m, *nested_p2m;
+
+ p2m = p2m_get_hostp2m(d); /* L0 p2m */
+ nested_p2m = p2m_get_nestedp2m(v, nhvm_vcpu_hostcr3(v));
+
+ /* walk the L1 P2M table, note we have to pass p2m
+ * and not nested_p2m here or we fail the walk forever,
+ * otherwise. */
+ rv = nestedhap_walk_L1_p2m(v, p2m, L2_gpa, &L1_gpa);
+
+ /* let caller to handle these two cases */
+ switch (rv) {
+ case NESTEDHVM_PAGEFAULT_INJECT:
+ return rv;
+ case NESTEDHVM_PAGEFAULT_ERROR:
+ return rv;
+ case NESTEDHVM_PAGEFAULT_DONE:
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ /* ==> we have to walk L0 P2M */
+ rv = nestedhap_walk_L0_p2m(p2m, L1_gpa, &L0_gpa);
+
+ /* let upper level caller to handle these two cases */
+ switch (rv) {
+ case NESTEDHVM_PAGEFAULT_INJECT:
+ return rv;
+ case NESTEDHVM_PAGEFAULT_ERROR:
+ return rv;
+ case NESTEDHVM_PAGEFAULT_DONE:
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ nestedp2m_lock(d);
+ /* fix p2m_get_pagetable(nested_p2m) */
+ nestedhap_fix_p2m(nested_p2m, L2_gpa, L0_gpa,
+ p2m_ram_rw,
+ p2m_access_rwx /* FIXME: Should use same permission as l1 guest */);
+ nestedp2m_unlock(d);
+
+ return NESTEDHVM_PAGEFAULT_DONE;
+}
+
+/********************************************/
+/* NESTED VIRT INITIALIZATION FUNCS */
+/********************************************/
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/mm/hap/private.h
--- a/xen/arch/x86/mm/hap/private.h
+++ b/xen/arch/x86/mm/hap/private.h
@@ -23,11 +23,27 @@
/********************************************/
/* GUEST TRANSLATION FUNCS */
/********************************************/
-unsigned long hap_gva_to_gfn_2_levels(struct vcpu *v, unsigned long gva,
+unsigned long hap_gva_to_gfn_2_levels(struct vcpu *v,
+ struct p2m_domain *p2m,
+ unsigned long gva,
uint32_t *pfec);
-unsigned long hap_gva_to_gfn_3_levels(struct vcpu *v, unsigned long gva,
+unsigned long hap_gva_to_gfn_3_levels(struct vcpu *v,
+ struct p2m_domain *p2m,
+ unsigned long gva,
uint32_t *pfec);
-unsigned long hap_gva_to_gfn_4_levels(struct vcpu *v, unsigned long gva,
+unsigned long hap_gva_to_gfn_4_levels(struct vcpu *v,
+ struct p2m_domain *p2m,
+ unsigned long gva,
uint32_t *pfec);
+unsigned long hap_p2m_ga_to_gfn_2_levels(struct vcpu *v,
+ struct p2m_domain *p2m, unsigned long cr3,
+ paddr_t ga, uint32_t *pfec);
+unsigned long hap_p2m_ga_to_gfn_3_levels(struct vcpu *v,
+ struct p2m_domain *p2m, unsigned long cr3,
+ paddr_t ga, uint32_t *pfec);
+unsigned long hap_p2m_ga_to_gfn_4_levels(struct vcpu *v,
+ struct p2m_domain *p2m, unsigned long cr3,
+ paddr_t ga, uint32_t *pfec);
+
#endif /* __HAP_PRIVATE_H__ */
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -34,6 +34,7 @@
#include <public/mem_event.h>
#include <asm/mem_sharing.h>
#include <xen/event.h>
+#include <asm/hvm/nestedhvm.h>
/* Debugging and auditing of the P2M code? */
#define P2M_AUDIT 0
@@ -75,7 +76,7 @@ boolean_param("hap_2mb", opt_hap_2mb);
#define SUPERPAGE_PAGES (1UL << 9)
#define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0)
-static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn)
+unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn)
{
unsigned long flags;
#ifdef __x86_64__
@@ -121,9 +122,9 @@ static void audit_p2m(struct p2m_domain
// Find the next level's P2M entry, checking for out-of-range gfn's...
// Returns NULL on error.
//
-static l1_pgentry_t *
+l1_pgentry_t *
p2m_find_entry(void *table, unsigned long *gfn_remainder,
- unsigned long gfn, u32 shift, u32 max)
+ unsigned long gfn, uint32_t shift, uint32_t max)
{
u32 index;
@@ -224,20 +225,17 @@ p2m_next_level(struct p2m_domain *p2m, m
switch ( type ) {
case PGT_l3_page_table:
- paging_write_p2m_entry(p2m->domain, gfn,
- p2m_entry, *table_mfn, new_entry, 4);
+ p2m->write_p2m_entry(p2m, gfn, p2m_entry, *table_mfn, new_entry, 4);
break;
case PGT_l2_page_table:
#if CONFIG_PAGING_LEVELS == 3
/* for PAE mode, PDPE only has PCD/PWT/P bits available */
new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), _PAGE_PRESENT);
#endif
- paging_write_p2m_entry(p2m->domain, gfn,
- p2m_entry, *table_mfn, new_entry, 3);
+ p2m->write_p2m_entry(p2m, gfn, p2m_entry, *table_mfn, new_entry, 3);
break;
case PGT_l1_page_table:
- paging_write_p2m_entry(p2m->domain, gfn,
- p2m_entry, *table_mfn, new_entry, 2);
+ p2m->write_p2m_entry(p2m, gfn, p2m_entry, *table_mfn, new_entry, 2);
break;
default:
BUG();
@@ -264,14 +262,13 @@ p2m_next_level(struct p2m_domain *p2m, m
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
new_entry = l1e_from_pfn(pfn + (i * L1_PAGETABLE_ENTRIES), flags);
- paging_write_p2m_entry(p2m->domain, gfn,
- l1_entry+i, *table_mfn, new_entry, 2);
+ p2m->write_p2m_entry(p2m, gfn,
+ l1_entry+i, *table_mfn, new_entry, 2);
}
unmap_domain_page(l1_entry);
new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
__PAGE_HYPERVISOR|_PAGE_USER); //disable PSE
- paging_write_p2m_entry(p2m->domain, gfn,
- p2m_entry, *table_mfn, new_entry, 3);
+ p2m->write_p2m_entry(p2m, gfn, p2m_entry, *table_mfn, new_entry, 3);
}
@@ -298,15 +295,15 @@ p2m_next_level(struct p2m_domain *p2m, m
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
new_entry = l1e_from_pfn(pfn + i, flags);
- paging_write_p2m_entry(p2m->domain, gfn,
- l1_entry+i, *table_mfn, new_entry, 1);
+ p2m->write_p2m_entry(p2m, gfn,
+ l1_entry+i, *table_mfn, new_entry, 1);
}
unmap_domain_page(l1_entry);
new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
__PAGE_HYPERVISOR|_PAGE_USER);
- paging_write_p2m_entry(p2m->domain, gfn,
- p2m_entry, *table_mfn, new_entry, 2);
+ p2m->write_p2m_entry(p2m, gfn,
+ p2m_entry, *table_mfn, new_entry, 2);
}
*table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
@@ -1369,8 +1366,7 @@ p2m_set_entry(struct p2m_domain *p2m, un
p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE)
: l3e_empty();
entry_content.l1 = l3e_content.l3;
- paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
- table_mfn, entry_content, 3);
+ p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 3);
/* NB: paging_write_p2m_entry() handles tlb flushes properly */
/* Free old intermediate tables if necessary */
@@ -1410,8 +1406,7 @@ p2m_set_entry(struct p2m_domain *p2m, un
entry_content = l1e_empty();
/* level 1 entry */
- paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
- table_mfn, entry_content, 1);
+ p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 1);
/* NB: paging_write_p2m_entry() handles tlb flushes properly */
}
else if ( page_order == 9 )
@@ -1440,8 +1435,7 @@ p2m_set_entry(struct p2m_domain *p2m, un
l2e_content = l2e_empty();
entry_content.l1 = l2e_content.l2;
- paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
- table_mfn, entry_content, 2);
+ p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 2);
/* NB: paging_write_p2m_entry() handles tlb flushes properly */
/* Free old intermediate tables if necessary */
@@ -1806,10 +1800,13 @@ static void p2m_initialise(struct domain
p2m->domain = d;
p2m->default_access = p2m_access_rwx;
+ p2m->cr3 = CR3_EADDR;
p2m->set_entry = p2m_set_entry;
p2m->get_entry = p2m_gfn_to_mfn;
p2m->get_entry_current = p2m_gfn_to_mfn_current;
p2m->change_entry_type_global = p2m_change_type_global;
+ p2m->write_p2m_entry = paging_write_p2m_entry;
+ cpus_clear(p2m->p2m_dirty_cpumask);
if ( hap_enabled(d) && (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
ept_p2m_init(d);
@@ -1817,6 +1814,25 @@ static void p2m_initialise(struct domain
return;
}
+static int
+p2m_init_nestedp2m(struct domain *d)
+{
+ uint8_t i;
+ struct p2m_domain *p2m;
+
+ nestedp2m_lock_init(d);
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+ d->arch.nested_p2m[i] = p2m = xmalloc(struct p2m_domain);
+ if (p2m == NULL)
+ return -ENOMEM;
+ p2m_initialise(d, p2m);
+ p2m->get_entry_current = p2m->get_entry;
+ p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
+ }
+
+ return 0;
+}
+
int p2m_init(struct domain *d)
{
struct p2m_domain *p2m;
@@ -1825,8 +1841,12 @@ int p2m_init(struct domain *d)
if ( p2m == NULL )
return -ENOMEM;
p2m_initialise(d, p2m);
-
- return 0;
+
+ /* Must initialise nestedp2m unconditionally
+ * since nestedhvm_enabled(d) returns false here.
+ * (p2m_init runs too early for HVM_PARAM_* options)
+ */
+ return p2m_init_nestedp2m(d);
}
void p2m_change_entry_type_global(struct p2m_domain *p2m,
@@ -1919,6 +1939,9 @@ int p2m_alloc_table(struct p2m_domain *p
p2m_invalid, p2m->default_access) )
goto error;
+ if (p2m_is_nestedp2m(p2m))
+ goto nesteddone;
+
/* Copy all existing mappings from the page list and m2p */
spin_lock(&p2m->domain->page_alloc_lock);
page_list_for_each(page, &p2m->domain->page_list)
@@ -1940,6 +1963,7 @@ int p2m_alloc_table(struct p2m_domain *p
}
spin_unlock(&p2m->domain->page_alloc_lock);
+ nesteddone:
P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
p2m_unlock(p2m);
return 0;
@@ -1966,6 +1990,9 @@ void p2m_teardown(struct p2m_domain *p2m
mfn_t mfn;
#endif
+ if (p2m == NULL)
+ return;
+
p2m_lock(p2m);
#ifdef __x86_64__
@@ -1984,11 +2011,26 @@ void p2m_teardown(struct p2m_domain *p2m
p2m_unlock(p2m);
}
+static void p2m_teardown_nestedp2m(struct domain *d)
+{
+ uint8_t i;
+
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+ xfree(d->arch.nested_p2m[i]);
+ d->arch.nested_p2m[i] = NULL;
+ }
+}
+
void p2m_final_teardown(struct domain *d)
{
/* Iterate over all p2m tables per domain */
xfree(d->arch.p2m);
d->arch.p2m = NULL;
+
+ /* We must teardown unconditionally because
+ * we initialise them unconditionally.
+ */
+ p2m_teardown_nestedp2m(d);
}
#if P2M_AUDIT
@@ -2573,9 +2615,9 @@ void p2m_change_type_global(struct p2m_d
gfn = get_gpfn_from_mfn(mfn);
flags = p2m_type_to_flags(nt, _mfn(mfn));
l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
- paging_write_p2m_entry(p2m->domain, gfn,
- (l1_pgentry_t *)&l3e[i3],
- l3mfn, l1e_content, 3);
+ p2m->write_p2m_entry(p2m, gfn,
+ (l1_pgentry_t *)&l3e[i3],
+ l3mfn, l1e_content, 3);
continue;
}
@@ -2604,9 +2646,9 @@ void p2m_change_type_global(struct p2m_d
* L2_PAGETABLE_ENTRIES) * L1_PAGETABLE_ENTRIES;
flags = p2m_type_to_flags(nt, _mfn(mfn));
l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
- paging_write_p2m_entry(p2m->domain, gfn,
- (l1_pgentry_t *)&l2e[i2],
- l2mfn, l1e_content, 2);
+ p2m->write_p2m_entry(p2m, gfn,
+ (l1_pgentry_t *)&l2e[i2],
+ l2mfn, l1e_content, 2);
continue;
}
@@ -2628,8 +2670,8 @@ void p2m_change_type_global(struct p2m_d
/* create a new 1le entry with the new type */
flags = p2m_type_to_flags(nt, _mfn(mfn));
l1e_content = l1e_from_pfn(mfn, flags);
- paging_write_p2m_entry(p2m->domain, gfn, &l1e[i1],
- l1mfn, l1e_content, 1);
+ p2m->write_p2m_entry(p2m, gfn, &l1e[i1],
+ l1mfn, l1e_content, 1);
}
unmap_domain_page(l1e);
}
@@ -3048,6 +3090,182 @@ void p2m_mem_access_resume(struct p2m_do
}
#endif /* __x86_64__ */
+static struct p2m_domain *
+p2m_getlru_nestedp2m(struct domain *d, struct p2m_domain *p2m)
+{
+ int i, lru_index = -1;
+ struct p2m_domain *lrup2m, *tmp;
+
+ if (p2m == NULL) {
+ lru_index = MAX_NESTEDP2M - 1;
+ lrup2m = d->arch.nested_p2m[lru_index];
+ } else {
+ lrup2m = p2m;
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+ if (d->arch.nested_p2m[i] == p2m) {
+ lru_index = i;
+ break;
+ }
+ }
+ }
+
+ ASSERT(lru_index >= 0);
+ if (lru_index == 0) {
+ return lrup2m;
+ }
+
+ /* move the other's down the array "list" */
+ for (i = lru_index - 1; i >= 0; i--) {
+ tmp = d->arch.nested_p2m[i];
+ d->arch.nested_p2m[i+1] = tmp;
+ }
+
+ /* make the entry the first one */
+ d->arch.nested_p2m[0] = lrup2m;
+
+ return lrup2m;
+}
+
+static int
+p2m_flush_locked(struct p2m_domain *p2m)
+{
+ ASSERT(p2m);
+ if (p2m->cr3 == CR3_EADDR)
+ /* Microoptimisation: p2m is already empty.
+ * => about 0.3% speedup of overall system performance.
+ */
+ return 0;
+
+ p2m_teardown(p2m);
+ p2m_initialise(p2m->domain, p2m);
+ p2m->get_entry_current = p2m->get_entry;
+ p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
+ return p2m_alloc_table(p2m);
+}
+
+void
+p2m_flush(struct vcpu *v, struct p2m_domain *p2m)
+{
+ struct domain *d = p2m->domain;
+
+ ASSERT(v->domain == d);
+ vcpu_nestedhvm(v).nv_p2m = NULL;
+ nestedp2m_lock(d);
+ BUG_ON(p2m_flush_locked(p2m) != 0);
+ hvm_asid_flush_vcpu(v);
+ nestedhvm_vmcx_flushtlb(p2m);
+ nestedp2m_unlock(d);
+}
+
+void
+p2m_flush_nestedp2m(struct domain *d)
+{
+ int i;
+
+ nestedp2m_lock(d);
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+ struct p2m_domain *p2m = d->arch.nested_p2m[i];
+ BUG_ON(p2m_flush_locked(p2m) != 0);
+ cpus_clear(p2m->p2m_dirty_cpumask);
+ }
+ nestedhvm_vmcx_flushtlbdomain(d);
+ nestedp2m_unlock(d);
+}
+
+struct p2m_domain *
+p2m_get_nestedp2m(struct vcpu *v, uint64_t cr3)
+{
+ /* Use volatile to prevent gcc to cache nv->nv_p2m in a cpu register as
+ * this may change within the loop by an other (v)cpu.
+ */
+ volatile struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+ struct domain *d;
+ struct p2m_domain *p2m;
+ int i, rv;
+
+ if (cr3 == 0 || cr3 == CR3_EADDR)
+ cr3 = v->arch.hvm_vcpu.guest_cr[3];
+
+ if (nv->nv_flushp2m && nv->nv_p2m) {
+ nv->nv_p2m = NULL;
+ }
+
+ d = v->domain;
+ nestedp2m_lock(d);
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+ p2m = d->arch.nested_p2m[i];
+ if ((p2m->cr3 != cr3 && p2m->cr3 != CR3_EADDR) || (p2m != nv->nv_p2m))
+ continue;
+
+ nv->nv_flushp2m = 0;
+ p2m_getlru_nestedp2m(d, p2m);
+ nv->nv_p2m = p2m;
+ if (p2m->cr3 == CR3_EADDR)
+ hvm_asid_flush_vcpu(v);
+ p2m->cr3 = cr3;
+ cpu_set(v->processor, p2m->p2m_dirty_cpumask);
+ nestedp2m_unlock(d);
+ return p2m;
+ }
+
+ /* All p2m's are or were in use. Take the least recent used one,
+ * flush it and reuse.
+ */
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+ p2m = p2m_getlru_nestedp2m(d, NULL);
+ rv = p2m_flush_locked(p2m);
+ if (rv == 0)
+ break;
+ }
+ nv->nv_p2m = p2m;
+ p2m->cr3 = cr3;
+ nv->nv_flushp2m = 0;
+ hvm_asid_flush_vcpu(v);
+ nestedhvm_vmcx_flushtlb(nv->nv_p2m);
+ cpu_set(v->processor, p2m->p2m_dirty_cpumask);
+ nestedp2m_unlock(d);
+
+ return p2m;
+}
+
+struct p2m_domain *
+p2m_get_p2m(struct vcpu *v)
+{
+ if (!nestedhvm_is_n2(v))
+ return p2m_get_hostp2m(v->domain);
+
+ return p2m_get_nestedp2m(v, nhvm_vcpu_hostcr3(v));
+}
+
+unsigned long paging_gva_to_gfn(struct vcpu *v,
+ unsigned long va,
+ uint32_t *pfec)
+{
+ struct p2m_domain *hostp2m = p2m_get_hostp2m(v->domain);
+ const struct paging_mode *hostmode = paging_get_hostmode(v);
+
+ if ( is_hvm_domain(v->domain)
+ && paging_mode_hap(v->domain)
+ && nestedhvm_is_n2(v) )
+ {
+ unsigned long gfn;
+ struct p2m_domain *p2m;
+ const struct paging_mode *mode;
+ uint64_t ncr3 = nhvm_vcpu_hostcr3(v);
+
+ /* translate l2 guest va into l2 guest gfn */
+ p2m = p2m_get_nestedp2m(v, ncr3);
+ mode = paging_get_nestedmode(v);
+ gfn = mode->gva_to_gfn(v, p2m, va, pfec);
+
+ /* translate l2 guest gfn into l1 guest gfn */
+ return hostmode->p2m_ga_to_gfn(v, hostp2m, ncr3,
+ gfn << PAGE_SHIFT, pfec);
+ }
+
+ return hostmode->gva_to_gfn(v, hostp2m, va, pfec);
+}
+
/*
* Local variables:
* mode: C
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -26,6 +26,7 @@
#include <asm/p2m.h>
#include <asm/hap.h>
#include <asm/guest_access.h>
+#include <asm/hvm/nestedhvm.h>
#include <xen/numa.h>
#include <xsm/xsm.h>
@@ -851,21 +852,58 @@ void paging_dump_vcpu_info(struct vcpu *
printk(" paging assistance: ");
if ( paging_mode_shadow(v->domain) )
{
- if ( v->arch.paging.mode )
+ if ( paging_get_hostmode(v) )
printk("shadowed %u-on-%u\n",
- v->arch.paging.mode->guest_levels,
- v->arch.paging.mode->shadow.shadow_levels);
+ paging_get_hostmode(v)->guest_levels,
+ paging_get_hostmode(v)->shadow.shadow_levels);
else
printk("not shadowed\n");
}
- else if ( paging_mode_hap(v->domain) && v->arch.paging.mode )
+ else if ( paging_mode_hap(v->domain) && paging_get_hostmode(v) )
printk("hap, %u levels\n",
- v->arch.paging.mode->guest_levels);
+ paging_get_hostmode(v)->guest_levels);
else
printk("none\n");
}
}
+const struct paging_mode *paging_get_mode(struct vcpu *v)
+{
+ if (!nestedhvm_is_n2(v))
+ return paging_get_hostmode(v);
+
+ return paging_get_nestedmode(v);
+}
+
+extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
+
+void paging_update_nestedmode(struct vcpu *v)
+{
+ ASSERT(nestedhvm_enabled(v->domain));
+ if (nestedhvm_paging_mode_hap(v))
+ /* nested-on-nested */
+ v->arch.paging.nestedmode = hap_paging_get_mode(v);
+ else
+ /* TODO: shadow-on-shadow */
+ v->arch.paging.nestedmode = NULL;
+}
+
+void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
+ l1_pgentry_t *p, mfn_t table_mfn,
+ l1_pgentry_t new, unsigned int level)
+{
+ struct domain *d = p2m->domain;
+ struct vcpu *v = current;
+ if ( v->domain != d )
+ v = d->vcpu ? d->vcpu[0] : NULL;
+ if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v) != NULL) )
+ {
+ return paging_get_hostmode(v)->write_p2m_entry(v, gfn, p, table_mfn,
+ new, level);
+ }
+ else
+ safe_write_pte(p, new);
+}
/*
* Local variables:
diff -r cfde4384be14 -r 28809c365861 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -837,22 +837,6 @@ shadow_write_entries(void *d, void *s, i
if ( map != NULL ) sh_unmap_domain_page(map);
}
-static inline int
-perms_strictly_increased(u32 old_flags, u32 new_flags)
-/* Given the flags of two entries, are the new flags a strict
- * increase in rights over the old ones? */
-{
- u32 of = old_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT);
- u32 nf = new_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT);
- /* Flip the NX bit, since it's the only one that decreases rights;
- * we calculate as if it were an "X" bit. */
- of ^= _PAGE_NX_BIT;
- nf ^= _PAGE_NX_BIT;
- /* If the changed bits are all set in the new flags, then rights strictly
- * increased between old and new. */
- return ((of | (of ^ nf)) == nf);
-}
-
/* type is only used to distinguish grant map pages from ordinary RAM
* i.e. non-p2m_is_grant() pages are treated as p2m_ram_rw. */
static int inline
@@ -3768,7 +3752,8 @@ sh_invlpg(struct vcpu *v, unsigned long
static unsigned long
-sh_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec)
+sh_gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
+ unsigned long va, uint32_t *pfec)
/* Called to translate a guest virtual address to what the *guest*
* pagetables would map it to. */
{
@@ -4820,7 +4805,7 @@ static mfn_t emulate_gva_to_mfn(struct v
struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
/* Translate the VA to a GFN */
- gfn = sh_gva_to_gfn(v, vaddr, &pfec);
+ gfn = sh_gva_to_gfn(v, p2m, vaddr, &pfec);
if ( gfn == INVALID_GFN )
{
if ( is_hvm_vcpu(v) )
diff -r cfde4384be14 -r 28809c365861 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -210,6 +210,8 @@ struct paging_domain {
struct paging_vcpu {
/* Pointers to mode-specific entry points. */
const struct paging_mode *mode;
+ /* Nested Virtualization: paging mode of nested guest */
+ const struct paging_mode *nestedmode;
/* HVM guest: last emulate was to a pagetable */
unsigned int last_write_was_pt:1;
/* HVM guest: last write emulation succeeds */
@@ -225,6 +227,7 @@ struct paging_vcpu {
#define MAX_CPUID_INPUT 40
typedef xen_domctl_cpuid_t cpuid_input_t;
+#define MAX_NESTEDP2M 10
struct p2m_domain;
struct time_scale {
int shift;
@@ -258,6 +261,12 @@ struct arch_domain
struct paging_domain paging;
struct p2m_domain *p2m;
+ /* nestedhvm: translate l2 guest physical to host physical */
+ struct p2m_domain *nested_p2m[MAX_NESTEDP2M];
+ spinlock_t nested_p2m_lock;
+ int nested_p2m_locker;
+ const char *nested_p2m_function;
+
/* NB. protected by d->event_lock and by irq_desc[irq].lock */
int *irq_pirq;
int *pirq_irq;
diff -r cfde4384be14 -r 28809c365861 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -374,12 +374,12 @@ static inline void hvm_set_info_guest(st
int hvm_debug_op(struct vcpu *v, int32_t op);
-bool_t hvm_hap_nested_page_fault(unsigned long gpa,
- bool_t gla_valid, unsigned long gla,
- bool_t access_valid,
- bool_t access_r,
- bool_t access_w,
- bool_t access_x);
+int hvm_hap_nested_page_fault(unsigned long gpa,
+ bool_t gla_valid, unsigned long gla,
+ bool_t access_valid,
+ bool_t access_r,
+ bool_t access_w,
+ bool_t access_x);
#define hvm_msr_tsc_aux(v) ({ \
struct domain *__d = (v)->domain; \
diff -r cfde4384be14 -r 28809c365861 xen/include/asm-x86/hvm/nestedhvm.h
--- a/xen/include/asm-x86/hvm/nestedhvm.h
+++ b/xen/include/asm-x86/hvm/nestedhvm.h
@@ -60,4 +60,9 @@ unsigned long *nestedhvm_vcpu_iomap_get(
#define nestedhvm_vmswitch_in_progress(v) \
(!!vcpu_nestedhvm((v)).nv_vmswitch_in_progress)
+void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
+void nestedhvm_vmcx_flushtlbdomain(struct domain *d);
+
+bool_t nestedhvm_is_n2(struct vcpu *v);
+
#endif /* _HVM_NESTEDHVM_H */
diff -r cfde4384be14 -r 28809c365861 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -199,7 +199,15 @@ struct p2m_domain {
/* Shadow translated domain: p2m mapping */
pagetable_t phys_table;
+ /* Same as domain_dirty_cpumask but limited to
+ * this p2m and those physical cpus whose vcpu's are in
+ * guestmode.
+ */
+ cpumask_t p2m_dirty_cpumask;
+
struct domain *domain; /* back pointer to domain */
+#define CR3_EADDR (~0ULL)
+ uint64_t cr3; /* to identify this p2m for re-use */
/* Pages used to construct the p2m */
struct page_list_head pages;
@@ -223,6 +231,11 @@ struct p2m_domain {
p2m_type_t ot,
p2m_type_t nt);
+ void (*write_p2m_entry)(struct p2m_domain *p2m,
+ unsigned long gfn, l1_pgentry_t *p,
+ mfn_t table_mfn, l1_pgentry_t new,
+ unsigned int level);
+
/* Default P2M access type for each page in the the domain: new pages,
* swapped in pages, cleared pages, and pages that are ambiquously
* retyped get this access type. See definition of p2m_access_t. */
@@ -264,8 +277,26 @@ struct p2m_domain {
/* get host p2m table */
#define p2m_get_hostp2m(d) ((d)->arch.p2m)
+/* Get p2m table (re)usable for specified cr3.
+ * Automatically destroys and re-initializes a p2m if none found.
+ * If cr3 == 0 then v->arch.hvm_vcpu.guest_cr[3] is used.
+ */
+struct p2m_domain *p2m_get_nestedp2m(struct vcpu *v, uint64_t cr3);
+
+/* If vcpu is in host mode then behaviour matches p2m_get_hostp2m().
+ * If vcpu is in guest mode then behaviour matches p2m_get_nestedp2m().
+ */
+struct p2m_domain *p2m_get_p2m(struct vcpu *v);
+
+#define p2m_is_nestedp2m(p2m) ((p2m) != p2m_get_hostp2m((p2m->domain)))
+
#define p2m_get_pagetable(p2m) ((p2m)->phys_table)
+/* Flushes specified p2m table */
+void p2m_flush(struct vcpu *v, struct p2m_domain *p2m);
+/* Flushes all nested p2m tables */
+void p2m_flush_nestedp2m(struct domain *d);
+
/*
* The P2M lock. This protects all updates to the p2m table.
* Updates are expected to be safe against concurrent reads,
@@ -307,6 +338,38 @@ struct p2m_domain {
(current->processor == (_p2m)->locker)
+#define nestedp2m_lock_init(_domain) \
+ do { \
+ spin_lock_init(&(_domain)->arch.nested_p2m_lock); \
+ (_domain)->arch.nested_p2m_locker = -1; \
+ (_domain)->arch.nested_p2m_function = "nobody"; \
+ } while (0)
+
+#define nestedp2m_locked_by_me(_domain) \
+ (current->processor == (_domain)->arch.nested_p2m_locker)
+
+#define nestedp2m_lock(_domain) \
+ do { \
+ if ( nestedp2m_locked_by_me(_domain) ) \
+ { \
+ printk("Error: p2m lock held by %s\n", \
+ (_domain)->arch.nested_p2m_function); \
+ BUG(); \
+ } \
+ spin_lock(&(_domain)->arch.nested_p2m_lock); \
+ ASSERT((_domain)->arch.nested_p2m_locker == -1); \
+ (_domain)->arch.nested_p2m_locker = current->processor; \
+ (_domain)->arch.nested_p2m_function = __func__; \
+ } while (0)
+
+#define nestedp2m_unlock(_domain) \
+ do { \
+ ASSERT(nestedp2m_locked_by_me(_domain)); \
+ (_domain)->arch.nested_p2m_locker = -1; \
+ (_domain)->arch.nested_p2m_function = "nobody"; \
+ spin_unlock(&(_domain)->arch.nested_p2m_lock); \
+ } while (0)
+
/* Extract the type from the PTE flags that store it */
static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
{
@@ -424,11 +487,21 @@ static inline unsigned long mfn_to_gfn(s
/* Init the datastructures for later use by the p2m code */
int p2m_init(struct domain *d);
+/* PTE flags for various types of p2m entry */
+unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn);
+
/* Allocate a new p2m table for a domain.
*
* Returns 0 for success or -errno. */
int p2m_alloc_table(struct p2m_domain *p2m);
+/* Find the next level's P2M entry, checking for out-of-range gfn's...
+ * Returns NULL on error.
+ */
+l1_pgentry_t *
+p2m_find_entry(void *table, unsigned long *gfn_remainder,
+ unsigned long gfn, uint32_t shift, uint32_t max);
+
/* Return all the p2m resources to Xen. */
void p2m_teardown(struct p2m_domain *p2m);
void p2m_final_teardown(struct domain *d);
@@ -502,6 +575,8 @@ p2m_type_t p2m_change_type(struct p2m_do
int set_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn);
int clear_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn);
+void nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
+ l1_pgentry_t *p, mfn_t table_mfn, l1_pgentry_t new, unsigned int level);
#ifdef __x86_64__
/* Modify p2m table for shared gfn */
diff -r cfde4384be14 -r 28809c365861 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -391,6 +391,23 @@ static inline uint32_t cacheattr_to_pte_
return ((cacheattr & 4) << 5) | ((cacheattr & 3) << 3);
}
+/* return true if permission increased */
+static inline bool_t
+perms_strictly_increased(uint32_t old_flags, uint32_t new_flags)
+/* Given the flags of two entries, are the new flags a strict
+ * increase in rights over the old ones? */
+{
+ uint32_t of = old_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT);
+ uint32_t nf = new_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT);
+ /* Flip the NX bit, since it's the only one that decreases rights;
+ * we calculate as if it were an "X" bit. */
+ of ^= _PAGE_NX_BIT;
+ nf ^= _PAGE_NX_BIT;
+ /* If the changed bits are all set in the new flags, then rights strictly
+ * increased between old and new. */
+ return ((of | (of ^ nf)) == nf);
+}
+
#endif /* !__ASSEMBLY__ */
#define PAGE_ALIGN(x) (((x) + PAGE_SIZE - 1) & PAGE_MASK)
diff -r cfde4384be14 -r 28809c365861 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -108,8 +108,14 @@ struct paging_mode {
int (*page_fault )(struct vcpu *v, unsigned long va,
struct cpu_user_regs *regs);
int (*invlpg )(struct vcpu *v, unsigned long va);
- unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va,
+ unsigned long (*gva_to_gfn )(struct vcpu *v,
+ struct p2m_domain *p2m,
+ unsigned long va,
uint32_t *pfec);
+ unsigned long (*p2m_ga_to_gfn )(struct vcpu *v,
+ struct p2m_domain *p2m,
+ unsigned long cr3,
+ paddr_t ga, uint32_t *pfec);
void (*update_cr3 )(struct vcpu *v, int do_locking);
void (*update_paging_modes )(struct vcpu *v);
void (*write_p2m_entry )(struct vcpu *v, unsigned long gfn,
@@ -219,6 +225,10 @@ void paging_final_teardown(struct domain
* creation. */
int paging_enable(struct domain *d, u32 mode);
+#define paging_get_hostmode(v) ((v)->arch.paging.mode)
+#define paging_get_nestedmode(v) ((v)->arch.paging.nestedmode)
+const struct paging_mode *paging_get_mode(struct vcpu *v);
+void paging_update_nestedmode(struct vcpu *v);
/* Page fault handler
* Called from pagefault handler in Xen, and from the HVM trap handlers
@@ -233,7 +243,7 @@ static inline int
paging_fault(unsigned long va, struct cpu_user_regs *regs)
{
struct vcpu *v = current;
- return v->arch.paging.mode->page_fault(v, va, regs);
+ return paging_get_hostmode(v)->page_fault(v, va, regs);
}
/* Handle invlpg requests on vcpus.
@@ -241,7 +251,7 @@ paging_fault(unsigned long va, struct cp
* or 0 if it's safe not to do so. */
static inline int paging_invlpg(struct vcpu *v, unsigned long va)
{
- return v->arch.paging.mode->invlpg(v, va);
+ return paging_get_hostmode(v)->invlpg(v, va);
}
/* Translate a guest virtual address to the frame number that the
@@ -251,11 +261,30 @@ static inline int paging_invlpg(struct v
* walking the tables. The caller should set the PFEC_page_present bit
* in pfec[0]; in the failure case, that bit will be cleared if appropriate. */
#define INVALID_GFN (-1UL)
-static inline unsigned long paging_gva_to_gfn(struct vcpu *v,
- unsigned long va,
- uint32_t *pfec)
+unsigned long paging_gva_to_gfn(struct vcpu *v,
+ unsigned long va,
+ uint32_t *pfec);
+
+/* Translates a guest virtual address to guest physical address
+ * where the specified cr3 is translated to host physical address
+ * using the specified p2m table.
+ * This allows to do page walks in the guest or even in the nested guest.
+ * It returns the guest's gfn or the nested guest's gfn.
+ * Use 'paddr_t' for the guest address so it won't overflow when
+ * guest or nested guest is in 32bit PAE mode.
+ */
+static inline unsigned long paging_p2m_ga_to_gfn(struct vcpu *v,
+ struct p2m_domain *p2m,
+ const struct paging_mode *mode,
+ unsigned long cr3,
+ paddr_t ga,
+ uint32_t *pfec)
{
- return v->arch.paging.mode->gva_to_gfn(v, va, pfec);
+ if ( is_hvm_domain(v->domain) && paging_mode_hap(v->domain) )
+ return mode->p2m_ga_to_gfn(v, p2m, cr3, ga, pfec);
+
+ /* shadow paging */
+ return paging_gva_to_gfn(v, ga, pfec);
}
/* Update all the things that are derived from the guest's CR3.
@@ -263,7 +292,7 @@ static inline unsigned long paging_gva_t
* as the value to load into the host CR3 to schedule this vcpu */
static inline void paging_update_cr3(struct vcpu *v)
{
- v->arch.paging.mode->update_cr3(v, 1);
+ paging_get_hostmode(v)->update_cr3(v, 1);
}
/* Update all the things that are derived from the guest's CR0/CR3/CR4.
@@ -271,7 +300,7 @@ static inline void paging_update_cr3(str
* has changed, and when bringing up a VCPU for the first time. */
static inline void paging_update_paging_modes(struct vcpu *v)
{
- v->arch.paging.mode->update_paging_modes(v);
+ paging_get_hostmode(v)->update_paging_modes(v);
}
@@ -283,7 +312,7 @@ static inline int paging_write_guest_ent
{
if ( unlikely(paging_mode_enabled(v->domain)
&& v->arch.paging.mode != NULL) )
- return v->arch.paging.mode->write_guest_entry(v, p, new, gmfn);
+ return paging_get_hostmode(v)->write_guest_entry(v, p, new, gmfn);
else
return (!__copy_to_user(p, &new, sizeof(new)));
}
@@ -299,7 +328,7 @@ static inline int paging_cmpxchg_guest_e
{
if ( unlikely(paging_mode_enabled(v->domain)
&& v->arch.paging.mode != NULL) )
- return v->arch.paging.mode->cmpxchg_guest_entry(v, p, old, new, gmfn);
+ return paging_get_hostmode(v)->cmpxchg_guest_entry(v, p, old, new, gmfn);
else
return (!cmpxchg_user(p, *old, new));
}
@@ -327,21 +356,11 @@ static inline void safe_write_pte(l1_pge
* a pointer to the entry to be written, the MFN in which the entry resides,
* the new contents of the entry, and the level in the p2m tree at which
* we are writing. */
-static inline void paging_write_p2m_entry(struct domain *d, unsigned long gfn,
- l1_pgentry_t *p, mfn_t table_mfn,
- l1_pgentry_t new, unsigned int level)
-{
- struct vcpu *v = current;
- if ( v->domain != d )
- v = d->vcpu ? d->vcpu[0] : NULL;
- if ( likely(v && paging_mode_enabled(d) && v->arch.paging.mode != NULL) )
- {
- return v->arch.paging.mode->write_p2m_entry(v, gfn, p, table_mfn,
- new, level);
- }
- else
- safe_write_pte(p, new);
-}
+struct p2m_domain;
+
+void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
+ l1_pgentry_t *p, mfn_t table_mfn,
+ l1_pgentry_t new, unsigned int level);
/* Called from the guest to indicate that the a process is being
* torn down and its pagetables will soon be discarded */
@@ -362,7 +381,7 @@ guest_map_l1e(struct vcpu *v, unsigned l
l2_pgentry_t l2e;
if ( unlikely(paging_mode_translate(v->domain)) )
- return v->arch.paging.mode->guest_map_l1e(v, addr, gl1mfn);
+ return paging_get_hostmode(v)->guest_map_l1e(v, addr, gl1mfn);
/* Find this l1e and its enclosing l1mfn in the linear map */
if ( __copy_from_user(&l2e,
@@ -398,7 +417,7 @@ guest_get_eff_l1e(struct vcpu *v, unsign
return;
}
- v->arch.paging.mode->guest_get_eff_l1e(v, addr, eff_l1e);
+ paging_get_hostmode(v)->guest_get_eff_l1e(v, addr, eff_l1e);
}
/* Read the guest's l1e that maps this address, from the kernel-mode
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
next prev parent reply other threads:[~2011-04-05 15:48 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-03-09 14:31 [PATCH 12/12] Nested Virtualization: hap-on-hap Christoph Egger
2011-03-22 14:59 ` Tim Deegan
2011-03-31 15:25 ` Christoph Egger
2011-04-05 15:48 ` Christoph Egger [this message]
2011-04-06 10:29 ` Tim Deegan
2011-04-06 14:42 ` Christoph Egger
2011-04-29 9:03 ` Jan Beulich
2011-04-29 9:09 ` Christoph Egger
2011-04-29 9:19 ` Jan Beulich
-- strict thread matches above, loose matches on Subject: below --
2010-12-20 16:13 Christoph Egger
2011-01-07 15:55 ` Tim Deegan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4D9B39BA.8090807@amd.com \
--to=christoph.egger@amd.com \
--cc=Tim.Deegan@citrix.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.