xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Egger <Christoph.Egger@amd.com>
To: xen-devel@lists.xensource.com
Subject: [PATCH 10/14] Nested Virtualization: svm specific implementation
Date: Thu, 5 Aug 2010 17:04:02 +0200	[thread overview]
Message-ID: <201008051704.03074.Christoph.Egger@amd.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 322 bytes --]


Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>

-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

[-- Attachment #2: xen_nh10_svm.diff --]
[-- Type: text/x-diff, Size: 64186 bytes --]

# HG changeset patch
# User cegger
# Date 1281004712 -7200
Implement SVM specific part for Nested Virtualization

diff -r 1a1837336ffd -r c6a5143c1c33 xen/arch/x86/hvm/svm/Makefile
--- a/xen/arch/x86/hvm/svm/Makefile
+++ b/xen/arch/x86/hvm/svm/Makefile
@@ -3,5 +3,6 @@ obj-y += emulate.o
 obj-y += entry.o
 obj-y += intr.o
 obj-y += svm.o
+obj-y += svmdebug.o
 obj-y += vmcb.o
 obj-y += vpmu.o
diff -r 1a1837336ffd -r c6a5143c1c33 xen/arch/x86/hvm/svm/emulate.c
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -100,6 +100,11 @@ MAKE_INSTR(VMCALL, 3, 0x0f, 0x01, 0xd9);
 MAKE_INSTR(HLT,    1, 0xf4);
 MAKE_INSTR(INT3,   1, 0xcc);
 MAKE_INSTR(RDTSC,  2, 0x0f, 0x31);
+MAKE_INSTR(VMRUN,  3, 0x0f, 0x01, 0xd8);
+MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
+MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
+MAKE_INSTR(STGI,   3, 0x0f, 0x01, 0xdc);
+MAKE_INSTR(CLGI,   3, 0x0f, 0x01, 0xdd);
 
 static const u8 *opc_bytes[INSTR_MAX_COUNT] = 
 {
@@ -111,7 +116,12 @@ static const u8 *opc_bytes[INSTR_MAX_COU
     [INSTR_VMCALL] = OPCODE_VMCALL,
     [INSTR_HLT]    = OPCODE_HLT,
     [INSTR_INT3]   = OPCODE_INT3,
-    [INSTR_RDTSC]  = OPCODE_RDTSC
+    [INSTR_RDTSC]  = OPCODE_RDTSC,
+    [INSTR_VMRUN]  = OPCODE_VMRUN,
+    [INSTR_VMLOAD] = OPCODE_VMLOAD,
+    [INSTR_VMSAVE] = OPCODE_VMSAVE,
+    [INSTR_STGI]   = OPCODE_STGI,
+    [INSTR_CLGI]   = OPCODE_CLGI,
 };
 
 static int fetch(struct vcpu *v, u8 *buf, unsigned long addr, int len)
diff -r 1a1837336ffd -r c6a5143c1c33 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -49,6 +49,8 @@
 #include <asm/hvm/svm/vmcb.h>
 #include <asm/hvm/svm/emulate.h>
 #include <asm/hvm/svm/intr.h>
+#include <asm/hvm/svm/svmdebug.h>
+#include <asm/hvm/nestedhvm.h>
 #include <asm/x86_emulate.h>
 #include <public/sched.h>
 #include <asm/hvm/vpt.h>
@@ -108,6 +110,44 @@ static void svm_cpu_down(void)
     write_efer(read_efer() & ~EFER_SVME);
 }
 
+static unsigned long *
+svm_msrbit(unsigned long *msr_bitmap, uint32_t msr)
+{
+    unsigned long *msr_bit = NULL;
+
+    /*
+     * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
+     */
+    if ( msr <= 0x1fff )
+        msr_bit = msr_bitmap + 0x0000 / BYTES_PER_LONG;
+    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+        msr_bit = msr_bitmap + 0x0800 / BYTES_PER_LONG;
+    else if ( (msr >= 0xc0010000) && (msr <= 0xc0011fff) )
+        msr_bit = msr_bitmap + 0x1000 / BYTES_PER_LONG;
+
+    return msr_bit;
+}
+
+void svm_intercept_msr(struct vcpu *v, uint32_t msr, int enable)
+{
+    unsigned long *msr_bit;
+
+    msr_bit = svm_msrbit(v->arch.hvm_svm.msrpm, msr);
+    BUG_ON(msr_bit == NULL);
+    msr &= 0x1fff;
+
+    if ( enable )
+    {
+        __set_bit(msr * 2, msr_bit);
+        __set_bit(msr * 2 + 1, msr_bit);
+    }
+    else
+    {
+        __clear_bit(msr * 2, msr_bit);
+        __clear_bit(msr * 2 + 1, msr_bit);
+    }
+}
+
 static enum handler_return
 long_mode_do_msr_write(unsigned int msr, uint64_t msr_content)
 {
@@ -325,7 +365,7 @@ static int svm_load_vmcb_ctxt(struct vcp
 {
     svm_load_cpu_state(v, ctxt);
     if (svm_vmcb_restore(v, ctxt)) {
-        printk("svm_vmcb restore failed!\n");
+        gdprintk(XENLOG_ERR, "svm_vmcb restore failed!\n");
         domain_crash(v->domain);
         return -EINVAL;
     }
@@ -692,8 +732,10 @@ static void svm_ctxt_switch_to(struct vc
 static void svm_do_resume(struct vcpu *v) 
 {
     bool_t debug_state = v->domain->debugger_attached;
-
-    if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
+    bool_t guestmode = nestedhvm_vcpu_in_guestmode(v);
+
+    if ( !guestmode &&
+        unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
     {
         uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
         v->arch.hvm_vcpu.debug_state_latch = debug_state;
@@ -712,11 +754,14 @@ static void svm_do_resume(struct vcpu *v
         hvm_asid_flush_vcpu(v);
     }
 
-    /* Reflect the vlapic's TPR in the hardware vtpr */
-    v->arch.hvm_svm.vmcb->vintr.fields.tpr = 
-        (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
-
-    hvm_do_resume(v);
+    if ( !guestmode )
+    {
+        /* Reflect the vlapic's TPR in the hardware vtpr */
+        v->arch.hvm_svm.vmcb->vintr.fields.tpr = 
+            (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
+
+        hvm_do_resume(v);
+    }
     reset_stack_and_jump(svm_asm_do_resume);
 }
 
@@ -861,6 +906,986 @@ static void svm_init_erratum_383(struct 
     }
 }
 
+/*
+ * Nested SVM
+ */
+static int nsvm_vcpu_destroy(struct vcpu *v);
+
+static int nsvm_vcpu_initialise(struct vcpu *v)
+{
+    void *msrpm;
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+
+    ASSERT(hvm->nh_hostsave == NULL);
+    hvm->nh_hostsave = alloc_vmcb();
+    if (hvm->nh_hostsave == NULL)
+        goto err;
+
+    msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
+    hvm->nh_cached_msrpm = msrpm;
+    if (msrpm == NULL)
+        goto err;
+    memset(msrpm, 0x0, MSRPM_SIZE);
+    hvm->nh_cached_msrpm_size = MSRPM_SIZE;
+
+    msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
+    hvm->nh_merged_msrpm = msrpm;
+    if (msrpm == NULL)
+        goto err;
+    memset(msrpm, 0x0, MSRPM_SIZE);
+    hvm->nh_merged_msrpm_size = MSRPM_SIZE;
+
+    hvm->nh_vm = alloc_vmcb();
+    hvm->nh_vmsize = sizeof(struct vmcb_struct);
+    if (hvm->nh_vm == NULL)
+        goto err;
+
+    hvm->nh_arch = xmalloc_bytes(sizeof(struct nestedsvm));
+    if (hvm->nh_arch == NULL)
+        goto err;
+    hvm->nh_arch_size = sizeof(struct nestedsvm);
+    memset(hvm->nh_arch, 0x0, hvm->nh_arch_size);
+
+    return 0;
+
+err:
+    nsvm_vcpu_destroy(v);
+    return -ENOMEM;
+}
+
+static int nsvm_vcpu_destroy(struct vcpu *v)
+{
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+
+    if (hvm->nh_vm) {
+        free_vmcb(hvm->nh_vm);
+        hvm->nh_vm = NULL;
+    }
+    if (hvm->nh_hostsave) {
+        free_vmcb(hvm->nh_hostsave);
+        hvm->nh_hostsave = NULL;
+    }
+    if (hvm->nh_cached_msrpm) {
+        ASSERT(hvm->nh_cached_msrpm_size > 0);
+        free_xenheap_pages(hvm->nh_cached_msrpm,
+                           get_order_from_bytes(hvm->nh_cached_msrpm_size));
+        hvm->nh_cached_msrpm = NULL;
+        hvm->nh_cached_msrpm_size = 0;
+    }
+    if (hvm->nh_merged_msrpm) {
+        ASSERT(hvm->nh_merged_msrpm_size > 0);
+        free_xenheap_pages(hvm->nh_merged_msrpm,
+                           get_order_from_bytes(hvm->nh_merged_msrpm_size));
+        hvm->nh_merged_msrpm = NULL;
+        hvm->nh_merged_msrpm_size = 0;
+    }
+    if (hvm->nh_arch) {
+        xfree(hvm->nh_arch);
+        hvm->nh_arch = NULL;
+        hvm->nh_arch_size = 0;
+    }
+
+    return 0;
+}
+
+static int nsvm_vcpu_reset(struct vcpu *v)
+{
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+    struct vmcb_struct *vmcb = hvm->nh_vm;
+
+    hvm->nh_vmmaxaddr = 0xfd00000000ULL;
+    vmcb->np_enable = 0;
+    vmcb->g_pat = MSR_IA32_CR_PAT_RESET;
+    return 0;
+}
+
+static int nsvm_vcpu_features(struct vcpu *v,
+    uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+    /* We handle SVM features via cpuid by tools/libxc.
+     * So nothing to do here.
+     */
+    return 0;
+}
+
+static void nsvm_vmcb_loadsave(struct vmcb_struct *from,
+				struct vmcb_struct *to)
+{
+    to->fs = from->fs;
+    to->gs = from->gs;
+    to->tr = from->tr;
+    to->ldtr = from->ldtr;
+    to->kerngsbase = from->kerngsbase;
+    to->star = from->star;
+    to->lstar = from->lstar;
+    to->cstar = from->cstar;
+    to->sfmask = from->sfmask;
+    to->sysenter_cs = from->sysenter_cs;
+    to->sysenter_esp = from->sysenter_esp;
+    to->sysenter_eip = from->sysenter_eip;
+}
+
+static int nsvm_vcpu_hostsave(struct vcpu *v, unsigned int inst_len)
+{
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+    struct vmcb_struct *hsave, *vmcb;
+
+    hsave = hvm->nh_hostsave;
+    vmcb = v->arch.hvm_svm.vmcb;
+
+    memcpy(hsave, vmcb, sizeof(struct vmcb_struct));
+    hsave->rip += inst_len;
+
+    /* Remember the host interrupt flag */
+    hvm->nh_hostflags.fields.rflagsif = (hsave->rflags & X86_EFLAGS_IF) ? 1 : 0;
+
+    /* Nested paging mode */
+    if (nestedhvm_paging_mode_hap(v))
+        hsave->cr3 = vmcb->cr3;
+        hsave->h_cr3 = vmcb->h_cr3;
+    if (paging_mode_hap(v->domain))
+        hsave->cr3 = vmcb->cr3;
+    else
+        hsave->cr3 = v->arch.hvm_vcpu.guest_cr[3];
+
+    hsave->efer = v->arch.hvm_vcpu.guest_efer;
+    hsave->cr0 = v->arch.hvm_vcpu.guest_cr[0];
+    hsave->cr2 = v->arch.hvm_vcpu.guest_cr[2];
+    hsave->cr4 = v->arch.hvm_vcpu.guest_cr[4];
+
+    return 0;
+}
+
+static int nsvm_vcpu_hostrestore(struct vcpu *v, struct cpu_user_regs *regs)
+{
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+    struct vmcb_struct *hsave, *vmcb;
+    int rc;
+
+    hsave = hvm->nh_hostsave;
+    vmcb = v->arch.hvm_svm.vmcb;
+
+    /* Must keep register values handled by VMSAVE/VMLOAD */
+    nsvm_vmcb_loadsave(vmcb, hsave);
+    memcpy(vmcb, hsave, sizeof(struct vmcb_struct));
+
+    /* EFER */
+    v->arch.hvm_vcpu.guest_efer = vmcb->efer;
+    rc = hvm_set_efer(vmcb->efer);
+    if (rc != X86EMUL_OKAY)
+        gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
+
+    /* CR4 */
+    v->arch.hvm_vcpu.guest_cr[4] = vmcb->cr4;
+    rc = hvm_set_cr4(vmcb->cr4);
+    if (rc != X86EMUL_OKAY)
+        gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
+
+    /* CR0 */
+    v->arch.hvm_vcpu.guest_cr[0] = vmcb->cr0 | X86_CR0_PE;
+    vmcb->rflags &= ~X86_EFLAGS_VM;
+    rc = hvm_set_cr0(vmcb->cr0 | X86_CR0_PE);
+    if (rc != X86EMUL_OKAY)
+        gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
+
+    /* CR2 */
+    v->arch.hvm_vcpu.guest_cr[2] = vmcb->cr2;
+    hvm_update_guest_cr(v, 2);
+
+    /* CR3 */
+    /* Nested paging mode */
+    if (nestedhvm_paging_mode_hap(v)) {
+        /* host nested paging + guest nested paging. */
+        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+    } else if (paging_mode_hap(v->domain)) {
+        /* host nested paging + guest shadow paging. */
+        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+    } else {
+        /* host shadow paging + guest shadow paging. */
+
+        /* Reset MMU context  -- XXX (hostrestore) not yet working*/
+        if (!pagetable_is_null(v->arch.guest_table))
+            put_page(pagetable_get_page(v->arch.guest_table));
+        v->arch.guest_table = pagetable_null();
+        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+    }
+    rc = hvm_set_cr3(vmcb->cr3);
+    if (rc != X86EMUL_OKAY)
+        gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+
+    regs->eax = vmcb->rax;
+    regs->esp = vmcb->rsp;
+    regs->eip = vmcb->rip;
+    regs->eflags = vmcb->rflags;
+    vmcb->dr7 = 0; /* disable all breakpoints */
+    vmcb->cpl = 0;
+
+    /* Clear exitintinfo to prevent a fault loop of re-injecting
+     * exceptions forever.
+     */
+    vmcb->exitintinfo.bytes = 0;
+
+    hvm_asid_flush_vcpu(v);
+
+    return 0;
+}
+
+static int
+nsvm_vcpu_vmrun(struct vcpu *v)
+{
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+    struct vmcb_struct *ns_vmcb;
+
+    ns_vmcb = hvm->nh_vm;
+
+    /* Save values for later use. Needed for Nested-on-Nested and
+     * Shadow-on-Shadow paging.
+     */
+    hvm->nh_vmcb_cr3 = ns_vmcb->cr3;
+    hvm->nh_vmcb_hcr3 = ns_vmcb->h_cr3;
+
+    hvm->nh_flushp2m = (ns_vmcb->tlb_control
+        || (hvm->nh_guest_asid != ns_vmcb->guest_asid));
+    hvm->nh_guest_asid = ns_vmcb->guest_asid;
+
+    /* nested paging for the guest */
+    hvm->nh_hap_enabled = (ns_vmcb->np_enable) ? 1 : 0;
+
+    /* Remember the V_INTR_MASK in hostflags */
+    hvm->nh_hostflags.fields.vintrmask =
+        (ns_vmcb->vintr.fields.intr_masking) ? 1 : 0;
+
+    return 0;
+}
+
+static uint64_t
+nsvm_vmcb_exitcode_generic2native(enum nestedhvm_intercepts exitcode)
+{
+    switch (exitcode) {
+    case NESTEDHVM_INTERCEPT_INVALID:
+        return VMEXIT_INVALID;
+    case NESTEDHVM_INTERCEPT_SHUTDOWN:
+        return VMEXIT_SHUTDOWN;
+    case NESTEDHVM_INTERCEPT_INTR:
+        return VMEXIT_INTR;
+    case NESTEDHVM_INTERCEPT_NMI:
+        return VMEXIT_NMI;
+    case NESTEDHVM_INTERCEPT_MCE:
+        return VMEXIT_EXCEPTION_MC;
+    case NESTEDHVM_INTERCEPT_MSR_READ:
+    case NESTEDHVM_INTERCEPT_MSR_WRITE:
+        return VMEXIT_MSR;
+    case NESTEDHVM_INTERCEPT_IOIO:
+        return VMEXIT_IOIO;
+    case NESTEDHVM_INTERCEPT_NPF:
+        return VMEXIT_NPF;
+    case NESTEDHVM_INTERCEPT_PF:
+        return VMEXIT_EXCEPTION_PF;
+    case NESTEDHVM_INTERCEPT_DE:
+        return VMEXIT_EXCEPTION_DE;
+    case NESTEDHVM_INTERCEPT_OF:
+        return VMEXIT_EXCEPTION_OF;
+    case NESTEDHVM_INTERCEPT_BR:
+        return VMEXIT_EXCEPTION_BR;
+    case NESTEDHVM_INTERCEPT_UD:
+        return VMEXIT_EXCEPTION_UD;
+    case NESTEDHVM_INTERCEPT_NM:
+        return VMEXIT_EXCEPTION_NM;
+    case NESTEDHVM_INTERCEPT_DF:
+        return VMEXIT_EXCEPTION_DF;
+    case NESTEDHVM_INTERCEPT_09:
+        return VMEXIT_EXCEPTION_09;
+    case NESTEDHVM_INTERCEPT_XF:
+        return VMEXIT_EXCEPTION_XF;
+    case NESTEDHVM_INTERCEPT_DB:
+        return VMEXIT_EXCEPTION_DB;
+    case NESTEDHVM_INTERCEPT_BP:
+        return VMEXIT_EXCEPTION_BP;
+    case NESTEDHVM_INTERCEPT_TS:
+        return VMEXIT_EXCEPTION_TS;
+    case NESTEDHVM_INTERCEPT_NP:
+        return VMEXIT_EXCEPTION_NP;
+    case NESTEDHVM_INTERCEPT_SS:
+        return VMEXIT_EXCEPTION_SS;
+    case NESTEDHVM_INTERCEPT_GP:
+        return VMEXIT_EXCEPTION_GP;
+    case NESTEDHVM_INTERCEPT_15:
+        return VMEXIT_EXCEPTION_15;
+    case NESTEDHVM_INTERCEPT_MF:
+        return VMEXIT_EXCEPTION_MF;
+    case NESTEDHVM_INTERCEPT_AC:
+        return VMEXIT_EXCEPTION_AC;
+
+    case NESTEDHVM_INTERCEPT_LAST:
+        gdprintk(XENLOG_ERR, "generic to native exitcode mapping failed %u\n",
+            exitcode);
+        BUG();
+        return NESTEDHVM_INTERCEPT_LAST;
+    }
+
+    return NESTEDHVM_INTERCEPT_LAST;
+}
+
+static int
+nsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs, uint64_t exitcode)
+{
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+    struct vmcb_struct *ns_vmcb;
+
+    ns_vmcb = hvm->nh_vm;
+
+    ns_vmcb->exitcode = exitcode;
+    ns_vmcb->eventinj.bytes = 0;
+
+    if (hvm->nh_hostflags.fields.forcevmexit) {
+        enum nestedhvm_intercepts nh_exitcode;
+
+        nh_exitcode = hvm->nh_forcevmexit.exitcode;
+
+        switch (nh_exitcode) {
+        case NESTEDHVM_INTERCEPT_INVALID:
+        case NESTEDHVM_INTERCEPT_SHUTDOWN:
+            break;
+        case NESTEDHVM_INTERCEPT_INTR:
+        case NESTEDHVM_INTERCEPT_NMI:
+            break;
+        case NESTEDHVM_INTERCEPT_PF:
+            ns_vmcb->cr2 = ns_vmcb->exitinfo2;
+            /* fall through */
+        case NESTEDHVM_INTERCEPT_NPF:
+            ns_vmcb->exitinfo1 = hvm->nh_forcevmexit.exitinfo1; /* error code */
+            ns_vmcb->exitinfo2 = hvm->nh_forcevmexit.exitinfo2; /* fault address */
+            break;
+        case NESTEDHVM_INTERCEPT_MCE:
+            break;
+        case NESTEDHVM_INTERCEPT_MSR_READ:
+            ns_vmcb->exitinfo1 = 0;
+            regs->ecx = hvm->nh_forcevmexit.exitinfo1;
+            break;
+        case NESTEDHVM_INTERCEPT_MSR_WRITE:
+            ns_vmcb->exitinfo1 = 1;
+            regs->ecx = hvm->nh_forcevmexit.exitinfo1;
+            regs->eax = (uint32_t)hvm->nh_forcevmexit.exitinfo2;
+            regs->edx = (uint32_t)(hvm->nh_forcevmexit.exitinfo2 >> 32);
+            break;
+        case NESTEDHVM_INTERCEPT_IOIO:
+            /* Not implemented, the path shouldn't run anyway. */
+            BUG();
+            break;
+
+        case NESTEDHVM_INTERCEPT_DE:
+        case NESTEDHVM_INTERCEPT_OF:
+        case NESTEDHVM_INTERCEPT_BR:
+        case NESTEDHVM_INTERCEPT_UD:
+        case NESTEDHVM_INTERCEPT_NM:
+        case NESTEDHVM_INTERCEPT_DF:
+        case NESTEDHVM_INTERCEPT_09:
+        case NESTEDHVM_INTERCEPT_XF:
+            break;
+        case NESTEDHVM_INTERCEPT_DB:
+        case NESTEDHVM_INTERCEPT_BP:
+        case NESTEDHVM_INTERCEPT_TS:
+            break;
+        case NESTEDHVM_INTERCEPT_NP:
+        case NESTEDHVM_INTERCEPT_SS:
+        case NESTEDHVM_INTERCEPT_GP:
+        case NESTEDHVM_INTERCEPT_15:
+        case NESTEDHVM_INTERCEPT_MF:
+        case NESTEDHVM_INTERCEPT_AC:
+            ns_vmcb->exitinfo1 = hvm->nh_forcevmexit.exitinfo1;
+            break;
+
+        case NESTEDHVM_INTERCEPT_LAST:
+            BUG();
+            break;
+        }
+        exitcode = nsvm_vmcb_exitcode_generic2native(nh_exitcode);
+        ns_vmcb->exitcode = exitcode;
+    }
+
+    return 0;
+}
+
+static uint64_t
+nsvm_vmcb_exitcode_native2generic(struct vcpu *v, struct cpu_user_regs *regs,
+    uint64_t exitcode, uint64_t *info1, uint64_t *info2)
+{
+    struct vmcb_struct *ns_vmcb = VCPU_NESTEDHVM(v).nh_vm;
+
+    *info1 = *info2 = 0;
+    switch (exitcode) {
+    case VMEXIT_INVALID:
+        return NESTEDHVM_INTERCEPT_INVALID;
+    case VMEXIT_SHUTDOWN:
+        return NESTEDHVM_INTERCEPT_SHUTDOWN;
+    case VMEXIT_INTR:
+        return NESTEDHVM_INTERCEPT_INTR;
+    case VMEXIT_NMI:
+        return NESTEDHVM_INTERCEPT_NMI;
+    case VMEXIT_NPF:
+        *info1 = ns_vmcb->exitinfo1; /* #PF error code */
+        *info2 = ns_vmcb->exitinfo2; /* #PF guest physical address */
+        return NESTEDHVM_INTERCEPT_NPF;
+    case VMEXIT_EXCEPTION_PF:
+        *info1 = ns_vmcb->exitinfo1; /* #PF error code */
+        *info2 = ns_vmcb->exitinfo2; /* #PF virtual address */
+        return NESTEDHVM_INTERCEPT_PF;
+    case VMEXIT_EXCEPTION_MC:
+        return NESTEDHVM_INTERCEPT_MCE;
+    case VMEXIT_MSR:
+        *info1 = regs->ecx;
+        *info2 = ((uint64_t)regs->edx << 32) | regs->eax;
+        return (ns_vmcb->exitinfo1 == 0) ?
+            NESTEDHVM_INTERCEPT_MSR_READ : NESTEDHVM_INTERCEPT_MSR_WRITE;
+    case VMEXIT_IOIO:
+        return NESTEDHVM_INTERCEPT_IOIO;
+
+    case VMEXIT_EXCEPTION_DE:
+        return NESTEDHVM_INTERCEPT_DE;
+    case VMEXIT_EXCEPTION_OF:
+        return NESTEDHVM_INTERCEPT_OF;
+    case VMEXIT_EXCEPTION_BR:
+        return NESTEDHVM_INTERCEPT_BR;
+    case VMEXIT_EXCEPTION_UD:
+        return NESTEDHVM_INTERCEPT_UD;
+    case VMEXIT_EXCEPTION_NM:
+        return NESTEDHVM_INTERCEPT_NM;
+    case VMEXIT_EXCEPTION_DF:
+        return NESTEDHVM_INTERCEPT_DF;
+    case VMEXIT_EXCEPTION_09:
+        return NESTEDHVM_INTERCEPT_09;
+    case VMEXIT_EXCEPTION_XF:
+        return NESTEDHVM_INTERCEPT_XF;
+
+    case VMEXIT_EXCEPTION_DB:
+        *info1 = ns_vmcb->cs.attr.bytes;
+        *info2 = ns_vmcb->rip;
+        return NESTEDHVM_INTERCEPT_DB;
+    case VMEXIT_EXCEPTION_BP:
+        *info1 = ns_vmcb->cs.attr.bytes;
+        *info2 = ns_vmcb->rip;
+        return NESTEDHVM_INTERCEPT_BP;
+
+    case VMEXIT_EXCEPTION_TS:
+        return NESTEDHVM_INTERCEPT_TS;
+
+    case VMEXIT_EXCEPTION_NP:
+        *info1 = ns_vmcb->exitinfo1;
+        return NESTEDHVM_INTERCEPT_NP;
+    case VMEXIT_EXCEPTION_SS:
+        *info1 = ns_vmcb->exitinfo1;
+        return NESTEDHVM_INTERCEPT_SS;
+    case VMEXIT_EXCEPTION_GP:
+        *info1 = ns_vmcb->exitinfo1;
+        return NESTEDHVM_INTERCEPT_GP;
+    case VMEXIT_EXCEPTION_15:
+        *info1 = ns_vmcb->exitinfo1;
+        return NESTEDHVM_INTERCEPT_15;
+    case VMEXIT_EXCEPTION_MF:
+        *info1 = ns_vmcb->exitinfo1;
+        return NESTEDHVM_INTERCEPT_MF;
+    case VMEXIT_EXCEPTION_AC:
+        *info1 = ns_vmcb->exitinfo1;
+        return NESTEDHVM_INTERCEPT_AC;
+    }
+
+    return NESTEDHVM_INTERCEPT_LAST;
+}
+
+static int
+nsvm_vmcb_intercepted_by_guest(struct vcpu *v, uint64_t exitcode)
+{
+    uint64_t exit_bits;
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+    struct nestedsvm *svm = hvm->nh_arch;
+
+    if (hvm->nh_hostflags.fields.forcevmexit)
+        exitcode = nsvm_vmcb_exitcode_generic2native(hvm->nh_forcevmexit.exitcode);
+
+    switch (exitcode) {
+    case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
+    case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
+        exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ);
+        if (svm->ns_cr_intercepts & exit_bits)
+            break;
+        return 0;
+
+    case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
+    case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
+        exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ);
+        if (svm->ns_dr_intercepts & exit_bits)
+            break;
+        return 0;
+
+    case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF:
+        exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE);
+        if (svm->ns_exception_intercepts & exit_bits)
+            break;
+        return 0;
+
+    case VMEXIT_INTR ... VMEXIT_SHUTDOWN:
+        exit_bits = 1ULL << (exitcode - VMEXIT_INTR);
+        if (svm->ns_general1_intercepts & exit_bits)
+            break;
+        return 0;
+
+    /* case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL: */
+    default:
+        exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN);
+        if (svm->ns_general2_intercepts & exit_bits)
+            break;
+        return 0;
+    }
+
+    return 1;
+}
+
+static int nsvm_vmrun_permissionmap(struct vcpu *v)
+{
+    struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+    struct vmcb_struct *ns_vmcb = hvm->nh_vm;
+    struct vmcb_struct *host_vmcb = arch_svm->vmcb;
+    unsigned long *ns_msrpm_ptr;
+    unsigned int i;
+    enum hvm_copy_result ret;
+
+    ns_msrpm_ptr = (unsigned long *)hvm->nh_cached_msrpm;
+
+    ret = hvm_copy_from_guest_phys(hvm->nh_cached_msrpm,
+                                   ns_vmcb->msrpm_base_pa,
+                                   hvm->nh_cached_msrpm_size);
+    if (ret != HVMCOPY_okay) {
+        gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n", ret);
+        return 1;
+    }
+
+    /* Skip io bitmap merge since hvm_io_bitmap has all bits set but
+     * 0x80 and 0xed.
+     */
+
+    /* v->arch.hvm_svm.msrpm has type unsigned long, thus
+     * BYTES_PER_LONG.
+     */
+    for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++)
+        hvm->nh_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i];
+
+    host_vmcb->iopm_base_pa =
+        (uint64_t)virt_to_maddr(hvm_io_bitmap);
+    host_vmcb->msrpm_base_pa =
+        (uint64_t)virt_to_maddr(hvm->nh_merged_msrpm);
+
+    return 0;
+}
+
+static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs *regs)
+{
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+    struct nestedsvm *svm = hvm->nh_arch;
+    struct vmcb_struct *ns_vmcb = hvm->nh_vm;
+    struct vmcb_struct *host_vmcb = v->arch.hvm_svm.vmcb;
+    int rc;
+
+    /* Enable nested guest intercepts */
+    svm->ns_cr_intercepts = ns_vmcb->cr_intercepts;
+    svm->ns_dr_intercepts = ns_vmcb->dr_intercepts;
+    svm->ns_exception_intercepts = ns_vmcb->exception_intercepts;
+    svm->ns_general1_intercepts = ns_vmcb->general1_intercepts;
+    svm->ns_general2_intercepts = ns_vmcb->general2_intercepts;
+
+    host_vmcb->cr_intercepts |= ns_vmcb->cr_intercepts;
+    host_vmcb->dr_intercepts |= ns_vmcb->dr_intercepts;
+    host_vmcb->exception_intercepts |= ns_vmcb->exception_intercepts;
+    host_vmcb->general1_intercepts |= ns_vmcb->general1_intercepts;
+    host_vmcb->general2_intercepts |= ns_vmcb->general2_intercepts;
+
+    /* Nested Pause Filter */
+    host_vmcb->pause_filter_count = ns_vmcb->pause_filter_count;
+
+    /* Nested IO permission bitmaps */
+    rc = nsvm_vmrun_permissionmap(v);
+    if (rc)
+        return rc;
+
+    /* TSC offset */
+    hvm_set_guest_tsc(v, host_vmcb->tsc_offset + ns_vmcb->tsc_offset);
+
+    /* ASID */
+    hvm_asid_flush_vcpu(v);
+    /* host_vmcb->guest_asid = ns_vmcb->guest_asid; */
+
+    /* TLB control */
+    host_vmcb->tlb_control |= ns_vmcb->tlb_control;
+
+    /* Virtual Interrupts */
+    host_vmcb->vintr = ns_vmcb->vintr;
+    host_vmcb->vintr.fields.intr_masking = 1;
+
+    /* Shadow Mode */
+    host_vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow;
+
+    /* Exit codes */
+    host_vmcb->exitcode = ns_vmcb->exitcode;
+    host_vmcb->exitinfo1 = ns_vmcb->exitinfo1;
+    host_vmcb->exitinfo2 = ns_vmcb->exitinfo2;
+    host_vmcb->exitintinfo = ns_vmcb->exitintinfo;
+
+    /* Pending Interrupts */
+    host_vmcb->eventinj = ns_vmcb->eventinj;
+
+    /* LBR virtualization */
+    svm->ns_lbr_control = ns_vmcb->lbr_control;
+    host_vmcb->lbr_control.bytes |= ns_vmcb->lbr_control.bytes;
+
+    /* NextRIP */
+    host_vmcb->nextrip = ns_vmcb->nextrip;
+
+    /*
+     * VMCB Save State Area
+     */
+
+    /* Segments */
+    host_vmcb->es = ns_vmcb->es;
+    host_vmcb->cs = ns_vmcb->cs;
+    host_vmcb->ss = ns_vmcb->ss;
+    host_vmcb->ds = ns_vmcb->ds;
+    host_vmcb->gdtr = ns_vmcb->gdtr;
+    host_vmcb->idtr = ns_vmcb->idtr;
+
+    /* CPL */
+    host_vmcb->cpl = ns_vmcb->cpl;
+
+    /* EFER */
+    v->arch.hvm_vcpu.guest_efer = ns_vmcb->efer;
+    rc = hvm_set_efer(ns_vmcb->efer);
+    if (rc != X86EMUL_OKAY)
+	gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
+
+    /* CR4 */
+    v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->cr4;
+    rc = hvm_set_cr4(ns_vmcb->cr4);
+    if (rc != X86EMUL_OKAY)
+        gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
+
+    /* CR0 */
+    v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->cr0;
+    rc = hvm_set_cr0(ns_vmcb->cr0);
+    if (rc != X86EMUL_OKAY)
+        gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
+
+    /* CR2 */
+    v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->cr2;
+    hvm_update_guest_cr(v, 2);
+
+    /* Nested paging mode */
+    if (nestedhvm_paging_mode_hap(v)) {
+        /* host nested paging + guest nested paging. */
+
+        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+        rc = hvm_set_cr3(ns_vmcb->cr3);
+        if (rc != X86EMUL_OKAY)
+            gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+    } else if (paging_mode_hap(v->domain)) {
+        /* host nested paging + guest shadow paging. */
+        host_vmcb->np_enable = 1;
+        /* Keep h_cr3 as it is. */
+        /* Guest shadow paging: Must intercept pagefaults. */
+        host_vmcb->exception_intercepts |= (1U << TRAP_page_fault);
+        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+        rc = hvm_set_cr3(ns_vmcb->cr3);
+        if (rc != X86EMUL_OKAY)
+            gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+    } else {
+        /* host shadow paging + guest shadow paging. */
+        host_vmcb->np_enable = 0;
+        host_vmcb->h_cr3 = 0x0;
+
+#if 0
+        host_vmcb->cr3 = v->shadow_shadow_table;
+
+        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+        rc = hvm_set_cr3(ns_vmcb->cr3);
+        if (rc != X86EMUL_OKAY)
+            gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+#endif
+    }
+
+    /* DRn */
+    host_vmcb->dr7 = ns_vmcb->dr7;
+    host_vmcb->dr6 = ns_vmcb->dr6;
+
+    /* RFLAGS */
+    host_vmcb->rflags = ns_vmcb->rflags;
+
+    /* RIP */
+    host_vmcb->rip = ns_vmcb->rip;
+
+    /* RSP */
+    host_vmcb->rsp = ns_vmcb->rsp;
+
+    /* RAX */
+    host_vmcb->rax = ns_vmcb->rax;
+
+    /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase,
+     * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
+     * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
+     */
+
+    /* Page tables */
+    host_vmcb->pdpe0 = ns_vmcb->pdpe0;
+    host_vmcb->pdpe1 = ns_vmcb->pdpe1;
+    host_vmcb->pdpe2 = ns_vmcb->pdpe2;
+    host_vmcb->pdpe3 = ns_vmcb->pdpe3;
+
+    /* PAT */
+    host_vmcb->g_pat = ns_vmcb->g_pat;
+
+    /* Debug Control MSR */
+    host_vmcb->debugctlmsr = ns_vmcb->debugctlmsr;
+
+    /* LBR MSRs */
+    host_vmcb->lastbranchfromip = ns_vmcb->lastbranchfromip;
+    host_vmcb->lastbranchtoip = ns_vmcb->lastbranchtoip;
+    host_vmcb->lastintfromip = ns_vmcb->lastintfromip;
+    host_vmcb->lastinttoip = ns_vmcb->lastinttoip;
+
+    rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
+    if (rc) {
+        gdprintk(XENLOG_ERR, "nested vmcb invalid\n");
+        return rc;
+    }
+
+    rc = svm_vmcb_isvalid(__func__, host_vmcb, 1);
+    if (rc) {
+        gdprintk(XENLOG_ERR, "host vmcb invalid\n");
+        return rc;
+    }
+
+    /* Switch guest registers to nested guest */
+    regs->eax = ns_vmcb->rax;
+    regs->eip = ns_vmcb->rip;
+    regs->esp = ns_vmcb->rsp;
+    regs->eflags = ns_vmcb->rflags;
+
+    return 0;
+}
+
+static int nsvm_vmcb_prepare4vmexit(struct vcpu *v)
+{
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+    struct nestedsvm *svm = hvm->nh_arch;
+    struct vmcb_struct *vmcb, *ns_vmcb;
+
+    vmcb = v->arch.hvm_svm.vmcb;
+    ns_vmcb = hvm->nh_vm;
+
+    svm_vmsave(vmcb);
+
+    /* Intercepts */
+    /* Copy cached intercepts since they are the guest's original
+     * intercepts.
+     */
+    ns_vmcb->cr_intercepts = svm->ns_cr_intercepts;
+    ns_vmcb->dr_intercepts = svm->ns_dr_intercepts;
+    ns_vmcb->exception_intercepts = svm->ns_exception_intercepts;
+    ns_vmcb->general1_intercepts = svm->ns_general1_intercepts;
+    ns_vmcb->general2_intercepts = svm->ns_general2_intercepts;
+
+    /* Nested Pause Filter */
+    ns_vmcb->pause_filter_count = vmcb->pause_filter_count;
+
+    /* Nested IO permission bitmap */
+    /* Just keep the iopm_base_pa and msrpm_base_pa values.
+     * The guest must not see the virtualized values.
+     */
+
+    /* TSC offset */
+    ns_vmcb->tsc_offset = vmcb->tsc_offset;
+
+    /* ASID */
+    /* ns_vmcb->guest_asid = vmcb->guest_asid; */
+
+    /* TLB control */
+    ns_vmcb->tlb_control = 0;
+
+    /* Virtual Interrupts */
+    ns_vmcb->vintr = vmcb->vintr;
+    if (!(hvm->nh_hostflags.fields.vintrmask))
+        ns_vmcb->vintr.fields.intr_masking = 0;
+
+    /* Shadow mode */
+    ns_vmcb->interrupt_shadow = vmcb->interrupt_shadow;
+
+    /* Exit codes */
+    ns_vmcb->exitcode = vmcb->exitcode;
+    ns_vmcb->exitinfo1 = vmcb->exitinfo1;
+    ns_vmcb->exitinfo2 = vmcb->exitinfo2;
+    ns_vmcb->exitintinfo = vmcb->exitintinfo;
+
+    /* Interrupts */
+    /* If we emulate a VMRUN/#VMEXIT in the same host #VMEXIT cycle we have
+     * to make sure that we do not lose injected events. So check eventinj
+     * here and copy it to exitintinfo if it is valid.
+     * exitintinfo and eventinj can't be both valid because the case below
+     * only happens on a VMRUN instruction intercept which has no valid
+     * exitintinfo set.
+     */
+    if ( unlikely(vmcb->eventinj.fields.v) &&
+         hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
+                                     vmcb->eventinj.fields.vector) )
+    {
+        ns_vmcb->exitintinfo = vmcb->eventinj;
+    }
+
+    ns_vmcb->eventinj.bytes = 0;
+
+    /* Nested paging mode */
+    if (nestedhvm_paging_mode_hap(v)) {
+        /* host nested paging + guest nested paging. */
+        ns_vmcb->np_enable = vmcb->np_enable;
+        ns_vmcb->cr3 = vmcb->cr3;
+        /* The vmcb->h_cr3 is the shadowed h_cr3. The original
+         * unshadowed guest h_cr3 is kept in ns_vmcb->h_cr3,
+         * hence we keep the ns_vmcb->h_cr3 value. */
+    } else if (paging_mode_hap(v->domain)) {
+        /* host nested paging + guest shadow paging. */
+        ns_vmcb->np_enable = 0;
+        /* Throw h_cr3 away. Guest is not allowed to set it or
+         * it can break out, otherwise (security hole!) */
+        ns_vmcb->h_cr3 = 0x0;
+        /* Stop intercepting #PF (already done above
+         * by restoring cached intercepts). */
+        ns_vmcb->cr3 = vmcb->cr3;
+    } else {
+        /* host shadow paging + guest shadow paging. */
+        ns_vmcb->np_enable = 0;
+        ns_vmcb->h_cr3 = 0x0;
+        /* The vmcb->cr3 is the shadowed cr3. The original
+         * unshadowed guest cr3 is kept in ns_vmcb->cr3,
+         * hence we keep the ns_vmcb->cr3 value. */
+    }
+
+    /* LBR virtualization */
+    ns_vmcb->lbr_control = svm->ns_lbr_control;
+
+    /* NextRIP */
+    ns_vmcb->nextrip = vmcb->nextrip;
+
+    /*
+     * VMCB Save State Area
+     */
+
+    /* Segments */
+    ns_vmcb->es = vmcb->es;
+    ns_vmcb->cs = vmcb->cs;
+    ns_vmcb->ss = vmcb->ss;
+    ns_vmcb->ds = vmcb->ds;
+    ns_vmcb->gdtr = vmcb->gdtr;
+    ns_vmcb->idtr = vmcb->idtr;
+
+    /* CPL */
+    ns_vmcb->cpl = vmcb->cpl;
+
+    /* EFER */
+    ns_vmcb->efer = vmcb->efer;
+
+    /* CRn */
+    ns_vmcb->cr4 = vmcb->cr4;
+    ns_vmcb->cr0 = vmcb->cr0;
+
+    /* DRn */
+    ns_vmcb->dr7 = vmcb->dr7;
+    ns_vmcb->dr6 = vmcb->dr6;
+
+    /* RFLAGS */
+    ns_vmcb->rflags = vmcb->rflags;
+
+    /* RIP */
+    ns_vmcb->rip = vmcb->rip;
+
+    /* RSP */
+    ns_vmcb->rsp = vmcb->rsp;
+
+    /* RAX */
+    ns_vmcb->rax = vmcb->rax;
+
+    /* Keep the nested guest values of the fs, gs, ldtr, tr, kerngsbase,
+     * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
+     * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
+     */
+
+    /* CR2 */
+    ns_vmcb->cr2 = vmcb->cr2;
+
+    /* Page tables */
+    ns_vmcb->pdpe0 = vmcb->pdpe0;
+    ns_vmcb->pdpe1 = vmcb->pdpe1;
+    ns_vmcb->pdpe2 = vmcb->pdpe2;
+    ns_vmcb->pdpe3 = vmcb->pdpe3;
+
+    /* PAT */
+    ns_vmcb->g_pat = vmcb->g_pat;
+
+    /* Debug Control MSR */
+    ns_vmcb->debugctlmsr = vmcb->debugctlmsr;
+
+    /* LBR MSRs */
+    ns_vmcb->lastbranchfromip = vmcb->lastbranchfromip;
+    ns_vmcb->lastbranchtoip = vmcb->lastbranchtoip;
+    ns_vmcb->lastintfromip = vmcb->lastintfromip;
+    ns_vmcb->lastinttoip = vmcb->lastinttoip;
+
+    return 0;
+}
+
+static int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content)
+{
+    struct nestedsvm *svm = VCPU_NESTEDHVM(v).nh_vm;
+    int ret = 1;
+
+    *msr_content = 0;
+
+    switch (msr) {
+    case MSR_K8_VM_CR:
+        break;
+    case MSR_K8_VM_HSAVE_PA:
+        *msr_content = svm->ns_msr_hsavepa;
+        break;
+    default:
+        ret = 0;
+        break;
+    }
+
+    return ret;
+}
+
+static int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content)
+{
+    int ret = 1;
+    struct nestedhvm *hvm = &VCPU_NESTEDHVM(v);
+    struct nestedsvm *svm = hvm->nh_vm;
+
+    switch (msr) {
+    case MSR_K8_VM_CR:
+        /* ignore write. handle all bits as read-only. */
+        break;
+    case MSR_K8_VM_HSAVE_PA:
+        if (!nestedhvm_vmaddr_isvalid(hvm, msr_content)) {
+            gdprintk(XENLOG_ERR,
+                "MSR_K8_VM_HSAVE_PA value invalid 0x%"PRIx64"\n", msr_content);
+            ret = -1; /* inject #GP */
+            break;
+        }
+        svm->ns_msr_hsavepa = msr_content;
+        break;
+    default:
+        ret = 0;
+        break;
+    }
+
+    return ret;
+}
+
 static int svm_cpu_up(void)
 {
     uint64_t msr_content;
@@ -952,8 +1977,8 @@ static void svm_do_nested_pgfault(paddr_
         struct {
             uint64_t gpa;
             uint64_t mfn;
-            u32 qualification;
-            u32 p2mt;
+            uint32_t qualification;
+            uint32_t p2mt;
         } _d;
 
         _d.gpa = gpa;
@@ -994,11 +2019,24 @@ static void svm_cpuid_intercept(
 
     hvm_cpuid(input, eax, ebx, ecx, edx);
 
-    if ( input == 0x80000001 )
-    {
+    switch (input) {
+    case 0x80000001:
         /* Fix up VLAPIC details. */
         if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
             __clear_bit(X86_FEATURE_APIC & 31, edx);
+        break;
+    case 0x8000000a:
+        /* We require the host to use nested paging as
+         * hap-on-shadow is not supported.
+         * The tools have no way to check this case and
+         * thus always enable it. So we mask hap unless
+         * we use hap-on-hap. 
+         */
+        if ( cpu_has_svm_npt && !paging_mode_hap(v->domain) )
+            *edx &= ~(1U << SVM_FEATURE_NPT);
+        break;
+    default:
+        break;
     }
 
     HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
@@ -1034,6 +2072,7 @@ static void svm_dr_access(struct vcpu *v
 
 static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
 {
+    int ret;
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
@@ -1071,9 +2110,6 @@ static int svm_msr_read_intercept(unsign
         *msr_content = 0;
         break;
 
-    case MSR_K8_VM_HSAVE_PA:
-        goto gpf;
-
     case MSR_IA32_DEBUGCTLMSR:
         *msr_content = vmcb->debugctlmsr;
         break;
@@ -1106,6 +2142,11 @@ static int svm_msr_read_intercept(unsign
         break;
 
     default:
+        ret = nsvm_rdmsr(v, msr, msr_content);
+        if ( ret < 0 )
+            goto gpf;
+        else if ( ret )
+            break;
 
         if ( rdmsr_viridian_regs(msr, msr_content) ||
              rdmsr_hypervisor_regs(msr, msr_content) )
@@ -1128,14 +2169,12 @@ static int svm_msr_read_intercept(unsign
 
 static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
 {
+    int ret;
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
     switch ( msr )
     {
-    case MSR_K8_VM_HSAVE_PA:
-        goto gpf;
-
     case MSR_IA32_SYSENTER_CS:
         v->arch.hvm_svm.guest_sysenter_cs = msr_content;
         break;
@@ -1186,6 +2225,12 @@ static int svm_msr_write_intercept(unsig
         break;
 
     default:
+        ret = nsvm_wrmsr(v, msr, msr_content);
+        if ( ret < 0 )
+            goto gpf;
+        else if ( ret )
+            break;
+
         if ( wrmsr_viridian_regs(msr, msr_content) )
             break;
 
@@ -1258,6 +2303,166 @@ static void svm_vmexit_do_rdtsc(struct c
     hvm_rdtsc_intercept(regs);
 }
 
+static void svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
+				struct vcpu *v, uint64_t vmcbaddr)
+{
+    int ret;
+    unsigned int inst_len;
+
+    if ( (inst_len = __get_instruction_length(current, INSTR_VMRUN)) == 0 )
+        return;
+
+    ret = nestedhvm_vcpu_vmentry(v, regs, vmcbaddr, inst_len);
+    if (ret)
+        /* On failure, nestedhvm_vcpu_vmentry injected an exception,
+         * almost a #GP or #UD.
+         */
+        return;
+}
+
+static void
+svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
+                     struct cpu_user_regs *regs,
+                     struct vcpu *v, uint64_t vmcbaddr)
+{
+    int ret;
+    unsigned int inst_len;
+    struct vmcb_struct *tmp_vmcb;
+
+    if ( (inst_len = __get_instruction_length(v, INSTR_VMLOAD)) == 0 )
+        return;
+
+    /* tmp_vmcb can't be a local variable on the stack because
+     * the machine stops with a sudden freeze.
+     */
+    tmp_vmcb = xmalloc(struct vmcb_struct);
+    if (tmp_vmcb == NULL)
+        return;
+
+    ret = nestedhvm_vcpu_state_validate(v, vmcbaddr);
+    if (ret) {
+        gdprintk(XENLOG_ERR,
+            "nestedhvm_vcpu_state_validate failed, injecting 0x%x\n", ret);
+        goto inject;
+    }
+
+    ret = hvm_copy_from_guest_phys(tmp_vmcb, vmcbaddr,
+        sizeof(struct vmcb_struct));
+    if (ret) {
+        gdprintk(XENLOG_ERR,
+            "hvm_copy_from_guest_phys failed, injecting 0x%x\n", ret);
+        goto inject;
+    }
+
+    nsvm_vmcb_loadsave(tmp_vmcb, vmcb);
+    svm_vmload(vmcb);
+
+    __update_guest_eip(regs, inst_len);
+    xfree(tmp_vmcb);
+    return;
+
+ inject:
+    xfree(tmp_vmcb);
+    hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0);
+    return;
+}
+
+static void
+svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
+                     struct cpu_user_regs *regs,
+                     struct vcpu *v, uint64_t vmcbaddr)
+{
+    int ret;
+    unsigned int inst_len;
+    struct vmcb_struct *tmp_vmcb;
+
+    if ( (inst_len = __get_instruction_length(v, INSTR_VMSAVE)) == 0 )
+        return;
+
+    /* tmp_vmcb can't be a local variable on the stack because
+     * the machine stops with a sudden freeze.
+     */
+    tmp_vmcb = xmalloc(struct vmcb_struct);
+    if (tmp_vmcb == NULL)
+        return;
+
+    ret = nestedhvm_vcpu_state_validate(v, vmcbaddr);
+    if (ret) {
+        gdprintk(XENLOG_ERR,
+            "nestedhvm_vcpu_state_validate failed, injecting 0x%x\n", ret);
+        goto inject;
+    }
+
+    ret = hvm_copy_from_guest_phys(tmp_vmcb, vmcbaddr,
+        sizeof(struct vmcb_struct));
+    if (ret) {
+        gdprintk(XENLOG_ERR,
+            "hvm_copy_from_guest_phys failed, injecting 0x%x\n", ret);
+        goto inject;
+    }
+
+    svm_vmsave(vmcb);
+    nsvm_vmcb_loadsave(vmcb, tmp_vmcb);
+
+    ret = hvm_copy_to_guest_phys(vmcbaddr, tmp_vmcb,
+        sizeof(struct vmcb_struct));
+    if (ret) {
+        gdprintk(XENLOG_ERR,
+            "hvm_copy_to_guest_phys failed, injecting 0x%x\n", ret);
+        goto inject;
+    }
+
+    __update_guest_eip(regs, inst_len);
+    xfree(tmp_vmcb);
+    return;
+
+ inject:
+    xfree(tmp_vmcb);
+    hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0);
+    return;
+}
+
+static void svm_vmexit_do_clgi(struct cpu_user_regs *regs, struct vcpu *v)
+{
+    int ret;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned int inst_len;
+
+    if ( (inst_len = __get_instruction_length(v, INSTR_CLGI)) == 0 )
+        return;
+
+    ret = nestedsvm_vcpu_clgi(v);
+    if (ret)
+        /* On failure, nestedsvm_vcpu_clgi injected an exception,
+         * almost a #GP or #UD.
+         */
+        return;
+
+    /* After a CLGI no interrupts should come */
+    vmcb->vintr.fields.irq = 0;
+    vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
+
+    __update_guest_eip(regs, inst_len);
+}
+
+static void svm_vmexit_do_stgi(struct cpu_user_regs *regs, struct vcpu *v)
+{
+    int ret;
+    unsigned int inst_len;
+
+    if ( (inst_len = __get_instruction_length(v, INSTR_STGI)) == 0 )
+        return;
+
+    ret = nestedsvm_vcpu_stgi(v);
+    if (ret)
+        /* On failure, nestedsvm_vcpu_stgi injected an exception,
+         * almost a #GP or #UD.
+         */
+	return;
+
+    __update_guest_eip(regs, inst_len);
+}
+
 static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs)
 {
     struct hvm_emulate_ctxt ctxt;
@@ -1405,20 +2610,37 @@ static struct hvm_function_table __read_
     .msr_read_intercept   = svm_msr_read_intercept,
     .msr_write_intercept  = svm_msr_write_intercept,
     .invlpg_intercept     = svm_invlpg_intercept,
-    .set_rdtsc_exiting    = svm_set_rdtsc_exiting
+    .set_rdtsc_exiting    = svm_set_rdtsc_exiting,
+
+    .nestedhvm_vcpu_initialise = nsvm_vcpu_initialise,
+    .nestedhvm_vcpu_destroy = nsvm_vcpu_destroy,
+    .nestedhvm_vcpu_reset = nsvm_vcpu_reset,
+    .nestedhvm_vcpu_features = nsvm_vcpu_features,
+    .nestedhvm_vcpu_hostsave = nsvm_vcpu_hostsave,
+    .nestedhvm_vcpu_hostrestore = nsvm_vcpu_hostrestore,
+    .nestedhvm_vcpu_vmentry = nsvm_vcpu_vmrun,
+    .nestedhvm_vcpu_vmexit = nsvm_vcpu_vmexit,
+    .nestedhvm_vm_exitcode_native2generic = nsvm_vmcb_exitcode_native2generic,
+    .nestedhvm_vm_intercepted_by_guest = nsvm_vmcb_intercepted_by_guest,
+    .nestedhvm_vm_prepare4vmentry = nsvm_vmcb_prepare4vmrun,
+    .nestedhvm_vm_prepare4vmexit = nsvm_vmcb_prepare4vmexit,
 };
 
 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
 {
-    unsigned int exit_reason;
+    uint64_t exit_reason;
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     eventinj_t eventinj;
     int inst_len, rc;
+    bool_t vcpu_guestmode = 0;
 
     if ( paging_mode_hap(v->domain) )
         v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] = vmcb->cr3;
 
+    if ( nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v) )
+        vcpu_guestmode = 1;
+
     /*
      * Before doing anything else, we need to sync up the VLAPIC's TPR with
      * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
@@ -1426,12 +2648,39 @@ asmlinkage void svm_vmexit_handler(struc
      * NB. We need to preserve the low bits of the TPR to make checked builds
      * of Windows work, even though they don't actually do anything.
      */
-    vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
-                   ((vmcb->vintr.fields.tpr & 0x0F) << 4) |
-                   (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0x0F));
+    if ( !vcpu_guestmode ) {
+        vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
+                       ((vmcb->vintr.fields.tpr & 0x0F) << 4) |
+                       (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0x0F));
+    }
 
     exit_reason = vmcb->exitcode;
 
+    if ( vcpu_guestmode ) {
+        enum nestedhvm_vmexits nsret;
+
+        nsret = nestedhvm_vcpu_vmexit(v, regs, exit_reason);
+        VCPU_NESTEDHVM(v).nh_hostflags.fields.forcevmexit = 0;
+        switch (nsret) {
+        case NESTEDHVM_VMEXIT_DONE:
+            goto out;
+        case NESTEDHVM_VMEXIT_ERROR:
+            gdprintk(XENLOG_ERR,
+		"nestedhvm_vcpu_vmexit() returned NESTEDHVM_VMEXIT_ERROR\n");
+            goto out;
+        case NESTEDHVM_VMEXIT_HOST:
+        case NESTEDHVM_VMEXIT_CONTINUE:
+            break;
+        case NESTEDHVM_VMEXIT_FATALERROR:
+            gdprintk(XENLOG_ERR, "unexpected nestedhvm error\n");
+            goto exit_and_crash;
+        default:
+            gdprintk(XENLOG_INFO, "nestedhvm_vcpu_vmexit returned %i\n",
+                nsret);
+            goto exit_and_crash;
+        }
+    }
+
     if ( hvm_long_mode_enabled(v) )
         HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
                     (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
@@ -1443,7 +2692,7 @@ asmlinkage void svm_vmexit_handler(struc
 
     if ( unlikely(exit_reason == VMEXIT_INVALID) )
     {
-        svm_dump_vmcb(__func__, vmcb);
+        svm_vmcb_dump(__func__, vmcb);
         goto exit_and_crash;
     }
 
@@ -1517,7 +2766,26 @@ asmlinkage void svm_vmexit_handler(struc
             break;
         }
 
-        hvm_inject_exception(TRAP_page_fault, regs->error_code, va);
+        if ( vcpu_guestmode && !nestedhvm_paging_mode_hap(v) ) {
+            int ret;
+
+            /* We assume that guest intercepts the #PF so it will
+             * get a VMEXIT(#PF)
+             */
+            ret = hvm_inject_exception(TRAP_page_fault, regs->error_code, va);
+            if ( ret == 1 )
+                goto out;
+            else
+                /* VMEXIT(#PF) injection failed or nested guest got #PF.
+                 * Either case is unexpected.
+                 */
+                goto exit_and_crash;
+        }
+
+        /* Don't use hvm_inject_exception() here because we *always*
+         * want to inject the exception into the (nested) guest directly.
+         */
+        svm_inject_exception(TRAP_page_fault, regs->error_code, va);
         break;
     }
 
@@ -1591,6 +2859,10 @@ asmlinkage void svm_vmexit_handler(struc
     case VMEXIT_VMMCALL:
         if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
             break;
+        if ( vcpu_guestmode ) {
+            hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+            break;
+        }
         HVMTRACE_1D(VMMCALL, regs->eax);
         rc = hvm_do_hypercall(regs);
         if ( rc != HVM_HCALL_preempted )
@@ -1623,11 +2895,25 @@ asmlinkage void svm_vmexit_handler(struc
 
     case VMEXIT_MONITOR:
     case VMEXIT_MWAIT:
+        hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+        break;
+
     case VMEXIT_VMRUN:
+        svm_vmexit_do_vmrun(regs, v,
+                            regs->eax);
+        break;
     case VMEXIT_VMLOAD:
+        svm_vmexit_do_vmload(vmcb, regs, v, regs->eax);
+        break;
     case VMEXIT_VMSAVE:
+        svm_vmexit_do_vmsave(vmcb, regs, v, regs->eax);
+        break;
     case VMEXIT_STGI:
+        svm_vmexit_do_stgi(regs, v);
+        break;
     case VMEXIT_CLGI:
+        svm_vmexit_do_clgi(regs, v);
+        break;
     case VMEXIT_SKINIT:
         hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
         break;
@@ -1662,7 +2948,7 @@ asmlinkage void svm_vmexit_handler(struc
 
     default:
     exit_and_crash:
-        gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
+        gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%"PRIx64", "
                  "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
                  exit_reason, 
                  (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
@@ -1670,6 +2956,11 @@ asmlinkage void svm_vmexit_handler(struc
         break;
     }
 
+  out:
+    if ( vcpu_guestmode )
+        /* Don't clobber TPR of the nested guest. */
+        return;
+
     /* The exit may have updated the TPR: reflect this in the hardware vtpr */
     vmcb->vintr.fields.tpr = 
         (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
diff -r 1a1837336ffd -r c6a5143c1c33 xen/arch/x86/hvm/svm/svmdebug.c
--- /dev/null
+++ b/xen/arch/x86/hvm/svm/svmdebug.c
@@ -0,0 +1,185 @@
+/*
+ * svmdebug.c: debug functions
+ * Copyright (c) 2010, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/msr-index.h>
+#include <asm/hvm/svm/svmdebug.h>
+
+static void svm_dump_sel(const char *name, svm_segment_register_t *s)
+{
+    printk("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n", 
+           name, s->sel, s->attr.bytes, s->limit,
+           (unsigned long long)s->base);
+}
+
+void svm_vmcb_dump(const char *from, struct vmcb_struct *vmcb)
+{
+    printk("Dumping guest's current state at %s...\n", from);
+    printk("Size of VMCB = %d, address = %p\n", 
+            (int) sizeof(struct vmcb_struct), vmcb);
+
+    printk("cr_intercepts = 0x%08x dr_intercepts = 0x%08x "
+           "exception_intercepts = 0x%08x\n", 
+           vmcb->cr_intercepts, vmcb->dr_intercepts, 
+           vmcb->exception_intercepts);
+    printk("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n", 
+           vmcb->general1_intercepts, vmcb->general2_intercepts);
+    printk("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
+            "0x%016llx\n", 
+           (unsigned long long) vmcb->iopm_base_pa,
+           (unsigned long long) vmcb->msrpm_base_pa,
+           (unsigned long long) vmcb->tsc_offset);
+    printk("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
+            "0x%016llx\n", vmcb->tlb_control,
+           (unsigned long long) vmcb->vintr.bytes,
+           (unsigned long long) vmcb->interrupt_shadow);
+    printk("exitcode = 0x%016llx exitintinfo = 0x%016llx\n", 
+           (unsigned long long) vmcb->exitcode,
+           (unsigned long long) vmcb->exitintinfo.bytes);
+    printk("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
+           (unsigned long long) vmcb->exitinfo1,
+           (unsigned long long) vmcb->exitinfo2);
+    printk("np_enable = 0x%016llx guest_asid = 0x%03x\n", 
+           (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
+    printk("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n", 
+           vmcb->cpl, (unsigned long long) vmcb->efer,
+           (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
+    printk("CR0 = 0x%016llx CR2 = 0x%016llx\n",
+           (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
+    printk("CR3 = 0x%016llx CR4 = 0x%016llx\n", 
+           (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
+    printk("RSP = 0x%016llx  RIP = 0x%016llx\n", 
+           (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
+    printk("RAX = 0x%016llx  RFLAGS=0x%016llx\n",
+           (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
+    printk("DR6 = 0x%016llx, DR7 = 0x%016llx\n", 
+           (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
+    printk("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
+           (unsigned long long) vmcb->cstar, 
+           (unsigned long long) vmcb->sfmask);
+    printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n", 
+           (unsigned long long) vmcb->kerngsbase,
+           (unsigned long long) vmcb->g_pat);
+    printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
+
+    /* print out all the selectors */
+    svm_dump_sel("CS", &vmcb->cs);
+    svm_dump_sel("DS", &vmcb->ds);
+    svm_dump_sel("SS", &vmcb->ss);
+    svm_dump_sel("ES", &vmcb->es);
+    svm_dump_sel("FS", &vmcb->fs);
+    svm_dump_sel("GS", &vmcb->gs);
+    svm_dump_sel("GDTR", &vmcb->gdtr);
+    svm_dump_sel("LDTR", &vmcb->ldtr);
+    svm_dump_sel("IDTR", &vmcb->idtr);
+    svm_dump_sel("TR", &vmcb->tr);
+}
+
+bool_t
+svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
+                 bool_t verbose)
+{
+    bool_t ret = 0; /* ok */
+
+#define PRINTF(...) \
+    if (verbose) { ret = 1; printk("%s: ", from); printk(__VA_ARGS__); \
+    } else return 1;
+
+    if ((vmcb->efer & EFER_SVME) == 0) {
+        PRINTF("EFER: SVME bit not set (0x%"PRIx64")\n", vmcb->efer);
+    }
+
+    if ((vmcb->cr0 & X86_CR0_CD) == 0 && (vmcb->cr0 & X86_CR0_NW) != 0) {
+        PRINTF("CR0: CD bit is zero and NW bit set (0x%"PRIx64")\n",
+                vmcb->cr0);
+    }
+
+    if ((vmcb->cr0 >> 32U) != 0) {
+        PRINTF("CR0: bits [63:32] are not zero (0x%"PRIx64")\n",
+                vmcb->cr0);
+    }
+
+    if ((vmcb->cr3 & 0x7) != 0) {
+        PRINTF("CR3: MBZ bits are set (0x%"PRIx64")\n", vmcb->cr3);
+    }
+    if ((vmcb->efer & EFER_LMA) && (vmcb->cr3 & 0xfe) != 0) {
+        PRINTF("CR3: MBZ bits are set (0x%"PRIx64")\n", vmcb->cr3);
+    }
+
+    if ((vmcb->cr4 >> 11U) != 0) {
+        PRINTF("CR4: bits [63:11] are not zero (0x%"PRIx64")\n",
+                vmcb->cr4);
+    }
+
+    if ((vmcb->dr6 >> 32U) != 0) {
+        PRINTF("DR6: bits [63:32] are not zero (0x%"PRIx64")\n",
+                vmcb->dr6);
+    }
+
+    if ((vmcb->dr7 >> 32U) != 0) {
+        PRINTF("DR7: bits [63:32] are not zero (0x%"PRIx64")\n",
+                vmcb->dr7);
+    }
+
+    if ((vmcb->efer >> 15U) != 0) {
+        PRINTF("EFER: bits [63:15] are not zero (0x%"PRIx64")\n",
+                vmcb->efer);
+    }
+
+    if ((vmcb->efer & EFER_LME) != 0 && ((vmcb->cr0 & X86_CR0_PG) != 0)) {
+        if ((vmcb->cr4 & X86_CR4_PAE) == 0) {
+            PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero.\n");
+        }
+        if ((vmcb->cr0 & X86_CR0_PE) == 0) {
+            PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero.\n");
+        }
+    }
+
+    if ((vmcb->efer & EFER_LME) != 0
+        && (vmcb->cr0 & X86_CR0_PG) != 0
+        && (vmcb->cr4 & X86_CR4_PAE) != 0
+        && (vmcb->cs.attr.fields.l != 0)
+        && (vmcb->cs.attr.fields.db != 0))
+    {
+        PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero.\n");
+    }
+
+    if ((vmcb->general2_intercepts & GENERAL2_INTERCEPT_VMRUN) == 0) {
+        PRINTF("GENERAL2_INTERCEPT: VMRUN intercept bit is clear (0x%"PRIx32")\n",
+            vmcb->general2_intercepts);
+    }
+
+    if (vmcb->eventinj.fields.resvd1 != 0) {
+        PRINTF("eventinj: MBZ bits are set (0x%"PRIx64")\n",
+                vmcb->eventinj.bytes);
+    }
+
+#undef PRINTF
+    return ret;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 1a1837336ffd -r c6a5143c1c33 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -33,6 +33,7 @@
 #include <asm/hvm/svm/svm.h>
 #include <asm/hvm/svm/intr.h>
 #include <asm/hvm/svm/asid.h>
+#include <asm/hvm/svm/svmdebug.h>
 #include <xen/event.h>
 #include <xen/kernel.h>
 #include <xen/domain_page.h>
@@ -75,37 +76,6 @@ struct host_save_area *alloc_host_save_a
     return hsa;
 }
 
-void svm_intercept_msr(struct vcpu *v, uint32_t msr, int enable)
-{
-    unsigned long *msr_bitmap = v->arch.hvm_svm.msrpm;
-    unsigned long *msr_bit = NULL;
-
-    /*
-     * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
-     */
-    if ( msr <= 0x1fff )
-        msr_bit = msr_bitmap + 0x0000 / BYTES_PER_LONG;
-    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
-        msr_bit = msr_bitmap + 0x0800 / BYTES_PER_LONG;
-    else if ( (msr >= 0xc0010000) && (msr <= 0xc0011fff) )
-        msr_bit = msr_bitmap + 0x1000 / BYTES_PER_LONG;
-
-    BUG_ON(msr_bit == NULL);
-
-    msr &= 0x1fff;
-
-    if ( enable )
-    {
-        __set_bit(msr * 2, msr_bit);
-        __set_bit(msr * 2 + 1, msr_bit);
-    }
-    else
-    {
-        __clear_bit(msr * 2, msr_bit);
-        __clear_bit(msr * 2 + 1, msr_bit);
-    }
-}
-
 static int construct_vmcb(struct vcpu *v)
 {
     struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
@@ -298,76 +268,6 @@ void svm_destroy_vmcb(struct vcpu *v)
     arch_svm->vmcb = NULL;
 }
 
-static void svm_dump_sel(char *name, svm_segment_register_t *s)
-{
-    printk("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n", 
-           name, s->sel, s->attr.bytes, s->limit,
-           (unsigned long long)s->base);
-}
-
-void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb)
-{
-    printk("Dumping guest's current state at %s...\n", from);
-    printk("Size of VMCB = %d, address = %p\n", 
-            (int) sizeof(struct vmcb_struct), vmcb);
-
-    printk("cr_intercepts = 0x%08x dr_intercepts = 0x%08x "
-           "exception_intercepts = 0x%08x\n", 
-           vmcb->cr_intercepts, vmcb->dr_intercepts, 
-           vmcb->exception_intercepts);
-    printk("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n", 
-           vmcb->general1_intercepts, vmcb->general2_intercepts);
-    printk("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
-            "0x%016llx\n", 
-           (unsigned long long) vmcb->iopm_base_pa,
-           (unsigned long long) vmcb->msrpm_base_pa,
-           (unsigned long long) vmcb->tsc_offset);
-    printk("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
-            "0x%016llx\n", vmcb->tlb_control,
-           (unsigned long long) vmcb->vintr.bytes,
-           (unsigned long long) vmcb->interrupt_shadow);
-    printk("exitcode = 0x%016llx exitintinfo = 0x%016llx\n", 
-           (unsigned long long) vmcb->exitcode,
-           (unsigned long long) vmcb->exitintinfo.bytes);
-    printk("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
-           (unsigned long long) vmcb->exitinfo1,
-           (unsigned long long) vmcb->exitinfo2);
-    printk("np_enable = 0x%016llx guest_asid = 0x%03x\n", 
-           (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
-    printk("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n", 
-           vmcb->cpl, (unsigned long long) vmcb->efer,
-           (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
-    printk("CR0 = 0x%016llx CR2 = 0x%016llx\n",
-           (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
-    printk("CR3 = 0x%016llx CR4 = 0x%016llx\n", 
-           (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
-    printk("RSP = 0x%016llx  RIP = 0x%016llx\n", 
-           (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
-    printk("RAX = 0x%016llx  RFLAGS=0x%016llx\n",
-           (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
-    printk("DR6 = 0x%016llx, DR7 = 0x%016llx\n", 
-           (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
-    printk("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
-           (unsigned long long) vmcb->cstar, 
-           (unsigned long long) vmcb->sfmask);
-    printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n", 
-           (unsigned long long) vmcb->kerngsbase,
-           (unsigned long long) vmcb->g_pat);
-    printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
-
-    /* print out all the selectors */
-    svm_dump_sel("CS", &vmcb->cs);
-    svm_dump_sel("DS", &vmcb->ds);
-    svm_dump_sel("SS", &vmcb->ss);
-    svm_dump_sel("ES", &vmcb->es);
-    svm_dump_sel("FS", &vmcb->fs);
-    svm_dump_sel("GS", &vmcb->gs);
-    svm_dump_sel("GDTR", &vmcb->gdtr);
-    svm_dump_sel("LDTR", &vmcb->ldtr);
-    svm_dump_sel("IDTR", &vmcb->idtr);
-    svm_dump_sel("TR", &vmcb->tr);
-}
-
 static void vmcb_dump(unsigned char ch)
 {
     struct domain *d;
@@ -385,7 +285,7 @@ static void vmcb_dump(unsigned char ch)
         for_each_vcpu ( d, v )
         {
             printk("\tVCPU %d\n", v->vcpu_id);
-            svm_dump_vmcb("key_handler", v->arch.hvm_svm.vmcb);
+            svm_vmcb_dump("key_handler", v->arch.hvm_svm.vmcb);
         }
     }
 
diff -r 1a1837336ffd -r c6a5143c1c33 xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h
+++ b/xen/include/asm-x86/hvm/svm/emulate.h
@@ -31,6 +31,11 @@ enum instruction_index {
     INSTR_HLT,
     INSTR_INT3,
     INSTR_RDTSC,
+    INSTR_VMRUN,
+    INSTR_VMLOAD,
+    INSTR_VMSAVE,
+    INSTR_STGI,
+    INSTR_CLGI,
     INSTR_MAX_COUNT /* Must be last - Number of instructions supported */
 };
 
diff -r 1a1837336ffd -r c6a5143c1c33 xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -29,8 +29,6 @@
 #include <asm/i387.h>
 #include <asm/hvm/vpmu.h>
 
-void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb);
-
 #define SVM_REG_EAX (0) 
 #define SVM_REG_ECX (1) 
 #define SVM_REG_EDX (2) 
diff -r 1a1837336ffd -r c6a5143c1c33 xen/include/asm-x86/hvm/svm/svmdebug.h
--- /dev/null
+++ b/xen/include/asm-x86/hvm/svm/svmdebug.h
@@ -0,0 +1,30 @@
+/*
+ * svmdebug.h: SVM related debug defintions
+ * Copyright (c) 2010, AMD Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_X86_HVM_SVM_SVMDEBUG_H__
+#define __ASM_X86_HVM_SVM_SVMDEBUG_H__
+
+#include <asm/types.h>
+#include <asm/hvm/svm/vmcb.h>
+
+void svm_vmcb_dump(const char *from, struct vmcb_struct *vmcb);
+bool_t svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
+                        bool_t verbose);
+
+#endif /* __ASM_X86_HVM_SVM_SVMDEBUG_H__ */
diff -r 1a1837336ffd -r c6a5143c1c33 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -474,6 +474,20 @@ struct arch_svm_struct {
     uint64_t guest_sysenter_eip;
 };
 
+struct nestedsvm {
+    uint64_t ns_msr_hsavepa; /* MSR HSAVE_PA value */
+
+    /* Cached real intercepts of the nested guest */
+    uint32_t ns_cr_intercepts;
+    uint32_t ns_dr_intercepts;
+    uint32_t ns_exception_intercepts;
+    uint32_t ns_general1_intercepts;
+    uint32_t ns_general2_intercepts;
+
+    /* Cached real lbr of the nested guest */
+    lbrctrl_t ns_lbr_control;
+};
+
 struct vmcb_struct *alloc_vmcb(void);
 struct host_save_area *alloc_host_save_area(void);
 void free_vmcb(struct vmcb_struct *vmcb);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

             reply	other threads:[~2010-08-05 15:04 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-05 15:04 Christoph Egger [this message]
2010-08-09 12:57 ` [PATCH 10/14] Nested Virtualization: svm specific implementation Tim Deegan
2010-08-17 15:49   ` Christoph Egger
2010-08-17 16:57     ` Tim Deegan
2010-08-18  7:48       ` Christoph Egger
2010-08-18 10:45         ` Tim Deegan
2010-08-18 10:54           ` Christoph Egger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201008051704.03074.Christoph.Egger@amd.com \
    --to=christoph.egger@amd.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).