* [PATCH 09/13] Nested Virtualization: svm specific implementation
@ 2010-09-01 15:14 Christoph Egger
0 siblings, 0 replies; 5+ messages in thread
From: Christoph Egger @ 2010-09-01 15:14 UTC (permalink / raw)
To: xen-devel@lists.xensource.com; +Cc: Dong, Eddie, Tim Deegan
[-- Attachment #1: Type: text/plain, Size: 322 bytes --]
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
--
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632
[-- Attachment #2: xen_nh09_svm.diff --]
[-- Type: text/x-diff, Size: 65068 bytes --]
# HG changeset patch
# User cegger
# Date 1283345885 -7200
Implement SVM specific part for Nested Virtualization
diff -r 0fcb32521d57 -r fa992936dba4 xen/arch/x86/hvm/svm/Makefile
--- a/xen/arch/x86/hvm/svm/Makefile
+++ b/xen/arch/x86/hvm/svm/Makefile
@@ -3,5 +3,6 @@ obj-y += emulate.o
obj-y += entry.o
obj-y += intr.o
obj-y += svm.o
+obj-y += svmdebug.o
obj-y += vmcb.o
obj-y += vpmu.o
diff -r 0fcb32521d57 -r fa992936dba4 xen/arch/x86/hvm/svm/emulate.c
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -101,6 +101,11 @@ MAKE_INSTR(HLT, 1, 0xf4);
MAKE_INSTR(INT3, 1, 0xcc);
MAKE_INSTR(RDTSC, 2, 0x0f, 0x31);
MAKE_INSTR(PAUSE, 1, 0x90);
+MAKE_INSTR(VMRUN, 3, 0x0f, 0x01, 0xd8);
+MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
+MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
+MAKE_INSTR(STGI, 3, 0x0f, 0x01, 0xdc);
+MAKE_INSTR(CLGI, 3, 0x0f, 0x01, 0xdd);
static const u8 *opc_bytes[INSTR_MAX_COUNT] =
{
@@ -114,6 +119,11 @@ static const u8 *opc_bytes[INSTR_MAX_COU
[INSTR_INT3] = OPCODE_INT3,
[INSTR_RDTSC] = OPCODE_RDTSC,
[INSTR_PAUSE] = OPCODE_PAUSE,
+ [INSTR_VMRUN] = OPCODE_VMRUN,
+ [INSTR_VMLOAD] = OPCODE_VMLOAD,
+ [INSTR_VMSAVE] = OPCODE_VMSAVE,
+ [INSTR_STGI] = OPCODE_STGI,
+ [INSTR_CLGI] = OPCODE_CLGI,
};
static int fetch(struct vcpu *v, u8 *buf, unsigned long addr, int len)
diff -r 0fcb32521d57 -r fa992936dba4 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -49,6 +49,8 @@
#include <asm/hvm/svm/vmcb.h>
#include <asm/hvm/svm/emulate.h>
#include <asm/hvm/svm/intr.h>
+#include <asm/hvm/svm/svmdebug.h>
+#include <asm/hvm/nestedhvm.h>
#include <asm/x86_emulate.h>
#include <public/sched.h>
#include <asm/hvm/vpt.h>
@@ -108,6 +110,44 @@ static void svm_cpu_down(void)
write_efer(read_efer() & ~EFER_SVME);
}
+static unsigned long *
+svm_msrbit(unsigned long *msr_bitmap, uint32_t msr)
+{
+ unsigned long *msr_bit = NULL;
+
+ /*
+ * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
+ */
+ if ( msr <= 0x1fff )
+ msr_bit = msr_bitmap + 0x0000 / BYTES_PER_LONG;
+ else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+ msr_bit = msr_bitmap + 0x0800 / BYTES_PER_LONG;
+ else if ( (msr >= 0xc0010000) && (msr <= 0xc0011fff) )
+ msr_bit = msr_bitmap + 0x1000 / BYTES_PER_LONG;
+
+ return msr_bit;
+}
+
+void svm_intercept_msr(struct vcpu *v, uint32_t msr, int enable)
+{
+ unsigned long *msr_bit;
+
+ msr_bit = svm_msrbit(v->arch.hvm_svm.msrpm, msr);
+ BUG_ON(msr_bit == NULL);
+ msr &= 0x1fff;
+
+ if ( enable )
+ {
+ __set_bit(msr * 2, msr_bit);
+ __set_bit(msr * 2 + 1, msr_bit);
+ }
+ else
+ {
+ __clear_bit(msr * 2, msr_bit);
+ __clear_bit(msr * 2 + 1, msr_bit);
+ }
+}
+
static enum handler_return
long_mode_do_msr_write(unsigned int msr, uint64_t msr_content)
{
@@ -325,7 +365,7 @@ static int svm_load_vmcb_ctxt(struct vcp
{
svm_load_cpu_state(v, ctxt);
if (svm_vmcb_restore(v, ctxt)) {
- printk("svm_vmcb restore failed!\n");
+ gdprintk(XENLOG_ERR, "svm_vmcb restore failed!\n");
domain_crash(v->domain);
return -EINVAL;
}
@@ -692,8 +732,10 @@ static void svm_ctxt_switch_to(struct vc
static void svm_do_resume(struct vcpu *v)
{
bool_t debug_state = v->domain->debugger_attached;
-
- if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
+ bool_t guestmode = nestedhvm_vcpu_in_guestmode(v);
+
+ if ( !guestmode &&
+ unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
{
uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
v->arch.hvm_vcpu.debug_state_latch = debug_state;
@@ -712,11 +754,14 @@ static void svm_do_resume(struct vcpu *v
hvm_asid_flush_vcpu(v);
}
- /* Reflect the vlapic's TPR in the hardware vtpr */
- v->arch.hvm_svm.vmcb->vintr.fields.tpr =
- (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
-
- hvm_do_resume(v);
+ if ( !guestmode )
+ {
+ /* Reflect the vlapic's TPR in the hardware vtpr */
+ v->arch.hvm_svm.vmcb->vintr.fields.tpr =
+ (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
+
+ hvm_do_resume(v);
+ }
reset_stack_and_jump(svm_asm_do_resume);
}
@@ -861,6 +906,998 @@ static void svm_init_erratum_383(struct
}
}
+/*
+ * Nested SVM
+ */
+static int nsvm_vcpu_destroy(struct vcpu *v);
+
+static int nsvm_vcpu_initialise(struct vcpu *v)
+{
+ void *msrpm;
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm;
+
+ ASSERT(hvm->nh_hostsave == NULL);
+ hvm->nh_hostsave = alloc_vmcb();
+ if (hvm->nh_hostsave == NULL)
+ goto err;
+
+ msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
+ hvm->nh_cached_msrpm = msrpm;
+ if (msrpm == NULL)
+ goto err;
+ memset(msrpm, 0x0, MSRPM_SIZE);
+ hvm->nh_cached_msrpm_size = MSRPM_SIZE;
+
+ msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
+ hvm->nh_merged_msrpm = msrpm;
+ if (msrpm == NULL)
+ goto err;
+ memset(msrpm, 0x0, MSRPM_SIZE);
+ hvm->nh_merged_msrpm_size = MSRPM_SIZE;
+
+ hvm->nh_vm = alloc_vmcb();
+ hvm->nh_vmsize = sizeof(struct vmcb_struct);
+ if (hvm->nh_vm == NULL)
+ goto err;
+
+ svm = hvm->nh_arch = xmalloc_bytes(sizeof(struct nestedsvm));
+ if (hvm->nh_arch == NULL)
+ goto err;
+ hvm->nh_arch_size = sizeof(struct nestedsvm);
+ memset(hvm->nh_arch, 0x0, hvm->nh_arch_size);
+
+ svm->ns_tmpvmcb = alloc_vmcb();
+ if (svm->ns_tmpvmcb == NULL)
+ goto err;
+
+ return 0;
+
+err:
+ nsvm_vcpu_destroy(v);
+ return -ENOMEM;
+}
+
+static int nsvm_vcpu_destroy(struct vcpu *v)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+
+ if (hvm->nh_vm) {
+ free_vmcb(hvm->nh_vm);
+ hvm->nh_vm = NULL;
+ }
+ if (hvm->nh_hostsave) {
+ free_vmcb(hvm->nh_hostsave);
+ hvm->nh_hostsave = NULL;
+ }
+ if (hvm->nh_cached_msrpm) {
+ ASSERT(hvm->nh_cached_msrpm_size > 0);
+ free_xenheap_pages(hvm->nh_cached_msrpm,
+ get_order_from_bytes(hvm->nh_cached_msrpm_size));
+ hvm->nh_cached_msrpm = NULL;
+ hvm->nh_cached_msrpm_size = 0;
+ }
+ if (hvm->nh_merged_msrpm) {
+ ASSERT(hvm->nh_merged_msrpm_size > 0);
+ free_xenheap_pages(hvm->nh_merged_msrpm,
+ get_order_from_bytes(hvm->nh_merged_msrpm_size));
+ hvm->nh_merged_msrpm = NULL;
+ hvm->nh_merged_msrpm_size = 0;
+ }
+ if (hvm->nh_arch) {
+ struct nestedsvm *svm = hvm->nh_arch;
+ if (svm->ns_tmpvmcb)
+ free_vmcb(svm->ns_tmpvmcb);
+ xfree(hvm->nh_arch);
+ hvm->nh_arch = NULL;
+ hvm->nh_arch_size = 0;
+ }
+
+ return 0;
+}
+
+static int nsvm_vcpu_reset(struct vcpu *v)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct vmcb_struct *vmcb = hvm->nh_vm;
+
+ hvm->nh_vmmaxaddr = 0xfd00000000ULL;
+ vmcb->np_enable = 0;
+ vmcb->g_pat = MSR_IA32_CR_PAT_RESET;
+ return 0;
+}
+
+static int nsvm_vcpu_features(struct vcpu *v,
+ uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+ /* We handle SVM features via cpuid by tools/libxc.
+ * So nothing to do here.
+ */
+ return 0;
+}
+
+static void nsvm_vmcb_loadsave(struct vmcb_struct *from,
+ struct vmcb_struct *to)
+{
+ to->fs = from->fs;
+ to->gs = from->gs;
+ to->tr = from->tr;
+ to->ldtr = from->ldtr;
+ to->kerngsbase = from->kerngsbase;
+ to->star = from->star;
+ to->lstar = from->lstar;
+ to->cstar = from->cstar;
+ to->sfmask = from->sfmask;
+ to->sysenter_cs = from->sysenter_cs;
+ to->sysenter_esp = from->sysenter_esp;
+ to->sysenter_eip = from->sysenter_eip;
+}
+
+static int nsvm_vcpu_hostsave(struct vcpu *v, unsigned int inst_len)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct vmcb_struct *hsave, *vmcb;
+
+ hsave = hvm->nh_hostsave;
+ vmcb = v->arch.hvm_svm.vmcb;
+
+ memcpy(hsave, vmcb, sizeof(struct vmcb_struct));
+ hsave->rip += inst_len;
+
+ /* Remember the host interrupt flag */
+ hvm->nh_hostflags.fields.rflagsif = (hsave->rflags & X86_EFLAGS_IF) ? 1 : 0;
+
+ /* Nested paging mode */
+ if (nestedhvm_paging_mode_hap(v))
+ hsave->cr3 = vmcb->cr3;
+ hsave->h_cr3 = vmcb->h_cr3;
+ if (paging_mode_hap(v->domain))
+ hsave->cr3 = vmcb->cr3;
+ else
+ hsave->cr3 = v->arch.hvm_vcpu.guest_cr[3];
+
+ hsave->efer = v->arch.hvm_vcpu.guest_efer;
+ hsave->cr0 = v->arch.hvm_vcpu.guest_cr[0];
+ hsave->cr2 = v->arch.hvm_vcpu.guest_cr[2];
+ hsave->cr4 = v->arch.hvm_vcpu.guest_cr[4];
+
+ return 0;
+}
+
+static int nsvm_vcpu_hostrestore(struct vcpu *v, struct cpu_user_regs *regs)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct vmcb_struct *hsave, *vmcb;
+ int rc;
+
+ hsave = hvm->nh_hostsave;
+ vmcb = v->arch.hvm_svm.vmcb;
+
+ /* Must keep register values handled by VMSAVE/VMLOAD */
+ nsvm_vmcb_loadsave(vmcb, hsave);
+ memcpy(vmcb, hsave, sizeof(struct vmcb_struct));
+
+ /* EFER */
+ v->arch.hvm_vcpu.guest_efer = vmcb->efer;
+ rc = hvm_set_efer(vmcb->efer);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
+
+ /* CR4 */
+ v->arch.hvm_vcpu.guest_cr[4] = vmcb->cr4;
+ rc = hvm_set_cr4(vmcb->cr4);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
+
+ /* CR0 */
+ v->arch.hvm_vcpu.guest_cr[0] = vmcb->cr0 | X86_CR0_PE;
+ vmcb->rflags &= ~X86_EFLAGS_VM;
+ rc = hvm_set_cr0(vmcb->cr0 | X86_CR0_PE);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
+
+ /* CR2 */
+ v->arch.hvm_vcpu.guest_cr[2] = vmcb->cr2;
+ hvm_update_guest_cr(v, 2);
+
+ /* CR3 */
+ /* Nested paging mode */
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* host nested paging + guest nested paging. */
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ } else if (paging_mode_hap(v->domain)) {
+ /* host nested paging + guest shadow paging. */
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ } else {
+ /* host shadow paging + guest shadow paging. */
+
+ /* Reset MMU context -- XXX (hostrestore) not yet working*/
+ if (!pagetable_is_null(v->arch.guest_table))
+ put_page(pagetable_get_page(v->arch.guest_table));
+ v->arch.guest_table = pagetable_null();
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ }
+ rc = hvm_set_cr3(vmcb->cr3);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+
+ regs->eax = vmcb->rax;
+ regs->esp = vmcb->rsp;
+ regs->eip = vmcb->rip;
+ regs->eflags = vmcb->rflags;
+ vmcb->dr7 = 0; /* disable all breakpoints */
+ vmcb->cpl = 0;
+
+ /* Clear exitintinfo to prevent a fault loop of re-injecting
+ * exceptions forever.
+ */
+ vmcb->exitintinfo.bytes = 0;
+
+ hvm_asid_flush_vcpu(v);
+
+ return 0;
+}
+
+static int
+nsvm_vcpu_vmrun(struct vcpu *v)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct vmcb_struct *ns_vmcb;
+
+ ns_vmcb = hvm->nh_vm;
+
+ /* Save values for later use. Needed for Nested-on-Nested and
+ * Shadow-on-Shadow paging.
+ */
+ hvm->nh_vm_guestcr3 = ns_vmcb->cr3;
+ hvm->nh_vm_hostcr3 = ns_vmcb->h_cr3;
+
+ hvm->nh_flushp2m = (ns_vmcb->tlb_control
+ || (hvm->nh_guest_asid != ns_vmcb->guest_asid));
+ hvm->nh_guest_asid = ns_vmcb->guest_asid;
+
+ /* nested paging for the guest */
+ hvm->nh_hap_enabled = (ns_vmcb->np_enable) ? 1 : 0;
+
+ /* Remember the V_INTR_MASK in hostflags */
+ hvm->nh_hostflags.fields.vintrmask =
+ (ns_vmcb->vintr.fields.intr_masking) ? 1 : 0;
+
+ return 0;
+}
+
+static uint64_t
+nsvm_vmcb_exitcode_generic2native(enum nestedhvm_intercepts exitcode)
+{
+ switch (exitcode) {
+ case NESTEDHVM_INTERCEPT_INVALID:
+ return VMEXIT_INVALID;
+ case NESTEDHVM_INTERCEPT_SHUTDOWN:
+ return VMEXIT_SHUTDOWN;
+ case NESTEDHVM_INTERCEPT_VMMCALL:
+ return VMEXIT_VMMCALL;
+ case NESTEDHVM_INTERCEPT_INTR:
+ return VMEXIT_INTR;
+ case NESTEDHVM_INTERCEPT_NMI:
+ return VMEXIT_NMI;
+ case NESTEDHVM_INTERCEPT_MCE:
+ return VMEXIT_EXCEPTION_MC;
+ case NESTEDHVM_INTERCEPT_MSR_READ:
+ case NESTEDHVM_INTERCEPT_MSR_WRITE:
+ return VMEXIT_MSR;
+ case NESTEDHVM_INTERCEPT_IOIO:
+ return VMEXIT_IOIO;
+ case NESTEDHVM_INTERCEPT_NPF:
+ return VMEXIT_NPF;
+ case NESTEDHVM_INTERCEPT_PF:
+ return VMEXIT_EXCEPTION_PF;
+ case NESTEDHVM_INTERCEPT_DE:
+ return VMEXIT_EXCEPTION_DE;
+ case NESTEDHVM_INTERCEPT_OF:
+ return VMEXIT_EXCEPTION_OF;
+ case NESTEDHVM_INTERCEPT_BR:
+ return VMEXIT_EXCEPTION_BR;
+ case NESTEDHVM_INTERCEPT_UD:
+ return VMEXIT_EXCEPTION_UD;
+ case NESTEDHVM_INTERCEPT_NM:
+ return VMEXIT_EXCEPTION_NM;
+ case NESTEDHVM_INTERCEPT_DF:
+ return VMEXIT_EXCEPTION_DF;
+ case NESTEDHVM_INTERCEPT_09:
+ return VMEXIT_EXCEPTION_09;
+ case NESTEDHVM_INTERCEPT_XF:
+ return VMEXIT_EXCEPTION_XF;
+ case NESTEDHVM_INTERCEPT_DB:
+ return VMEXIT_EXCEPTION_DB;
+ case NESTEDHVM_INTERCEPT_BP:
+ return VMEXIT_EXCEPTION_BP;
+ case NESTEDHVM_INTERCEPT_TS:
+ return VMEXIT_EXCEPTION_TS;
+ case NESTEDHVM_INTERCEPT_NP:
+ return VMEXIT_EXCEPTION_NP;
+ case NESTEDHVM_INTERCEPT_SS:
+ return VMEXIT_EXCEPTION_SS;
+ case NESTEDHVM_INTERCEPT_GP:
+ return VMEXIT_EXCEPTION_GP;
+ case NESTEDHVM_INTERCEPT_15:
+ return VMEXIT_EXCEPTION_15;
+ case NESTEDHVM_INTERCEPT_MF:
+ return VMEXIT_EXCEPTION_MF;
+ case NESTEDHVM_INTERCEPT_AC:
+ return VMEXIT_EXCEPTION_AC;
+
+ case NESTEDHVM_INTERCEPT_LAST:
+ gdprintk(XENLOG_ERR, "generic to native exitcode mapping failed %u\n",
+ exitcode);
+ BUG();
+ return NESTEDHVM_INTERCEPT_LAST;
+ }
+
+ return NESTEDHVM_INTERCEPT_LAST;
+}
+
+static int
+nsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs, uint64_t exitcode)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct vmcb_struct *ns_vmcb;
+
+ ns_vmcb = hvm->nh_vm;
+
+ ns_vmcb->exitcode = exitcode;
+ ns_vmcb->eventinj.bytes = 0;
+
+ if (hvm->nh_hostflags.fields.forcevmexit) {
+ enum nestedhvm_intercepts nh_exitcode;
+
+ nh_exitcode = hvm->nh_forcevmexit.exitcode;
+
+ switch (nh_exitcode) {
+ case NESTEDHVM_INTERCEPT_INVALID:
+ case NESTEDHVM_INTERCEPT_SHUTDOWN:
+ case NESTEDHVM_INTERCEPT_VMMCALL:
+ break;
+ case NESTEDHVM_INTERCEPT_INTR:
+ case NESTEDHVM_INTERCEPT_NMI:
+ break;
+ case NESTEDHVM_INTERCEPT_PF:
+ ns_vmcb->cr2 = ns_vmcb->exitinfo2;
+ /* fall through */
+ case NESTEDHVM_INTERCEPT_NPF:
+ ns_vmcb->exitinfo1 = hvm->nh_forcevmexit.exitinfo1; /* error code */
+ ns_vmcb->exitinfo2 = hvm->nh_forcevmexit.exitinfo2; /* fault address */
+ break;
+ case NESTEDHVM_INTERCEPT_MCE:
+ break;
+ case NESTEDHVM_INTERCEPT_MSR_READ:
+ ns_vmcb->exitinfo1 = 0;
+ regs->ecx = hvm->nh_forcevmexit.exitinfo1;
+ break;
+ case NESTEDHVM_INTERCEPT_MSR_WRITE:
+ ns_vmcb->exitinfo1 = 1;
+ regs->ecx = hvm->nh_forcevmexit.exitinfo1;
+ regs->eax = (uint32_t)hvm->nh_forcevmexit.exitinfo2;
+ regs->edx = (uint32_t)(hvm->nh_forcevmexit.exitinfo2 >> 32);
+ break;
+ case NESTEDHVM_INTERCEPT_IOIO:
+ /* Not implemented, the path shouldn't run anyway. */
+ BUG();
+ break;
+
+ case NESTEDHVM_INTERCEPT_DE:
+ case NESTEDHVM_INTERCEPT_OF:
+ case NESTEDHVM_INTERCEPT_BR:
+ case NESTEDHVM_INTERCEPT_UD:
+ case NESTEDHVM_INTERCEPT_NM:
+ case NESTEDHVM_INTERCEPT_DF:
+ case NESTEDHVM_INTERCEPT_09:
+ case NESTEDHVM_INTERCEPT_XF:
+ break;
+ case NESTEDHVM_INTERCEPT_DB:
+ case NESTEDHVM_INTERCEPT_BP:
+ case NESTEDHVM_INTERCEPT_TS:
+ break;
+ case NESTEDHVM_INTERCEPT_NP:
+ case NESTEDHVM_INTERCEPT_SS:
+ case NESTEDHVM_INTERCEPT_GP:
+ case NESTEDHVM_INTERCEPT_15:
+ case NESTEDHVM_INTERCEPT_MF:
+ case NESTEDHVM_INTERCEPT_AC:
+ ns_vmcb->exitinfo1 = hvm->nh_forcevmexit.exitinfo1;
+ break;
+
+ case NESTEDHVM_INTERCEPT_LAST:
+ BUG();
+ break;
+ }
+ exitcode = nsvm_vmcb_exitcode_generic2native(nh_exitcode);
+ ns_vmcb->exitcode = exitcode;
+ }
+
+ return 0;
+}
+
+static uint64_t
+nsvm_vmcb_exitcode_native2generic(struct vcpu *v, struct cpu_user_regs *regs,
+ uint64_t exitcode, uint64_t *info1, uint64_t *info2)
+{
+ struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nh_vm;
+
+ *info1 = *info2 = 0;
+ switch (exitcode) {
+ case VMEXIT_INVALID:
+ return NESTEDHVM_INTERCEPT_INVALID;
+ case VMEXIT_SHUTDOWN:
+ return NESTEDHVM_INTERCEPT_SHUTDOWN;
+ case VMEXIT_VMMCALL:
+ return NESTEDHVM_INTERCEPT_VMMCALL;
+ case VMEXIT_INTR:
+ return NESTEDHVM_INTERCEPT_INTR;
+ case VMEXIT_NMI:
+ return NESTEDHVM_INTERCEPT_NMI;
+ case VMEXIT_NPF:
+ *info1 = ns_vmcb->exitinfo1; /* #PF error code */
+ *info2 = ns_vmcb->exitinfo2; /* #PF guest physical address */
+ return NESTEDHVM_INTERCEPT_NPF;
+ case VMEXIT_EXCEPTION_PF:
+ *info1 = ns_vmcb->exitinfo1; /* #PF error code */
+ *info2 = ns_vmcb->exitinfo2; /* #PF virtual address */
+ return NESTEDHVM_INTERCEPT_PF;
+ case VMEXIT_EXCEPTION_MC:
+ return NESTEDHVM_INTERCEPT_MCE;
+ case VMEXIT_MSR:
+ *info1 = regs->ecx;
+ *info2 = ((uint64_t)regs->edx << 32) | regs->eax;
+ return (ns_vmcb->exitinfo1 == 0) ?
+ NESTEDHVM_INTERCEPT_MSR_READ : NESTEDHVM_INTERCEPT_MSR_WRITE;
+ case VMEXIT_IOIO:
+ return NESTEDHVM_INTERCEPT_IOIO;
+
+ case VMEXIT_EXCEPTION_DE:
+ return NESTEDHVM_INTERCEPT_DE;
+ case VMEXIT_EXCEPTION_OF:
+ return NESTEDHVM_INTERCEPT_OF;
+ case VMEXIT_EXCEPTION_BR:
+ return NESTEDHVM_INTERCEPT_BR;
+ case VMEXIT_EXCEPTION_UD:
+ return NESTEDHVM_INTERCEPT_UD;
+ case VMEXIT_EXCEPTION_NM:
+ return NESTEDHVM_INTERCEPT_NM;
+ case VMEXIT_EXCEPTION_DF:
+ return NESTEDHVM_INTERCEPT_DF;
+ case VMEXIT_EXCEPTION_09:
+ return NESTEDHVM_INTERCEPT_09;
+ case VMEXIT_EXCEPTION_XF:
+ return NESTEDHVM_INTERCEPT_XF;
+
+ case VMEXIT_EXCEPTION_DB:
+ *info1 = ns_vmcb->cs.attr.bytes;
+ *info2 = ns_vmcb->rip;
+ return NESTEDHVM_INTERCEPT_DB;
+ case VMEXIT_EXCEPTION_BP:
+ *info1 = ns_vmcb->cs.attr.bytes;
+ *info2 = ns_vmcb->rip;
+ return NESTEDHVM_INTERCEPT_BP;
+
+ case VMEXIT_EXCEPTION_TS:
+ return NESTEDHVM_INTERCEPT_TS;
+
+ case VMEXIT_EXCEPTION_NP:
+ *info1 = ns_vmcb->exitinfo1;
+ return NESTEDHVM_INTERCEPT_NP;
+ case VMEXIT_EXCEPTION_SS:
+ *info1 = ns_vmcb->exitinfo1;
+ return NESTEDHVM_INTERCEPT_SS;
+ case VMEXIT_EXCEPTION_GP:
+ *info1 = ns_vmcb->exitinfo1;
+ return NESTEDHVM_INTERCEPT_GP;
+ case VMEXIT_EXCEPTION_15:
+ *info1 = ns_vmcb->exitinfo1;
+ return NESTEDHVM_INTERCEPT_15;
+ case VMEXIT_EXCEPTION_MF:
+ *info1 = ns_vmcb->exitinfo1;
+ return NESTEDHVM_INTERCEPT_MF;
+ case VMEXIT_EXCEPTION_AC:
+ *info1 = ns_vmcb->exitinfo1;
+ return NESTEDHVM_INTERCEPT_AC;
+ }
+
+ return NESTEDHVM_INTERCEPT_LAST;
+}
+
+static int
+nsvm_vmcb_intercepted_by_guest(struct vcpu *v, uint64_t exitcode)
+{
+ uint64_t exit_bits;
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+
+ if (hvm->nh_hostflags.fields.forcevmexit)
+ exitcode = nsvm_vmcb_exitcode_generic2native(hvm->nh_forcevmexit.exitcode);
+
+ switch (exitcode) {
+ case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
+ case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
+ exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ);
+ if (svm->ns_cr_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
+ case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
+ exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ);
+ if (svm->ns_dr_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF:
+ exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE);
+ if (svm->ns_exception_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_INTR ... VMEXIT_SHUTDOWN:
+ exit_bits = 1ULL << (exitcode - VMEXIT_INTR);
+ if (svm->ns_general1_intercepts & exit_bits)
+ break;
+ return 0;
+
+ /* case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL: */
+ default:
+ exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN);
+ if (svm->ns_general2_intercepts & exit_bits)
+ break;
+ return 0;
+ }
+
+ return 1;
+}
+
+static int nsvm_vmrun_permissionmap(struct vcpu *v)
+{
+ struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct vmcb_struct *ns_vmcb = hvm->nh_vm;
+ struct vmcb_struct *host_vmcb = arch_svm->vmcb;
+ unsigned long *ns_msrpm_ptr;
+ unsigned int i;
+ enum hvm_copy_result ret;
+
+ ns_msrpm_ptr = (unsigned long *)hvm->nh_cached_msrpm;
+
+ ret = hvm_copy_from_guest_phys(hvm->nh_cached_msrpm,
+ ns_vmcb->msrpm_base_pa,
+ hvm->nh_cached_msrpm_size);
+ if (ret != HVMCOPY_okay) {
+ gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n", ret);
+ return 1;
+ }
+
+ /* Skip io bitmap merge since hvm_io_bitmap has all bits set but
+ * 0x80 and 0xed.
+ */
+
+ /* v->arch.hvm_svm.msrpm has type unsigned long, thus
+ * BYTES_PER_LONG.
+ */
+ for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++)
+ hvm->nh_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i];
+
+ host_vmcb->iopm_base_pa =
+ (uint64_t)virt_to_maddr(hvm_io_bitmap);
+ host_vmcb->msrpm_base_pa =
+ (uint64_t)virt_to_maddr(hvm->nh_merged_msrpm);
+
+ return 0;
+}
+
+static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs *regs)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+ struct vmcb_struct *ns_vmcb = hvm->nh_vm;
+ struct vmcb_struct *host_vmcb = v->arch.hvm_svm.vmcb;
+ int rc;
+
+ /* Enable nested guest intercepts */
+ svm->ns_cr_intercepts = ns_vmcb->cr_intercepts;
+ svm->ns_dr_intercepts = ns_vmcb->dr_intercepts;
+ svm->ns_exception_intercepts = ns_vmcb->exception_intercepts;
+ svm->ns_general1_intercepts = ns_vmcb->general1_intercepts;
+ svm->ns_general2_intercepts = ns_vmcb->general2_intercepts;
+
+ host_vmcb->cr_intercepts |= ns_vmcb->cr_intercepts;
+ host_vmcb->dr_intercepts |= ns_vmcb->dr_intercepts;
+ host_vmcb->exception_intercepts |= ns_vmcb->exception_intercepts;
+ host_vmcb->general1_intercepts |= ns_vmcb->general1_intercepts;
+ host_vmcb->general2_intercepts |= ns_vmcb->general2_intercepts;
+
+ /* Nested Pause Filter */
+ if (ns_vmcb->general1_intercepts & GENERAL1_INTERCEPT_PAUSE)
+ host_vmcb->pause_filter_count =
+ min(ns_vmcb->pause_filter_count, host_vmcb->pause_filter_count);
+ else
+ host_vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT;
+
+ /* Nested IO permission bitmaps */
+ rc = nsvm_vmrun_permissionmap(v);
+ if (rc)
+ return rc;
+
+ /* TSC offset */
+ hvm_set_guest_tsc(v, host_vmcb->tsc_offset + ns_vmcb->tsc_offset);
+
+ /* ASID */
+ hvm_asid_flush_vcpu(v);
+ /* host_vmcb->guest_asid = ns_vmcb->guest_asid; */
+
+ /* TLB control */
+ host_vmcb->tlb_control |= ns_vmcb->tlb_control;
+
+ /* Virtual Interrupts */
+ host_vmcb->vintr = ns_vmcb->vintr;
+ host_vmcb->vintr.fields.intr_masking = 1;
+
+ /* Shadow Mode */
+ host_vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow;
+
+ /* Exit codes */
+ host_vmcb->exitcode = ns_vmcb->exitcode;
+ host_vmcb->exitinfo1 = ns_vmcb->exitinfo1;
+ host_vmcb->exitinfo2 = ns_vmcb->exitinfo2;
+ host_vmcb->exitintinfo = ns_vmcb->exitintinfo;
+
+ /* Pending Interrupts */
+ host_vmcb->eventinj = ns_vmcb->eventinj;
+
+ /* LBR virtualization */
+ svm->ns_lbr_control = ns_vmcb->lbr_control;
+ host_vmcb->lbr_control.bytes |= ns_vmcb->lbr_control.bytes;
+
+ /* NextRIP */
+ host_vmcb->nextrip = ns_vmcb->nextrip;
+
+ /*
+ * VMCB Save State Area
+ */
+
+ /* Segments */
+ host_vmcb->es = ns_vmcb->es;
+ host_vmcb->cs = ns_vmcb->cs;
+ host_vmcb->ss = ns_vmcb->ss;
+ host_vmcb->ds = ns_vmcb->ds;
+ host_vmcb->gdtr = ns_vmcb->gdtr;
+ host_vmcb->idtr = ns_vmcb->idtr;
+
+ /* CPL */
+ host_vmcb->cpl = ns_vmcb->cpl;
+
+ /* EFER */
+ v->arch.hvm_vcpu.guest_efer = ns_vmcb->efer;
+ rc = hvm_set_efer(ns_vmcb->efer);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
+
+ /* CR4 */
+ v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->cr4;
+ rc = hvm_set_cr4(ns_vmcb->cr4);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
+
+ /* CR0 */
+ v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->cr0;
+ rc = hvm_set_cr0(ns_vmcb->cr0);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
+
+ /* CR2 */
+ v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->cr2;
+ hvm_update_guest_cr(v, 2);
+
+ /* Nested paging mode */
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* host nested paging + guest nested paging. */
+
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ rc = hvm_set_cr3(ns_vmcb->cr3);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+ } else if (paging_mode_hap(v->domain)) {
+ /* host nested paging + guest shadow paging. */
+ host_vmcb->np_enable = 1;
+ /* Keep h_cr3 as it is. */
+ /* Guest shadow paging: Must intercept pagefaults. */
+ host_vmcb->exception_intercepts |= (1U << TRAP_page_fault);
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ rc = hvm_set_cr3(ns_vmcb->cr3);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+ } else {
+ /* host shadow paging + guest shadow paging. */
+ host_vmcb->np_enable = 0;
+ host_vmcb->h_cr3 = 0x0;
+
+ /* TODO: Once shadow-shadow paging is in place come back to here
+ * and set host_vmcb->cr3 to the shadowed shadow table.
+ */
+ }
+
+ /* DRn */
+ host_vmcb->dr7 = ns_vmcb->dr7;
+ host_vmcb->dr6 = ns_vmcb->dr6;
+
+ /* RFLAGS */
+ host_vmcb->rflags = ns_vmcb->rflags;
+
+ /* RIP */
+ host_vmcb->rip = ns_vmcb->rip;
+
+ /* RSP */
+ host_vmcb->rsp = ns_vmcb->rsp;
+
+ /* RAX */
+ host_vmcb->rax = ns_vmcb->rax;
+
+ /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase,
+ * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
+ * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
+ */
+
+ /* Page tables */
+ host_vmcb->pdpe0 = ns_vmcb->pdpe0;
+ host_vmcb->pdpe1 = ns_vmcb->pdpe1;
+ host_vmcb->pdpe2 = ns_vmcb->pdpe2;
+ host_vmcb->pdpe3 = ns_vmcb->pdpe3;
+
+ /* PAT */
+ host_vmcb->g_pat = ns_vmcb->g_pat;
+
+ /* Debug Control MSR */
+ host_vmcb->debugctlmsr = ns_vmcb->debugctlmsr;
+
+ /* LBR MSRs */
+ host_vmcb->lastbranchfromip = ns_vmcb->lastbranchfromip;
+ host_vmcb->lastbranchtoip = ns_vmcb->lastbranchtoip;
+ host_vmcb->lastintfromip = ns_vmcb->lastintfromip;
+ host_vmcb->lastinttoip = ns_vmcb->lastinttoip;
+
+ rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
+ if (rc) {
+ gdprintk(XENLOG_ERR, "nested vmcb invalid\n");
+ return rc;
+ }
+
+ rc = svm_vmcb_isvalid(__func__, host_vmcb, 1);
+ if (rc) {
+ gdprintk(XENLOG_ERR, "host vmcb invalid\n");
+ return rc;
+ }
+
+ /* Switch guest registers to nested guest */
+ regs->eax = ns_vmcb->rax;
+ regs->eip = ns_vmcb->rip;
+ regs->esp = ns_vmcb->rsp;
+ regs->eflags = ns_vmcb->rflags;
+
+ return 0;
+}
+
+static int nsvm_vmcb_prepare4vmexit(struct vcpu *v)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+ struct vmcb_struct *vmcb, *ns_vmcb;
+
+ vmcb = v->arch.hvm_svm.vmcb;
+ ns_vmcb = hvm->nh_vm;
+
+ svm_vmsave(vmcb);
+
+ /* Intercepts */
+ /* Copy cached intercepts since they are the guest's original
+ * intercepts.
+ */
+ ns_vmcb->cr_intercepts = svm->ns_cr_intercepts;
+ ns_vmcb->dr_intercepts = svm->ns_dr_intercepts;
+ ns_vmcb->exception_intercepts = svm->ns_exception_intercepts;
+ ns_vmcb->general1_intercepts = svm->ns_general1_intercepts;
+ ns_vmcb->general2_intercepts = svm->ns_general2_intercepts;
+
+ /* Nested Pause Filter */
+ ns_vmcb->pause_filter_count = vmcb->pause_filter_count;
+
+ /* Nested IO permission bitmap */
+ /* Just keep the iopm_base_pa and msrpm_base_pa values.
+ * The guest must not see the virtualized values.
+ */
+
+ /* TSC offset */
+ ns_vmcb->tsc_offset = vmcb->tsc_offset;
+
+ /* ASID */
+ /* ns_vmcb->guest_asid = vmcb->guest_asid; */
+
+ /* TLB control */
+ ns_vmcb->tlb_control = 0;
+
+ /* Virtual Interrupts */
+ ns_vmcb->vintr = vmcb->vintr;
+ if (!(hvm->nh_hostflags.fields.vintrmask))
+ ns_vmcb->vintr.fields.intr_masking = 0;
+
+ /* Shadow mode */
+ ns_vmcb->interrupt_shadow = vmcb->interrupt_shadow;
+
+ /* Exit codes */
+ ns_vmcb->exitcode = vmcb->exitcode;
+ ns_vmcb->exitinfo1 = vmcb->exitinfo1;
+ ns_vmcb->exitinfo2 = vmcb->exitinfo2;
+ ns_vmcb->exitintinfo = vmcb->exitintinfo;
+
+ /* Interrupts */
+ /* If we emulate a VMRUN/#VMEXIT in the same host #VMEXIT cycle we have
+ * to make sure that we do not lose injected events. So check eventinj
+ * here and copy it to exitintinfo if it is valid.
+ * exitintinfo and eventinj can't be both valid because the case below
+ * only happens on a VMRUN instruction intercept which has no valid
+ * exitintinfo set.
+ */
+ if ( unlikely(vmcb->eventinj.fields.v) &&
+ hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
+ vmcb->eventinj.fields.vector) )
+ {
+ ns_vmcb->exitintinfo = vmcb->eventinj;
+ }
+
+ ns_vmcb->eventinj.bytes = 0;
+
+ /* Nested paging mode */
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* host nested paging + guest nested paging. */
+ ns_vmcb->np_enable = vmcb->np_enable;
+ ns_vmcb->cr3 = vmcb->cr3;
+ /* The vmcb->h_cr3 is the shadowed h_cr3. The original
+ * unshadowed guest h_cr3 is kept in ns_vmcb->h_cr3,
+ * hence we keep the ns_vmcb->h_cr3 value. */
+ } else if (paging_mode_hap(v->domain)) {
+ /* host nested paging + guest shadow paging. */
+ ns_vmcb->np_enable = 0;
+ /* Throw h_cr3 away. Guest is not allowed to set it or
+ * it can break out, otherwise (security hole!) */
+ ns_vmcb->h_cr3 = 0x0;
+ /* Stop intercepting #PF (already done above
+ * by restoring cached intercepts). */
+ ns_vmcb->cr3 = vmcb->cr3;
+ } else {
+ /* host shadow paging + guest shadow paging. */
+ ns_vmcb->np_enable = 0;
+ ns_vmcb->h_cr3 = 0x0;
+ /* The vmcb->cr3 is the shadowed cr3. The original
+ * unshadowed guest cr3 is kept in ns_vmcb->cr3,
+ * hence we keep the ns_vmcb->cr3 value. */
+ }
+
+ /* LBR virtualization */
+ ns_vmcb->lbr_control = svm->ns_lbr_control;
+
+ /* NextRIP */
+ ns_vmcb->nextrip = vmcb->nextrip;
+
+ /*
+ * VMCB Save State Area
+ */
+
+ /* Segments */
+ ns_vmcb->es = vmcb->es;
+ ns_vmcb->cs = vmcb->cs;
+ ns_vmcb->ss = vmcb->ss;
+ ns_vmcb->ds = vmcb->ds;
+ ns_vmcb->gdtr = vmcb->gdtr;
+ ns_vmcb->idtr = vmcb->idtr;
+
+ /* CPL */
+ ns_vmcb->cpl = vmcb->cpl;
+
+ /* EFER */
+ ns_vmcb->efer = vmcb->efer;
+
+ /* CRn */
+ ns_vmcb->cr4 = vmcb->cr4;
+ ns_vmcb->cr0 = vmcb->cr0;
+
+ /* DRn */
+ ns_vmcb->dr7 = vmcb->dr7;
+ ns_vmcb->dr6 = vmcb->dr6;
+
+ /* RFLAGS */
+ ns_vmcb->rflags = vmcb->rflags;
+
+ /* RIP */
+ ns_vmcb->rip = vmcb->rip;
+
+ /* RSP */
+ ns_vmcb->rsp = vmcb->rsp;
+
+ /* RAX */
+ ns_vmcb->rax = vmcb->rax;
+
+ /* Keep the nested guest values of the fs, gs, ldtr, tr, kerngsbase,
+ * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
+ * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
+ */
+
+ /* CR2 */
+ ns_vmcb->cr2 = vmcb->cr2;
+
+ /* Page tables */
+ ns_vmcb->pdpe0 = vmcb->pdpe0;
+ ns_vmcb->pdpe1 = vmcb->pdpe1;
+ ns_vmcb->pdpe2 = vmcb->pdpe2;
+ ns_vmcb->pdpe3 = vmcb->pdpe3;
+
+ /* PAT */
+ ns_vmcb->g_pat = vmcb->g_pat;
+
+ /* Debug Control MSR */
+ ns_vmcb->debugctlmsr = vmcb->debugctlmsr;
+
+ /* LBR MSRs */
+ ns_vmcb->lastbranchfromip = vmcb->lastbranchfromip;
+ ns_vmcb->lastbranchtoip = vmcb->lastbranchtoip;
+ ns_vmcb->lastintfromip = vmcb->lastintfromip;
+ ns_vmcb->lastinttoip = vmcb->lastinttoip;
+
+ return 0;
+}
+
+static int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content)
+{
+ struct nestedsvm *svm = vcpu_nestedhvm(v).nh_arch;
+ int ret = 1;
+
+ *msr_content = 0;
+
+ switch (msr) {
+ case MSR_K8_VM_CR:
+ break;
+ case MSR_K8_VM_HSAVE_PA:
+ *msr_content = svm->ns_msr_hsavepa;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content)
+{
+ int ret = 1;
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+
+ switch (msr) {
+ case MSR_K8_VM_CR:
+ /* ignore write. handle all bits as read-only. */
+ break;
+ case MSR_K8_VM_HSAVE_PA:
+ if (!nestedhvm_vmaddr_isvalid(hvm, msr_content)) {
+ gdprintk(XENLOG_ERR,
+ "MSR_K8_VM_HSAVE_PA value invalid 0x%"PRIx64"\n", msr_content);
+ ret = -1; /* inject #GP */
+ break;
+ }
+ svm->ns_msr_hsavepa = msr_content;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
static int svm_cpu_up(void)
{
uint64_t msr_content;
@@ -955,8 +1992,8 @@ static void svm_do_nested_pgfault(paddr_
struct {
uint64_t gpa;
uint64_t mfn;
- u32 qualification;
- u32 p2mt;
+ uint32_t qualification;
+ uint32_t p2mt;
} _d;
_d.gpa = gpa;
@@ -997,11 +2034,24 @@ static void svm_cpuid_intercept(
hvm_cpuid(input, eax, ebx, ecx, edx);
- if ( input == 0x80000001 )
- {
+ switch (input) {
+ case 0x80000001:
/* Fix up VLAPIC details. */
if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
__clear_bit(X86_FEATURE_APIC & 31, edx);
+ break;
+ case 0x8000000a:
+ /* We require the host to use nested paging as
+ * hap-on-shadow is not supported.
+ * The tools have no way to check this case and
+ * thus always enable it. So we mask hap unless
+ * we use hap-on-hap.
+ */
+ if ( cpu_has_svm_npt && !paging_mode_hap(v->domain) )
+ *edx &= ~(1U << SVM_FEATURE_NPT);
+ break;
+ default:
+ break;
}
HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
@@ -1037,6 +2087,7 @@ static void svm_dr_access(struct vcpu *v
static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
{
+ int ret;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -1074,9 +2125,6 @@ static int svm_msr_read_intercept(unsign
*msr_content = 0;
break;
- case MSR_K8_VM_HSAVE_PA:
- goto gpf;
-
case MSR_IA32_DEBUGCTLMSR:
*msr_content = vmcb->debugctlmsr;
break;
@@ -1109,6 +2157,11 @@ static int svm_msr_read_intercept(unsign
break;
default:
+ ret = nsvm_rdmsr(v, msr, msr_content);
+ if ( ret < 0 )
+ goto gpf;
+ else if ( ret )
+ break;
if ( rdmsr_viridian_regs(msr, msr_content) ||
rdmsr_hypervisor_regs(msr, msr_content) )
@@ -1131,14 +2184,12 @@ static int svm_msr_read_intercept(unsign
static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
{
+ int ret;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
switch ( msr )
{
- case MSR_K8_VM_HSAVE_PA:
- goto gpf;
-
case MSR_IA32_SYSENTER_CS:
v->arch.hvm_svm.guest_sysenter_cs = msr_content;
break;
@@ -1189,6 +2240,12 @@ static int svm_msr_write_intercept(unsig
break;
default:
+ ret = nsvm_wrmsr(v, msr, msr_content);
+ if ( ret < 0 )
+ goto gpf;
+ else if ( ret )
+ break;
+
if ( wrmsr_viridian_regs(msr, msr_content) )
break;
@@ -1277,6 +2334,179 @@ static void svm_vmexit_do_pause(struct c
do_sched_op_compat(SCHEDOP_yield, 0);
}
+static void svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
+ struct vcpu *v, uint64_t vmcbaddr)
+{
+ int ret;
+ unsigned int inst_len;
+
+ if ( !nestedhvm_enabled(v->domain) ) {
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+ }
+
+ if ( (inst_len = __get_instruction_length(current, INSTR_VMRUN)) == 0 )
+ return;
+
+ ret = nestedhvm_vcpu_vmentry(v, regs, vmcbaddr, inst_len);
+ if (ret)
+ /* On failure, nestedhvm_vcpu_vmentry injected an exception,
+ * almost a #GP or #UD.
+ */
+ return;
+}
+
+static void
+svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
+ struct cpu_user_regs *regs,
+ struct vcpu *v, uint64_t vmcbaddr)
+{
+ int ret;
+ unsigned int inst_len;
+ struct nestedsvm *svm = vcpu_nestedhvm(v).nh_arch;
+ struct vmcb_struct *tmp_vmcb = svm->ns_tmpvmcb;
+
+ if ( !nestedhvm_enabled(v->domain) ) {
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+ }
+
+ if ( (inst_len = __get_instruction_length(v, INSTR_VMLOAD)) == 0 )
+ return;
+
+ ret = nestedhvm_vcpu_state_validate(v, vmcbaddr);
+ if (ret) {
+ gdprintk(XENLOG_ERR,
+ "nestedhvm_vcpu_state_validate failed, injecting 0x%x\n", ret);
+ goto inject;
+ }
+
+ ret = hvm_copy_from_guest_phys(tmp_vmcb, vmcbaddr,
+ sizeof(struct vmcb_struct));
+ if (ret) {
+ gdprintk(XENLOG_ERR,
+ "hvm_copy_from_guest_phys failed, injecting 0x%x\n", ret);
+ goto inject;
+ }
+
+ nsvm_vmcb_loadsave(tmp_vmcb, vmcb);
+ svm_vmload(vmcb);
+
+ __update_guest_eip(regs, inst_len);
+ memset(tmp_vmcb, 0x0, sizeof(struct vmcb_struct));
+ return;
+
+ inject:
+ memset(tmp_vmcb, 0x0, sizeof(struct vmcb_struct));
+ hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+}
+
+static void
+svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
+ struct cpu_user_regs *regs,
+ struct vcpu *v, uint64_t vmcbaddr)
+{
+ int ret;
+ unsigned int inst_len;
+ struct nestedsvm *svm = vcpu_nestedhvm(v).nh_arch;
+ struct vmcb_struct *tmp_vmcb = svm->ns_tmpvmcb;
+
+ if ( !nestedhvm_enabled(v->domain) ) {
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+ }
+
+ if ( (inst_len = __get_instruction_length(v, INSTR_VMSAVE)) == 0 )
+ return;
+
+ ret = nestedhvm_vcpu_state_validate(v, vmcbaddr);
+ if (ret) {
+ gdprintk(XENLOG_ERR,
+ "nestedhvm_vcpu_state_validate failed, injecting 0x%x\n", ret);
+ goto inject;
+ }
+
+ ret = hvm_copy_from_guest_phys(tmp_vmcb, vmcbaddr,
+ sizeof(struct vmcb_struct));
+ if (ret) {
+ gdprintk(XENLOG_ERR,
+ "hvm_copy_from_guest_phys failed, injecting 0x%x\n", ret);
+ goto inject;
+ }
+
+ svm_vmsave(vmcb);
+ nsvm_vmcb_loadsave(vmcb, tmp_vmcb);
+
+ ret = hvm_copy_to_guest_phys(vmcbaddr, tmp_vmcb,
+ sizeof(struct vmcb_struct));
+ if (ret) {
+ gdprintk(XENLOG_ERR,
+ "hvm_copy_to_guest_phys failed, injecting 0x%x\n", ret);
+ goto inject;
+ }
+
+ __update_guest_eip(regs, inst_len);
+ memset(tmp_vmcb, 0x0, sizeof(struct vmcb_struct));
+ return;
+
+ inject:
+ memset(tmp_vmcb, 0x0, sizeof(struct vmcb_struct));
+ hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+}
+
+static void svm_vmexit_do_clgi(struct cpu_user_regs *regs, struct vcpu *v)
+{
+ int ret;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ unsigned int inst_len;
+
+ if ( !nestedhvm_enabled(v->domain) ) {
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+ }
+
+ if ( (inst_len = __get_instruction_length(v, INSTR_CLGI)) == 0 )
+ return;
+
+ ret = nestedsvm_vcpu_clgi(v);
+ if (ret)
+ /* On failure, nestedsvm_vcpu_clgi injected an exception,
+ * almost a #GP or #UD.
+ */
+ return;
+
+ /* After a CLGI no interrupts should come */
+ vmcb->vintr.fields.irq = 0;
+ vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
+
+ __update_guest_eip(regs, inst_len);
+}
+
+static void svm_vmexit_do_stgi(struct cpu_user_regs *regs, struct vcpu *v)
+{
+ int ret;
+ unsigned int inst_len;
+
+ if ( !nestedhvm_enabled(v->domain) ) {
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+ }
+
+ if ( (inst_len = __get_instruction_length(v, INSTR_STGI)) == 0 )
+ return;
+
+ ret = nestedsvm_vcpu_stgi(v);
+ if (ret)
+ /* On failure, nestedsvm_vcpu_stgi injected an exception,
+ * almost a #GP or #UD.
+ */
+ return;
+
+ __update_guest_eip(regs, inst_len);
+}
+
static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs)
{
struct hvm_emulate_ctxt ctxt;
@@ -1424,20 +2654,37 @@ static struct hvm_function_table __read_
.msr_read_intercept = svm_msr_read_intercept,
.msr_write_intercept = svm_msr_write_intercept,
.invlpg_intercept = svm_invlpg_intercept,
- .set_rdtsc_exiting = svm_set_rdtsc_exiting
+ .set_rdtsc_exiting = svm_set_rdtsc_exiting,
+
+ .nestedhvm_vcpu_initialise = nsvm_vcpu_initialise,
+ .nestedhvm_vcpu_destroy = nsvm_vcpu_destroy,
+ .nestedhvm_vcpu_reset = nsvm_vcpu_reset,
+ .nestedhvm_vcpu_features = nsvm_vcpu_features,
+ .nestedhvm_vcpu_hostsave = nsvm_vcpu_hostsave,
+ .nestedhvm_vcpu_hostrestore = nsvm_vcpu_hostrestore,
+ .nestedhvm_vcpu_vmentry = nsvm_vcpu_vmrun,
+ .nestedhvm_vcpu_vmexit = nsvm_vcpu_vmexit,
+ .nestedhvm_vm_exitcode_native2generic = nsvm_vmcb_exitcode_native2generic,
+ .nestedhvm_vm_intercepted_by_guest = nsvm_vmcb_intercepted_by_guest,
+ .nestedhvm_vm_prepare4vmentry = nsvm_vmcb_prepare4vmrun,
+ .nestedhvm_vm_prepare4vmexit = nsvm_vmcb_prepare4vmexit,
};
asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
{
- unsigned int exit_reason;
+ uint64_t exit_reason;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
eventinj_t eventinj;
int inst_len, rc;
+ bool_t vcpu_guestmode = 0;
if ( paging_mode_hap(v->domain) )
v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] = vmcb->cr3;
+ if ( nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v) )
+ vcpu_guestmode = 1;
+
/*
* Before doing anything else, we need to sync up the VLAPIC's TPR with
* SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
@@ -1445,12 +2692,39 @@ asmlinkage void svm_vmexit_handler(struc
* NB. We need to preserve the low bits of the TPR to make checked builds
* of Windows work, even though they don't actually do anything.
*/
- vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
- ((vmcb->vintr.fields.tpr & 0x0F) << 4) |
- (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0x0F));
+ if ( !vcpu_guestmode ) {
+ vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
+ ((vmcb->vintr.fields.tpr & 0x0F) << 4) |
+ (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0x0F));
+ }
exit_reason = vmcb->exitcode;
+ if ( vcpu_guestmode ) {
+ enum nestedhvm_vmexits nsret;
+
+ nsret = nestedhvm_vcpu_vmexit(v, regs, exit_reason);
+ vcpu_nestedhvm(v).nh_hostflags.fields.forcevmexit = 0;
+ switch (nsret) {
+ case NESTEDHVM_VMEXIT_DONE:
+ goto out;
+ case NESTEDHVM_VMEXIT_ERROR:
+ gdprintk(XENLOG_ERR,
+ "nestedhvm_vcpu_vmexit() returned NESTEDHVM_VMEXIT_ERROR\n");
+ goto out;
+ case NESTEDHVM_VMEXIT_HOST:
+ case NESTEDHVM_VMEXIT_CONTINUE:
+ break;
+ case NESTEDHVM_VMEXIT_FATALERROR:
+ gdprintk(XENLOG_ERR, "unexpected nestedhvm error\n");
+ goto exit_and_crash;
+ default:
+ gdprintk(XENLOG_INFO, "nestedhvm_vcpu_vmexit returned %i\n",
+ nsret);
+ goto exit_and_crash;
+ }
+ }
+
if ( hvm_long_mode_enabled(v) )
HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
(uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
@@ -1462,7 +2736,7 @@ asmlinkage void svm_vmexit_handler(struc
if ( unlikely(exit_reason == VMEXIT_INVALID) )
{
- svm_dump_vmcb(__func__, vmcb);
+ svm_vmcb_dump(__func__, vmcb);
goto exit_and_crash;
}
@@ -1610,6 +2884,10 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_VMMCALL:
if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
break;
+ if ( vcpu_guestmode ) {
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ break;
+ }
HVMTRACE_1D(VMMCALL, regs->eax);
rc = hvm_do_hypercall(regs);
if ( rc != HVM_HCALL_preempted )
@@ -1642,11 +2920,25 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_MONITOR:
case VMEXIT_MWAIT:
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ break;
+
case VMEXIT_VMRUN:
+ svm_vmexit_do_vmrun(regs, v,
+ regs->eax);
+ break;
case VMEXIT_VMLOAD:
+ svm_vmexit_do_vmload(vmcb, regs, v, regs->eax);
+ break;
case VMEXIT_VMSAVE:
+ svm_vmexit_do_vmsave(vmcb, regs, v, regs->eax);
+ break;
case VMEXIT_STGI:
+ svm_vmexit_do_stgi(regs, v);
+ break;
case VMEXIT_CLGI:
+ svm_vmexit_do_clgi(regs, v);
+ break;
case VMEXIT_SKINIT:
hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
break;
@@ -1676,7 +2968,7 @@ asmlinkage void svm_vmexit_handler(struc
default:
exit_and_crash:
- gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
+ gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%"PRIx64", "
"exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
exit_reason,
(u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
@@ -1684,6 +2976,11 @@ asmlinkage void svm_vmexit_handler(struc
break;
}
+ out:
+ if ( vcpu_guestmode )
+ /* Don't clobber TPR of the nested guest. */
+ return;
+
/* The exit may have updated the TPR: reflect this in the hardware vtpr */
vmcb->vintr.fields.tpr =
(vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
diff -r 0fcb32521d57 -r fa992936dba4 xen/arch/x86/hvm/svm/svmdebug.c
--- /dev/null
+++ b/xen/arch/x86/hvm/svm/svmdebug.c
@@ -0,0 +1,185 @@
+/*
+ * svmdebug.c: debug functions
+ * Copyright (c) 2010, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/msr-index.h>
+#include <asm/hvm/svm/svmdebug.h>
+
+static void svm_dump_sel(const char *name, svm_segment_register_t *s)
+{
+ printk("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n",
+ name, s->sel, s->attr.bytes, s->limit,
+ (unsigned long long)s->base);
+}
+
+void svm_vmcb_dump(const char *from, struct vmcb_struct *vmcb)
+{
+ printk("Dumping guest's current state at %s...\n", from);
+ printk("Size of VMCB = %d, address = %p\n",
+ (int) sizeof(struct vmcb_struct), vmcb);
+
+ printk("cr_intercepts = 0x%08x dr_intercepts = 0x%08x "
+ "exception_intercepts = 0x%08x\n",
+ vmcb->cr_intercepts, vmcb->dr_intercepts,
+ vmcb->exception_intercepts);
+ printk("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n",
+ vmcb->general1_intercepts, vmcb->general2_intercepts);
+ printk("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
+ "0x%016llx\n",
+ (unsigned long long) vmcb->iopm_base_pa,
+ (unsigned long long) vmcb->msrpm_base_pa,
+ (unsigned long long) vmcb->tsc_offset);
+ printk("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
+ "0x%016llx\n", vmcb->tlb_control,
+ (unsigned long long) vmcb->vintr.bytes,
+ (unsigned long long) vmcb->interrupt_shadow);
+ printk("exitcode = 0x%016llx exitintinfo = 0x%016llx\n",
+ (unsigned long long) vmcb->exitcode,
+ (unsigned long long) vmcb->exitintinfo.bytes);
+ printk("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
+ (unsigned long long) vmcb->exitinfo1,
+ (unsigned long long) vmcb->exitinfo2);
+ printk("np_enable = 0x%016llx guest_asid = 0x%03x\n",
+ (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
+ printk("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n",
+ vmcb->cpl, (unsigned long long) vmcb->efer,
+ (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
+ printk("CR0 = 0x%016llx CR2 = 0x%016llx\n",
+ (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
+ printk("CR3 = 0x%016llx CR4 = 0x%016llx\n",
+ (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
+ printk("RSP = 0x%016llx RIP = 0x%016llx\n",
+ (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
+ printk("RAX = 0x%016llx RFLAGS=0x%016llx\n",
+ (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
+ printk("DR6 = 0x%016llx, DR7 = 0x%016llx\n",
+ (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
+ printk("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
+ (unsigned long long) vmcb->cstar,
+ (unsigned long long) vmcb->sfmask);
+ printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n",
+ (unsigned long long) vmcb->kerngsbase,
+ (unsigned long long) vmcb->g_pat);
+ printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
+
+ /* print out all the selectors */
+ svm_dump_sel("CS", &vmcb->cs);
+ svm_dump_sel("DS", &vmcb->ds);
+ svm_dump_sel("SS", &vmcb->ss);
+ svm_dump_sel("ES", &vmcb->es);
+ svm_dump_sel("FS", &vmcb->fs);
+ svm_dump_sel("GS", &vmcb->gs);
+ svm_dump_sel("GDTR", &vmcb->gdtr);
+ svm_dump_sel("LDTR", &vmcb->ldtr);
+ svm_dump_sel("IDTR", &vmcb->idtr);
+ svm_dump_sel("TR", &vmcb->tr);
+}
+
+bool_t
+svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
+ bool_t verbose)
+{
+ bool_t ret = 0; /* ok */
+
+#define PRINTF(...) \
+ if (verbose) { ret = 1; printk("%s: ", from); printk(__VA_ARGS__); \
+ } else return 1;
+
+ if ((vmcb->efer & EFER_SVME) == 0) {
+ PRINTF("EFER: SVME bit not set (0x%"PRIx64")\n", vmcb->efer);
+ }
+
+ if ((vmcb->cr0 & X86_CR0_CD) == 0 && (vmcb->cr0 & X86_CR0_NW) != 0) {
+ PRINTF("CR0: CD bit is zero and NW bit set (0x%"PRIx64")\n",
+ vmcb->cr0);
+ }
+
+ if ((vmcb->cr0 >> 32U) != 0) {
+ PRINTF("CR0: bits [63:32] are not zero (0x%"PRIx64")\n",
+ vmcb->cr0);
+ }
+
+ if ((vmcb->cr3 & 0x7) != 0) {
+ PRINTF("CR3: MBZ bits are set (0x%"PRIx64")\n", vmcb->cr3);
+ }
+ if ((vmcb->efer & EFER_LMA) && (vmcb->cr3 & 0xfe) != 0) {
+ PRINTF("CR3: MBZ bits are set (0x%"PRIx64")\n", vmcb->cr3);
+ }
+
+ if ((vmcb->cr4 >> 11U) != 0) {
+ PRINTF("CR4: bits [63:11] are not zero (0x%"PRIx64")\n",
+ vmcb->cr4);
+ }
+
+ if ((vmcb->dr6 >> 32U) != 0) {
+ PRINTF("DR6: bits [63:32] are not zero (0x%"PRIx64")\n",
+ vmcb->dr6);
+ }
+
+ if ((vmcb->dr7 >> 32U) != 0) {
+ PRINTF("DR7: bits [63:32] are not zero (0x%"PRIx64")\n",
+ vmcb->dr7);
+ }
+
+ if ((vmcb->efer >> 15U) != 0) {
+ PRINTF("EFER: bits [63:15] are not zero (0x%"PRIx64")\n",
+ vmcb->efer);
+ }
+
+ if ((vmcb->efer & EFER_LME) != 0 && ((vmcb->cr0 & X86_CR0_PG) != 0)) {
+ if ((vmcb->cr4 & X86_CR4_PAE) == 0) {
+ PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero.\n");
+ }
+ if ((vmcb->cr0 & X86_CR0_PE) == 0) {
+ PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero.\n");
+ }
+ }
+
+ if ((vmcb->efer & EFER_LME) != 0
+ && (vmcb->cr0 & X86_CR0_PG) != 0
+ && (vmcb->cr4 & X86_CR4_PAE) != 0
+ && (vmcb->cs.attr.fields.l != 0)
+ && (vmcb->cs.attr.fields.db != 0))
+ {
+ PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero.\n");
+ }
+
+ if ((vmcb->general2_intercepts & GENERAL2_INTERCEPT_VMRUN) == 0) {
+ PRINTF("GENERAL2_INTERCEPT: VMRUN intercept bit is clear (0x%"PRIx32")\n",
+ vmcb->general2_intercepts);
+ }
+
+ if (vmcb->eventinj.fields.resvd1 != 0) {
+ PRINTF("eventinj: MBZ bits are set (0x%"PRIx64")\n",
+ vmcb->eventinj.bytes);
+ }
+
+#undef PRINTF
+ return ret;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 0fcb32521d57 -r fa992936dba4 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -33,6 +33,7 @@
#include <asm/hvm/svm/svm.h>
#include <asm/hvm/svm/intr.h>
#include <asm/hvm/svm/asid.h>
+#include <asm/hvm/svm/svmdebug.h>
#include <xen/event.h>
#include <xen/kernel.h>
#include <xen/domain_page.h>
@@ -75,37 +76,6 @@ struct host_save_area *alloc_host_save_a
return hsa;
}
-void svm_intercept_msr(struct vcpu *v, uint32_t msr, int enable)
-{
- unsigned long *msr_bitmap = v->arch.hvm_svm.msrpm;
- unsigned long *msr_bit = NULL;
-
- /*
- * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
- */
- if ( msr <= 0x1fff )
- msr_bit = msr_bitmap + 0x0000 / BYTES_PER_LONG;
- else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
- msr_bit = msr_bitmap + 0x0800 / BYTES_PER_LONG;
- else if ( (msr >= 0xc0010000) && (msr <= 0xc0011fff) )
- msr_bit = msr_bitmap + 0x1000 / BYTES_PER_LONG;
-
- BUG_ON(msr_bit == NULL);
-
- msr &= 0x1fff;
-
- if ( enable )
- {
- __set_bit(msr * 2, msr_bit);
- __set_bit(msr * 2 + 1, msr_bit);
- }
- else
- {
- __clear_bit(msr * 2, msr_bit);
- __clear_bit(msr * 2 + 1, msr_bit);
- }
-}
-
static int construct_vmcb(struct vcpu *v)
{
struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
@@ -250,7 +220,7 @@ static int construct_vmcb(struct vcpu *v
if ( cpu_has_pause_filter )
{
- vmcb->pause_filter_count = 3000;
+ vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT;
vmcb->general1_intercepts |= GENERAL1_INTERCEPT_PAUSE;
}
@@ -298,76 +268,6 @@ void svm_destroy_vmcb(struct vcpu *v)
arch_svm->vmcb = NULL;
}
-static void svm_dump_sel(char *name, svm_segment_register_t *s)
-{
- printk("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n",
- name, s->sel, s->attr.bytes, s->limit,
- (unsigned long long)s->base);
-}
-
-void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb)
-{
- printk("Dumping guest's current state at %s...\n", from);
- printk("Size of VMCB = %d, address = %p\n",
- (int) sizeof(struct vmcb_struct), vmcb);
-
- printk("cr_intercepts = 0x%08x dr_intercepts = 0x%08x "
- "exception_intercepts = 0x%08x\n",
- vmcb->cr_intercepts, vmcb->dr_intercepts,
- vmcb->exception_intercepts);
- printk("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n",
- vmcb->general1_intercepts, vmcb->general2_intercepts);
- printk("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
- "0x%016llx\n",
- (unsigned long long) vmcb->iopm_base_pa,
- (unsigned long long) vmcb->msrpm_base_pa,
- (unsigned long long) vmcb->tsc_offset);
- printk("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
- "0x%016llx\n", vmcb->tlb_control,
- (unsigned long long) vmcb->vintr.bytes,
- (unsigned long long) vmcb->interrupt_shadow);
- printk("exitcode = 0x%016llx exitintinfo = 0x%016llx\n",
- (unsigned long long) vmcb->exitcode,
- (unsigned long long) vmcb->exitintinfo.bytes);
- printk("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
- (unsigned long long) vmcb->exitinfo1,
- (unsigned long long) vmcb->exitinfo2);
- printk("np_enable = 0x%016llx guest_asid = 0x%03x\n",
- (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
- printk("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n",
- vmcb->cpl, (unsigned long long) vmcb->efer,
- (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
- printk("CR0 = 0x%016llx CR2 = 0x%016llx\n",
- (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
- printk("CR3 = 0x%016llx CR4 = 0x%016llx\n",
- (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
- printk("RSP = 0x%016llx RIP = 0x%016llx\n",
- (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
- printk("RAX = 0x%016llx RFLAGS=0x%016llx\n",
- (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
- printk("DR6 = 0x%016llx, DR7 = 0x%016llx\n",
- (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
- printk("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
- (unsigned long long) vmcb->cstar,
- (unsigned long long) vmcb->sfmask);
- printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n",
- (unsigned long long) vmcb->kerngsbase,
- (unsigned long long) vmcb->g_pat);
- printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
-
- /* print out all the selectors */
- svm_dump_sel("CS", &vmcb->cs);
- svm_dump_sel("DS", &vmcb->ds);
- svm_dump_sel("SS", &vmcb->ss);
- svm_dump_sel("ES", &vmcb->es);
- svm_dump_sel("FS", &vmcb->fs);
- svm_dump_sel("GS", &vmcb->gs);
- svm_dump_sel("GDTR", &vmcb->gdtr);
- svm_dump_sel("LDTR", &vmcb->ldtr);
- svm_dump_sel("IDTR", &vmcb->idtr);
- svm_dump_sel("TR", &vmcb->tr);
-}
-
static void vmcb_dump(unsigned char ch)
{
struct domain *d;
@@ -385,7 +285,7 @@ static void vmcb_dump(unsigned char ch)
for_each_vcpu ( d, v )
{
printk("\tVCPU %d\n", v->vcpu_id);
- svm_dump_vmcb("key_handler", v->arch.hvm_svm.vmcb);
+ svm_vmcb_dump("key_handler", v->arch.hvm_svm.vmcb);
}
}
diff -r 0fcb32521d57 -r fa992936dba4 xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h
+++ b/xen/include/asm-x86/hvm/svm/emulate.h
@@ -32,6 +32,11 @@ enum instruction_index {
INSTR_INT3,
INSTR_RDTSC,
INSTR_PAUSE,
+ INSTR_VMRUN,
+ INSTR_VMLOAD,
+ INSTR_VMSAVE,
+ INSTR_STGI,
+ INSTR_CLGI,
INSTR_MAX_COUNT /* Must be last - Number of instructions supported */
};
diff -r 0fcb32521d57 -r fa992936dba4 xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -29,8 +29,6 @@
#include <asm/i387.h>
#include <asm/hvm/vpmu.h>
-void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb);
-
#define SVM_REG_EAX (0)
#define SVM_REG_ECX (1)
#define SVM_REG_EDX (2)
@@ -76,4 +74,6 @@ extern u32 svm_feature_flags;
#define cpu_has_svm_nrips test_bit(SVM_FEATURE_NRIPS, &svm_feature_flags)
#define cpu_has_pause_filter test_bit(SVM_FEATURE_PAUSEF, &svm_feature_flags)
+#define SVM_PAUSEFILTER_INIT 3000
+
#endif /* __ASM_X86_HVM_SVM_H__ */
diff -r 0fcb32521d57 -r fa992936dba4 xen/include/asm-x86/hvm/svm/svmdebug.h
--- /dev/null
+++ b/xen/include/asm-x86/hvm/svm/svmdebug.h
@@ -0,0 +1,30 @@
+/*
+ * svmdebug.h: SVM related debug defintions
+ * Copyright (c) 2010, AMD Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_X86_HVM_SVM_SVMDEBUG_H__
+#define __ASM_X86_HVM_SVM_SVMDEBUG_H__
+
+#include <asm/types.h>
+#include <asm/hvm/svm/vmcb.h>
+
+void svm_vmcb_dump(const char *from, struct vmcb_struct *vmcb);
+bool_t svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
+ bool_t verbose);
+
+#endif /* __ASM_X86_HVM_SVM_SVMDEBUG_H__ */
diff -r 0fcb32521d57 -r fa992936dba4 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -474,6 +474,27 @@ struct arch_svm_struct {
uint64_t guest_sysenter_eip;
};
+struct nestedsvm {
+ uint64_t ns_msr_hsavepa; /* MSR HSAVE_PA value */
+
+ /* Cached real intercepts of the nested guest */
+ uint32_t ns_cr_intercepts;
+ uint32_t ns_dr_intercepts;
+ uint32_t ns_exception_intercepts;
+ uint32_t ns_general1_intercepts;
+ uint32_t ns_general2_intercepts;
+
+ /* Cached real lbr of the nested guest */
+ lbrctrl_t ns_lbr_control;
+
+ /* Permament allocated vmcb during vcpu lifetime.
+ * Used for VMLOAD/VMSAVE instruction emulation.
+ * Tim doesn't like the idea to xmalloc() and free()
+ * a temporary vmcb every time and Xen stack is small.
+ */
+ struct vmcb_struct *ns_tmpvmcb;
+};
+
struct vmcb_struct *alloc_vmcb(void);
struct host_save_area *alloc_host_save_area(void);
void free_vmcb(struct vmcb_struct *vmcb);
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 09/13] Nested Virtualization: svm specific implementation
@ 2010-10-15 13:07 Christoph Egger
0 siblings, 0 replies; 5+ messages in thread
From: Christoph Egger @ 2010-10-15 13:07 UTC (permalink / raw)
To: xen-devel@lists.xensource.com; +Cc: Dong, Eddie, Tim Deegan
[-- Attachment #1: Type: text/plain, Size: 264 bytes --]
--
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632
[-- Attachment #2: xen_nh09_svm.diff --]
[-- Type: text/x-diff, Size: 64336 bytes --]
# HG changeset patch
# User cegger
# Date 1287145213 -7200
Implement SVM specific part for Nested Virtualization
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
diff -r 584b6f1a0511 -r 87774fa72e28 xen/arch/x86/hvm/svm/Makefile
--- a/xen/arch/x86/hvm/svm/Makefile
+++ b/xen/arch/x86/hvm/svm/Makefile
@@ -2,6 +2,8 @@ obj-y += asid.o
obj-y += emulate.o
obj-y += entry.o
obj-y += intr.o
+obj-y += nestedsvm.o
obj-y += svm.o
+obj-y += svmdebug.o
obj-y += vmcb.o
obj-y += vpmu.o
diff -r 584b6f1a0511 -r 87774fa72e28 xen/arch/x86/hvm/svm/emulate.c
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -101,6 +101,11 @@ MAKE_INSTR(HLT, 1, 0xf4);
MAKE_INSTR(INT3, 1, 0xcc);
MAKE_INSTR(RDTSC, 2, 0x0f, 0x31);
MAKE_INSTR(PAUSE, 1, 0x90);
+MAKE_INSTR(VMRUN, 3, 0x0f, 0x01, 0xd8);
+MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
+MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
+MAKE_INSTR(STGI, 3, 0x0f, 0x01, 0xdc);
+MAKE_INSTR(CLGI, 3, 0x0f, 0x01, 0xdd);
static const u8 *opc_bytes[INSTR_MAX_COUNT] =
{
@@ -114,6 +119,11 @@ static const u8 *opc_bytes[INSTR_MAX_COU
[INSTR_INT3] = OPCODE_INT3,
[INSTR_RDTSC] = OPCODE_RDTSC,
[INSTR_PAUSE] = OPCODE_PAUSE,
+ [INSTR_VMRUN] = OPCODE_VMRUN,
+ [INSTR_VMLOAD] = OPCODE_VMLOAD,
+ [INSTR_VMSAVE] = OPCODE_VMSAVE,
+ [INSTR_STGI] = OPCODE_STGI,
+ [INSTR_CLGI] = OPCODE_CLGI,
};
static int fetch(struct vcpu *v, u8 *buf, unsigned long addr, int len)
diff -r 584b6f1a0511 -r 87774fa72e28 xen/arch/x86/hvm/svm/nestedsvm.c
--- /dev/null
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -0,0 +1,966 @@
+/*
+ * nestedsvm.c: Nested Virtualization
+ * Copyright (c) 2010, Advanced Micro Devices, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <asm/hvm/support.h>
+#include <asm/hvm/svm/emulate.h>
+#include <asm/hvm/svm/svm.h>
+#include <asm/hvm/nestedhvm.h>
+#include <asm/hvm/svm/nestedsvm.h>
+#include <asm/hvm/svm/svmdebug.h>
+#include <asm/paging.h> /* paging_mode_hap */
+
+/* Interface methods */
+int nsvm_vcpu_initialise(struct vcpu *v)
+{
+ void *msrpm;
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm;
+
+ msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
+ hvm->nh_cached_msrpm = msrpm;
+ if (msrpm == NULL)
+ goto err;
+ memset(msrpm, 0x0, MSRPM_SIZE);
+ hvm->nh_cached_msrpm_size = MSRPM_SIZE;
+
+ msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
+ hvm->nh_merged_msrpm = msrpm;
+ if (msrpm == NULL)
+ goto err;
+ memset(msrpm, 0x0, MSRPM_SIZE);
+ hvm->nh_merged_msrpm_size = MSRPM_SIZE;
+
+ svm = hvm->nh_arch = xmalloc_bytes(sizeof(struct nestedsvm));
+ if (hvm->nh_arch == NULL)
+ goto err;
+ hvm->nh_arch_size = sizeof(struct nestedsvm);
+ memset(hvm->nh_arch, 0x0, hvm->nh_arch_size);
+
+ svm->ns_hostsave = alloc_vmcb();
+ if (svm->ns_hostsave == NULL)
+ goto err;
+
+ nhvm_exitcode[NESTEDHVM_INTERCEPT_INVALID] = VMEXIT_INVALID;
+ nhvm_exitcode[NESTEDHVM_INTERCEPT_SHUTDOWN] = VMEXIT_SHUTDOWN;
+ nhvm_exitcode[NESTEDHVM_INTERCEPT_MCE] = VMEXIT_EXCEPTION_MC;
+ nhvm_exitcode[NESTEDHVM_INTERCEPT_VMMCALL] = VMEXIT_VMMCALL;
+ nhvm_exitcode[NESTEDHVM_INTERCEPT_INTR] = VMEXIT_INTR;
+ nhvm_exitcode[NESTEDHVM_INTERCEPT_NMI] = VMEXIT_NMI;
+ nhvm_exitcode[NESTEDHVM_INTERCEPT_NPF] = VMEXIT_NPF;
+ nhvm_exitcode[NESTEDHVM_INTERCEPT_PF] = VMEXIT_EXCEPTION_PF;
+ nhvm_exitcode[NESTEDHVM_INTERCEPT_NM] = VMEXIT_EXCEPTION_NM;
+
+ return 0;
+
+err:
+ nsvm_vcpu_destroy(v);
+ return -ENOMEM;
+}
+
+int nsvm_vcpu_destroy(struct vcpu *v)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+
+ if (hvm->nh_cached_msrpm) {
+ ASSERT(hvm->nh_cached_msrpm_size > 0);
+ free_xenheap_pages(hvm->nh_cached_msrpm,
+ get_order_from_bytes(hvm->nh_cached_msrpm_size));
+ hvm->nh_cached_msrpm = NULL;
+ hvm->nh_cached_msrpm_size = 0;
+ }
+ if (hvm->nh_merged_msrpm) {
+ ASSERT(hvm->nh_merged_msrpm_size > 0);
+ free_xenheap_pages(hvm->nh_merged_msrpm,
+ get_order_from_bytes(hvm->nh_merged_msrpm_size));
+ hvm->nh_merged_msrpm = NULL;
+ hvm->nh_merged_msrpm_size = 0;
+ }
+ if (hvm->nh_arch) {
+ struct nestedsvm *svm = hvm->nh_arch;
+ if (svm->ns_hostsave) {
+ free_vmcb(svm->ns_hostsave);
+ svm->ns_hostsave = NULL;
+ }
+ xfree(hvm->nh_arch);
+ hvm->nh_arch = NULL;
+ hvm->nh_arch_size = 0;
+ }
+
+ return 0;
+}
+
+int nsvm_vcpu_reset(struct vcpu *v)
+{
+ return 0;
+}
+
+static void nsvm_vmcb_loadsave(struct vmcb_struct *from,
+ struct vmcb_struct *to)
+{
+ to->fs = from->fs;
+ to->gs = from->gs;
+ to->tr = from->tr;
+ to->ldtr = from->ldtr;
+ to->kerngsbase = from->kerngsbase;
+ to->star = from->star;
+ to->lstar = from->lstar;
+ to->cstar = from->cstar;
+ to->sfmask = from->sfmask;
+ to->sysenter_cs = from->sysenter_cs;
+ to->sysenter_esp = from->sysenter_esp;
+ to->sysenter_eip = from->sysenter_eip;
+}
+
+static int nsvm_vcpu_hostsave(struct vcpu *v, unsigned int inst_len)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+ struct vmcb_struct *hsave, *vmcb;
+
+ hsave = svm->ns_hostsave;
+ vmcb = v->arch.hvm_svm.vmcb;
+
+ memcpy(hsave, vmcb, sizeof(struct vmcb_struct));
+ hsave->rip += inst_len;
+
+ /* Remember the host interrupt flag */
+ svm->ns_hostflags.fields.rflagsif = (hsave->rflags & X86_EFLAGS_IF) ? 1 : 0;
+
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* guest: nested paging mode */
+ hsave->cr3 = vmcb->cr3;
+ hsave->h_cr3 = vmcb->h_cr3;
+ } else if (paging_mode_hap(v->domain))
+ /* host: nested paging, guest: shadow paging */
+ hsave->cr3 = vmcb->cr3;
+ else
+ /* host: shadow paging, guest: shadow paging */
+ hsave->cr3 = v->arch.hvm_vcpu.guest_cr[3];
+
+ hsave->efer = v->arch.hvm_vcpu.guest_efer;
+ hsave->cr0 = v->arch.hvm_vcpu.guest_cr[0];
+ hsave->cr2 = v->arch.hvm_vcpu.guest_cr[2];
+ hsave->cr4 = v->arch.hvm_vcpu.guest_cr[4];
+
+ return 0;
+}
+
+int nsvm_vcpu_hostrestore(struct vcpu *v, struct cpu_user_regs *regs)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+ struct vmcb_struct *hsave, *vmcb;
+ int rc;
+
+ hsave = svm->ns_hostsave;
+ vmcb = v->arch.hvm_svm.vmcb;
+
+ /* Must keep register values handled by VMSAVE/VMLOAD */
+ nsvm_vmcb_loadsave(vmcb, hsave);
+ memcpy(vmcb, hsave, sizeof(struct vmcb_struct));
+
+ /* EFER */
+ v->arch.hvm_vcpu.guest_efer = vmcb->efer;
+ rc = hvm_set_efer(vmcb->efer);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
+
+ /* CR4 */
+ v->arch.hvm_vcpu.guest_cr[4] = vmcb->cr4;
+ rc = hvm_set_cr4(vmcb->cr4);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
+
+ /* CR0 */
+ v->arch.hvm_vcpu.guest_cr[0] = vmcb->cr0 | X86_CR0_PE;
+ vmcb->rflags &= ~X86_EFLAGS_VM;
+ rc = hvm_set_cr0(vmcb->cr0 | X86_CR0_PE);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
+
+ /* CR2 */
+ v->arch.hvm_vcpu.guest_cr[2] = vmcb->cr2;
+ hvm_update_guest_cr(v, 2);
+
+ /* CR3 */
+ /* Nested paging mode */
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* host nested paging + guest nested paging. */
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ } else if (paging_mode_hap(v->domain)) {
+ /* host nested paging + guest shadow paging. */
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ } else {
+ /* host shadow paging + guest shadow paging. */
+
+ /* Reset MMU context -- XXX (hostrestore) not yet working*/
+ if (!pagetable_is_null(v->arch.guest_table))
+ put_page(pagetable_get_page(v->arch.guest_table));
+ v->arch.guest_table = pagetable_null();
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ }
+ rc = hvm_set_cr3(vmcb->cr3);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+
+ regs->eax = vmcb->rax;
+ regs->esp = vmcb->rsp;
+ regs->eip = vmcb->rip;
+ regs->eflags = vmcb->rflags;
+ vmcb->dr7 = 0; /* disable all breakpoints */
+ vmcb->cpl = 0;
+
+ /* Clear exitintinfo to prevent a fault loop of re-injecting
+ * exceptions forever.
+ */
+ vmcb->exitintinfo.bytes = 0;
+
+ hvm_asid_flush_vcpu(v);
+
+ return 0;
+}
+
+static int nsvm_vmrun_permissionmap(struct vcpu *v)
+{
+ struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct vmcb_struct *ns_vmcb = hvm->nh_vmcx;
+ struct vmcb_struct *host_vmcb = arch_svm->vmcb;
+ unsigned long *ns_msrpm_ptr;
+ unsigned int i;
+ enum hvm_copy_result ret;
+
+ ns_msrpm_ptr = (unsigned long *)hvm->nh_cached_msrpm;
+
+ ret = hvm_copy_from_guest_phys(hvm->nh_cached_msrpm,
+ ns_vmcb->msrpm_base_pa,
+ hvm->nh_cached_msrpm_size);
+ if (ret != HVMCOPY_okay) {
+ gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n", ret);
+ return 1;
+ }
+
+ /* Skip io bitmap merge since hvm_io_bitmap has all bits set but
+ * 0x80 and 0xed.
+ */
+
+ /* v->arch.hvm_svm.msrpm has type unsigned long, thus
+ * BYTES_PER_LONG.
+ */
+ for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++)
+ hvm->nh_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i];
+
+ host_vmcb->iopm_base_pa =
+ (uint64_t)virt_to_maddr(hvm_io_bitmap);
+ host_vmcb->msrpm_base_pa =
+ (uint64_t)virt_to_maddr(hvm->nh_merged_msrpm);
+
+ return 0;
+}
+
+static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs *regs)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+ struct vmcb_struct *ns_vmcb = hvm->nh_vmcx;
+ struct vmcb_struct *host_vmcb = v->arch.hvm_svm.vmcb;
+ int rc;
+
+ /* Enable nested guest intercepts */
+ svm->ns_cr_intercepts = ns_vmcb->cr_intercepts;
+ svm->ns_dr_intercepts = ns_vmcb->dr_intercepts;
+ svm->ns_exception_intercepts = ns_vmcb->exception_intercepts;
+ svm->ns_general1_intercepts = ns_vmcb->general1_intercepts;
+ svm->ns_general2_intercepts = ns_vmcb->general2_intercepts;
+
+ host_vmcb->cr_intercepts |= ns_vmcb->cr_intercepts;
+ host_vmcb->dr_intercepts |= ns_vmcb->dr_intercepts;
+ host_vmcb->exception_intercepts |= ns_vmcb->exception_intercepts;
+ host_vmcb->general1_intercepts |= ns_vmcb->general1_intercepts;
+ host_vmcb->general2_intercepts |= ns_vmcb->general2_intercepts;
+
+ /* Nested Pause Filter */
+ if (ns_vmcb->general1_intercepts & GENERAL1_INTERCEPT_PAUSE)
+ host_vmcb->pause_filter_count =
+ min(ns_vmcb->pause_filter_count, host_vmcb->pause_filter_count);
+ else
+ host_vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT;
+
+ /* Nested IO permission bitmaps */
+ rc = nsvm_vmrun_permissionmap(v);
+ if (rc)
+ return rc;
+
+ /* TSC offset */
+ hvm_set_guest_tsc(v, host_vmcb->tsc_offset + ns_vmcb->tsc_offset);
+
+ /* ASID */
+ hvm_asid_flush_vcpu(v);
+ /* host_vmcb->guest_asid = ns_vmcb->guest_asid; */
+
+ /* TLB control */
+ host_vmcb->tlb_control |= ns_vmcb->tlb_control;
+
+ /* Virtual Interrupts */
+ host_vmcb->vintr = ns_vmcb->vintr;
+ host_vmcb->vintr.fields.intr_masking = 1;
+
+ /* Shadow Mode */
+ host_vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow;
+
+ /* Exit codes */
+ host_vmcb->exitcode = ns_vmcb->exitcode;
+ host_vmcb->exitinfo1 = ns_vmcb->exitinfo1;
+ host_vmcb->exitinfo2 = ns_vmcb->exitinfo2;
+ host_vmcb->exitintinfo = ns_vmcb->exitintinfo;
+
+ /* Pending Interrupts */
+ host_vmcb->eventinj = ns_vmcb->eventinj;
+
+ /* LBR virtualization */
+ svm->ns_lbr_control = ns_vmcb->lbr_control;
+ host_vmcb->lbr_control.bytes |= ns_vmcb->lbr_control.bytes;
+
+ /* NextRIP */
+ host_vmcb->nextrip = ns_vmcb->nextrip;
+
+ /*
+ * VMCB Save State Area
+ */
+
+ /* Segments */
+ host_vmcb->es = ns_vmcb->es;
+ host_vmcb->cs = ns_vmcb->cs;
+ host_vmcb->ss = ns_vmcb->ss;
+ host_vmcb->ds = ns_vmcb->ds;
+ host_vmcb->gdtr = ns_vmcb->gdtr;
+ host_vmcb->idtr = ns_vmcb->idtr;
+
+ /* CPL */
+ host_vmcb->cpl = ns_vmcb->cpl;
+
+ /* EFER */
+ v->arch.hvm_vcpu.guest_efer = ns_vmcb->efer;
+ rc = hvm_set_efer(ns_vmcb->efer);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
+
+ /* CR4 */
+ v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->cr4;
+ rc = hvm_set_cr4(ns_vmcb->cr4);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
+
+ /* CR0 */
+ v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->cr0;
+ rc = hvm_set_cr0(ns_vmcb->cr0);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
+
+ /* CR2 */
+ v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->cr2;
+ hvm_update_guest_cr(v, 2);
+
+ /* Nested paging mode */
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* host nested paging + guest nested paging. */
+
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ rc = hvm_set_cr3(ns_vmcb->cr3);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+ } else if (paging_mode_hap(v->domain)) {
+ /* host nested paging + guest shadow paging. */
+ host_vmcb->np_enable = 1;
+ /* Keep h_cr3 as it is. */
+ /* Guest shadow paging: Must intercept pagefaults. */
+ host_vmcb->exception_intercepts |= (1U << TRAP_page_fault);
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ rc = hvm_set_cr3(ns_vmcb->cr3);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+ } else {
+ /* host shadow paging + guest shadow paging. */
+ host_vmcb->np_enable = 0;
+ host_vmcb->h_cr3 = 0x0;
+
+ /* TODO: Once shadow-shadow paging is in place come back to here
+ * and set host_vmcb->cr3 to the shadowed shadow table.
+ */
+ }
+
+ /* DRn */
+ host_vmcb->dr7 = ns_vmcb->dr7;
+ host_vmcb->dr6 = ns_vmcb->dr6;
+
+ /* RFLAGS */
+ host_vmcb->rflags = ns_vmcb->rflags;
+
+ /* RIP */
+ host_vmcb->rip = ns_vmcb->rip;
+
+ /* RSP */
+ host_vmcb->rsp = ns_vmcb->rsp;
+
+ /* RAX */
+ host_vmcb->rax = ns_vmcb->rax;
+
+ /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase,
+ * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
+ * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
+ */
+
+ /* Page tables */
+ host_vmcb->pdpe0 = ns_vmcb->pdpe0;
+ host_vmcb->pdpe1 = ns_vmcb->pdpe1;
+ host_vmcb->pdpe2 = ns_vmcb->pdpe2;
+ host_vmcb->pdpe3 = ns_vmcb->pdpe3;
+
+ /* PAT */
+ host_vmcb->g_pat = ns_vmcb->g_pat;
+
+ /* Debug Control MSR */
+ host_vmcb->debugctlmsr = ns_vmcb->debugctlmsr;
+
+ /* LBR MSRs */
+ host_vmcb->lastbranchfromip = ns_vmcb->lastbranchfromip;
+ host_vmcb->lastbranchtoip = ns_vmcb->lastbranchtoip;
+ host_vmcb->lastintfromip = ns_vmcb->lastintfromip;
+ host_vmcb->lastinttoip = ns_vmcb->lastinttoip;
+
+ rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
+ if (rc) {
+ gdprintk(XENLOG_ERR, "nested vmcb invalid\n");
+ return rc;
+ }
+
+ rc = svm_vmcb_isvalid(__func__, host_vmcb, 1);
+ if (rc) {
+ gdprintk(XENLOG_ERR, "host vmcb invalid\n");
+ return rc;
+ }
+
+ /* Switch guest registers to nested guest */
+ regs->eax = ns_vmcb->rax;
+ regs->eip = ns_vmcb->rip;
+ regs->esp = ns_vmcb->rsp;
+ regs->eflags = ns_vmcb->rflags;
+
+ return 0;
+}
+
+int
+nsvm_vcpu_vmrun(struct vcpu *v, struct cpu_user_regs *regs,
+ unsigned int inst_len)
+{
+ int ret;
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+ struct vmcb_struct *hsave, *ns_vmcb, *hostvmcb;
+
+ hsave = svm->ns_hostsave;
+ ns_vmcb = hvm->nh_vmcx;
+ hostvmcb = v->arch.hvm_svm.vmcb;
+ ASSERT(ns_vmcb != NULL);
+
+ /* Save values for later use. Needed for Nested-on-Nested and
+ * Shadow-on-Shadow paging.
+ */
+ hvm->nh_vm_guestcr3 = ns_vmcb->cr3;
+ hvm->nh_vm_hostcr3 = ns_vmcb->h_cr3;
+
+ hvm->nh_flushp2m = (ns_vmcb->tlb_control
+ || (hvm->nh_guest_asid != ns_vmcb->guest_asid));
+ hvm->nh_guest_asid = ns_vmcb->guest_asid;
+
+ /* nested paging for the guest */
+ hvm->nh_hap_enabled = (ns_vmcb->np_enable) ? 1 : 0;
+
+ /* Remember the V_INTR_MASK in hostflags */
+ svm->ns_hostflags.fields.vintrmask =
+ (ns_vmcb->vintr.fields.intr_masking) ? 1 : 0;
+
+ /* Save l1 guest state (= host state) */
+ ret = nsvm_vcpu_hostsave(v, inst_len);
+ if (ret) {
+ gdprintk(XENLOG_ERR, "hostsave failed\n");
+ return ret;
+ }
+
+ ret = nsvm_vmcb_prepare4vmrun(v, regs);
+ if (ret) {
+ gdprintk(XENLOG_ERR, "hostsave failed\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+int
+nsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs, uint64_t exitcode)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct vmcb_struct *ns_vmcb;
+
+ ns_vmcb = hvm->nh_vmcx;
+
+ ns_vmcb->exitcode = exitcode;
+ ns_vmcb->eventinj.bytes = 0;
+
+ if (hvm->nh_hostflags.fields.forcevmexit) {
+
+ ASSERT(hvm->nh_hostflags.fields.use_native_exitcode);
+ switch (exitcode) {
+ case VMEXIT_EXCEPTION_PF:
+ ns_vmcb->cr2 = ns_vmcb->exitinfo2;
+ /* fall through */
+ case VMEXIT_NPF:
+ /* PF error code */
+ ns_vmcb->exitinfo1 = hvm->nh_forcevmexit.exitinfo1;
+ /* fault address */
+ ns_vmcb->exitinfo2 = hvm->nh_forcevmexit.exitinfo2;
+ break;
+ case VMEXIT_MSR:
+ if (ns_vmcb->exitinfo1 == 0) { /* read */
+ regs->ecx = hvm->nh_forcevmexit.exitinfo1;
+ } else {
+ regs->ecx = hvm->nh_forcevmexit.exitinfo1;
+ regs->eax = (uint32_t)hvm->nh_forcevmexit.exitinfo2;
+ regs->edx = (uint32_t)(hvm->nh_forcevmexit.exitinfo2 >> 32);
+ }
+ break;
+ case VMEXIT_EXCEPTION_NP:
+ case VMEXIT_EXCEPTION_SS:
+ case VMEXIT_EXCEPTION_GP:
+ case VMEXIT_EXCEPTION_15:
+ case VMEXIT_EXCEPTION_MF:
+ case VMEXIT_EXCEPTION_AC:
+ ns_vmcb->exitinfo1 = hvm->nh_forcevmexit.exitinfo1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+uint64_t
+nsvm_vmcb_exitcode_native2generic(struct vcpu *v, uint64_t exitcode)
+{
+ switch (exitcode) {
+ case VMEXIT_INVALID:
+ return NESTEDHVM_INTERCEPT_INVALID;
+ case VMEXIT_SHUTDOWN:
+ return NESTEDHVM_INTERCEPT_SHUTDOWN;
+ case VMEXIT_VMMCALL:
+ return NESTEDHVM_INTERCEPT_VMMCALL;
+ case VMEXIT_INTR:
+ return NESTEDHVM_INTERCEPT_INTR;
+ case VMEXIT_NMI:
+ return NESTEDHVM_INTERCEPT_NMI;
+ case VMEXIT_NPF:
+ return NESTEDHVM_INTERCEPT_NPF;
+ case VMEXIT_EXCEPTION_PF:
+ return NESTEDHVM_INTERCEPT_PF;
+ case VMEXIT_EXCEPTION_MC:
+ return NESTEDHVM_INTERCEPT_MCE;
+
+ case VMEXIT_EXCEPTION_NM:
+ return NESTEDHVM_INTERCEPT_NM;
+ }
+
+ return NESTEDHVM_INTERCEPT_LAST;
+}
+
+static int
+nsvm_vmcb_guest_intercepts_msr(unsigned long *msr_bitmap,
+ uint32_t msr, bool_t write)
+{
+ bool_t enabled;
+ unsigned long *msr_bit;
+
+ msr_bit = svm_msrbit(msr_bitmap, msr);
+
+ if (msr_bit == NULL)
+ /* MSR not in the permission map: Let the guest handle it. */
+ return NESTEDHVM_VMEXIT_INJECT;
+
+ BUG_ON(msr_bit == NULL);
+ msr &= 0x1fff;
+
+ if (write)
+ /* write access */
+ enabled = test_bit(msr * 2 + 1, msr_bit);
+ else
+ /* read access */
+ enabled = test_bit(msr * 2, msr_bit);
+
+ if (!enabled)
+ return NESTEDHVM_VMEXIT_HOST;
+
+ return NESTEDHVM_VMEXIT_CONTINUE;
+}
+
+int
+nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v,
+ struct cpu_user_regs *regs, uint64_t exitcode)
+{
+ uint64_t exit_bits;
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+ struct vmcb_struct *ns_vmcb = hvm->nh_vmcx;
+ enum nestedhvm_vmexits vmexits;
+ bool_t unmap = (ns_vmcb == NULL);
+
+ if (hvm->nh_hostflags.fields.forcevmexit)
+ if (!hvm->nh_hostflags.fields.use_native_exitcode) {
+ BUG_ON(hvm->nh_forcevmexit.exitcode == NESTEDHVM_INTERCEPT_LAST);
+ exitcode = nhvm_exitcode[hvm->nh_forcevmexit.exitcode];
+ }
+
+ switch (exitcode) {
+ case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
+ case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
+ exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ);
+ if (svm->ns_cr_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
+ case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
+ exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ);
+ if (svm->ns_dr_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF:
+ exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE);
+ if (svm->ns_exception_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_INTR ... VMEXIT_SHUTDOWN:
+ exit_bits = 1ULL << (exitcode - VMEXIT_INTR);
+ if (svm->ns_general1_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL:
+ exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN);
+ if (svm->ns_general2_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_NPF:
+ case VMEXIT_INVALID:
+ /* Always intercepted */
+ break;
+
+ default:
+ gdprintk(XENLOG_ERR, "Illegal exitcode 0x%"PRIx64"\n", exitcode);
+ BUG();
+ break;
+ }
+
+ /* Special cases: Do more detailed checks */
+ switch (exitcode) {
+ case VMEXIT_MSR:
+ if (ns_vmcb == NULL)
+ ns_vmcb = hvm_map_guest_frame_ro(hvm->nh_vmaddr >> PAGE_SHIFT);
+ ASSERT(ns_vmcb != NULL);
+ vmexits = nsvm_vmcb_guest_intercepts_msr(hvm->nh_cached_msrpm,
+ regs->ecx, ns_vmcb->exitinfo1 != 0);
+ if (unmap)
+ hvm_unmap_guest_frame(ns_vmcb);
+ if (vmexits == NESTEDHVM_VMEXIT_HOST)
+ return 0;
+ break;
+
+ case VMEXIT_IOIO:
+ /* always intercepted */
+ break;
+ }
+
+ return 1;
+}
+
+int
+nsvm_vmcb_guest_intercepts_trap(struct vcpu *v, unsigned int trapnr)
+{
+ uint64_t exitcode = VMEXIT_EXCEPTION_DE + trapnr;
+
+ ASSERT(vcpu_nestedhvm(v).nh_hostflags.fields.forcevmexit == 0);
+ return nsvm_vmcb_guest_intercepts_exitcode(v,
+ guest_cpu_user_regs(), exitcode);
+}
+
+int nsvm_vmcb_prepare4vmexit(struct vcpu *v)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+ struct vmcb_struct *vmcb, *ns_vmcb;
+
+ vmcb = v->arch.hvm_svm.vmcb;
+ ns_vmcb = hvm->nh_vmcx;
+
+ svm_vmsave(vmcb);
+
+ /* Intercepts */
+ /* Copy cached intercepts since they are the guest's original
+ * intercepts.
+ */
+ ns_vmcb->cr_intercepts = svm->ns_cr_intercepts;
+ ns_vmcb->dr_intercepts = svm->ns_dr_intercepts;
+ ns_vmcb->exception_intercepts = svm->ns_exception_intercepts;
+ ns_vmcb->general1_intercepts = svm->ns_general1_intercepts;
+ ns_vmcb->general2_intercepts = svm->ns_general2_intercepts;
+
+ /* Nested Pause Filter */
+ ns_vmcb->pause_filter_count = vmcb->pause_filter_count;
+
+ /* Nested IO permission bitmap */
+ /* Just keep the iopm_base_pa and msrpm_base_pa values.
+ * The guest must not see the virtualized values.
+ */
+
+ /* TSC offset */
+ ns_vmcb->tsc_offset = vmcb->tsc_offset;
+
+ /* ASID */
+ /* ns_vmcb->guest_asid = vmcb->guest_asid; */
+
+ /* TLB control */
+ ns_vmcb->tlb_control = 0;
+
+ /* Virtual Interrupts */
+ ns_vmcb->vintr = vmcb->vintr;
+ if (!(svm->ns_hostflags.fields.vintrmask))
+ ns_vmcb->vintr.fields.intr_masking = 0;
+
+ /* Shadow mode */
+ ns_vmcb->interrupt_shadow = vmcb->interrupt_shadow;
+
+ /* Exit codes */
+ ns_vmcb->exitcode = vmcb->exitcode;
+ ns_vmcb->exitinfo1 = vmcb->exitinfo1;
+ ns_vmcb->exitinfo2 = vmcb->exitinfo2;
+ ns_vmcb->exitintinfo = vmcb->exitintinfo;
+
+ /* Interrupts */
+ /* If we emulate a VMRUN/#VMEXIT in the same host #VMEXIT cycle we have
+ * to make sure that we do not lose injected events. So check eventinj
+ * here and copy it to exitintinfo if it is valid.
+ * exitintinfo and eventinj can't be both valid because the case below
+ * only happens on a VMRUN instruction intercept which has no valid
+ * exitintinfo set.
+ */
+ if ( unlikely(vmcb->eventinj.fields.v) &&
+ hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
+ vmcb->eventinj.fields.vector) )
+ {
+ ns_vmcb->exitintinfo = vmcb->eventinj;
+ }
+
+ ns_vmcb->eventinj.bytes = 0;
+
+ /* Nested paging mode */
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* host nested paging + guest nested paging. */
+ ns_vmcb->np_enable = vmcb->np_enable;
+ ns_vmcb->cr3 = vmcb->cr3;
+ /* The vmcb->h_cr3 is the shadowed h_cr3. The original
+ * unshadowed guest h_cr3 is kept in ns_vmcb->h_cr3,
+ * hence we keep the ns_vmcb->h_cr3 value. */
+ } else if (paging_mode_hap(v->domain)) {
+ /* host nested paging + guest shadow paging. */
+ ns_vmcb->np_enable = 0;
+ /* Throw h_cr3 away. Guest is not allowed to set it or
+ * it can break out, otherwise (security hole!) */
+ ns_vmcb->h_cr3 = 0x0;
+ /* Stop intercepting #PF (already done above
+ * by restoring cached intercepts). */
+ ns_vmcb->cr3 = vmcb->cr3;
+ } else {
+ /* host shadow paging + guest shadow paging. */
+ ns_vmcb->np_enable = 0;
+ ns_vmcb->h_cr3 = 0x0;
+ /* The vmcb->cr3 is the shadowed cr3. The original
+ * unshadowed guest cr3 is kept in ns_vmcb->cr3,
+ * hence we keep the ns_vmcb->cr3 value. */
+ }
+
+ /* LBR virtualization */
+ ns_vmcb->lbr_control = svm->ns_lbr_control;
+
+ /* NextRIP */
+ ns_vmcb->nextrip = vmcb->nextrip;
+
+ /*
+ * VMCB Save State Area
+ */
+
+ /* Segments */
+ ns_vmcb->es = vmcb->es;
+ ns_vmcb->cs = vmcb->cs;
+ ns_vmcb->ss = vmcb->ss;
+ ns_vmcb->ds = vmcb->ds;
+ ns_vmcb->gdtr = vmcb->gdtr;
+ ns_vmcb->idtr = vmcb->idtr;
+
+ /* CPL */
+ ns_vmcb->cpl = vmcb->cpl;
+
+ /* EFER */
+ ns_vmcb->efer = vmcb->efer;
+
+ /* CRn */
+ ns_vmcb->cr4 = vmcb->cr4;
+ ns_vmcb->cr0 = vmcb->cr0;
+
+ /* DRn */
+ ns_vmcb->dr7 = vmcb->dr7;
+ ns_vmcb->dr6 = vmcb->dr6;
+
+ /* RFLAGS */
+ ns_vmcb->rflags = vmcb->rflags;
+
+ /* RIP */
+ ns_vmcb->rip = vmcb->rip;
+
+ /* RSP */
+ ns_vmcb->rsp = vmcb->rsp;
+
+ /* RAX */
+ ns_vmcb->rax = vmcb->rax;
+
+ /* Keep the nested guest values of the fs, gs, ldtr, tr, kerngsbase,
+ * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
+ * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
+ */
+
+ /* CR2 */
+ ns_vmcb->cr2 = vmcb->cr2;
+
+ /* Page tables */
+ ns_vmcb->pdpe0 = vmcb->pdpe0;
+ ns_vmcb->pdpe1 = vmcb->pdpe1;
+ ns_vmcb->pdpe2 = vmcb->pdpe2;
+ ns_vmcb->pdpe3 = vmcb->pdpe3;
+
+ /* PAT */
+ ns_vmcb->g_pat = vmcb->g_pat;
+
+ /* Debug Control MSR */
+ ns_vmcb->debugctlmsr = vmcb->debugctlmsr;
+
+ /* LBR MSRs */
+ ns_vmcb->lastbranchfromip = vmcb->lastbranchfromip;
+ ns_vmcb->lastbranchtoip = vmcb->lastbranchtoip;
+ ns_vmcb->lastintfromip = vmcb->lastintfromip;
+ ns_vmcb->lastinttoip = vmcb->lastinttoip;
+
+ return 0;
+}
+
+int
+nsvm_vmcb_isvalid(struct vcpu *v, uint64_t vmcxaddr)
+{
+ if ( !hvm_svm_enabled(v) || hvm_guest_x86_mode(v) < 2 )
+ return 0;
+
+ /* Maximum valid physical address.
+ * See AMD BKDG for HSAVE_PA MSR.
+ */
+ if ( vmcxaddr > 0xfd00000000ULL )
+ return 0;
+
+ return 1;
+}
+
+int
+nsvm_vcpu_vmexit_trap(struct vcpu *v, unsigned int trapnr,
+ int errcode, unsigned long cr2)
+{
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ enum nestedhvm_vmexits nsret;
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+
+ ASSERT(hvm->nh_vmcx == NULL);
+
+ hvm->nh_hostflags.fields.forcevmexit = 1;
+ hvm->nh_hostflags.fields.use_native_exitcode = 1;
+ hvm->nh_forcevmexit.exitinfo1 = errcode;
+ hvm->nh_forcevmexit.exitinfo2 = cr2;
+
+ nsret = nestedhvm_vcpu_vmexit(v, regs, VMEXIT_EXCEPTION_DE + trapnr);
+ hvm->nh_hostflags.fields.forcevmexit = 0;
+ hvm->nh_hostflags.fields.use_native_exitcode = 0;
+
+ return nsret;
+}
+
+/* MSR handling */
+int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content)
+{
+ struct nestedsvm *svm = vcpu_nestedhvm(v).nh_arch;
+ int ret = 1;
+
+ *msr_content = 0;
+
+ switch (msr) {
+ case MSR_K8_VM_CR:
+ break;
+ case MSR_K8_VM_HSAVE_PA:
+ *msr_content = svm->ns_msr_hsavepa;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content)
+{
+ int ret = 1;
+ struct nestedhvm *hvm = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = hvm->nh_arch;
+
+ switch (msr) {
+ case MSR_K8_VM_CR:
+ /* ignore write. handle all bits as read-only. */
+ break;
+ case MSR_K8_VM_HSAVE_PA:
+ if (!nhvm_vmcx_isvalid(v, msr_content)) {
+ gdprintk(XENLOG_ERR,
+ "MSR_K8_VM_HSAVE_PA value invalid 0x%"PRIx64"\n", msr_content);
+ ret = -1; /* inject #GP */
+ break;
+ }
+ svm->ns_msr_hsavepa = msr_content;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
diff -r 584b6f1a0511 -r 87774fa72e28 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -49,6 +49,9 @@
#include <asm/hvm/svm/vmcb.h>
#include <asm/hvm/svm/emulate.h>
#include <asm/hvm/svm/intr.h>
+#include <asm/hvm/svm/svmdebug.h>
+#include <asm/hvm/svm/nestedsvm.h>
+#include <asm/hvm/nestedhvm.h>
#include <asm/x86_emulate.h>
#include <public/sched.h>
#include <asm/hvm/vpt.h>
@@ -106,6 +109,44 @@ static void svm_cpu_down(void)
write_efer(read_efer() & ~EFER_SVME);
}
+unsigned long *
+svm_msrbit(unsigned long *msr_bitmap, uint32_t msr)
+{
+ unsigned long *msr_bit = NULL;
+
+ /*
+ * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
+ */
+ if ( msr <= 0x1fff )
+ msr_bit = msr_bitmap + 0x0000 / BYTES_PER_LONG;
+ else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+ msr_bit = msr_bitmap + 0x0800 / BYTES_PER_LONG;
+ else if ( (msr >= 0xc0010000) && (msr <= 0xc0011fff) )
+ msr_bit = msr_bitmap + 0x1000 / BYTES_PER_LONG;
+
+ return msr_bit;
+}
+
+void svm_intercept_msr(struct vcpu *v, uint32_t msr, int enable)
+{
+ unsigned long *msr_bit;
+
+ msr_bit = svm_msrbit(v->arch.hvm_svm.msrpm, msr);
+ BUG_ON(msr_bit == NULL);
+ msr &= 0x1fff;
+
+ if ( enable )
+ {
+ __set_bit(msr * 2, msr_bit);
+ __set_bit(msr * 2 + 1, msr_bit);
+ }
+ else
+ {
+ __clear_bit(msr * 2, msr_bit);
+ __clear_bit(msr * 2 + 1, msr_bit);
+ }
+}
+
static void svm_save_dr(struct vcpu *v)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -294,7 +335,7 @@ static int svm_load_vmcb_ctxt(struct vcp
{
svm_load_cpu_state(v, ctxt);
if (svm_vmcb_restore(v, ctxt)) {
- printk("svm_vmcb restore failed!\n");
+ gdprintk(XENLOG_ERR, "svm_vmcb restore failed!\n");
domain_crash(v->domain);
return -EINVAL;
}
@@ -661,8 +702,10 @@ static void svm_ctxt_switch_to(struct vc
static void svm_do_resume(struct vcpu *v)
{
bool_t debug_state = v->domain->debugger_attached;
+ bool_t guestmode = nestedhvm_vcpu_in_guestmode(v);
- if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
+ if ( !guestmode &&
+ unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
{
uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
v->arch.hvm_vcpu.debug_state_latch = debug_state;
@@ -681,11 +724,14 @@ static void svm_do_resume(struct vcpu *v
hvm_asid_flush_vcpu(v);
}
- /* Reflect the vlapic's TPR in the hardware vtpr */
- v->arch.hvm_svm.vmcb->vintr.fields.tpr =
- (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
+ if ( !guestmode )
+ {
+ /* Reflect the vlapic's TPR in the hardware vtpr */
+ v->arch.hvm_svm.vmcb->vintr.fields.tpr =
+ (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
- hvm_do_resume(v);
+ hvm_do_resume(v);
+ }
reset_stack_and_jump(svm_asm_do_resume);
}
@@ -924,8 +970,8 @@ static void svm_do_nested_pgfault(paddr_
struct {
uint64_t gpa;
uint64_t mfn;
- u32 qualification;
- u32 p2mt;
+ uint32_t qualification;
+ uint32_t p2mt;
} _d;
_d.gpa = gpa;
@@ -947,12 +993,21 @@ static void svm_do_nested_pgfault(paddr_
static void svm_fpu_dirty_intercept(void)
{
- struct vcpu *curr = current;
- struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- svm_fpu_enter(curr);
+ svm_fpu_enter(v);
- if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
+ if ( nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v) ) {
+ /* Check if guest must make FPU ready for the nested guest */
+ if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS )
+ hvm_inject_exception(TRAP_no_device, HVM_DELIVER_NO_ERROR_CODE, 0);
+ else
+ vmcb->cr0 &= ~X86_CR0_TS;
+ return;
+ }
+
+ if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
vmcb->cr0 &= ~X86_CR0_TS;
}
@@ -966,11 +1021,14 @@ static void svm_cpuid_intercept(
hvm_cpuid(input, eax, ebx, ecx, edx);
- if ( input == 0x80000001 )
- {
+ switch (input) {
+ case 0x80000001:
/* Fix up VLAPIC details. */
if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
__clear_bit(X86_FEATURE_APIC & 31, edx);
+ break;
+ default:
+ break;
}
HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
@@ -1006,6 +1064,7 @@ static void svm_dr_access(struct vcpu *v
static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
{
+ int ret;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -1039,9 +1098,6 @@ static int svm_msr_read_intercept(unsign
*msr_content = 0;
break;
- case MSR_K8_VM_HSAVE_PA:
- goto gpf;
-
case MSR_IA32_DEBUGCTLMSR:
*msr_content = vmcb->debugctlmsr;
break;
@@ -1074,6 +1130,11 @@ static int svm_msr_read_intercept(unsign
break;
default:
+ ret = nsvm_rdmsr(v, msr, msr_content);
+ if ( ret < 0 )
+ goto gpf;
+ else if ( ret )
+ break;
if ( rdmsr_viridian_regs(msr, msr_content) ||
rdmsr_hypervisor_regs(msr, msr_content) )
@@ -1096,14 +1157,12 @@ static int svm_msr_read_intercept(unsign
static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
{
+ int ret;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
switch ( msr )
{
- case MSR_K8_VM_HSAVE_PA:
- goto gpf;
-
case MSR_IA32_SYSENTER_CS:
v->arch.hvm_svm.guest_sysenter_cs = msr_content;
break;
@@ -1163,6 +1222,12 @@ static int svm_msr_write_intercept(unsig
break;
default:
+ ret = nsvm_wrmsr(v, msr, msr_content);
+ if ( ret < 0 )
+ goto gpf;
+ else if ( ret )
+ break;
+
if ( wrmsr_viridian_regs(msr, msr_content) )
break;
@@ -1242,6 +1307,104 @@ static void svm_vmexit_do_pause(struct c
do_sched_op_compat(SCHEDOP_yield, 0);
}
+static void svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
+ struct vcpu *v, uint64_t vmcbaddr)
+{
+ int ret;
+ unsigned int inst_len;
+
+ if ( !nestedhvm_enabled(v->domain) ) {
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+ }
+
+ if ( (inst_len = __get_instruction_length(v, INSTR_VMRUN)) == 0 )
+ return;
+
+ ret = nestedhvm_vcpu_vmentry(v, regs, vmcbaddr, inst_len);
+ if (ret)
+ /* On failure, nestedhvm_vcpu_vmentry injected an exception,
+ * almost a #GP or #UD.
+ */
+ return;
+}
+
+static void
+svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
+ struct cpu_user_regs *regs,
+ struct vcpu *v, uint64_t vmcbaddr)
+{
+ int ret;
+ unsigned int inst_len;
+ struct vmcb_struct *gvmcb;
+
+ if ( (inst_len = __get_instruction_length(v, INSTR_VMLOAD)) == 0 )
+ return;
+
+ ret = nestedhvm_vcpu_state_validate(v, vmcbaddr);
+ if (ret) {
+ gdprintk(XENLOG_ERR,
+ "nestedhvm_vcpu_state_validate failed, injecting 0x%x\n", ret);
+ goto inject;
+ }
+
+ gvmcb = hvm_map_guest_frame_ro(vmcbaddr >> PAGE_SHIFT);
+ if (gvmcb == NULL) {
+ gdprintk(XENLOG_ERR, "VMLOAD: VMCB mapping failed\n");
+ ret = TRAP_invalid_op;
+ goto inject;
+ }
+
+ svm_vmload(gvmcb);
+ hvm_unmap_guest_frame(gvmcb);
+ /* State in L1 VMCB is stale now */
+ v->arch.hvm_svm.vmcb_in_sync = 0;
+
+ __update_guest_eip(regs, inst_len);
+ return;
+
+ inject:
+ hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+}
+
+static void
+svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
+ struct cpu_user_regs *regs,
+ struct vcpu *v, uint64_t vmcbaddr)
+{
+ int ret;
+ unsigned int inst_len;
+ struct vmcb_struct *gvmcb;
+
+ if ( (inst_len = __get_instruction_length(v, INSTR_VMSAVE)) == 0 )
+ return;
+
+ ret = nestedhvm_vcpu_state_validate(v, vmcbaddr);
+ if (ret) {
+ gdprintk(XENLOG_ERR,
+ "nestedhvm_vcpu_state_validate failed, injecting 0x%x\n", ret);
+ goto inject;
+ }
+
+ gvmcb = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT);
+ if (gvmcb == NULL) {
+ gdprintk(XENLOG_ERR, "VMSAVE: VMCB mapping failed\n");
+ ret = TRAP_invalid_op;
+ goto inject;
+ }
+
+ svm_vmsave(gvmcb);
+ hvm_unmap_guest_frame(gvmcb);
+
+ __update_guest_eip(regs, inst_len);
+ return;
+
+ inject:
+ hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+}
+
static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs)
{
struct hvm_emulate_ctxt ctxt;
@@ -1372,20 +1535,37 @@ static struct hvm_function_table __read_
.msr_read_intercept = svm_msr_read_intercept,
.msr_write_intercept = svm_msr_write_intercept,
.invlpg_intercept = svm_invlpg_intercept,
- .set_rdtsc_exiting = svm_set_rdtsc_exiting
+ .set_rdtsc_exiting = svm_set_rdtsc_exiting,
+
+ .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
+ .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
+ .nhvm_vcpu_reset = nsvm_vcpu_reset,
+ .nhvm_vcpu_hostrestore = nsvm_vcpu_hostrestore,
+ .nhvm_vcpu_vmentry = nsvm_vcpu_vmrun,
+ .nhvm_vcpu_vmexit = nsvm_vcpu_vmexit,
+ .nhvm_vcpu_vmexit_trap = nsvm_vcpu_vmexit_trap,
+ .nhvm_vmcx_exitcode_native2generic = nsvm_vmcb_exitcode_native2generic,
+ .nhvm_vmcx_guest_intercepts_trap = nsvm_vmcb_guest_intercepts_trap,
+ .nhvm_vmcx_guest_intercepts_exitcode = nsvm_vmcb_guest_intercepts_exitcode,
+ .nhvm_vmcx_prepare4vmexit = nsvm_vmcb_prepare4vmexit,
+ .nhvm_vmcx_isvalid = nsvm_vmcb_isvalid,
};
asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
{
- unsigned int exit_reason;
+ uint64_t exit_reason;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
eventinj_t eventinj;
int inst_len, rc;
+ bool_t vcpu_guestmode = 0;
if ( paging_mode_hap(v->domain) )
v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] = vmcb->cr3;
+ if ( nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v) )
+ vcpu_guestmode = 1;
+
/*
* Before doing anything else, we need to sync up the VLAPIC's TPR with
* SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
@@ -1393,12 +1573,39 @@ asmlinkage void svm_vmexit_handler(struc
* NB. We need to preserve the low bits of the TPR to make checked builds
* of Windows work, even though they don't actually do anything.
*/
- vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
- ((vmcb->vintr.fields.tpr & 0x0F) << 4) |
- (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0x0F));
+ if ( !vcpu_guestmode ) {
+ vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
+ ((vmcb->vintr.fields.tpr & 0x0F) << 4) |
+ (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0x0F));
+ }
exit_reason = vmcb->exitcode;
+ if ( vcpu_guestmode ) {
+ enum nestedhvm_vmexits nsret;
+
+ nsret = nestedhvm_vcpu_vmexit(v, regs, exit_reason);
+ vcpu_nestedhvm(v).nh_hostflags.fields.forcevmexit = 0;
+ switch (nsret) {
+ case NESTEDHVM_VMEXIT_DONE:
+ goto out;
+ case NESTEDHVM_VMEXIT_ERROR:
+ gdprintk(XENLOG_ERR,
+ "nestedhvm_vcpu_vmexit() returned NESTEDHVM_VMEXIT_ERROR\n");
+ goto out;
+ case NESTEDHVM_VMEXIT_HOST:
+ case NESTEDHVM_VMEXIT_CONTINUE:
+ break;
+ case NESTEDHVM_VMEXIT_FATALERROR:
+ gdprintk(XENLOG_ERR, "unexpected nestedhvm error\n");
+ goto exit_and_crash;
+ default:
+ gdprintk(XENLOG_INFO, "nestedhvm_vcpu_vmexit returned %i\n",
+ nsret);
+ goto exit_and_crash;
+ }
+ }
+
if ( hvm_long_mode_enabled(v) )
HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
(uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
@@ -1410,7 +1617,7 @@ asmlinkage void svm_vmexit_handler(struc
if ( unlikely(exit_reason == VMEXIT_INVALID) )
{
- svm_dump_vmcb(__func__, vmcb);
+ svm_vmcb_dump(__func__, vmcb);
goto exit_and_crash;
}
@@ -1562,6 +1769,7 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_VMMCALL:
if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
break;
+ BUG_ON(vcpu_guestmode);
HVMTRACE_1D(VMMCALL, regs->eax);
rc = hvm_do_hypercall(regs);
if ( rc != HVM_HCALL_preempted )
@@ -1594,9 +1802,19 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_MONITOR:
case VMEXIT_MWAIT:
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ break;
+
case VMEXIT_VMRUN:
+ svm_vmexit_do_vmrun(regs, v,
+ regs->eax);
+ break;
case VMEXIT_VMLOAD:
+ svm_vmexit_do_vmload(vmcb, regs, v, regs->eax);
+ break;
case VMEXIT_VMSAVE:
+ svm_vmexit_do_vmsave(vmcb, regs, v, regs->eax);
+ break;
case VMEXIT_STGI:
case VMEXIT_CLGI:
case VMEXIT_SKINIT:
@@ -1628,7 +1846,7 @@ asmlinkage void svm_vmexit_handler(struc
default:
exit_and_crash:
- gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
+ gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%"PRIx64", "
"exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
exit_reason,
(u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
@@ -1636,6 +1854,11 @@ asmlinkage void svm_vmexit_handler(struc
break;
}
+ out:
+ if ( vcpu_guestmode )
+ /* Don't clobber TPR of the nested guest. */
+ return;
+
/* The exit may have updated the TPR: reflect this in the hardware vtpr */
vmcb->vintr.fields.tpr =
(vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
diff -r 584b6f1a0511 -r 87774fa72e28 xen/arch/x86/hvm/svm/svmdebug.c
--- /dev/null
+++ b/xen/arch/x86/hvm/svm/svmdebug.c
@@ -0,0 +1,189 @@
+/*
+ * svmdebug.c: debug functions
+ * Copyright (c) 2010, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/msr-index.h>
+#include <asm/hvm/svm/svmdebug.h>
+
+static void svm_dump_sel(const char *name, svm_segment_register_t *s)
+{
+ printk("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n",
+ name, s->sel, s->attr.bytes, s->limit,
+ (unsigned long long)s->base);
+}
+
+void svm_vmcb_dump(const char *from, struct vmcb_struct *vmcb)
+{
+ printk("Dumping guest's current state at %s...\n", from);
+ printk("Size of VMCB = %d, address = %p\n",
+ (int) sizeof(struct vmcb_struct), vmcb);
+
+ printk("cr_intercepts = 0x%08x dr_intercepts = 0x%08x "
+ "exception_intercepts = 0x%08x\n",
+ vmcb->cr_intercepts, vmcb->dr_intercepts,
+ vmcb->exception_intercepts);
+ printk("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n",
+ vmcb->general1_intercepts, vmcb->general2_intercepts);
+ printk("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
+ "0x%016llx\n",
+ (unsigned long long) vmcb->iopm_base_pa,
+ (unsigned long long) vmcb->msrpm_base_pa,
+ (unsigned long long) vmcb->tsc_offset);
+ printk("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
+ "0x%016llx\n", vmcb->tlb_control,
+ (unsigned long long) vmcb->vintr.bytes,
+ (unsigned long long) vmcb->interrupt_shadow);
+ printk("exitcode = 0x%016llx exitintinfo = 0x%016llx\n",
+ (unsigned long long) vmcb->exitcode,
+ (unsigned long long) vmcb->exitintinfo.bytes);
+ printk("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
+ (unsigned long long) vmcb->exitinfo1,
+ (unsigned long long) vmcb->exitinfo2);
+ printk("np_enable = 0x%016llx guest_asid = 0x%03x\n",
+ (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
+ printk("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n",
+ vmcb->cpl, (unsigned long long) vmcb->efer,
+ (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
+ printk("CR0 = 0x%016llx CR2 = 0x%016llx\n",
+ (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
+ printk("CR3 = 0x%016llx CR4 = 0x%016llx\n",
+ (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
+ printk("RSP = 0x%016llx RIP = 0x%016llx\n",
+ (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
+ printk("RAX = 0x%016llx RFLAGS=0x%016llx\n",
+ (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
+ printk("DR6 = 0x%016llx, DR7 = 0x%016llx\n",
+ (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
+ printk("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
+ (unsigned long long) vmcb->cstar,
+ (unsigned long long) vmcb->sfmask);
+ printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n",
+ (unsigned long long) vmcb->kerngsbase,
+ (unsigned long long) vmcb->g_pat);
+ printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
+
+ /* print out all the selectors */
+ svm_dump_sel("CS", &vmcb->cs);
+ svm_dump_sel("DS", &vmcb->ds);
+ svm_dump_sel("SS", &vmcb->ss);
+ svm_dump_sel("ES", &vmcb->es);
+ svm_dump_sel("FS", &vmcb->fs);
+ svm_dump_sel("GS", &vmcb->gs);
+ svm_dump_sel("GDTR", &vmcb->gdtr);
+ svm_dump_sel("LDTR", &vmcb->ldtr);
+ svm_dump_sel("IDTR", &vmcb->idtr);
+ svm_dump_sel("TR", &vmcb->tr);
+}
+
+bool_t
+svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
+ bool_t verbose)
+{
+ bool_t ret = 0; /* ok */
+
+#define PRINTF(...) \
+ if (verbose) { ret = 1; printk("%s: ", from); printk(__VA_ARGS__); \
+ } else return 1;
+
+ if ((vmcb->efer & EFER_SVME) == 0) {
+ PRINTF("EFER: SVME bit not set (0x%"PRIx64")\n", vmcb->efer);
+ }
+
+ if ((vmcb->cr0 & X86_CR0_CD) == 0 && (vmcb->cr0 & X86_CR0_NW) != 0) {
+ PRINTF("CR0: CD bit is zero and NW bit set (0x%"PRIx64")\n",
+ vmcb->cr0);
+ }
+
+ if ((vmcb->cr0 >> 32U) != 0) {
+ PRINTF("CR0: bits [63:32] are not zero (0x%"PRIx64")\n",
+ vmcb->cr0);
+ }
+
+ if ((vmcb->cr3 & 0x7) != 0) {
+ PRINTF("CR3: MBZ bits are set (0x%"PRIx64")\n", vmcb->cr3);
+ }
+ if ((vmcb->efer & EFER_LMA) && (vmcb->cr3 & 0xfe) != 0) {
+ PRINTF("CR3: MBZ bits are set (0x%"PRIx64")\n", vmcb->cr3);
+ }
+
+ if ((vmcb->cr4 >> 11U) != 0) {
+ PRINTF("CR4: bits [63:11] are not zero (0x%"PRIx64")\n",
+ vmcb->cr4);
+ }
+
+ if ((vmcb->dr6 >> 32U) != 0) {
+ PRINTF("DR6: bits [63:32] are not zero (0x%"PRIx64")\n",
+ vmcb->dr6);
+ }
+
+ if ((vmcb->dr7 >> 32U) != 0) {
+ PRINTF("DR7: bits [63:32] are not zero (0x%"PRIx64")\n",
+ vmcb->dr7);
+ }
+
+ if ((vmcb->efer >> 15U) != 0) {
+ PRINTF("EFER: bits [63:15] are not zero (0x%"PRIx64")\n",
+ vmcb->efer);
+ }
+
+ if ((vmcb->efer & EFER_LME) != 0 && ((vmcb->cr0 & X86_CR0_PG) != 0)) {
+ if ((vmcb->cr4 & X86_CR4_PAE) == 0) {
+ PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero.\n");
+ }
+ if ((vmcb->cr0 & X86_CR0_PE) == 0) {
+ PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero.\n");
+ }
+ }
+
+ if ((vmcb->efer & EFER_LME) != 0
+ && (vmcb->cr0 & X86_CR0_PG) != 0
+ && (vmcb->cr4 & X86_CR4_PAE) != 0
+ && (vmcb->cs.attr.fields.l != 0)
+ && (vmcb->cs.attr.fields.db != 0))
+ {
+ PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero.\n");
+ }
+
+ if ((vmcb->general2_intercepts & GENERAL2_INTERCEPT_VMRUN) == 0) {
+ PRINTF("GENERAL2_INTERCEPT: VMRUN intercept bit is clear (0x%"PRIx32")\n",
+ vmcb->general2_intercepts);
+ }
+
+ if (vmcb->eventinj.fields.resvd1 != 0) {
+ PRINTF("eventinj: MBZ bits are set (0x%"PRIx64")\n",
+ vmcb->eventinj.bytes);
+ }
+
+ if (vmcb->np_enable && vmcb->h_cr3 == 0) {
+ PRINTF("nested paging enabled but host cr3 is 0\n");
+ }
+
+#undef PRINTF
+ return ret;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 584b6f1a0511 -r 87774fa72e28 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -33,6 +33,7 @@
#include <asm/hvm/svm/svm.h>
#include <asm/hvm/svm/intr.h>
#include <asm/hvm/svm/asid.h>
+#include <asm/hvm/svm/svmdebug.h>
#include <xen/event.h>
#include <xen/kernel.h>
#include <xen/domain_page.h>
@@ -75,37 +76,6 @@ struct host_save_area *alloc_host_save_a
return hsa;
}
-void svm_intercept_msr(struct vcpu *v, uint32_t msr, int enable)
-{
- unsigned long *msr_bitmap = v->arch.hvm_svm.msrpm;
- unsigned long *msr_bit = NULL;
-
- /*
- * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
- */
- if ( msr <= 0x1fff )
- msr_bit = msr_bitmap + 0x0000 / BYTES_PER_LONG;
- else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
- msr_bit = msr_bitmap + 0x0800 / BYTES_PER_LONG;
- else if ( (msr >= 0xc0010000) && (msr <= 0xc0011fff) )
- msr_bit = msr_bitmap + 0x1000 / BYTES_PER_LONG;
-
- BUG_ON(msr_bit == NULL);
-
- msr &= 0x1fff;
-
- if ( enable )
- {
- __set_bit(msr * 2, msr_bit);
- __set_bit(msr * 2 + 1, msr_bit);
- }
- else
- {
- __clear_bit(msr * 2, msr_bit);
- __clear_bit(msr * 2 + 1, msr_bit);
- }
-}
-
static int construct_vmcb(struct vcpu *v)
{
struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
@@ -250,7 +220,7 @@ static int construct_vmcb(struct vcpu *v
if ( cpu_has_pause_filter )
{
- vmcb->pause_filter_count = 3000;
+ vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT;
vmcb->general1_intercepts |= GENERAL1_INTERCEPT_PAUSE;
}
@@ -298,76 +268,6 @@ void svm_destroy_vmcb(struct vcpu *v)
arch_svm->vmcb = NULL;
}
-static void svm_dump_sel(char *name, svm_segment_register_t *s)
-{
- printk("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n",
- name, s->sel, s->attr.bytes, s->limit,
- (unsigned long long)s->base);
-}
-
-void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb)
-{
- printk("Dumping guest's current state at %s...\n", from);
- printk("Size of VMCB = %d, address = %p\n",
- (int) sizeof(struct vmcb_struct), vmcb);
-
- printk("cr_intercepts = 0x%08x dr_intercepts = 0x%08x "
- "exception_intercepts = 0x%08x\n",
- vmcb->cr_intercepts, vmcb->dr_intercepts,
- vmcb->exception_intercepts);
- printk("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n",
- vmcb->general1_intercepts, vmcb->general2_intercepts);
- printk("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
- "0x%016llx\n",
- (unsigned long long) vmcb->iopm_base_pa,
- (unsigned long long) vmcb->msrpm_base_pa,
- (unsigned long long) vmcb->tsc_offset);
- printk("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
- "0x%016llx\n", vmcb->tlb_control,
- (unsigned long long) vmcb->vintr.bytes,
- (unsigned long long) vmcb->interrupt_shadow);
- printk("exitcode = 0x%016llx exitintinfo = 0x%016llx\n",
- (unsigned long long) vmcb->exitcode,
- (unsigned long long) vmcb->exitintinfo.bytes);
- printk("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
- (unsigned long long) vmcb->exitinfo1,
- (unsigned long long) vmcb->exitinfo2);
- printk("np_enable = 0x%016llx guest_asid = 0x%03x\n",
- (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
- printk("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n",
- vmcb->cpl, (unsigned long long) vmcb->efer,
- (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
- printk("CR0 = 0x%016llx CR2 = 0x%016llx\n",
- (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
- printk("CR3 = 0x%016llx CR4 = 0x%016llx\n",
- (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
- printk("RSP = 0x%016llx RIP = 0x%016llx\n",
- (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
- printk("RAX = 0x%016llx RFLAGS=0x%016llx\n",
- (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
- printk("DR6 = 0x%016llx, DR7 = 0x%016llx\n",
- (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
- printk("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
- (unsigned long long) vmcb->cstar,
- (unsigned long long) vmcb->sfmask);
- printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n",
- (unsigned long long) vmcb->kerngsbase,
- (unsigned long long) vmcb->g_pat);
- printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
-
- /* print out all the selectors */
- svm_dump_sel("CS", &vmcb->cs);
- svm_dump_sel("DS", &vmcb->ds);
- svm_dump_sel("SS", &vmcb->ss);
- svm_dump_sel("ES", &vmcb->es);
- svm_dump_sel("FS", &vmcb->fs);
- svm_dump_sel("GS", &vmcb->gs);
- svm_dump_sel("GDTR", &vmcb->gdtr);
- svm_dump_sel("LDTR", &vmcb->ldtr);
- svm_dump_sel("IDTR", &vmcb->idtr);
- svm_dump_sel("TR", &vmcb->tr);
-}
-
static void vmcb_dump(unsigned char ch)
{
struct domain *d;
@@ -385,7 +285,7 @@ static void vmcb_dump(unsigned char ch)
for_each_vcpu ( d, v )
{
printk("\tVCPU %d\n", v->vcpu_id);
- svm_dump_vmcb("key_handler", v->arch.hvm_svm.vmcb);
+ svm_vmcb_dump("key_handler", v->arch.hvm_svm.vmcb);
}
}
diff -r 584b6f1a0511 -r 87774fa72e28 xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h
+++ b/xen/include/asm-x86/hvm/svm/emulate.h
@@ -32,6 +32,11 @@ enum instruction_index {
INSTR_INT3,
INSTR_RDTSC,
INSTR_PAUSE,
+ INSTR_VMRUN,
+ INSTR_VMLOAD,
+ INSTR_VMSAVE,
+ INSTR_STGI,
+ INSTR_CLGI,
INSTR_MAX_COUNT /* Must be last - Number of instructions supported */
};
diff -r 584b6f1a0511 -r 87774fa72e28 xen/include/asm-x86/hvm/svm/nestedsvm.h
--- /dev/null
+++ b/xen/include/asm-x86/hvm/svm/nestedsvm.h
@@ -0,0 +1,87 @@
+/*
+ * nestedsvm.h: Nested Virtualization
+ * Copyright (c) 2010, Advanced Micro Devices, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_HVM_SVM_NESTEDSVM_H__
+#define __ASM_X86_HVM_SVM_NESTEDSVM_H__
+
+#include <asm/config.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/svm/vmcb.h>
+
+struct nestedsvm {
+ uint64_t ns_msr_hsavepa; /* MSR HSAVE_PA value */
+
+ /* Cached real intercepts of the nested guest */
+ uint32_t ns_cr_intercepts;
+ uint32_t ns_dr_intercepts;
+ uint32_t ns_exception_intercepts;
+ uint32_t ns_general1_intercepts;
+ uint32_t ns_general2_intercepts;
+
+ /* Cached real lbr of the nested guest */
+ lbrctrl_t ns_lbr_control;
+
+ void *ns_hostsave;
+
+ union {
+ uint32_t bytes;
+ struct {
+ uint32_t rflagsif: 1;
+ uint32_t vintrmask: 1;
+ uint32_t reserved: 30;
+ } fields;
+ } ns_hostflags;
+};
+
+/* True when l1 guest enabled SVM in EFER */
+#define hvm_svm_enabled(v) \
+ (!!((v)->arch.hvm_vcpu.guest_efer & EFER_SVME))
+
+/* Interface methods */
+int nsvm_vcpu_destroy(struct vcpu *v);
+int nsvm_vcpu_initialise(struct vcpu *v);
+int nsvm_vcpu_reset(struct vcpu *v);
+int nsvm_vcpu_hostrestore(struct vcpu *v, struct cpu_user_regs *regs);
+int nsvm_vcpu_vmrun(struct vcpu *v, struct cpu_user_regs *regs,
+ unsigned int inst_len);
+int nsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs,
+ uint64_t exitcode);
+uint64_t nsvm_vmcb_exitcode_native2generic(struct vcpu *v, uint64_t exitcode);
+int nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v,
+ struct cpu_user_regs *regs, uint64_t exitcode);
+int nsvm_vmcb_guest_intercepts_trap(struct vcpu *v, unsigned int trapnr);
+int nsvm_vmcb_prepare4vmexit(struct vcpu *v);
+int nsvm_vmcb_isvalid(struct vcpu *v, uint64_t vmcxaddr);
+int nsvm_vcpu_vmexit_trap(struct vcpu *v, unsigned int trapnr,
+ int errcode, unsigned long cr2);
+
+/* MSRs */
+int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content);
+int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content);
+
+#endif /* ASM_X86_HVM_SVM_NESTEDSVM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 584b6f1a0511 -r 87774fa72e28 xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -29,8 +29,6 @@
#include <asm/i387.h>
#include <asm/hvm/vpmu.h>
-void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb);
-
#define SVM_REG_EAX (0)
#define SVM_REG_ECX (1)
#define SVM_REG_EDX (2)
@@ -62,6 +60,8 @@ static inline void svm_vmsave(void *vmcb
: : "a" (__pa(vmcb)) : "memory" );
}
+unsigned long *svm_msrbit(unsigned long *msr_bitmap, uint32_t msr);
+
extern u32 svm_feature_flags;
#define SVM_FEATURE_NPT 0
@@ -76,4 +76,6 @@ extern u32 svm_feature_flags;
#define cpu_has_svm_nrips test_bit(SVM_FEATURE_NRIPS, &svm_feature_flags)
#define cpu_has_pause_filter test_bit(SVM_FEATURE_PAUSEF, &svm_feature_flags)
+#define SVM_PAUSEFILTER_INIT 3000
+
#endif /* __ASM_X86_HVM_SVM_H__ */
diff -r 584b6f1a0511 -r 87774fa72e28 xen/include/asm-x86/hvm/svm/svmdebug.h
--- /dev/null
+++ b/xen/include/asm-x86/hvm/svm/svmdebug.h
@@ -0,0 +1,30 @@
+/*
+ * svmdebug.h: SVM related debug defintions
+ * Copyright (c) 2010, AMD Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_X86_HVM_SVM_SVMDEBUG_H__
+#define __ASM_X86_HVM_SVM_SVMDEBUG_H__
+
+#include <asm/types.h>
+#include <asm/hvm/svm/vmcb.h>
+
+void svm_vmcb_dump(const char *from, struct vmcb_struct *vmcb);
+bool_t svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
+ bool_t verbose);
+
+#endif /* __ASM_X86_HVM_SVM_SVMDEBUG_H__ */
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 09/13] Nested Virtualization: svm specific implementation
@ 2010-11-12 18:43 Christoph Egger
2010-11-16 14:54 ` Tim Deegan
0 siblings, 1 reply; 5+ messages in thread
From: Christoph Egger @ 2010-11-12 18:43 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 264 bytes --]
--
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632
[-- Attachment #2: xen_nh09_svm.diff --]
[-- Type: text/x-diff, Size: 72440 bytes --]
# HG changeset patch
# User cegger
# Date 1289584001 -3600
Implement SVM specific part for Nested Virtualization
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
diff -r 54dd43df686b -r 3bfc06e2e41a xen/arch/x86/hvm/svm/Makefile
--- a/xen/arch/x86/hvm/svm/Makefile
+++ b/xen/arch/x86/hvm/svm/Makefile
@@ -2,6 +2,8 @@ obj-y += asid.o
obj-y += emulate.o
obj-y += entry.o
obj-y += intr.o
+obj-y += nestedsvm.o
obj-y += svm.o
+obj-y += svmdebug.o
obj-y += vmcb.o
obj-y += vpmu.o
diff -r 54dd43df686b -r 3bfc06e2e41a xen/arch/x86/hvm/svm/emulate.c
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -101,6 +101,11 @@ MAKE_INSTR(HLT, 1, 0xf4);
MAKE_INSTR(INT3, 1, 0xcc);
MAKE_INSTR(RDTSC, 2, 0x0f, 0x31);
MAKE_INSTR(PAUSE, 1, 0x90);
+MAKE_INSTR(VMRUN, 3, 0x0f, 0x01, 0xd8);
+MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
+MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
+MAKE_INSTR(STGI, 3, 0x0f, 0x01, 0xdc);
+MAKE_INSTR(CLGI, 3, 0x0f, 0x01, 0xdd);
static const u8 *opc_bytes[INSTR_MAX_COUNT] =
{
@@ -114,6 +119,11 @@ static const u8 *opc_bytes[INSTR_MAX_COU
[INSTR_INT3] = OPCODE_INT3,
[INSTR_RDTSC] = OPCODE_RDTSC,
[INSTR_PAUSE] = OPCODE_PAUSE,
+ [INSTR_VMRUN] = OPCODE_VMRUN,
+ [INSTR_VMLOAD] = OPCODE_VMLOAD,
+ [INSTR_VMSAVE] = OPCODE_VMSAVE,
+ [INSTR_STGI] = OPCODE_STGI,
+ [INSTR_CLGI] = OPCODE_CLGI,
};
static int fetch(struct vcpu *v, u8 *buf, unsigned long addr, int len)
diff -r 54dd43df686b -r 3bfc06e2e41a xen/arch/x86/hvm/svm/entry.S
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -54,6 +54,7 @@
ENTRY(svm_asm_do_resume)
call svm_intr_assist
+ call_with_regs(nsvm_vcpu_switch)
get_current(bx)
CLGI
diff -r 54dd43df686b -r 3bfc06e2e41a xen/arch/x86/hvm/svm/nestedsvm.c
--- /dev/null
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -0,0 +1,1185 @@
+/*
+ * nestedsvm.c: Nested Virtualization
+ * Copyright (c) 2010, Advanced Micro Devices, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <asm/hvm/support.h>
+#include <asm/hvm/svm/emulate.h>
+#include <asm/hvm/svm/svm.h>
+#include <asm/hvm/nestedhvm.h>
+#include <asm/hvm/svm/nestedsvm.h>
+#include <asm/hvm/svm/svmdebug.h>
+#include <asm/paging.h> /* paging_mode_hap */
+
+static int
+nestedsvm_vmcb_isvalid(struct vcpu *v, uint64_t vmcxaddr)
+{
+ if ( !hvm_svm_enabled(v) || hvm_guest_x86_mode(v) < 2 )
+ return 0;
+
+ /* Maximum valid physical address.
+ * See AMD BKDG for HSAVE_PA MSR.
+ */
+ if ( vmcxaddr > 0xfd00000000ULL )
+ return 0;
+
+ return 1;
+}
+
+int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr)
+{
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+
+ if (nv->nv_vmcx != NULL && nv->nv_vmcxaddr != vmcbaddr) {
+ ASSERT(nv->nv_vmcx != NULL);
+ ASSERT(nv->nv_vmcxaddr != 0);
+ hvm_unmap_guest_frame(nv->nv_vmcx);
+ nv->nv_vmcx = NULL;
+ nv->nv_vmcxaddr = 0;
+ }
+
+ if (nv->nv_vmcx == NULL) {
+ nv->nv_vmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT);
+ if (nv->nv_vmcx == NULL)
+ return 0;
+ nv->nv_vmcxaddr = vmcbaddr;
+ }
+
+ return 1;
+}
+
+/* Interface methods */
+int nsvm_vcpu_initialise(struct vcpu *v)
+{
+ void *msrpm;
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+
+ msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
+ svm->ns_cached_msrpm = msrpm;
+ if (msrpm == NULL)
+ goto err;
+ memset(msrpm, 0x0, MSRPM_SIZE);
+
+ msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
+ svm->ns_merged_msrpm = msrpm;
+ if (msrpm == NULL)
+ goto err;
+ memset(msrpm, 0x0, MSRPM_SIZE);
+
+ svm->ns_hostsave = alloc_vmcb();
+ if (svm->ns_hostsave == NULL)
+ goto err;
+
+ return 0;
+
+err:
+ nsvm_vcpu_destroy(v);
+ return -ENOMEM;
+}
+
+int nsvm_vcpu_destroy(struct vcpu *v)
+{
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+
+ if (svm->ns_cached_msrpm) {
+ free_xenheap_pages(svm->ns_cached_msrpm,
+ get_order_from_bytes(MSRPM_SIZE));
+ svm->ns_cached_msrpm = NULL;
+ }
+ if (svm->ns_merged_msrpm) {
+ free_xenheap_pages(svm->ns_merged_msrpm,
+ get_order_from_bytes(MSRPM_SIZE));
+ svm->ns_merged_msrpm = NULL;
+ }
+ if (svm->ns_hostsave) {
+ free_vmcb(svm->ns_hostsave);
+ svm->ns_hostsave = NULL;
+ }
+
+ return 0;
+}
+
+int nsvm_vcpu_reset(struct vcpu *v)
+{
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+
+ svm->ns_msr_hsavepa = 0;
+
+ svm->ns_cr_intercepts = 0;
+ svm->ns_dr_intercepts = 0;
+ svm->ns_exception_intercepts = 0;
+ svm->ns_general1_intercepts = 0;
+ svm->ns_general2_intercepts = 0;
+ svm->ns_lbr_control.bytes = 0;
+
+ svm->ns_hap_enabled = 0;
+ svm->ns_vmcb_guestcr3 = 0;
+ svm->ns_vmcb_hostcr3 = 0;
+ svm->ns_guest_asid = 0;
+ svm->ns_hostflags.bytes = 0;
+ svm->ns_vmexit.exitinfo1 = 0;
+ svm->ns_vmexit.exitinfo2 = 0;
+
+ return 0;
+}
+
+static void nsvm_vmcb_loadsave(struct vmcb_struct *from,
+ struct vmcb_struct *to)
+{
+ to->fs = from->fs;
+ to->gs = from->gs;
+ to->tr = from->tr;
+ to->ldtr = from->ldtr;
+ to->kerngsbase = from->kerngsbase;
+ to->star = from->star;
+ to->lstar = from->lstar;
+ to->cstar = from->cstar;
+ to->sfmask = from->sfmask;
+ to->sysenter_cs = from->sysenter_cs;
+ to->sysenter_esp = from->sysenter_esp;
+ to->sysenter_eip = from->sysenter_eip;
+}
+
+static int nsvm_vcpu_hostsave(struct vcpu *v, unsigned int inst_len)
+{
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+ struct vmcb_struct *hsave, *vmcb;
+
+ hsave = svm->ns_hostsave;
+ vmcb = v->arch.hvm_svm.vmcb;
+
+ memcpy(hsave, vmcb, sizeof(struct vmcb_struct));
+ hsave->rip += inst_len;
+
+ /* Remember the host interrupt flag */
+ svm->ns_hostflags.fields.rflagsif = (hsave->rflags & X86_EFLAGS_IF) ? 1 : 0;
+
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* guest: nested paging mode */
+ hsave->cr3 = vmcb->cr3;
+ hsave->h_cr3 = vmcb->h_cr3;
+ } else if (paging_mode_hap(v->domain))
+ /* host: nested paging, guest: shadow paging */
+ hsave->cr3 = vmcb->cr3;
+ else
+ /* host: shadow paging, guest: shadow paging */
+ hsave->cr3 = v->arch.hvm_vcpu.guest_cr[3];
+
+ hsave->efer = v->arch.hvm_vcpu.guest_efer;
+ hsave->cr0 = v->arch.hvm_vcpu.guest_cr[0];
+ hsave->cr2 = v->arch.hvm_vcpu.guest_cr[2];
+ hsave->cr4 = v->arch.hvm_vcpu.guest_cr[4];
+
+ return 0;
+}
+
+int nsvm_vcpu_hostrestore(struct vcpu *v, struct cpu_user_regs *regs)
+{
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+ struct vmcb_struct *hsave, *vmcb;
+ int rc;
+
+ hsave = svm->ns_hostsave;
+ vmcb = v->arch.hvm_svm.vmcb;
+
+ /* Must keep register values handled by VMSAVE/VMLOAD */
+ nsvm_vmcb_loadsave(vmcb, hsave);
+ memcpy(vmcb, hsave, sizeof(struct vmcb_struct));
+
+ /* EFER */
+ v->arch.hvm_vcpu.guest_efer = vmcb->efer;
+ rc = hvm_set_efer(vmcb->efer);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
+
+ /* CR4 */
+ v->arch.hvm_vcpu.guest_cr[4] = vmcb->cr4;
+ rc = hvm_set_cr4(vmcb->cr4);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
+
+ /* CR0 */
+ v->arch.hvm_vcpu.guest_cr[0] = vmcb->cr0 | X86_CR0_PE;
+ vmcb->rflags &= ~X86_EFLAGS_VM;
+ rc = hvm_set_cr0(vmcb->cr0 | X86_CR0_PE);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
+
+ /* CR2 */
+ v->arch.hvm_vcpu.guest_cr[2] = vmcb->cr2;
+ hvm_update_guest_cr(v, 2);
+
+ /* CR3 */
+ /* Nested paging mode */
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* host nested paging + guest nested paging. */
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ } else if (paging_mode_hap(v->domain)) {
+ /* host nested paging + guest shadow paging. */
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ } else {
+ /* host shadow paging + guest shadow paging. */
+
+ /* Reset MMU context -- XXX (hostrestore) not yet working*/
+ if (!pagetable_is_null(v->arch.guest_table))
+ put_page(pagetable_get_page(v->arch.guest_table));
+ v->arch.guest_table = pagetable_null();
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ }
+ rc = hvm_set_cr3(vmcb->cr3);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+
+ regs->eax = vmcb->rax;
+ regs->esp = vmcb->rsp;
+ regs->eip = vmcb->rip;
+ regs->eflags = vmcb->rflags;
+ vmcb->dr7 = 0; /* disable all breakpoints */
+ vmcb->cpl = 0;
+
+ /* Clear exitintinfo to prevent a fault loop of re-injecting
+ * exceptions forever.
+ */
+ vmcb->exitintinfo.bytes = 0;
+
+ hvm_asid_flush_vcpu(v);
+
+ return 0;
+}
+
+static int nsvm_vmrun_permissionmap(struct vcpu *v)
+{
+ struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+ struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
+ struct vmcb_struct *host_vmcb = arch_svm->vmcb;
+ unsigned long *ns_msrpm_ptr;
+ unsigned int i;
+ enum hvm_copy_result ret;
+
+ ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
+
+ ret = hvm_copy_from_guest_phys(svm->ns_cached_msrpm,
+ ns_vmcb->msrpm_base_pa, MSRPM_SIZE);
+ if (ret != HVMCOPY_okay) {
+ gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n", ret);
+ return 1;
+ }
+
+ /* Skip io bitmap merge since hvm_io_bitmap has all bits set but
+ * 0x80 and 0xed.
+ */
+
+ /* v->arch.hvm_svm.msrpm has type unsigned long, thus
+ * BYTES_PER_LONG.
+ */
+ for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++)
+ svm->ns_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i];
+
+ host_vmcb->iopm_base_pa =
+ (uint64_t)virt_to_maddr(hvm_io_bitmap);
+ host_vmcb->msrpm_base_pa =
+ (uint64_t)virt_to_maddr(svm->ns_merged_msrpm);
+
+ return 0;
+}
+
+static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs *regs)
+{
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+ struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
+ struct vmcb_struct *host_vmcb = v->arch.hvm_svm.vmcb;
+ int rc;
+
+ /* Enable nested guest intercepts */
+ svm->ns_cr_intercepts = ns_vmcb->cr_intercepts;
+ svm->ns_dr_intercepts = ns_vmcb->dr_intercepts;
+ svm->ns_exception_intercepts = ns_vmcb->exception_intercepts;
+ svm->ns_general1_intercepts = ns_vmcb->general1_intercepts;
+ svm->ns_general2_intercepts = ns_vmcb->general2_intercepts;
+
+ host_vmcb->cr_intercepts |= ns_vmcb->cr_intercepts;
+ host_vmcb->dr_intercepts |= ns_vmcb->dr_intercepts;
+ host_vmcb->exception_intercepts |= ns_vmcb->exception_intercepts;
+ host_vmcb->general1_intercepts |= ns_vmcb->general1_intercepts;
+ host_vmcb->general2_intercepts |= ns_vmcb->general2_intercepts;
+
+ /* Nested Pause Filter */
+ if (ns_vmcb->general1_intercepts & GENERAL1_INTERCEPT_PAUSE)
+ host_vmcb->pause_filter_count =
+ min(ns_vmcb->pause_filter_count, host_vmcb->pause_filter_count);
+ else
+ host_vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT;
+
+ /* Nested IO permission bitmaps */
+ rc = nsvm_vmrun_permissionmap(v);
+ if (rc)
+ return rc;
+
+ /* TSC offset */
+ hvm_set_guest_tsc(v, host_vmcb->tsc_offset + ns_vmcb->tsc_offset);
+
+ /* ASID */
+ hvm_asid_flush_vcpu(v);
+ /* host_vmcb->guest_asid = ns_vmcb->guest_asid; */
+
+ /* TLB control */
+ host_vmcb->tlb_control |= ns_vmcb->tlb_control;
+
+ /* Virtual Interrupts */
+ host_vmcb->vintr = ns_vmcb->vintr;
+ host_vmcb->vintr.fields.intr_masking = 1;
+
+ /* Shadow Mode */
+ host_vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow;
+
+ /* Exit codes */
+ host_vmcb->exitcode = ns_vmcb->exitcode;
+ host_vmcb->exitinfo1 = ns_vmcb->exitinfo1;
+ host_vmcb->exitinfo2 = ns_vmcb->exitinfo2;
+ host_vmcb->exitintinfo = ns_vmcb->exitintinfo;
+
+ /* Pending Interrupts */
+ host_vmcb->eventinj = ns_vmcb->eventinj;
+
+ /* LBR virtualization */
+ svm->ns_lbr_control = ns_vmcb->lbr_control;
+ host_vmcb->lbr_control.bytes |= ns_vmcb->lbr_control.bytes;
+
+ /* NextRIP */
+ host_vmcb->nextrip = ns_vmcb->nextrip;
+
+ /*
+ * VMCB Save State Area
+ */
+
+ /* Segments */
+ host_vmcb->es = ns_vmcb->es;
+ host_vmcb->cs = ns_vmcb->cs;
+ host_vmcb->ss = ns_vmcb->ss;
+ host_vmcb->ds = ns_vmcb->ds;
+ host_vmcb->gdtr = ns_vmcb->gdtr;
+ host_vmcb->idtr = ns_vmcb->idtr;
+
+ /* CPL */
+ host_vmcb->cpl = ns_vmcb->cpl;
+
+ /* EFER */
+ v->arch.hvm_vcpu.guest_efer = ns_vmcb->efer;
+ rc = hvm_set_efer(ns_vmcb->efer);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
+
+ /* CR4 */
+ v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->cr4;
+ rc = hvm_set_cr4(ns_vmcb->cr4);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
+
+ /* CR0 */
+ v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->cr0;
+ rc = hvm_set_cr0(ns_vmcb->cr0);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
+
+ /* CR2 */
+ v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->cr2;
+ hvm_update_guest_cr(v, 2);
+
+ /* Nested paging mode */
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* host nested paging + guest nested paging. */
+
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ rc = hvm_set_cr3(ns_vmcb->cr3);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+ } else if (paging_mode_hap(v->domain)) {
+ /* host nested paging + guest shadow paging. */
+ host_vmcb->np_enable = 1;
+ /* Keep h_cr3 as it is. */
+ /* Guest shadow paging: Must intercept pagefaults. */
+ host_vmcb->exception_intercepts |= (1U << TRAP_page_fault);
+ /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
+ rc = hvm_set_cr3(ns_vmcb->cr3);
+ if (rc != X86EMUL_OKAY)
+ gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
+ } else {
+ /* host shadow paging + guest shadow paging. */
+ host_vmcb->np_enable = 0;
+ host_vmcb->h_cr3 = 0x0;
+
+ /* TODO: Once shadow-shadow paging is in place come back to here
+ * and set host_vmcb->cr3 to the shadowed shadow table.
+ */
+ }
+
+ /* DRn */
+ host_vmcb->dr7 = ns_vmcb->dr7;
+ host_vmcb->dr6 = ns_vmcb->dr6;
+
+ /* RFLAGS */
+ host_vmcb->rflags = ns_vmcb->rflags;
+
+ /* RIP */
+ host_vmcb->rip = ns_vmcb->rip;
+
+ /* RSP */
+ host_vmcb->rsp = ns_vmcb->rsp;
+
+ /* RAX */
+ host_vmcb->rax = ns_vmcb->rax;
+
+ /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase,
+ * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
+ * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
+ */
+
+ /* Page tables */
+ host_vmcb->pdpe0 = ns_vmcb->pdpe0;
+ host_vmcb->pdpe1 = ns_vmcb->pdpe1;
+ host_vmcb->pdpe2 = ns_vmcb->pdpe2;
+ host_vmcb->pdpe3 = ns_vmcb->pdpe3;
+
+ /* PAT */
+ host_vmcb->g_pat = ns_vmcb->g_pat;
+
+ /* Debug Control MSR */
+ host_vmcb->debugctlmsr = ns_vmcb->debugctlmsr;
+
+ /* LBR MSRs */
+ host_vmcb->lastbranchfromip = ns_vmcb->lastbranchfromip;
+ host_vmcb->lastbranchtoip = ns_vmcb->lastbranchtoip;
+ host_vmcb->lastintfromip = ns_vmcb->lastintfromip;
+ host_vmcb->lastinttoip = ns_vmcb->lastinttoip;
+
+ rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
+ if (rc) {
+ gdprintk(XENLOG_ERR, "nested vmcb invalid\n");
+ return rc;
+ }
+
+ rc = svm_vmcb_isvalid(__func__, host_vmcb, 1);
+ if (rc) {
+ gdprintk(XENLOG_ERR, "host vmcb invalid\n");
+ return rc;
+ }
+
+ /* Switch guest registers to nested guest */
+ regs->eax = ns_vmcb->rax;
+ regs->eip = ns_vmcb->rip;
+ regs->esp = ns_vmcb->rsp;
+ regs->eflags = ns_vmcb->rflags;
+
+ return 0;
+}
+
+static int
+nsvm_vcpu_vmentry(struct vcpu *v, struct cpu_user_regs *regs,
+ unsigned int inst_len)
+{
+ int ret;
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+ struct vmcb_struct *hsave, *ns_vmcb, *hostvmcb;
+
+ hsave = svm->ns_hostsave;
+ ns_vmcb = nv->nv_vmcx;
+ hostvmcb = v->arch.hvm_svm.vmcb;
+ ASSERT(ns_vmcb != NULL);
+
+ /* Save values for later use. Needed for Nested-on-Nested and
+ * Shadow-on-Shadow paging.
+ */
+ svm->ns_vmcb_guestcr3 = ns_vmcb->cr3;
+ svm->ns_vmcb_hostcr3 = ns_vmcb->h_cr3;
+
+ nv->nv_flushp2m = (ns_vmcb->tlb_control
+ || (svm->ns_guest_asid != ns_vmcb->guest_asid));
+ svm->ns_guest_asid = ns_vmcb->guest_asid;
+
+ /* nested paging for the guest */
+ svm->ns_hap_enabled = (ns_vmcb->np_enable) ? 1 : 0;
+
+ /* Remember the V_INTR_MASK in hostflags */
+ svm->ns_hostflags.fields.vintrmask =
+ (ns_vmcb->vintr.fields.intr_masking) ? 1 : 0;
+
+ /* Save l1 guest state (= host state) */
+ ret = nsvm_vcpu_hostsave(v, inst_len);
+ if (ret) {
+ gdprintk(XENLOG_ERR, "hostsave failed\n");
+ return ret;
+ }
+
+ ret = nsvm_vmcb_prepare4vmrun(v, regs);
+ if (ret) {
+ gdprintk(XENLOG_ERR, "hostsave failed\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+int
+nsvm_vcpu_vmrun(struct vcpu *v, struct cpu_user_regs *regs)
+{
+ int ret;
+ unsigned int inst_len;
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+
+ inst_len = __get_instruction_length(v, INSTR_VMRUN);
+ if (inst_len == 0) {
+ svm->ns_vmexit.exitcode = VMEXIT_SHUTDOWN;
+ return -1;
+ }
+
+ nv->nv_hostflags.fields.vmswitch_in_progress = 1;
+ ASSERT(nv->nv_vmcx != NULL);
+
+ /* save host state */
+ ret = nsvm_vcpu_vmentry(v, regs, inst_len);
+ if (ret) {
+ gdprintk(XENLOG_ERR,
+ "nsvm_vcpu_vmentry failed, injecting #UD\n");
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ nv->nv_hostflags.fields.vmswitch_in_progress = 0;
+ return 1;
+ }
+
+ /* Switch vcpu to guest mode
+ */
+ nestedhvm_vcpu_enter_guestmode(v);
+ nv->nv_hostflags.fields.vmswitch_in_progress = 0;
+ return 0;
+}
+
+int
+nsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs, uint64_t exitcode)
+{
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+ struct vmcb_struct *ns_vmcb;
+
+ ns_vmcb = nv->nv_vmcx;
+
+ if (nv->nv_hostflags.fields.vmexit_pending) {
+
+ switch (exitcode) {
+ case VMEXIT_INTR:
+ if ( unlikely(ns_vmcb->eventinj.fields.v)
+ && nv->nv_hostflags.fields.vmentry_pending
+ && hvm_event_needs_reinjection(ns_vmcb->eventinj.fields.type,
+ ns_vmcb->eventinj.fields.vector) )
+ {
+ ns_vmcb->exitintinfo.bytes = ns_vmcb->eventinj.bytes;
+ }
+ break;
+ case VMEXIT_EXCEPTION_PF:
+ ns_vmcb->cr2 = ns_vmcb->exitinfo2;
+ /* fall through */
+ case VMEXIT_NPF:
+ /* PF error code */
+ ns_vmcb->exitinfo1 = svm->ns_vmexit.exitinfo1;
+ /* fault address */
+ ns_vmcb->exitinfo2 = svm->ns_vmexit.exitinfo2;
+ break;
+ case VMEXIT_MSR:
+ ASSERT(regs != NULL);
+ if (ns_vmcb->exitinfo1 == 0) { /* read */
+ regs->ecx = svm->ns_vmexit.exitinfo1;
+ } else {
+ regs->ecx = svm->ns_vmexit.exitinfo1;
+ regs->eax = (uint32_t)svm->ns_vmexit.exitinfo2;
+ regs->edx = (uint32_t)(svm->ns_vmexit.exitinfo2 >> 32);
+ }
+ break;
+ case VMEXIT_EXCEPTION_NP:
+ case VMEXIT_EXCEPTION_SS:
+ case VMEXIT_EXCEPTION_GP:
+ case VMEXIT_EXCEPTION_15:
+ case VMEXIT_EXCEPTION_MF:
+ case VMEXIT_EXCEPTION_AC:
+ ns_vmcb->exitinfo1 = svm->ns_vmexit.exitinfo1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ ns_vmcb->exitcode = exitcode;
+ ns_vmcb->eventinj.bytes = 0;
+ return 0;
+}
+
+int
+nsvm_vcpu_vmexit_trap(struct vcpu *v, unsigned int trapnr,
+ int errcode, unsigned long cr2)
+{
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+
+ ASSERT(nv->nv_vmcx != NULL);
+
+ svm->ns_vmexit.exitcode = VMEXIT_EXCEPTION_DE + trapnr;
+ svm->ns_vmexit.exitinfo1 = errcode;
+ svm->ns_vmexit.exitinfo2 = cr2;
+ nv->nv_hostflags.fields.vmexit_pending = 1;
+
+ return NESTEDHVM_VMEXIT_DONE;
+}
+
+uint64_t nsvm_vcpu_guestcr3(struct vcpu *v)
+{
+ return vcpu_nestedsvm(v).ns_vmcb_guestcr3;
+}
+
+uint64_t nsvm_vcpu_hostcr3(struct vcpu *v)
+{
+ return vcpu_nestedsvm(v).ns_vmcb_hostcr3;
+}
+
+uint32_t nsvm_vcpu_asid(struct vcpu *v)
+{
+ return vcpu_nestedsvm(v).ns_guest_asid;
+}
+
+static int
+nsvm_vmcb_guest_intercepts_msr(unsigned long *msr_bitmap,
+ uint32_t msr, bool_t write)
+{
+ bool_t enabled;
+ unsigned long *msr_bit;
+
+ msr_bit = svm_msrbit(msr_bitmap, msr);
+
+ if (msr_bit == NULL)
+ /* MSR not in the permission map: Let the guest handle it. */
+ return NESTEDHVM_VMEXIT_INJECT;
+
+ BUG_ON(msr_bit == NULL);
+ msr &= 0x1fff;
+
+ if (write)
+ /* write access */
+ enabled = test_bit(msr * 2 + 1, msr_bit);
+ else
+ /* read access */
+ enabled = test_bit(msr * 2, msr_bit);
+
+ if (!enabled)
+ return NESTEDHVM_VMEXIT_HOST;
+
+ return NESTEDHVM_VMEXIT_CONTINUE;
+}
+
+int
+nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v,
+ struct cpu_user_regs *regs, uint64_t exitcode)
+{
+ uint64_t exit_bits;
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+ struct vmcb_struct *ns_vmcb = nv->nv_vmcx;
+ enum nestedhvm_vmexits vmexits;
+
+ switch (exitcode) {
+ case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
+ case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
+ exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ);
+ if (svm->ns_cr_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
+ case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
+ exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ);
+ if (svm->ns_dr_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF:
+ exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE);
+ if (svm->ns_exception_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_INTR ... VMEXIT_SHUTDOWN:
+ exit_bits = 1ULL << (exitcode - VMEXIT_INTR);
+ if (svm->ns_general1_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL:
+ exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN);
+ if (svm->ns_general2_intercepts & exit_bits)
+ break;
+ return 0;
+
+ case VMEXIT_NPF:
+ case VMEXIT_INVALID:
+ /* Always intercepted */
+ break;
+
+ default:
+ gdprintk(XENLOG_ERR, "Illegal exitcode 0x%"PRIx64"\n", exitcode);
+ BUG();
+ break;
+ }
+
+ /* Special cases: Do more detailed checks */
+ switch (exitcode) {
+ case VMEXIT_MSR:
+ ASSERT(regs != NULL);
+ nestedsvm_vmcb_map(v, nv->nv_vmcxaddr);
+ ASSERT(nv->nv_vmcx != NULL);
+ ns_vmcb = nv->nv_vmcx;
+ vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm,
+ regs->ecx, ns_vmcb->exitinfo1 != 0);
+ if (vmexits == NESTEDHVM_VMEXIT_HOST)
+ return 0;
+ break;
+
+ case VMEXIT_IOIO:
+ /* always intercepted */
+ break;
+ }
+
+ return 1;
+}
+
+int
+nsvm_vmcb_guest_intercepts_trap(struct vcpu *v, unsigned int trapnr)
+{
+ return nsvm_vmcb_guest_intercepts_exitcode(v,
+ guest_cpu_user_regs(), VMEXIT_EXCEPTION_DE + trapnr);
+}
+
+static int
+nsvm_vmcb_prepare4vmexit(struct vcpu *v)
+{
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+ struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+ svm_vmsave(vmcb);
+
+ /* Intercepts */
+ /* Copy cached intercepts since they are the guest's original
+ * intercepts.
+ */
+ ns_vmcb->cr_intercepts = svm->ns_cr_intercepts;
+ ns_vmcb->dr_intercepts = svm->ns_dr_intercepts;
+ ns_vmcb->exception_intercepts = svm->ns_exception_intercepts;
+ ns_vmcb->general1_intercepts = svm->ns_general1_intercepts;
+ ns_vmcb->general2_intercepts = svm->ns_general2_intercepts;
+
+ /* Nested Pause Filter */
+ ns_vmcb->pause_filter_count = vmcb->pause_filter_count;
+
+ /* Nested IO permission bitmap */
+ /* Just keep the iopm_base_pa and msrpm_base_pa values.
+ * The guest must not see the virtualized values.
+ */
+
+ /* TSC offset */
+ ns_vmcb->tsc_offset = vmcb->tsc_offset;
+
+ /* ASID */
+ /* ns_vmcb->guest_asid = vmcb->guest_asid; */
+
+ /* TLB control */
+ ns_vmcb->tlb_control = 0;
+
+ /* Virtual Interrupts */
+ ns_vmcb->vintr = vmcb->vintr;
+ if (!(svm->ns_hostflags.fields.vintrmask))
+ ns_vmcb->vintr.fields.intr_masking = 0;
+
+ /* Shadow mode */
+ ns_vmcb->interrupt_shadow = vmcb->interrupt_shadow;
+
+ /* Exit codes */
+ ns_vmcb->exitcode = vmcb->exitcode;
+ ns_vmcb->exitinfo1 = vmcb->exitinfo1;
+ ns_vmcb->exitinfo2 = vmcb->exitinfo2;
+ ns_vmcb->exitintinfo = vmcb->exitintinfo;
+
+ /* Interrupts */
+ /* If we emulate a VMRUN/#VMEXIT in the same host #VMEXIT cycle we have
+ * to make sure that we do not lose injected events. So check eventinj
+ * here and copy it to exitintinfo if it is valid.
+ * exitintinfo and eventinj can't be both valid because the case below
+ * only happens on a VMRUN instruction intercept which has no valid
+ * exitintinfo set.
+ */
+ if ( unlikely(vmcb->eventinj.fields.v) &&
+ hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
+ vmcb->eventinj.fields.vector) )
+ {
+ ns_vmcb->exitintinfo = vmcb->eventinj;
+ }
+
+ ns_vmcb->eventinj.bytes = 0;
+
+ /* Nested paging mode */
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* host nested paging + guest nested paging. */
+ ns_vmcb->np_enable = vmcb->np_enable;
+ ns_vmcb->cr3 = vmcb->cr3;
+ /* The vmcb->h_cr3 is the shadowed h_cr3. The original
+ * unshadowed guest h_cr3 is kept in ns_vmcb->h_cr3,
+ * hence we keep the ns_vmcb->h_cr3 value. */
+ } else if (paging_mode_hap(v->domain)) {
+ /* host nested paging + guest shadow paging. */
+ ns_vmcb->np_enable = 0;
+ /* Throw h_cr3 away. Guest is not allowed to set it or
+ * it can break out, otherwise (security hole!) */
+ ns_vmcb->h_cr3 = 0x0;
+ /* Stop intercepting #PF (already done above
+ * by restoring cached intercepts). */
+ ns_vmcb->cr3 = vmcb->cr3;
+ } else {
+ /* host shadow paging + guest shadow paging. */
+ ns_vmcb->np_enable = 0;
+ ns_vmcb->h_cr3 = 0x0;
+ /* The vmcb->cr3 is the shadowed cr3. The original
+ * unshadowed guest cr3 is kept in ns_vmcb->cr3,
+ * hence we keep the ns_vmcb->cr3 value. */
+ }
+
+ /* LBR virtualization */
+ ns_vmcb->lbr_control = svm->ns_lbr_control;
+
+ /* NextRIP */
+ ns_vmcb->nextrip = vmcb->nextrip;
+
+ /*
+ * VMCB Save State Area
+ */
+
+ /* Segments */
+ ns_vmcb->es = vmcb->es;
+ ns_vmcb->cs = vmcb->cs;
+ ns_vmcb->ss = vmcb->ss;
+ ns_vmcb->ds = vmcb->ds;
+ ns_vmcb->gdtr = vmcb->gdtr;
+ ns_vmcb->idtr = vmcb->idtr;
+
+ /* CPL */
+ ns_vmcb->cpl = vmcb->cpl;
+
+ /* EFER */
+ ns_vmcb->efer = vmcb->efer;
+
+ /* CRn */
+ ns_vmcb->cr4 = vmcb->cr4;
+ ns_vmcb->cr0 = vmcb->cr0;
+
+ /* DRn */
+ ns_vmcb->dr7 = vmcb->dr7;
+ ns_vmcb->dr6 = vmcb->dr6;
+
+ /* RFLAGS */
+ ns_vmcb->rflags = vmcb->rflags;
+
+ /* RIP */
+ ns_vmcb->rip = vmcb->rip;
+
+ /* RSP */
+ ns_vmcb->rsp = vmcb->rsp;
+
+ /* RAX */
+ ns_vmcb->rax = vmcb->rax;
+
+ /* Keep the nested guest values of the fs, gs, ldtr, tr, kerngsbase,
+ * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
+ * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
+ */
+
+ /* CR2 */
+ ns_vmcb->cr2 = vmcb->cr2;
+
+ /* Page tables */
+ ns_vmcb->pdpe0 = vmcb->pdpe0;
+ ns_vmcb->pdpe1 = vmcb->pdpe1;
+ ns_vmcb->pdpe2 = vmcb->pdpe2;
+ ns_vmcb->pdpe3 = vmcb->pdpe3;
+
+ /* PAT */
+ ns_vmcb->g_pat = vmcb->g_pat;
+
+ /* Debug Control MSR */
+ ns_vmcb->debugctlmsr = vmcb->debugctlmsr;
+
+ /* LBR MSRs */
+ ns_vmcb->lastbranchfromip = vmcb->lastbranchfromip;
+ ns_vmcb->lastbranchtoip = vmcb->lastbranchtoip;
+ ns_vmcb->lastintfromip = vmcb->lastintfromip;
+ ns_vmcb->lastinttoip = vmcb->lastinttoip;
+
+ return 0;
+}
+
+bool_t
+nsvm_vmcb_hap_enabled(struct vcpu *v)
+{
+ return vcpu_nestedsvm(v).ns_hap_enabled;
+}
+
+/* MSR handling */
+int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content)
+{
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+ int ret = 1;
+
+ *msr_content = 0;
+
+ switch (msr) {
+ case MSR_K8_VM_CR:
+ break;
+ case MSR_K8_VM_HSAVE_PA:
+ *msr_content = svm->ns_msr_hsavepa;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content)
+{
+ int ret = 1;
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+
+ switch (msr) {
+ case MSR_K8_VM_CR:
+ /* ignore write. handle all bits as read-only. */
+ break;
+ case MSR_K8_VM_HSAVE_PA:
+ if (!nestedsvm_vmcb_isvalid(v, msr_content)) {
+ gdprintk(XENLOG_ERR,
+ "MSR_K8_VM_HSAVE_PA value invalid 0x%"PRIx64"\n", msr_content);
+ ret = -1; /* inject #GP */
+ break;
+ }
+ svm->ns_msr_hsavepa = msr_content;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+/* VMEXIT emulation */
+static enum nestedhvm_vmexits
+nestedsvm_vmexit_intercepts(struct vcpu *v, struct cpu_user_regs *regs,
+ uint64_t exitcode)
+{
+ bool_t is_intercepted;
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+
+ is_intercepted = nsvm_vmcb_guest_intercepts_exitcode(v, regs, exitcode);
+
+ if (nv->nv_hostflags.fields.vmexit_pending) {
+ if (is_intercepted)
+ return NESTEDHVM_VMEXIT_INJECT;
+ gdprintk(XENLOG_ERR,
+ "forced VMEXIT can't happen as guest can't "
+ "handle the intercept\n");
+ return NESTEDHVM_VMEXIT_FATALERROR;
+ }
+
+ switch (exitcode) {
+ case VMEXIT_INVALID:
+ if (is_intercepted)
+ return NESTEDHVM_VMEXIT_INJECT;
+ return NESTEDHVM_VMEXIT_HOST;
+
+ case VMEXIT_INTR:
+ case VMEXIT_NMI:
+ return NESTEDHVM_VMEXIT_HOST;
+ case VMEXIT_EXCEPTION_NM:
+ /* Host must handle lazy fpu context switching first.
+ * Then inject the VMEXIT if L1 guest intercepts this.
+ */
+ return NESTEDHVM_VMEXIT_HOST;
+
+ case VMEXIT_NPF:
+ if (nestedhvm_paging_mode_hap(v)) {
+ if (!is_intercepted)
+ return NESTEDHVM_VMEXIT_FATALERROR;
+ /* host nested paging + guest nested paging */
+ return NESTEDHVM_VMEXIT_HOST;
+ }
+ if (paging_mode_hap(v->domain)) {
+ if (is_intercepted)
+ return NESTEDHVM_VMEXIT_FATALERROR;
+ /* host nested paging + guest shadow paging */
+ return NESTEDHVM_VMEXIT_HOST;
+ }
+ /* host shadow paging + guest shadow paging */
+ /* Can this happen? */
+ BUG();
+ return NESTEDHVM_VMEXIT_FATALERROR;
+ case VMEXIT_EXCEPTION_PF:
+ if (nestedhvm_paging_mode_hap(v)) {
+ /* host nested paging + guest nested paging */
+ if (!is_intercepted)
+ /* guest intercepts #PF unnecessarily */
+ return NESTEDHVM_VMEXIT_HOST;
+ /* nested guest intercepts #PF unnecessarily */
+ return NESTEDHVM_VMEXIT_INJECT;
+ }
+ if (!paging_mode_hap(v->domain)) {
+ /* host shadow paging + guest shadow paging */
+ return NESTEDHVM_VMEXIT_HOST;
+ }
+ /* host nested paging + guest shadow paging */
+ return NESTEDHVM_VMEXIT_INJECT;
+ case VMEXIT_VMMCALL:
+ /* Always let the guest handle VMMCALL/VMCALL */
+ return NESTEDHVM_VMEXIT_INJECT;
+ default:
+ break;
+ }
+
+ if (is_intercepted)
+ return NESTEDHVM_VMEXIT_CONTINUE;
+ return NESTEDHVM_VMEXIT_HOST;
+}
+
+static enum nestedhvm_vmexits
+nestedsvm_vmexit(struct vcpu *v, struct cpu_user_regs *regs, uint64_t exitcode)
+{
+ int rc;
+ enum nestedhvm_vmexits ret;
+
+ ASSERT(nestedhvm_vcpu_in_guestmode(v));
+
+ ret = nestedsvm_vmexit_intercepts(v, regs, exitcode);
+ switch (ret) {
+ case NESTEDHVM_VMEXIT_CONTINUE:
+ case NESTEDHVM_VMEXIT_INJECT:
+ break;
+ case NESTEDHVM_VMEXIT_ERROR:
+ case NESTEDHVM_VMEXIT_FATALERROR:
+ goto out;
+ case NESTEDHVM_VMEXIT_HOST:
+ return ret;
+ default:
+ break;
+ }
+
+ rc = nsvm_vmcb_prepare4vmexit(v);
+ if (rc)
+ ret = NESTEDHVM_VMEXIT_ERROR;
+
+out:
+ rc = nhvm_vcpu_hostrestore(v, regs);
+ if (rc)
+ ret = NESTEDHVM_VMEXIT_FATALERROR;
+
+ nestedhvm_vcpu_exit_guestmode(v);
+ return ret;
+}
+
+/* The exitcode is in native SVM/VMX format. The forced exitcode
+ * is in generic format.
+ */
+enum nestedhvm_vmexits
+nestedsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs,
+ uint64_t exitcode)
+{
+ int rc;
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+
+ nv->nv_hostflags.fields.vmswitch_in_progress = 1;
+
+ ASSERT(nv->nv_vmcx != NULL);
+
+ if (nestedhvm_vcpu_in_guestmode(v)) {
+ enum nestedhvm_vmexits ret;
+
+ ret = nestedsvm_vmexit(v, regs, exitcode);
+ switch (ret) {
+ case NESTEDHVM_VMEXIT_FATALERROR:
+ gdprintk(XENLOG_ERR, "VMEXIT: fatal error\n");
+ case NESTEDHVM_VMEXIT_HOST:
+ nv->nv_hostflags.fields.vmswitch_in_progress = 0;
+ ASSERT(nv->nv_hostflags.fields.vmexit_pending == 0);
+ return ret;
+ case NESTEDHVM_VMEXIT_ERROR:
+ exitcode = VMEXIT_INVALID;
+ break;
+ default:
+ ASSERT(!nestedhvm_vcpu_in_guestmode(v));
+ break;
+ }
+
+ /* host state has been restored */
+ }
+
+ ASSERT(!nestedhvm_vcpu_in_guestmode(v));
+
+ /* Prepare for running the l1 guest. Make the actual
+ * modifications to the virtual VMCB/VMCS.
+ */
+ rc = nhvm_vcpu_vmexit(v, regs, exitcode);
+
+ nv->nv_hostflags.fields.vmswitch_in_progress = 0;
+
+ if (rc)
+ return NESTEDHVM_VMEXIT_FATALERROR;
+
+ return NESTEDHVM_VMEXIT_DONE;
+}
+
+/* VCPU switch */
+asmlinkage void nsvm_vcpu_switch(struct cpu_user_regs *regs)
+{
+ int ret;
+ struct vcpu *v = current;
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+ struct nestedsvm *svm = &vcpu_nestedsvm(v);
+
+ if (!nestedhvm_enabled(v->domain))
+ return;
+
+ if (nv->nv_hostflags.fields.vmexit_pending) {
+ vmexit:
+ nestedsvm_vcpu_vmexit(v, regs, svm->ns_vmexit.exitcode);
+ nv->nv_hostflags.fields.vmexit_pending = 0;
+ nv->nv_hostflags.fields.vmentry_pending = 0;
+ return;
+ }
+ if (nv->nv_hostflags.fields.vmentry_pending) {
+ ASSERT(!nv->nv_hostflags.fields.vmexit_pending);
+ ret = nsvm_vcpu_vmrun(v, regs);
+ if (ret < 0)
+ goto vmexit;
+ nv->nv_hostflags.fields.vmentry_pending = 0;
+ return;
+ }
+}
+
+
diff -r 54dd43df686b -r 3bfc06e2e41a xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -49,6 +49,9 @@
#include <asm/hvm/svm/vmcb.h>
#include <asm/hvm/svm/emulate.h>
#include <asm/hvm/svm/intr.h>
+#include <asm/hvm/svm/svmdebug.h>
+#include <asm/hvm/svm/nestedsvm.h>
+#include <asm/hvm/nestedhvm.h>
#include <asm/x86_emulate.h>
#include <public/sched.h>
#include <asm/hvm/vpt.h>
@@ -106,6 +109,44 @@ static void svm_cpu_down(void)
write_efer(read_efer() & ~EFER_SVME);
}
+unsigned long *
+svm_msrbit(unsigned long *msr_bitmap, uint32_t msr)
+{
+ unsigned long *msr_bit = NULL;
+
+ /*
+ * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
+ */
+ if ( msr <= 0x1fff )
+ msr_bit = msr_bitmap + 0x0000 / BYTES_PER_LONG;
+ else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+ msr_bit = msr_bitmap + 0x0800 / BYTES_PER_LONG;
+ else if ( (msr >= 0xc0010000) && (msr <= 0xc0011fff) )
+ msr_bit = msr_bitmap + 0x1000 / BYTES_PER_LONG;
+
+ return msr_bit;
+}
+
+void svm_intercept_msr(struct vcpu *v, uint32_t msr, int enable)
+{
+ unsigned long *msr_bit;
+
+ msr_bit = svm_msrbit(v->arch.hvm_svm.msrpm, msr);
+ BUG_ON(msr_bit == NULL);
+ msr &= 0x1fff;
+
+ if ( enable )
+ {
+ __set_bit(msr * 2, msr_bit);
+ __set_bit(msr * 2 + 1, msr_bit);
+ }
+ else
+ {
+ __clear_bit(msr * 2, msr_bit);
+ __clear_bit(msr * 2 + 1, msr_bit);
+ }
+}
+
static void svm_save_dr(struct vcpu *v)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -294,7 +335,7 @@ static int svm_load_vmcb_ctxt(struct vcp
{
svm_load_cpu_state(v, ctxt);
if (svm_vmcb_restore(v, ctxt)) {
- printk("svm_vmcb restore failed!\n");
+ gdprintk(XENLOG_ERR, "svm_vmcb restore failed!\n");
domain_crash(v->domain);
return -EINVAL;
}
@@ -661,8 +702,10 @@ static void svm_ctxt_switch_to(struct vc
static void svm_do_resume(struct vcpu *v)
{
bool_t debug_state = v->domain->debugger_attached;
+ bool_t guestmode = nestedhvm_vcpu_in_guestmode(v);
- if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
+ if ( !guestmode &&
+ unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
{
uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
v->arch.hvm_vcpu.debug_state_latch = debug_state;
@@ -681,11 +724,14 @@ static void svm_do_resume(struct vcpu *v
hvm_asid_flush_vcpu(v);
}
- /* Reflect the vlapic's TPR in the hardware vtpr */
- v->arch.hvm_svm.vmcb->vintr.fields.tpr =
- (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
+ if ( !guestmode )
+ {
+ /* Reflect the vlapic's TPR in the hardware vtpr */
+ v->arch.hvm_svm.vmcb->vintr.fields.tpr =
+ (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
- hvm_do_resume(v);
+ hvm_do_resume(v);
+ }
reset_stack_and_jump(svm_asm_do_resume);
}
@@ -924,8 +970,8 @@ static void svm_do_nested_pgfault(paddr_
struct {
uint64_t gpa;
uint64_t mfn;
- u32 qualification;
- u32 p2mt;
+ uint32_t qualification;
+ uint32_t p2mt;
} _d;
_d.gpa = gpa;
@@ -947,12 +993,21 @@ static void svm_do_nested_pgfault(paddr_
static void svm_fpu_dirty_intercept(void)
{
- struct vcpu *curr = current;
- struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- svm_fpu_enter(curr);
+ svm_fpu_enter(v);
- if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
+ if ( nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v) ) {
+ /* Check if guest must make FPU ready for the nested guest */
+ if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS )
+ hvm_inject_exception(TRAP_no_device, HVM_DELIVER_NO_ERROR_CODE, 0);
+ else
+ vmcb->cr0 &= ~X86_CR0_TS;
+ return;
+ }
+
+ if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
vmcb->cr0 &= ~X86_CR0_TS;
}
@@ -966,11 +1021,14 @@ static void svm_cpuid_intercept(
hvm_cpuid(input, eax, ebx, ecx, edx);
- if ( input == 0x80000001 )
- {
+ switch (input) {
+ case 0x80000001:
/* Fix up VLAPIC details. */
if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
__clear_bit(X86_FEATURE_APIC & 31, edx);
+ break;
+ default:
+ break;
}
HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
@@ -1006,6 +1064,7 @@ static void svm_dr_access(struct vcpu *v
static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
{
+ int ret;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -1039,9 +1098,6 @@ static int svm_msr_read_intercept(unsign
*msr_content = 0;
break;
- case MSR_K8_VM_HSAVE_PA:
- goto gpf;
-
case MSR_IA32_DEBUGCTLMSR:
*msr_content = vmcb->debugctlmsr;
break;
@@ -1074,6 +1130,11 @@ static int svm_msr_read_intercept(unsign
break;
default:
+ ret = nsvm_rdmsr(v, msr, msr_content);
+ if ( ret < 0 )
+ goto gpf;
+ else if ( ret )
+ break;
if ( rdmsr_viridian_regs(msr, msr_content) ||
rdmsr_hypervisor_regs(msr, msr_content) )
@@ -1096,14 +1157,12 @@ static int svm_msr_read_intercept(unsign
static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
{
+ int ret;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
switch ( msr )
{
- case MSR_K8_VM_HSAVE_PA:
- goto gpf;
-
case MSR_IA32_SYSENTER_CS:
v->arch.hvm_svm.guest_sysenter_cs = msr_content;
break;
@@ -1163,6 +1222,12 @@ static int svm_msr_write_intercept(unsig
break;
default:
+ ret = nsvm_wrmsr(v, msr, msr_content);
+ if ( ret < 0 )
+ goto gpf;
+ else if ( ret )
+ break;
+
if ( wrmsr_viridian_regs(msr, msr_content) )
break;
@@ -1242,6 +1307,96 @@ static void svm_vmexit_do_pause(struct c
do_sched_op_compat(SCHEDOP_yield, 0);
}
+static void
+svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
+ struct vcpu *v, uint64_t vmcbaddr)
+{
+ if (!nestedhvm_enabled(v->domain)) {
+ gdprintk(XENLOG_ERR, "nestedhvm disabled, injecting #UD\n");
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+ }
+
+ if (!nestedsvm_vmcb_map(v, vmcbaddr)) {
+ gdprintk(XENLOG_ERR, "VMRUN: mapping vmcb failed, injecting #UD\n");
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+ }
+
+ vcpu_nestedhvm(v).nv_hostflags.fields.vmentry_pending = 1;
+ return;
+}
+
+static void
+svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
+ struct cpu_user_regs *regs,
+ struct vcpu *v, uint64_t vmcbaddr)
+{
+ int ret;
+ unsigned int inst_len;
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+
+ if ( (inst_len = __get_instruction_length(v, INSTR_VMLOAD)) == 0 )
+ return;
+
+ if (!nestedhvm_enabled(v->domain)) {
+ gdprintk(XENLOG_ERR, "nestedhvm disabled, injecting #UD\n");
+ ret = TRAP_invalid_op;
+ goto inject;
+ }
+
+ if (!nestedsvm_vmcb_map(v, vmcbaddr)) {
+ gdprintk(XENLOG_ERR, "VMLOAD: mapping vmcb failed, injecting #UD\n");
+ ret = TRAP_invalid_op;
+ goto inject;
+ }
+
+ svm_vmload(nv->nv_vmcx);
+ /* State in L1 VMCB is stale now */
+ v->arch.hvm_svm.vmcb_in_sync = 0;
+
+ __update_guest_eip(regs, inst_len);
+ return;
+
+ inject:
+ hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+}
+
+static void
+svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
+ struct cpu_user_regs *regs,
+ struct vcpu *v, uint64_t vmcbaddr)
+{
+ int ret;
+ unsigned int inst_len;
+ struct nestedvcpu *nv = &vcpu_nestedhvm(v);
+
+ if ( (inst_len = __get_instruction_length(v, INSTR_VMSAVE)) == 0 )
+ return;
+
+ if (!nestedhvm_enabled(v->domain)) {
+ gdprintk(XENLOG_ERR, "nestedhvm disabled, injecting #UD\n");
+ ret = TRAP_invalid_op;
+ goto inject;
+ }
+
+ if (!nestedsvm_vmcb_map(v, vmcbaddr)) {
+ gdprintk(XENLOG_ERR, "VMSAVE: mapping vmcb failed, injecting #UD\n");
+ ret = TRAP_invalid_op;
+ goto inject;
+ }
+
+ svm_vmsave(nv->nv_vmcx);
+
+ __update_guest_eip(regs, inst_len);
+ return;
+
+ inject:
+ hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0);
+ return;
+}
+
static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs)
{
struct hvm_emulate_ctxt ctxt;
@@ -1372,20 +1527,36 @@ static struct hvm_function_table __read_
.msr_read_intercept = svm_msr_read_intercept,
.msr_write_intercept = svm_msr_write_intercept,
.invlpg_intercept = svm_invlpg_intercept,
- .set_rdtsc_exiting = svm_set_rdtsc_exiting
+ .set_rdtsc_exiting = svm_set_rdtsc_exiting,
+
+ .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
+ .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
+ .nhvm_vcpu_reset = nsvm_vcpu_reset,
+ .nhvm_vcpu_hostrestore = nsvm_vcpu_hostrestore,
+ .nhvm_vcpu_vmexit = nsvm_vcpu_vmexit,
+ .nhvm_vcpu_vmexit_trap = nsvm_vcpu_vmexit_trap,
+ .nhvm_vcpu_guestcr3 = nsvm_vcpu_guestcr3,
+ .nhvm_vcpu_hostcr3 = nsvm_vcpu_hostcr3,
+ .nhvm_vcpu_asid = nsvm_vcpu_asid,
+ .nhvm_vmcx_guest_intercepts_trap = nsvm_vmcb_guest_intercepts_trap,
+ .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
};
asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
{
- unsigned int exit_reason;
+ uint64_t exit_reason;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
eventinj_t eventinj;
int inst_len, rc;
+ bool_t vcpu_guestmode = 0;
if ( paging_mode_hap(v->domain) )
v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] = vmcb->cr3;
+ if ( nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v) )
+ vcpu_guestmode = 1;
+
/*
* Before doing anything else, we need to sync up the VLAPIC's TPR with
* SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
@@ -1393,12 +1564,38 @@ asmlinkage void svm_vmexit_handler(struc
* NB. We need to preserve the low bits of the TPR to make checked builds
* of Windows work, even though they don't actually do anything.
*/
- vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
- ((vmcb->vintr.fields.tpr & 0x0F) << 4) |
- (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0x0F));
+ if ( !vcpu_guestmode ) {
+ vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
+ ((vmcb->vintr.fields.tpr & 0x0F) << 4) |
+ (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0x0F));
+ }
exit_reason = vmcb->exitcode;
+ if ( vcpu_guestmode ) {
+ enum nestedhvm_vmexits nsret;
+
+ nsret = nestedsvm_vcpu_vmexit(v, regs, exit_reason);
+ switch (nsret) {
+ case NESTEDHVM_VMEXIT_DONE:
+ goto out;
+ case NESTEDHVM_VMEXIT_ERROR:
+ gdprintk(XENLOG_ERR,
+ "nestedsvm_vcpu_vmexit() returned NESTEDHVM_VMEXIT_ERROR\n");
+ goto out;
+ case NESTEDHVM_VMEXIT_HOST:
+ case NESTEDHVM_VMEXIT_CONTINUE:
+ break;
+ case NESTEDHVM_VMEXIT_FATALERROR:
+ gdprintk(XENLOG_ERR, "unexpected nestedhvm error\n");
+ goto exit_and_crash;
+ default:
+ gdprintk(XENLOG_INFO, "nestedhvm_vcpu_vmexit returned %i\n",
+ nsret);
+ goto exit_and_crash;
+ }
+ }
+
if ( hvm_long_mode_enabled(v) )
HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
(uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
@@ -1410,7 +1607,7 @@ asmlinkage void svm_vmexit_handler(struc
if ( unlikely(exit_reason == VMEXIT_INVALID) )
{
- svm_dump_vmcb(__func__, vmcb);
+ svm_vmcb_dump(__func__, vmcb);
goto exit_and_crash;
}
@@ -1562,6 +1759,7 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_VMMCALL:
if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
break;
+ BUG_ON(vcpu_guestmode);
HVMTRACE_1D(VMMCALL, regs->eax);
rc = hvm_do_hypercall(regs);
if ( rc != HVM_HCALL_preempted )
@@ -1594,9 +1792,18 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_MONITOR:
case VMEXIT_MWAIT:
+ hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
+ break;
+
case VMEXIT_VMRUN:
+ svm_vmexit_do_vmrun(regs, v, regs->eax);
+ break;
case VMEXIT_VMLOAD:
+ svm_vmexit_do_vmload(vmcb, regs, v, regs->eax);
+ break;
case VMEXIT_VMSAVE:
+ svm_vmexit_do_vmsave(vmcb, regs, v, regs->eax);
+ break;
case VMEXIT_STGI:
case VMEXIT_CLGI:
case VMEXIT_SKINIT:
@@ -1628,7 +1835,7 @@ asmlinkage void svm_vmexit_handler(struc
default:
exit_and_crash:
- gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
+ gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%"PRIx64", "
"exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
exit_reason,
(u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
@@ -1636,6 +1843,11 @@ asmlinkage void svm_vmexit_handler(struc
break;
}
+ out:
+ if ( vcpu_guestmode )
+ /* Don't clobber TPR of the nested guest. */
+ return;
+
/* The exit may have updated the TPR: reflect this in the hardware vtpr */
vmcb->vintr.fields.tpr =
(vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
diff -r 54dd43df686b -r 3bfc06e2e41a xen/arch/x86/hvm/svm/svmdebug.c
--- /dev/null
+++ b/xen/arch/x86/hvm/svm/svmdebug.c
@@ -0,0 +1,189 @@
+/*
+ * svmdebug.c: debug functions
+ * Copyright (c) 2010, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/msr-index.h>
+#include <asm/hvm/svm/svmdebug.h>
+
+static void svm_dump_sel(const char *name, svm_segment_register_t *s)
+{
+ printk("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n",
+ name, s->sel, s->attr.bytes, s->limit,
+ (unsigned long long)s->base);
+}
+
+void svm_vmcb_dump(const char *from, struct vmcb_struct *vmcb)
+{
+ printk("Dumping guest's current state at %s...\n", from);
+ printk("Size of VMCB = %d, address = %p\n",
+ (int) sizeof(struct vmcb_struct), vmcb);
+
+ printk("cr_intercepts = 0x%08x dr_intercepts = 0x%08x "
+ "exception_intercepts = 0x%08x\n",
+ vmcb->cr_intercepts, vmcb->dr_intercepts,
+ vmcb->exception_intercepts);
+ printk("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n",
+ vmcb->general1_intercepts, vmcb->general2_intercepts);
+ printk("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
+ "0x%016llx\n",
+ (unsigned long long) vmcb->iopm_base_pa,
+ (unsigned long long) vmcb->msrpm_base_pa,
+ (unsigned long long) vmcb->tsc_offset);
+ printk("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
+ "0x%016llx\n", vmcb->tlb_control,
+ (unsigned long long) vmcb->vintr.bytes,
+ (unsigned long long) vmcb->interrupt_shadow);
+ printk("exitcode = 0x%016llx exitintinfo = 0x%016llx\n",
+ (unsigned long long) vmcb->exitcode,
+ (unsigned long long) vmcb->exitintinfo.bytes);
+ printk("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
+ (unsigned long long) vmcb->exitinfo1,
+ (unsigned long long) vmcb->exitinfo2);
+ printk("np_enable = 0x%016llx guest_asid = 0x%03x\n",
+ (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
+ printk("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n",
+ vmcb->cpl, (unsigned long long) vmcb->efer,
+ (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
+ printk("CR0 = 0x%016llx CR2 = 0x%016llx\n",
+ (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
+ printk("CR3 = 0x%016llx CR4 = 0x%016llx\n",
+ (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
+ printk("RSP = 0x%016llx RIP = 0x%016llx\n",
+ (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
+ printk("RAX = 0x%016llx RFLAGS=0x%016llx\n",
+ (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
+ printk("DR6 = 0x%016llx, DR7 = 0x%016llx\n",
+ (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
+ printk("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
+ (unsigned long long) vmcb->cstar,
+ (unsigned long long) vmcb->sfmask);
+ printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n",
+ (unsigned long long) vmcb->kerngsbase,
+ (unsigned long long) vmcb->g_pat);
+ printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
+
+ /* print out all the selectors */
+ svm_dump_sel("CS", &vmcb->cs);
+ svm_dump_sel("DS", &vmcb->ds);
+ svm_dump_sel("SS", &vmcb->ss);
+ svm_dump_sel("ES", &vmcb->es);
+ svm_dump_sel("FS", &vmcb->fs);
+ svm_dump_sel("GS", &vmcb->gs);
+ svm_dump_sel("GDTR", &vmcb->gdtr);
+ svm_dump_sel("LDTR", &vmcb->ldtr);
+ svm_dump_sel("IDTR", &vmcb->idtr);
+ svm_dump_sel("TR", &vmcb->tr);
+}
+
+bool_t
+svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
+ bool_t verbose)
+{
+ bool_t ret = 0; /* ok */
+
+#define PRINTF(...) \
+ if (verbose) { ret = 1; printk("%s: ", from); printk(__VA_ARGS__); \
+ } else return 1;
+
+ if ((vmcb->efer & EFER_SVME) == 0) {
+ PRINTF("EFER: SVME bit not set (0x%"PRIx64")\n", vmcb->efer);
+ }
+
+ if ((vmcb->cr0 & X86_CR0_CD) == 0 && (vmcb->cr0 & X86_CR0_NW) != 0) {
+ PRINTF("CR0: CD bit is zero and NW bit set (0x%"PRIx64")\n",
+ vmcb->cr0);
+ }
+
+ if ((vmcb->cr0 >> 32U) != 0) {
+ PRINTF("CR0: bits [63:32] are not zero (0x%"PRIx64")\n",
+ vmcb->cr0);
+ }
+
+ if ((vmcb->cr3 & 0x7) != 0) {
+ PRINTF("CR3: MBZ bits are set (0x%"PRIx64")\n", vmcb->cr3);
+ }
+ if ((vmcb->efer & EFER_LMA) && (vmcb->cr3 & 0xfe) != 0) {
+ PRINTF("CR3: MBZ bits are set (0x%"PRIx64")\n", vmcb->cr3);
+ }
+
+ if ((vmcb->cr4 >> 11U) != 0) {
+ PRINTF("CR4: bits [63:11] are not zero (0x%"PRIx64")\n",
+ vmcb->cr4);
+ }
+
+ if ((vmcb->dr6 >> 32U) != 0) {
+ PRINTF("DR6: bits [63:32] are not zero (0x%"PRIx64")\n",
+ vmcb->dr6);
+ }
+
+ if ((vmcb->dr7 >> 32U) != 0) {
+ PRINTF("DR7: bits [63:32] are not zero (0x%"PRIx64")\n",
+ vmcb->dr7);
+ }
+
+ if ((vmcb->efer >> 15U) != 0) {
+ PRINTF("EFER: bits [63:15] are not zero (0x%"PRIx64")\n",
+ vmcb->efer);
+ }
+
+ if ((vmcb->efer & EFER_LME) != 0 && ((vmcb->cr0 & X86_CR0_PG) != 0)) {
+ if ((vmcb->cr4 & X86_CR4_PAE) == 0) {
+ PRINTF("EFER_LME and CR0.PG are both set and CR4.PAE is zero.\n");
+ }
+ if ((vmcb->cr0 & X86_CR0_PE) == 0) {
+ PRINTF("EFER_LME and CR0.PG are both set and CR0.PE is zero.\n");
+ }
+ }
+
+ if ((vmcb->efer & EFER_LME) != 0
+ && (vmcb->cr0 & X86_CR0_PG) != 0
+ && (vmcb->cr4 & X86_CR4_PAE) != 0
+ && (vmcb->cs.attr.fields.l != 0)
+ && (vmcb->cs.attr.fields.db != 0))
+ {
+ PRINTF("EFER_LME, CR0.PG, CR4.PAE, CS.L and CS.D are all non-zero.\n");
+ }
+
+ if ((vmcb->general2_intercepts & GENERAL2_INTERCEPT_VMRUN) == 0) {
+ PRINTF("GENERAL2_INTERCEPT: VMRUN intercept bit is clear (0x%"PRIx32")\n",
+ vmcb->general2_intercepts);
+ }
+
+ if (vmcb->eventinj.fields.resvd1 != 0) {
+ PRINTF("eventinj: MBZ bits are set (0x%"PRIx64")\n",
+ vmcb->eventinj.bytes);
+ }
+
+ if (vmcb->np_enable && vmcb->h_cr3 == 0) {
+ PRINTF("nested paging enabled but host cr3 is 0\n");
+ }
+
+#undef PRINTF
+ return ret;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 54dd43df686b -r 3bfc06e2e41a xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -33,6 +33,7 @@
#include <asm/hvm/svm/svm.h>
#include <asm/hvm/svm/intr.h>
#include <asm/hvm/svm/asid.h>
+#include <asm/hvm/svm/svmdebug.h>
#include <xen/event.h>
#include <xen/kernel.h>
#include <xen/domain_page.h>
@@ -40,9 +41,6 @@
extern int svm_dbg_on;
-#define IOPM_SIZE (12 * 1024)
-#define MSRPM_SIZE (8 * 1024)
-
struct vmcb_struct *alloc_vmcb(void)
{
struct vmcb_struct *vmcb;
@@ -78,37 +76,6 @@ struct host_save_area *alloc_host_save_a
return hsa;
}
-void svm_intercept_msr(struct vcpu *v, uint32_t msr, int enable)
-{
- unsigned long *msr_bitmap = v->arch.hvm_svm.msrpm;
- unsigned long *msr_bit = NULL;
-
- /*
- * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
- */
- if ( msr <= 0x1fff )
- msr_bit = msr_bitmap + 0x0000 / BYTES_PER_LONG;
- else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
- msr_bit = msr_bitmap + 0x0800 / BYTES_PER_LONG;
- else if ( (msr >= 0xc0010000) && (msr <= 0xc0011fff) )
- msr_bit = msr_bitmap + 0x1000 / BYTES_PER_LONG;
-
- BUG_ON(msr_bit == NULL);
-
- msr &= 0x1fff;
-
- if ( enable )
- {
- __set_bit(msr * 2, msr_bit);
- __set_bit(msr * 2 + 1, msr_bit);
- }
- else
- {
- __clear_bit(msr * 2, msr_bit);
- __clear_bit(msr * 2 + 1, msr_bit);
- }
-}
-
static int construct_vmcb(struct vcpu *v)
{
struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
@@ -253,7 +220,7 @@ static int construct_vmcb(struct vcpu *v
if ( cpu_has_pause_filter )
{
- vmcb->pause_filter_count = 3000;
+ vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT;
vmcb->general1_intercepts |= GENERAL1_INTERCEPT_PAUSE;
}
@@ -301,76 +268,6 @@ void svm_destroy_vmcb(struct vcpu *v)
arch_svm->vmcb = NULL;
}
-static void svm_dump_sel(char *name, svm_segment_register_t *s)
-{
- printk("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n",
- name, s->sel, s->attr.bytes, s->limit,
- (unsigned long long)s->base);
-}
-
-void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb)
-{
- printk("Dumping guest's current state at %s...\n", from);
- printk("Size of VMCB = %d, address = %p\n",
- (int) sizeof(struct vmcb_struct), vmcb);
-
- printk("cr_intercepts = 0x%08x dr_intercepts = 0x%08x "
- "exception_intercepts = 0x%08x\n",
- vmcb->cr_intercepts, vmcb->dr_intercepts,
- vmcb->exception_intercepts);
- printk("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n",
- vmcb->general1_intercepts, vmcb->general2_intercepts);
- printk("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
- "0x%016llx\n",
- (unsigned long long) vmcb->iopm_base_pa,
- (unsigned long long) vmcb->msrpm_base_pa,
- (unsigned long long) vmcb->tsc_offset);
- printk("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
- "0x%016llx\n", vmcb->tlb_control,
- (unsigned long long) vmcb->vintr.bytes,
- (unsigned long long) vmcb->interrupt_shadow);
- printk("exitcode = 0x%016llx exitintinfo = 0x%016llx\n",
- (unsigned long long) vmcb->exitcode,
- (unsigned long long) vmcb->exitintinfo.bytes);
- printk("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
- (unsigned long long) vmcb->exitinfo1,
- (unsigned long long) vmcb->exitinfo2);
- printk("np_enable = 0x%016llx guest_asid = 0x%03x\n",
- (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
- printk("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n",
- vmcb->cpl, (unsigned long long) vmcb->efer,
- (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
- printk("CR0 = 0x%016llx CR2 = 0x%016llx\n",
- (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
- printk("CR3 = 0x%016llx CR4 = 0x%016llx\n",
- (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
- printk("RSP = 0x%016llx RIP = 0x%016llx\n",
- (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
- printk("RAX = 0x%016llx RFLAGS=0x%016llx\n",
- (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
- printk("DR6 = 0x%016llx, DR7 = 0x%016llx\n",
- (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
- printk("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
- (unsigned long long) vmcb->cstar,
- (unsigned long long) vmcb->sfmask);
- printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n",
- (unsigned long long) vmcb->kerngsbase,
- (unsigned long long) vmcb->g_pat);
- printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
-
- /* print out all the selectors */
- svm_dump_sel("CS", &vmcb->cs);
- svm_dump_sel("DS", &vmcb->ds);
- svm_dump_sel("SS", &vmcb->ss);
- svm_dump_sel("ES", &vmcb->es);
- svm_dump_sel("FS", &vmcb->fs);
- svm_dump_sel("GS", &vmcb->gs);
- svm_dump_sel("GDTR", &vmcb->gdtr);
- svm_dump_sel("LDTR", &vmcb->ldtr);
- svm_dump_sel("IDTR", &vmcb->idtr);
- svm_dump_sel("TR", &vmcb->tr);
-}
-
static void vmcb_dump(unsigned char ch)
{
struct domain *d;
@@ -388,7 +285,7 @@ static void vmcb_dump(unsigned char ch)
for_each_vcpu ( d, v )
{
printk("\tVCPU %d\n", v->vcpu_id);
- svm_dump_vmcb("key_handler", v->arch.hvm_svm.vmcb);
+ svm_vmcb_dump("key_handler", v->arch.hvm_svm.vmcb);
}
}
diff -r 54dd43df686b -r 3bfc06e2e41a xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h
+++ b/xen/include/asm-x86/hvm/svm/emulate.h
@@ -32,6 +32,11 @@ enum instruction_index {
INSTR_INT3,
INSTR_RDTSC,
INSTR_PAUSE,
+ INSTR_VMRUN,
+ INSTR_VMLOAD,
+ INSTR_VMSAVE,
+ INSTR_STGI,
+ INSTR_CLGI,
INSTR_MAX_COUNT /* Must be last - Number of instructions supported */
};
diff -r 54dd43df686b -r 3bfc06e2e41a xen/include/asm-x86/hvm/svm/nestedsvm.h
--- /dev/null
+++ b/xen/include/asm-x86/hvm/svm/nestedsvm.h
@@ -0,0 +1,119 @@
+/*
+ * nestedsvm.h: Nested Virtualization
+ * Copyright (c) 2010, Advanced Micro Devices, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_HVM_SVM_NESTEDSVM_H__
+#define __ASM_X86_HVM_SVM_NESTEDSVM_H__
+
+#include <asm/config.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/svm/vmcb.h>
+
+struct nestedsvm {
+ uint64_t ns_msr_hsavepa; /* MSR HSAVE_PA value */
+
+ /* Cached real intercepts of the nested guest */
+ uint32_t ns_cr_intercepts;
+ uint32_t ns_dr_intercepts;
+ uint32_t ns_exception_intercepts;
+ uint32_t ns_general1_intercepts;
+ uint32_t ns_general2_intercepts;
+
+ /* Cached real lbr of the nested guest */
+ lbrctrl_t ns_lbr_control;
+
+ /* Cached real MSR permission bitmaps of the nested guest */
+ unsigned long *ns_cached_msrpm;
+ /* Merged MSR permission bitmap */
+ unsigned long *ns_merged_msrpm;
+
+ void *ns_hostsave;
+
+ /* Cache guest cr3/host cr3 the guest sets up for the nested guest.
+ * Used by Shadow-on-Shadow and Nested-on-Nested.
+ * ns_vmcb_guestcr3: in l2 guest physical address space and points to
+ * the l2 guest page table
+ * ns_vmcb_hostcr3: in l1 guest physical address space and points to
+ * the l1 guest nested page table
+ */
+ uint64_t ns_vmcb_guestcr3, ns_vmcb_hostcr3;
+ uint32_t ns_guest_asid;
+
+ bool_t ns_hap_enabled;
+
+ /* Only meaningful when vmexit_pending flag is set */
+ struct {
+ uint64_t exitcode; /* native exitcode to inject into l1 guest */
+ uint64_t exitinfo1; /* additional information to the exitcode */
+ uint64_t exitinfo2; /* additional information to the exitcode */
+ } ns_vmexit;
+ union {
+ uint32_t bytes;
+ struct {
+ uint32_t rflagsif: 1;
+ uint32_t vintrmask: 1;
+ uint32_t reserved: 30;
+ } fields;
+ } ns_hostflags;
+};
+
+#define vcpu_nestedsvm(v) (vcpu_nestedhvm(v).u.nsvm)
+
+/* True when l1 guest enabled SVM in EFER */
+#define hvm_svm_enabled(v) \
+ (!!((v)->arch.hvm_vcpu.guest_efer & EFER_SVME))
+
+int
+nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
+enum nestedhvm_vmexits
+nestedsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs,
+ uint64_t exitcode);
+
+/* Interface methods */
+int nsvm_vcpu_destroy(struct vcpu *v);
+int nsvm_vcpu_initialise(struct vcpu *v);
+int nsvm_vcpu_reset(struct vcpu *v);
+int nsvm_vcpu_hostrestore(struct vcpu *v, struct cpu_user_regs *regs);
+int nsvm_vcpu_vmrun(struct vcpu *v, struct cpu_user_regs *regs);
+int nsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs,
+ uint64_t exitcode);
+int nsvm_vcpu_vmexit_trap(struct vcpu *v, unsigned int trapnr,
+ int errcode, unsigned long cr2);
+uint64_t nsvm_vcpu_guestcr3(struct vcpu *v);
+uint64_t nsvm_vcpu_hostcr3(struct vcpu *v);
+uint32_t nsvm_vcpu_asid(struct vcpu *v);
+uint64_t nsvm_vmcb_exitcode_native2generic(struct vcpu *v, uint64_t exitcode);
+int nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v,
+ struct cpu_user_regs *regs, uint64_t exitcode);
+int nsvm_vmcb_guest_intercepts_trap(struct vcpu *v, unsigned int trapnr);
+bool_t nsvm_vmcb_hap_enabled(struct vcpu *v);
+
+/* MSRs */
+int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content);
+int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content);
+
+#endif /* ASM_X86_HVM_SVM_NESTEDSVM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 54dd43df686b -r 3bfc06e2e41a xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -29,8 +29,6 @@
#include <asm/i387.h>
#include <asm/hvm/vpmu.h>
-void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb);
-
#define SVM_REG_EAX (0)
#define SVM_REG_ECX (1)
#define SVM_REG_EDX (2)
@@ -62,6 +60,8 @@ static inline void svm_vmsave(void *vmcb
: : "a" (__pa(vmcb)) : "memory" );
}
+unsigned long *svm_msrbit(unsigned long *msr_bitmap, uint32_t msr);
+
extern u32 svm_feature_flags;
#define SVM_FEATURE_NPT 0
@@ -76,4 +76,6 @@ extern u32 svm_feature_flags;
#define cpu_has_svm_nrips test_bit(SVM_FEATURE_NRIPS, &svm_feature_flags)
#define cpu_has_pause_filter test_bit(SVM_FEATURE_PAUSEF, &svm_feature_flags)
+#define SVM_PAUSEFILTER_INIT 3000
+
#endif /* __ASM_X86_HVM_SVM_H__ */
diff -r 54dd43df686b -r 3bfc06e2e41a xen/include/asm-x86/hvm/svm/svmdebug.h
--- /dev/null
+++ b/xen/include/asm-x86/hvm/svm/svmdebug.h
@@ -0,0 +1,30 @@
+/*
+ * svmdebug.h: SVM related debug defintions
+ * Copyright (c) 2010, AMD Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_X86_HVM_SVM_SVMDEBUG_H__
+#define __ASM_X86_HVM_SVM_SVMDEBUG_H__
+
+#include <asm/types.h>
+#include <asm/hvm/svm/vmcb.h>
+
+void svm_vmcb_dump(const char *from, struct vmcb_struct *vmcb);
+bool_t svm_vmcb_isvalid(const char *from, struct vmcb_struct *vmcb,
+ bool_t verbose);
+
+#endif /* __ASM_X86_HVM_SVM_SVMDEBUG_H__ */
diff -r 54dd43df686b -r 3bfc06e2e41a xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -364,6 +364,9 @@ typedef union
} fields;
} __attribute__ ((packed)) lbrctrl_t;
+#define IOPM_SIZE (12 * 1024)
+#define MSRPM_SIZE (8 * 1024)
+
struct vmcb_struct {
u32 cr_intercepts; /* offset 0x00 */
u32 dr_intercepts; /* offset 0x04 */
diff -r 54dd43df686b -r 3bfc06e2e41a xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -25,6 +25,7 @@
#include <asm/hvm/vlapic.h>
#include <asm/hvm/vmx/vmcs.h>
#include <asm/hvm/svm/vmcb.h>
+#include <asm/hvm/svm/nestedsvm.h>
#include <asm/mtrr.h>
enum hvm_io_state {
@@ -45,6 +46,7 @@ struct nestedvcpu {
/* SVM/VMX arch specific */
union {
+ struct nestedsvm nsvm;
} u;
bool_t nv_flushp2m; /* True, when p2m table must be flushed */
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 09/13] Nested Virtualization: svm specific implementation
2010-11-12 18:43 [PATCH 09/13] Nested Virtualization: svm specific implementation Christoph Egger
@ 2010-11-16 14:54 ` Tim Deegan
2010-12-02 17:44 ` Christoph Egger
0 siblings, 1 reply; 5+ messages in thread
From: Tim Deegan @ 2010-11-16 14:54 UTC (permalink / raw)
To: Christoph Egger; +Cc: xen-devel@lists.xensource.com
Hi,
At 18:43 +0000 on 12 Nov (1289587425), Christoph Egger wrote:
> +static int nsvm_vmrun_permissionmap(struct vcpu *v)
> +{
> + struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
> + struct nestedsvm *svm = &vcpu_nestedsvm(v);
> + struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
> + struct vmcb_struct *host_vmcb = arch_svm->vmcb;
> + unsigned long *ns_msrpm_ptr;
> + unsigned int i;
> + enum hvm_copy_result ret;
> +
> + ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
> +
> + ret = hvm_copy_from_guest_phys(svm->ns_cached_msrpm,
> + ns_vmcb->msrpm_base_pa, MSRPM_SIZE);
> + if (ret != HVMCOPY_okay) {
> + gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n", ret);
> + return 1;
> + }
> +
> + /* Skip io bitmap merge since hvm_io_bitmap has all bits set but
> + * 0x80 and 0xed.
> + */
What if the L1 hypervisor wants to intercept port 0x80 or port 0xed?
> + /* v->arch.hvm_svm.msrpm has type unsigned long, thus
> + * BYTES_PER_LONG.
> + */
> + for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++)
> + svm->ns_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i];
> +
> + host_vmcb->iopm_base_pa =
> + (uint64_t)virt_to_maddr(hvm_io_bitmap);
> + host_vmcb->msrpm_base_pa =
> + (uint64_t)virt_to_maddr(svm->ns_merged_msrpm);
> +
> + return 0;
> +}
> +
> +static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs *regs)
> +{
> + struct nestedsvm *svm = &vcpu_nestedsvm(v);
> + struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
> + struct vmcb_struct *host_vmcb = v->arch.hvm_svm.vmcb;
> + int rc;
> +
> + /* Enable nested guest intercepts */
> + svm->ns_cr_intercepts = ns_vmcb->cr_intercepts;
> + svm->ns_dr_intercepts = ns_vmcb->dr_intercepts;
> + svm->ns_exception_intercepts = ns_vmcb->exception_intercepts;
> + svm->ns_general1_intercepts = ns_vmcb->general1_intercepts;
> + svm->ns_general2_intercepts = ns_vmcb->general2_intercepts;
> +
> + host_vmcb->cr_intercepts |= ns_vmcb->cr_intercepts;
> + host_vmcb->dr_intercepts |= ns_vmcb->dr_intercepts;
> + host_vmcb->exception_intercepts |= ns_vmcb->exception_intercepts;
> + host_vmcb->general1_intercepts |= ns_vmcb->general1_intercepts;
> + host_vmcb->general2_intercepts |= ns_vmcb->general2_intercepts;
> +
> + /* Nested Pause Filter */
> + if (ns_vmcb->general1_intercepts & GENERAL1_INTERCEPT_PAUSE)
> + host_vmcb->pause_filter_count =
> + min(ns_vmcb->pause_filter_count, host_vmcb->pause_filter_count);
> + else
> + host_vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT;
Why ignore the L0 count if the L1 doesn't intercept PAUSE?
> +
> + /* Nested IO permission bitmaps */
> + rc = nsvm_vmrun_permissionmap(v);
> + if (rc)
> + return rc;
> +
> + /* TSC offset */
> + hvm_set_guest_tsc(v, host_vmcb->tsc_offset + ns_vmcb->tsc_offset);
hvm_set_guest_tsc takes an absolute value, not an offset. ITYM to call
the hvm_funcs pointer directly.
> +
> + /* ASID */
> + hvm_asid_flush_vcpu(v);
> + /* host_vmcb->guest_asid = ns_vmcb->guest_asid; */
> +
> + /* TLB control */
> + host_vmcb->tlb_control |= ns_vmcb->tlb_control;
> +
> + /* Virtual Interrupts */
> + host_vmcb->vintr = ns_vmcb->vintr;
> + host_vmcb->vintr.fields.intr_masking = 1;
> +
> + /* Shadow Mode */
> + host_vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow;
> +
> + /* Exit codes */
> + host_vmcb->exitcode = ns_vmcb->exitcode;
> + host_vmcb->exitinfo1 = ns_vmcb->exitinfo1;
> + host_vmcb->exitinfo2 = ns_vmcb->exitinfo2;
> + host_vmcb->exitintinfo = ns_vmcb->exitintinfo;
> +
> + /* Pending Interrupts */
> + host_vmcb->eventinj = ns_vmcb->eventinj;
> +
> + /* LBR virtualization */
> + svm->ns_lbr_control = ns_vmcb->lbr_control;
> + host_vmcb->lbr_control.bytes |= ns_vmcb->lbr_control.bytes;
> +
> + /* NextRIP */
> + host_vmcb->nextrip = ns_vmcb->nextrip;
> +
> + /*
> + * VMCB Save State Area
> + */
> +
> + /* Segments */
> + host_vmcb->es = ns_vmcb->es;
> + host_vmcb->cs = ns_vmcb->cs;
> + host_vmcb->ss = ns_vmcb->ss;
> + host_vmcb->ds = ns_vmcb->ds;
> + host_vmcb->gdtr = ns_vmcb->gdtr;
> + host_vmcb->idtr = ns_vmcb->idtr;
> +
> + /* CPL */
> + host_vmcb->cpl = ns_vmcb->cpl;
> +
> + /* EFER */
> + v->arch.hvm_vcpu.guest_efer = ns_vmcb->efer;
> + rc = hvm_set_efer(ns_vmcb->efer);
> + if (rc != X86EMUL_OKAY)
> + gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
> +
> + /* CR4 */
> + v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->cr4;
> + rc = hvm_set_cr4(ns_vmcb->cr4);
> + if (rc != X86EMUL_OKAY)
> + gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
> +
> + /* CR0 */
> + v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->cr0;
> + rc = hvm_set_cr0(ns_vmcb->cr0);
> + if (rc != X86EMUL_OKAY)
> + gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
> +
> + /* CR2 */
> + v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->cr2;
> + hvm_update_guest_cr(v, 2);
> +
> + /* Nested paging mode */
> + if (nestedhvm_paging_mode_hap(v)) {
> + /* host nested paging + guest nested paging. */
> +
> + /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
> + rc = hvm_set_cr3(ns_vmcb->cr3);
> + if (rc != X86EMUL_OKAY)
> + gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
> + } else if (paging_mode_hap(v->domain)) {
> + /* host nested paging + guest shadow paging. */
> + host_vmcb->np_enable = 1;
> + /* Keep h_cr3 as it is. */
> + /* Guest shadow paging: Must intercept pagefaults. */
> + host_vmcb->exception_intercepts |= (1U << TRAP_page_fault);
No - it's the L1 hypervisor's job to intercep #PF if it wants to use
shadow paging. We shouldn't special-case it here.
> + /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
> + rc = hvm_set_cr3(ns_vmcb->cr3);
> + if (rc != X86EMUL_OKAY)
> + gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
> + } else {
> + /* host shadow paging + guest shadow paging. */
> + host_vmcb->np_enable = 0;
> + host_vmcb->h_cr3 = 0x0;
> +
> + /* TODO: Once shadow-shadow paging is in place come back to here
> + * and set host_vmcb->cr3 to the shadowed shadow table.
> + */
> + }
> +
> + /* DRn */
> + host_vmcb->dr7 = ns_vmcb->dr7;
> + host_vmcb->dr6 = ns_vmcb->dr6;
> +
> + /* RFLAGS */
> + host_vmcb->rflags = ns_vmcb->rflags;
> +
> + /* RIP */
> + host_vmcb->rip = ns_vmcb->rip;
> +
> + /* RSP */
> + host_vmcb->rsp = ns_vmcb->rsp;
> +
> + /* RAX */
> + host_vmcb->rax = ns_vmcb->rax;
> +
> + /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase,
> + * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
> + * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
> + */
> +
> + /* Page tables */
> + host_vmcb->pdpe0 = ns_vmcb->pdpe0;
> + host_vmcb->pdpe1 = ns_vmcb->pdpe1;
> + host_vmcb->pdpe2 = ns_vmcb->pdpe2;
> + host_vmcb->pdpe3 = ns_vmcb->pdpe3;
> +
> + /* PAT */
> + host_vmcb->g_pat = ns_vmcb->g_pat;
> +
> + /* Debug Control MSR */
> + host_vmcb->debugctlmsr = ns_vmcb->debugctlmsr;
> +
> + /* LBR MSRs */
> + host_vmcb->lastbranchfromip = ns_vmcb->lastbranchfromip;
> + host_vmcb->lastbranchtoip = ns_vmcb->lastbranchtoip;
> + host_vmcb->lastintfromip = ns_vmcb->lastintfromip;
> + host_vmcb->lastinttoip = ns_vmcb->lastinttoip;
> +
> + rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
> + if (rc) {
> + gdprintk(XENLOG_ERR, "nested vmcb invalid\n");
> + return rc;
> + }
> +
> + rc = svm_vmcb_isvalid(__func__, host_vmcb, 1);
> + if (rc) {
> + gdprintk(XENLOG_ERR, "host vmcb invalid\n");
> + return rc;
> + }
> +
> + /* Switch guest registers to nested guest */
> + regs->eax = ns_vmcb->rax;
> + regs->eip = ns_vmcb->rip;
> + regs->esp = ns_vmcb->rsp;
> + regs->eflags = ns_vmcb->rflags;
> +
> + return 0;
> +}
> +
> +int
> +nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v,
> + struct cpu_user_regs *regs, uint64_t exitcode)
> +{
> + uint64_t exit_bits;
> + struct nestedvcpu *nv = &vcpu_nestedhvm(v);
> + struct nestedsvm *svm = &vcpu_nestedsvm(v);
> + struct vmcb_struct *ns_vmcb = nv->nv_vmcx;
> + enum nestedhvm_vmexits vmexits;
> +
> + switch (exitcode) {
> + case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
> + case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
> + exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ);
> + if (svm->ns_cr_intercepts & exit_bits)
> + break;
> + return 0;
> +
> + case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
> + case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
> + exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ);
> + if (svm->ns_dr_intercepts & exit_bits)
> + break;
> + return 0;
> +
> + case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF:
> + exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE);
> + if (svm->ns_exception_intercepts & exit_bits)
> + break;
> + return 0;
> +
> + case VMEXIT_INTR ... VMEXIT_SHUTDOWN:
> + exit_bits = 1ULL << (exitcode - VMEXIT_INTR);
> + if (svm->ns_general1_intercepts & exit_bits)
> + break;
> + return 0;
> +
> + case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL:
> + exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN);
> + if (svm->ns_general2_intercepts & exit_bits)
> + break;
> + return 0;
> +
> + case VMEXIT_NPF:
> + case VMEXIT_INVALID:
> + /* Always intercepted */
> + break;
> +
> + default:
> + gdprintk(XENLOG_ERR, "Illegal exitcode 0x%"PRIx64"\n", exitcode);
> + BUG();
> + break;
> + }
> +
> + /* Special cases: Do more detailed checks */
> + switch (exitcode) {
> + case VMEXIT_MSR:
> + ASSERT(regs != NULL);
> + nestedsvm_vmcb_map(v, nv->nv_vmcxaddr);
> + ASSERT(nv->nv_vmcx != NULL);
> + ns_vmcb = nv->nv_vmcx;
> + vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm,
> + regs->ecx, ns_vmcb->exitinfo1 != 0);
> + if (vmexits == NESTEDHVM_VMEXIT_HOST)
> + return 0;
> + break;
> +
> + case VMEXIT_IOIO:
> + /* always intercepted */
> + break;
What if the guest doesn't want to intercept it? Won't that make it
crash or misbehave?
Tim.
--
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Xen Platform Team
Citrix Systems UK Ltd. (Company #02937203, SL9 0BG)
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 09/13] Nested Virtualization: svm specific implementation
2010-11-16 14:54 ` Tim Deegan
@ 2010-12-02 17:44 ` Christoph Egger
0 siblings, 0 replies; 5+ messages in thread
From: Christoph Egger @ 2010-12-02 17:44 UTC (permalink / raw)
To: Tim Deegan; +Cc: xen-devel@lists.xensource.com
On Tuesday 16 November 2010 15:54:11 Tim Deegan wrote:
> Hi,
>
> At 18:43 +0000 on 12 Nov (1289587425), Christoph Egger wrote:
> > +static int nsvm_vmrun_permissionmap(struct vcpu *v)
> > +{
> > + struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
> > + struct nestedsvm *svm = &vcpu_nestedsvm(v);
> > + struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
> > + struct vmcb_struct *host_vmcb = arch_svm->vmcb;
> > + unsigned long *ns_msrpm_ptr;
> > + unsigned int i;
> > + enum hvm_copy_result ret;
> > +
> > + ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
> > +
> > + ret = hvm_copy_from_guest_phys(svm->ns_cached_msrpm,
> > + ns_vmcb->msrpm_base_pa, MSRPM_SIZE);
> > + if (ret != HVMCOPY_okay) {
> > + gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n",
> > ret); + return 1;
> > + }
> > +
> > + /* Skip io bitmap merge since hvm_io_bitmap has all bits set but
> > + * 0x80 and 0xed.
> > + */
>
> What if the L1 hypervisor wants to intercept port 0x80 or port 0xed?
Thanks for the pointer. This item is work in progress.
>
> > + /* v->arch.hvm_svm.msrpm has type unsigned long, thus
> > + * BYTES_PER_LONG.
> > + */
> > + for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++)
> > + svm->ns_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i];
> > +
> > + host_vmcb->iopm_base_pa =
> > + (uint64_t)virt_to_maddr(hvm_io_bitmap);
> > + host_vmcb->msrpm_base_pa =
> > + (uint64_t)virt_to_maddr(svm->ns_merged_msrpm);
> > +
> > + return 0;
> > +}
> > +
> > +static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs
> > *regs) +{
> > + struct nestedsvm *svm = &vcpu_nestedsvm(v);
> > + struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
> > + struct vmcb_struct *host_vmcb = v->arch.hvm_svm.vmcb;
> > + int rc;
> > +
> > + /* Enable nested guest intercepts */
> > + svm->ns_cr_intercepts = ns_vmcb->cr_intercepts;
> > + svm->ns_dr_intercepts = ns_vmcb->dr_intercepts;
> > + svm->ns_exception_intercepts = ns_vmcb->exception_intercepts;
> > + svm->ns_general1_intercepts = ns_vmcb->general1_intercepts;
> > + svm->ns_general2_intercepts = ns_vmcb->general2_intercepts;
> > +
> > + host_vmcb->cr_intercepts |= ns_vmcb->cr_intercepts;
> > + host_vmcb->dr_intercepts |= ns_vmcb->dr_intercepts;
> > + host_vmcb->exception_intercepts |= ns_vmcb->exception_intercepts;
> > + host_vmcb->general1_intercepts |= ns_vmcb->general1_intercepts;
> > + host_vmcb->general2_intercepts |= ns_vmcb->general2_intercepts;
> > +
> > + /* Nested Pause Filter */
> > + if (ns_vmcb->general1_intercepts & GENERAL1_INTERCEPT_PAUSE)
> > + host_vmcb->pause_filter_count =
> > + min(ns_vmcb->pause_filter_count,
> > host_vmcb->pause_filter_count); + else
> > + host_vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT;
>
> Why ignore the L0 count if the L1 doesn't intercept PAUSE?
SVM_PAUSEFILTER_INIT is what is also used in construct_vmcb().
>
> > +
> > + /* Nested IO permission bitmaps */
> > + rc = nsvm_vmrun_permissionmap(v);
> > + if (rc)
> > + return rc;
> > +
> > + /* TSC offset */
> > + hvm_set_guest_tsc(v, host_vmcb->tsc_offset + ns_vmcb->tsc_offset);
>
> hvm_set_guest_tsc takes an absolute value, not an offset. ITYM to call
> the hvm_funcs pointer directly.
Thanks. Fixed.
>
> > +
> > + /* ASID */
> > + hvm_asid_flush_vcpu(v);
> > + /* host_vmcb->guest_asid = ns_vmcb->guest_asid; */
> > +
> > + /* TLB control */
> > + host_vmcb->tlb_control |= ns_vmcb->tlb_control;
> > +
> > + /* Virtual Interrupts */
> > + host_vmcb->vintr = ns_vmcb->vintr;
> > + host_vmcb->vintr.fields.intr_masking = 1;
> > +
> > + /* Shadow Mode */
> > + host_vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow;
> > +
> > + /* Exit codes */
> > + host_vmcb->exitcode = ns_vmcb->exitcode;
> > + host_vmcb->exitinfo1 = ns_vmcb->exitinfo1;
> > + host_vmcb->exitinfo2 = ns_vmcb->exitinfo2;
> > + host_vmcb->exitintinfo = ns_vmcb->exitintinfo;
> > +
> > + /* Pending Interrupts */
> > + host_vmcb->eventinj = ns_vmcb->eventinj;
> > +
> > + /* LBR virtualization */
> > + svm->ns_lbr_control = ns_vmcb->lbr_control;
> > + host_vmcb->lbr_control.bytes |= ns_vmcb->lbr_control.bytes;
> > +
> > + /* NextRIP */
> > + host_vmcb->nextrip = ns_vmcb->nextrip;
> > +
> > + /*
> > + * VMCB Save State Area
> > + */
> > +
> > + /* Segments */
> > + host_vmcb->es = ns_vmcb->es;
> > + host_vmcb->cs = ns_vmcb->cs;
> > + host_vmcb->ss = ns_vmcb->ss;
> > + host_vmcb->ds = ns_vmcb->ds;
> > + host_vmcb->gdtr = ns_vmcb->gdtr;
> > + host_vmcb->idtr = ns_vmcb->idtr;
> > +
> > + /* CPL */
> > + host_vmcb->cpl = ns_vmcb->cpl;
> > +
> > + /* EFER */
> > + v->arch.hvm_vcpu.guest_efer = ns_vmcb->efer;
> > + rc = hvm_set_efer(ns_vmcb->efer);
> > + if (rc != X86EMUL_OKAY)
> > + gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
> > +
> > + /* CR4 */
> > + v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->cr4;
> > + rc = hvm_set_cr4(ns_vmcb->cr4);
> > + if (rc != X86EMUL_OKAY)
> > + gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
> > +
> > + /* CR0 */
> > + v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->cr0;
> > + rc = hvm_set_cr0(ns_vmcb->cr0);
> > + if (rc != X86EMUL_OKAY)
> > + gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
> > +
> > + /* CR2 */
> > + v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->cr2;
> > + hvm_update_guest_cr(v, 2);
> > +
> > + /* Nested paging mode */
> > + if (nestedhvm_paging_mode_hap(v)) {
> > + /* host nested paging + guest nested paging. */
> > +
> > + /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us.
> > */ + rc = hvm_set_cr3(ns_vmcb->cr3);
> > + if (rc != X86EMUL_OKAY)
> > + gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
> > + } else if (paging_mode_hap(v->domain)) {
> > + /* host nested paging + guest shadow paging. */
> > + host_vmcb->np_enable = 1;
> > + /* Keep h_cr3 as it is. */
> > + /* Guest shadow paging: Must intercept pagefaults. */
> > + host_vmcb->exception_intercepts |= (1U << TRAP_page_fault);
>
> No - it's the L1 hypervisor's job to intercep #PF if it wants to use
> shadow paging. We shouldn't special-case it here.
Fixed.
>
> > + /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us.
> > */ + rc = hvm_set_cr3(ns_vmcb->cr3);
> > + if (rc != X86EMUL_OKAY)
> > + gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
> > + } else {
> > + /* host shadow paging + guest shadow paging. */
> > + host_vmcb->np_enable = 0;
> > + host_vmcb->h_cr3 = 0x0;
> > +
> > + /* TODO: Once shadow-shadow paging is in place come back to here
> > + * and set host_vmcb->cr3 to the shadowed shadow table.
> > + */
> > + }
> > +
> > + /* DRn */
> > + host_vmcb->dr7 = ns_vmcb->dr7;
> > + host_vmcb->dr6 = ns_vmcb->dr6;
> > +
> > + /* RFLAGS */
> > + host_vmcb->rflags = ns_vmcb->rflags;
> > +
> > + /* RIP */
> > + host_vmcb->rip = ns_vmcb->rip;
> > +
> > + /* RSP */
> > + host_vmcb->rsp = ns_vmcb->rsp;
> > +
> > + /* RAX */
> > + host_vmcb->rax = ns_vmcb->rax;
> > +
> > + /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase,
> > + * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
> > + * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
> > + */
> > +
> > + /* Page tables */
> > + host_vmcb->pdpe0 = ns_vmcb->pdpe0;
> > + host_vmcb->pdpe1 = ns_vmcb->pdpe1;
> > + host_vmcb->pdpe2 = ns_vmcb->pdpe2;
> > + host_vmcb->pdpe3 = ns_vmcb->pdpe3;
> > +
> > + /* PAT */
> > + host_vmcb->g_pat = ns_vmcb->g_pat;
> > +
> > + /* Debug Control MSR */
> > + host_vmcb->debugctlmsr = ns_vmcb->debugctlmsr;
> > +
> > + /* LBR MSRs */
> > + host_vmcb->lastbranchfromip = ns_vmcb->lastbranchfromip;
> > + host_vmcb->lastbranchtoip = ns_vmcb->lastbranchtoip;
> > + host_vmcb->lastintfromip = ns_vmcb->lastintfromip;
> > + host_vmcb->lastinttoip = ns_vmcb->lastinttoip;
> > +
> > + rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
> > + if (rc) {
> > + gdprintk(XENLOG_ERR, "nested vmcb invalid\n");
> > + return rc;
> > + }
> > +
> > + rc = svm_vmcb_isvalid(__func__, host_vmcb, 1);
> > + if (rc) {
> > + gdprintk(XENLOG_ERR, "host vmcb invalid\n");
> > + return rc;
> > + }
> > +
> > + /* Switch guest registers to nested guest */
> > + regs->eax = ns_vmcb->rax;
> > + regs->eip = ns_vmcb->rip;
> > + regs->esp = ns_vmcb->rsp;
> > + regs->eflags = ns_vmcb->rflags;
> > +
> > + return 0;
> > +}
> > +
> >
> > +int
> > +nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v,
> > + struct cpu_user_regs *regs, uint64_t exitcode)
> > +{
> > + uint64_t exit_bits;
> > + struct nestedvcpu *nv = &vcpu_nestedhvm(v);
> > + struct nestedsvm *svm = &vcpu_nestedsvm(v);
> > + struct vmcb_struct *ns_vmcb = nv->nv_vmcx;
> > + enum nestedhvm_vmexits vmexits;
> > +
> > + switch (exitcode) {
> > + case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
> > + case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
> > + exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ);
> > + if (svm->ns_cr_intercepts & exit_bits)
> > + break;
> > + return 0;
> > +
> > + case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
> > + case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
> > + exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ);
> > + if (svm->ns_dr_intercepts & exit_bits)
> > + break;
> > + return 0;
> > +
> > + case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF:
> > + exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE);
> > + if (svm->ns_exception_intercepts & exit_bits)
> > + break;
> > + return 0;
> > +
> > + case VMEXIT_INTR ... VMEXIT_SHUTDOWN:
> > + exit_bits = 1ULL << (exitcode - VMEXIT_INTR);
> > + if (svm->ns_general1_intercepts & exit_bits)
> > + break;
> > + return 0;
> > +
> > + case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL:
> > + exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN);
> > + if (svm->ns_general2_intercepts & exit_bits)
> > + break;
> > + return 0;
> > +
> > + case VMEXIT_NPF:
> > + case VMEXIT_INVALID:
> > + /* Always intercepted */
> > + break;
> > +
> > + default:
> > + gdprintk(XENLOG_ERR, "Illegal exitcode 0x%"PRIx64"\n",
> > exitcode); + BUG();
> > + break;
> > + }
> > +
> > + /* Special cases: Do more detailed checks */
> > + switch (exitcode) {
> > + case VMEXIT_MSR:
> > + ASSERT(regs != NULL);
> > + nestedsvm_vmcb_map(v, nv->nv_vmcxaddr);
> > + ASSERT(nv->nv_vmcx != NULL);
> > + ns_vmcb = nv->nv_vmcx;
> > + vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm,
> > + regs->ecx, ns_vmcb->exitinfo1 != 0);
> > + if (vmexits == NESTEDHVM_VMEXIT_HOST)
> > + return 0;
> > + break;
> > +
> > + case VMEXIT_IOIO:
> > + /* always intercepted */
> > + break;
>
> What if the guest doesn't want to intercept it? Won't that make it
> crash or misbehave?
Yes, right. Fixed.
Christoph
--
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-12-02 17:44 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-11-12 18:43 [PATCH 09/13] Nested Virtualization: svm specific implementation Christoph Egger
2010-11-16 14:54 ` Tim Deegan
2010-12-02 17:44 ` Christoph Egger
-- strict thread matches above, loose matches on Subject: below --
2010-10-15 13:07 Christoph Egger
2010-09-01 15:14 Christoph Egger
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.