From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alexander Graf Subject: [PATCH 6/9] Add VMRUN handler v5 Date: Mon, 20 Oct 2008 19:04:47 +0200 Message-ID: <1224522290-11740-7-git-send-email-agraf@suse.de> References: <1224522290-11740-1-git-send-email-agraf@suse.de> <1224522290-11740-2-git-send-email-agraf@suse.de> <1224522290-11740-3-git-send-email-agraf@suse.de> <1224522290-11740-4-git-send-email-agraf@suse.de> <1224522290-11740-5-git-send-email-agraf@suse.de> <1224522290-11740-6-git-send-email-agraf@suse.de> Cc: joro@8bytes.org, anthony@codemonkey.ws, avi@redhat.com To: kvm@vger.kernel.org Return-path: Received: from mail.suse.de ([195.135.220.2]:53941 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754545AbYJTREd (ORCPT ); Mon, 20 Oct 2008 13:04:33 -0400 In-Reply-To: <1224522290-11740-6-git-send-email-agraf@suse.de> Sender: kvm-owner@vger.kernel.org List-ID: This patch implements VMRUN. VMRUN enters a virtual CPU and runs that in the same context as the normal guest CPU would run. So basically it is implemented the same way, a normal CPU would do it. We also prepare all intercepts that get OR'ed with the original intercepts, as we do not allow a level 2 guest to be intercepted less than the first level guest. v2 implements the following improvements: - fixes the CPL check - does not allocate iopm when not used - remembers the host's IF in the HIF bit in the hflags v3: - make use of the new permission checking - add support for V_INTR_MASKING_MASK v4: - use host page backed hsave v5: - remove IOPM merging code Signed-off-by: Alexander Graf --- arch/x86/kvm/kvm_svm.h | 8 ++ arch/x86/kvm/svm.c | 156 +++++++++++++++++++++++++++++++++++++++++++- include/asm-x86/kvm_host.h | 2 + 3 files changed, 164 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index 40cb128..c5854e8 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h @@ -42,6 +42,14 @@ struct vcpu_svm { u32 *msrpm; struct vmcb *hsave; + + u64 nested_vmcb; + + /* These are the merged vectors */ + u32 *nested_msrpm; + + /* gpa pointers to the real vectors */ + u64 nested_vmcb_msrpm; }; #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 10ad02b..c3831b3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -76,6 +76,11 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) return container_of(vcpu, struct vcpu_svm, vcpu); } +static inline bool is_nested(struct vcpu_svm *svm) +{ + return svm->nested_vmcb; +} + static unsigned long iopm_base; struct kvm_ldttss_desc { @@ -615,6 +620,7 @@ static void init_vmcb(struct vcpu_svm *svm) } force_new_asid(&svm->vcpu); + svm->nested_vmcb = 0; svm->vcpu.arch.hflags = HF_GIF_MASK; } @@ -641,6 +647,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) struct page *page; struct page *msrpm_pages; struct page *hsave_page; + struct page *nested_msrpm_pages; int err; svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); @@ -663,6 +670,11 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!msrpm_pages) goto uninit; + + nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + if (!nested_msrpm_pages) + goto uninit; + svm->msrpm = page_address(msrpm_pages); svm_vcpu_init_msrpm(svm->msrpm); @@ -671,6 +683,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto uninit; svm->hsave = page_address(hsave_page); + svm->nested_msrpm = page_address(nested_msrpm_pages); + svm->vmcb = page_address(page); clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; @@ -701,6 +715,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->hsave)); + __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); } @@ -1242,6 +1257,122 @@ static int nested_svm_do(struct vcpu_svm *svm, return retval; } + +static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1, + void *arg2, void *opaque) +{ + int i; + u32 *nested_msrpm = (u32*)arg1; + for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) + svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; + svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm); + + return 0; +} + +static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1, + void *arg2, void *opaque) +{ + struct vmcb *nested_vmcb = (struct vmcb *)arg1; + struct vmcb *hsave = svm->hsave; + + /* nested_vmcb is our indicator if nested SVM is activated */ + svm->nested_vmcb = svm->vmcb->save.rax; + + /* Clear internal status */ + svm->vcpu.arch.exception.pending = false; + + /* Save the old vmcb, so we don't need to pick what we save, but + can restore everything when a VMEXIT occurs */ + memcpy(hsave, svm->vmcb, sizeof(struct vmcb)); + /* We need to remember the original CR3 in the SPT case */ + if (!npt_enabled) + hsave->save.cr3 = svm->vcpu.arch.cr3; + hsave->save.rip = svm->next_rip; + + if (svm->vmcb->save.rflags & X86_EFLAGS_IF) + svm->vcpu.arch.hflags |= HF_HIF_MASK; + else + svm->vcpu.arch.hflags &= ~HF_HIF_MASK; + + /* Load the nested guest state */ + svm->vmcb->save.es = nested_vmcb->save.es; + svm->vmcb->save.cs = nested_vmcb->save.cs; + svm->vmcb->save.ss = nested_vmcb->save.ss; + svm->vmcb->save.ds = nested_vmcb->save.ds; + svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; + svm->vmcb->save.idtr = nested_vmcb->save.idtr; + svm->vmcb->save.rflags = nested_vmcb->save.rflags; + svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); + svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); + svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); + if (npt_enabled) { + svm->vmcb->save.cr3 = nested_vmcb->save.cr3; + svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; + } else { + kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); + kvm_mmu_reset_context(&svm->vcpu); + } + svm->vmcb->save.cr2 = nested_vmcb->save.cr2; + kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); + kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); + kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); + /* In case we don't even reach vcpu_run, the fields are not updated */ + svm->vmcb->save.rax = nested_vmcb->save.rax; + svm->vmcb->save.rsp = nested_vmcb->save.rsp; + svm->vmcb->save.rip = nested_vmcb->save.rip; + svm->vmcb->save.dr7 = nested_vmcb->save.dr7; + svm->vmcb->save.dr6 = nested_vmcb->save.dr6; + svm->vmcb->save.cpl = nested_vmcb->save.cpl; + + /* We don't want a nested guest to be more powerful than the guest, + so all intercepts are ORed */ + svm->vmcb->control.intercept_cr_read |= + nested_vmcb->control.intercept_cr_read; + svm->vmcb->control.intercept_cr_write |= + nested_vmcb->control.intercept_cr_write; + svm->vmcb->control.intercept_dr_read |= + nested_vmcb->control.intercept_dr_read; + svm->vmcb->control.intercept_dr_write |= + nested_vmcb->control.intercept_dr_write; + svm->vmcb->control.intercept_exceptions |= + nested_vmcb->control.intercept_exceptions; + + svm->vmcb->control.intercept |= nested_vmcb->control.intercept; + + svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; + + force_new_asid(&svm->vcpu); + svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; + svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; + svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; + if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { + nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", + nested_vmcb->control.int_ctl); + } + if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) + svm->vcpu.arch.hflags |= HF_VINTR_MASK; + else + svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; + + nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", + nested_vmcb->control.exit_int_info, + nested_vmcb->control.int_state); + + svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; + svm->vmcb->control.int_state = nested_vmcb->control.int_state; + svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; + if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) + nsvm_printk("Injecting Event: 0x%x\n", + nested_vmcb->control.event_inj); + svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; + svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; + + svm->vcpu.arch.hflags |= HF_GIF_MASK; + + return 1; +} + static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; @@ -1298,6 +1429,26 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } +static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + nsvm_printk("VMrun\n"); + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + if (nested_svm_do(svm, svm->vmcb->save.rax, 0, + NULL, nested_svm_vmrun)) + return 1; + + if (nested_svm_do(svm, svm->vmcb->control.msrpm_base_pa, 0, + NULL, nested_svm_vmrun_msrpm)) + return 1; + + return 1; +} + static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { if (nested_svm_check_permissions(svm)) @@ -1630,7 +1781,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_MSR] = msr_interception, [SVM_EXIT_TASK_SWITCH] = task_switch_interception, [SVM_EXIT_SHUTDOWN] = shutdown_interception, - [SVM_EXIT_VMRUN] = invalid_op_interception, + [SVM_EXIT_VMRUN] = vmrun_interception, [SVM_EXIT_VMMCALL] = vmmcall_interception, [SVM_EXIT_VMLOAD] = vmload_interception, [SVM_EXIT_VMSAVE] = vmsave_interception, @@ -1937,7 +2088,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) svm->host_cr2 = kvm_read_cr2(); svm->host_dr6 = read_dr6(); svm->host_dr7 = read_dr7(); - svm->vmcb->save.cr2 = vcpu->arch.cr2; + if (!is_nested(svm)) + svm->vmcb->save.cr2 = vcpu->arch.cr2; /* required for live migration with NPT */ if (npt_enabled) svm->vmcb->save.cr3 = vcpu->arch.cr3; diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 421abf7..7d28894 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -736,6 +736,8 @@ enum { }; #define HF_GIF_MASK (1 << 0) +#define HF_HIF_MASK (1 << 1) +#define HF_VINTR_MASK (1 << 2) /* * Hardware virtualization extension instructions may fault if a -- 1.5.6