From: Alexander Graf <agraf@suse.de>
To: kvm@vger.kernel.org
Cc: joro@8bytes.org, anthony@codemonkey.ws, avi@qumranet.com,
Alexander Graf <agraf@suse.de>
Subject: [PATCH 07/10] Add VMRUN handler v4
Date: Wed, 15 Oct 2008 21:31:30 +0200 [thread overview]
Message-ID: <1224099093-10985-8-git-send-email-agraf@suse.de> (raw)
In-Reply-To: <1224099093-10985-7-git-send-email-agraf@suse.de>
This patch implements VMRUN. VMRUN enters a virtual CPU and runs that
in the same context as the normal guest CPU would run.
So basically it is implemented the same way, a normal CPU would do it.
We also prepare all intercepts that get OR'ed with the original
intercepts, as we do not allow a level 2 guest to be intercepted less
than the first level guest.
v2 implements the following improvements:
- fixes the CPL check
- does not allocate iopm when not used
- remembers the host's IF in the HIF bit in the hflags
v3:
- make use of the new permission checking
- add support for V_INTR_MASKING_MASK
v4:
- use host page backed hsave
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/x86/kvm/kvm_svm.h | 10 ++
arch/x86/kvm/svm.c | 198 +++++++++++++++++++++++++++++++++++++++++++-
include/asm-x86/kvm_host.h | 2 +
3 files changed, 208 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 40cb128..a647602 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -42,6 +42,16 @@ struct vcpu_svm {
u32 *msrpm;
struct vmcb *hsave;
+
+ u64 nested_vmcb;
+
+ /* These are the merged vectors */
+ u32 *nested_msrpm;
+ u32 *nested_iopm;
+
+ /* gpa pointers to the real vectors */
+ u64 nested_vmcb_msrpm;
+ u64 nested_vmcb_iopm;
};
#endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 10ad02b..6520bff 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -51,6 +51,9 @@ MODULE_LICENSE("GPL");
/* Turn on to get debugging output*/
/* #define NESTED_DEBUG */
+/* Not needed until device passthrough */
+/* #define NESTED_KVM_MERGE_IOPM */
+
#ifdef NESTED_DEBUG
#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
#else
@@ -76,6 +79,11 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_svm, vcpu);
}
+static inline bool is_nested(struct vcpu_svm *svm)
+{
+ return svm->nested_vmcb;
+}
+
static unsigned long iopm_base;
struct kvm_ldttss_desc {
@@ -615,6 +623,7 @@ static void init_vmcb(struct vcpu_svm *svm)
}
force_new_asid(&svm->vcpu);
+ svm->nested_vmcb = 0;
svm->vcpu.arch.hflags = HF_GIF_MASK;
}
@@ -641,6 +650,10 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
struct page *page;
struct page *msrpm_pages;
struct page *hsave_page;
+ struct page *nested_msrpm_pages;
+#ifdef NESTED_KVM_MERGE_IOPM
+ struct page *nested_iopm_pages;
+#endif
int err;
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -663,6 +676,17 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
if (!msrpm_pages)
goto uninit;
+
+ nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+ if (!nested_msrpm_pages)
+ goto uninit;
+
+#ifdef NESTED_KVM_MERGE_IOPM
+ nested_iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
+ if (!nested_iopm_pages)
+ goto uninit;
+#endif
+
svm->msrpm = page_address(msrpm_pages);
svm_vcpu_init_msrpm(svm->msrpm);
@@ -671,6 +695,11 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
goto uninit;
svm->hsave = page_address(hsave_page);
+ svm->nested_msrpm = page_address(nested_msrpm_pages);
+#ifdef NESTED_KVM_MERGE_IOPM
+ svm->nested_iopm = page_address(nested_iopm_pages);
+#endif
+
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
@@ -701,6 +730,10 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->hsave));
+ __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
+#ifdef NESTED_KVM_MERGE_IOPM
+ __free_pages(virt_to_page(svm->nested_iopm), IOPM_ALLOC_ORDER);
+#endif
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
@@ -1242,6 +1275,138 @@ static int nested_svm_do(struct vcpu_svm *svm,
return retval;
}
+
+static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ int i;
+ u32 *nested_msrpm = (u32*)arg1;
+ for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
+ svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
+ svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
+
+ return 0;
+}
+
+#ifdef NESTED_KVM_MERGE_IOPM
+static int nested_svm_vmrun_iopm(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ int i;
+ u32 *nested_iopm = (u32*)arg1;
+ u32 *iopm = (u32*)__va(iopm_base);
+ for (i=0; i< PAGE_SIZE * (1 << IOPM_ALLOC_ORDER) / 4; i++)
+ svm->nested_iopm[i] = iopm[i] | nested_iopm[i];
+ svm->vmcb->control.iopm_base_pa = __pa(svm->nested_iopm);
+
+ return 0;
+}
+#endif
+
+static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ struct vmcb *hsave = svm->hsave;
+
+ /* nested_vmcb is our indicator if nested SVM is activated */
+ svm->nested_vmcb = svm->vmcb->save.rax;
+
+ /* Clear internal status */
+ svm->vcpu.arch.exception.pending = false;
+
+ /* Save the old vmcb, so we don't need to pick what we save, but
+ can restore everything when a VMEXIT occurs */
+ memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
+ /* We need to remember the original CR3 in the SPT case */
+ if (!npt_enabled)
+ hsave->save.cr3 = svm->vcpu.arch.cr3;
+ hsave->save.rip = svm->next_rip;
+
+ if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
+ svm->vcpu.arch.hflags |= HF_HIF_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+ /* Load the nested guest state */
+ svm->vmcb->save.es = nested_vmcb->save.es;
+ svm->vmcb->save.cs = nested_vmcb->save.cs;
+ svm->vmcb->save.ss = nested_vmcb->save.ss;
+ svm->vmcb->save.ds = nested_vmcb->save.ds;
+ svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
+ svm->vmcb->save.idtr = nested_vmcb->save.idtr;
+ svm->vmcb->save.rflags = nested_vmcb->save.rflags;
+ svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
+ svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
+ svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
+ if (npt_enabled) {
+ svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
+ svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
+ } else {
+ kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
+ kvm_mmu_reset_context(&svm->vcpu);
+ }
+ svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
+ /* In case we don't even reach vcpu_run, the fields are not updated */
+ svm->vmcb->save.rax = nested_vmcb->save.rax;
+ svm->vmcb->save.rsp = nested_vmcb->save.rsp;
+ svm->vmcb->save.rip = nested_vmcb->save.rip;
+ svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
+ svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+ svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+
+ /* We don't want a nested guest to be more powerful than the guest,
+ so all intercepts are ORed */
+ svm->vmcb->control.intercept_cr_read |=
+ nested_vmcb->control.intercept_cr_read;
+ svm->vmcb->control.intercept_cr_write |=
+ nested_vmcb->control.intercept_cr_write;
+ svm->vmcb->control.intercept_dr_read |=
+ nested_vmcb->control.intercept_dr_read;
+ svm->vmcb->control.intercept_dr_write |=
+ nested_vmcb->control.intercept_dr_write;
+ svm->vmcb->control.intercept_exceptions |=
+ nested_vmcb->control.intercept_exceptions;
+
+ svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
+
+ svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
+ svm->nested_vmcb_iopm = nested_vmcb->control.iopm_base_pa;
+
+ force_new_asid(&svm->vcpu);
+ svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
+ svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
+ svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
+ if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
+ nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
+ nested_vmcb->control.int_ctl);
+ }
+ if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
+ svm->vcpu.arch.hflags |= HF_VINTR_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
+
+ nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
+ nested_vmcb->control.exit_int_info,
+ nested_vmcb->control.int_state);
+
+ svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
+ svm->vmcb->control.int_state = nested_vmcb->control.int_state;
+ svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
+ if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
+ nsvm_printk("Injecting Event: 0x%x\n",
+ nested_vmcb->control.event_inj);
+ svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
+ svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+ return 1;
+}
+
static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
{
to_vmcb->save.fs = from_vmcb->save.fs;
@@ -1298,6 +1463,34 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
+static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ nsvm_printk("VMrun\n");
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
+ NULL, nested_svm_vmrun))
+ return 1;
+
+ /* Right now SVM always intecepts all IOs except for the DEBUG port.
+ This is fine for the nested VM too IMHO. */
+#ifdef NESTED_KVM_MERGE_IOPM
+ if (nested_svm_do(svm, svm->vmcb->control.iopm_base_pa, 0,
+ NULL, nested_svm_vmrun_iopm))
+ return 1;
+#endif
+
+ if (nested_svm_do(svm, svm->vmcb->control.msrpm_base_pa, 0,
+ NULL, nested_svm_vmrun_msrpm))
+ return 1;
+
+ return 1;
+}
+
static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
if (nested_svm_check_permissions(svm))
@@ -1630,7 +1823,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_MSR] = msr_interception,
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
- [SVM_EXIT_VMRUN] = invalid_op_interception,
+ [SVM_EXIT_VMRUN] = vmrun_interception,
[SVM_EXIT_VMMCALL] = vmmcall_interception,
[SVM_EXIT_VMLOAD] = vmload_interception,
[SVM_EXIT_VMSAVE] = vmsave_interception,
@@ -1937,7 +2130,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
svm->host_cr2 = kvm_read_cr2();
svm->host_dr6 = read_dr6();
svm->host_dr7 = read_dr7();
- svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ if (!is_nested(svm))
+ svm->vmcb->save.cr2 = vcpu->arch.cr2;
/* required for live migration with NPT */
if (npt_enabled)
svm->vmcb->save.cr3 = vcpu->arch.cr3;
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index dc30b7b..7469d96 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -736,6 +736,8 @@ enum {
};
#define HF_GIF_MASK (1 << 0)
+#define HF_HIF_MASK (1 << 1)
+#define HF_VINTR_MASK (1 << 2)
/*
* Hardware virtualization extension instructions may fault if a
--
1.5.6
next prev parent reply other threads:[~2008-10-15 19:30 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-15 19:31 [PATCH 00/10] Add support for nested SVM (kernel) v4 Alexander Graf
2008-10-15 19:31 ` [PATCH 01/10] Add CPUID feature flag for SVM v4 Alexander Graf
2008-10-15 19:31 ` [PATCH 02/10] Clean up VINTR setting v4 Alexander Graf
2008-10-15 19:31 ` [PATCH 03/10] Add helper functions for nested SVM v4 Alexander Graf
2008-10-15 19:31 ` [PATCH 04/10] Implement GIF, clgi and stgi v4 Alexander Graf
2008-10-15 19:31 ` [PATCH 05/10] Implement hsave v4 Alexander Graf
2008-10-15 19:31 ` [PATCH 06/10] Add VMLOAD and VMSAVE handlers v4 Alexander Graf
2008-10-15 19:31 ` Alexander Graf [this message]
2008-10-15 19:31 ` [PATCH 08/10] Add VMEXIT handler and intercepts v4 Alexander Graf
2008-10-15 19:31 ` [PATCH 09/10] allow read access to MSR_VM_VR Alexander Graf
2008-10-15 19:31 ` [PATCH 10/10] Allow setting the SVME bit v4 Alexander Graf
2008-10-19 9:51 ` [PATCH 07/10] Add VMRUN handler v4 Avi Kivity
2008-10-19 9:58 ` Alexander Graf
2008-10-19 10:06 ` Avi Kivity
2008-10-19 10:13 ` Alexander Graf
2008-10-19 10:27 ` Avi Kivity
2008-10-19 10:33 ` Alexander Graf
2008-10-19 10:54 ` Avi Kivity
2008-10-19 9:46 ` [PATCH 04/10] Implement GIF, clgi and stgi v4 Avi Kivity
2008-10-19 9:57 ` Alexander Graf
2008-10-19 10:04 ` Avi Kivity
2008-10-16 11:25 ` [PATCH 01/10] Add CPUID feature flag for SVM v4 Alexander Graf
2008-10-19 9:56 ` [PATCH 00/10] Add support for nested SVM (kernel) v4 Avi Kivity
2008-10-19 10:01 ` Alexander Graf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1224099093-10985-8-git-send-email-agraf@suse.de \
--to=agraf@suse.de \
--cc=anthony@codemonkey.ws \
--cc=avi@qumranet.com \
--cc=joro@8bytes.org \
--cc=kvm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.