[PATCH 0/3] Arch cleanup v3

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/3] Arch cleanup v3
@ 2007-07-26 18:45 Gregory Haskins
       [not found] ` <20070726184241.9119.87037.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Gregory Haskins @ 2007-07-26 18:45 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Changes from v2

Patch #1: Fixed bad indent

I split the original #2 up into two patches:

Patch #2: Contains only VMX/VMCS cleanup
Patch #3: Contains a fix for VMCS the race condition previously discussed.

Patch #3 is optional given the recent discovery that the race should not cause
actual problems.

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <20070726184241.9119.87037.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>]

* [PATCH 1/3] KVM: Remove arch specific components from the general code
       [not found] ` <20070726184241.9119.87037.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
@ 2007-07-26 18:45   ` Gregory Haskins
       [not found]     ` <20070726184525.9119.73573.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
  2007-07-26 18:45   ` [PATCH 2/3] KVM: Clean up VMCLEAR/VMPTRLD code on VMX Gregory Haskins
  2007-07-26 18:45   ` [PATCH 3/3] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware Gregory Haskins
  2 siblings, 1 reply; 12+ messages in thread
From: Gregory Haskins @ 2007-07-26 18:45 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---

 drivers/kvm/kvm.h      |   31 -----
 drivers/kvm/kvm_main.c |   26 +---
 drivers/kvm/kvm_svm.h  |    3 
 drivers/kvm/svm.c      |  322 +++++++++++++++++++++++++-----------------------
 drivers/kvm/vmx.c      |  236 +++++++++++++++++++++--------------
 5 files changed, 320 insertions(+), 298 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index fc27c2f..6cbf087 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -15,7 +15,6 @@
 #include <linux/mm.h>
 #include <asm/signal.h>
 
-#include "vmx.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -140,14 +139,6 @@ struct kvm_mmu_page {
 	};
 };
 
-struct vmcs {
-	u32 revision_id;
-	u32 abort;
-	char data[0];
-};
-
-#define vmx_msr_entry kvm_msr_entry
-
 struct kvm_vcpu;
 
 /*
@@ -309,15 +300,12 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 			     struct kvm_io_device *dev);
 
 struct kvm_vcpu {
+	int valid;
 	struct kvm *kvm;
 	int vcpu_id;
-	union {
-		struct vmcs *vmcs;
-		struct vcpu_svm *svm;
-	};
+	void *_priv;
 	struct mutex mutex;
 	int   cpu;
-	int   launched;
 	u64 host_tsc;
 	struct kvm_run *run;
 	int interrupt_window_open;
@@ -340,14 +328,6 @@ struct kvm_vcpu {
 	u64 shadow_efer;
 	u64 apic_base;
 	u64 ia32_misc_enable_msr;
-	int nmsrs;
-	int save_nmsrs;
-	int msr_offset_efer;
-#ifdef CONFIG_X86_64
-	int msr_offset_kernel_gs_base;
-#endif
-	struct vmx_msr_entry *guest_msrs;
-	struct vmx_msr_entry *host_msrs;
 
 	struct kvm_mmu mmu;
 
@@ -366,11 +346,6 @@ struct kvm_vcpu {
 	char *guest_fx_image;
 	int fpu_active;
 	int guest_fpu_loaded;
-	struct vmx_host_state {
-		int loaded;
-		u16 fs_sel, gs_sel, ldt_sel;
-		int fs_gs_ldt_reload_needed;
-	} vmx_host_state;
 
 	int mmio_needed;
 	int mmio_read_completed;
@@ -579,8 +554,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 
 void fx_init(struct kvm_vcpu *vcpu);
 
-void load_msrs(struct vmx_msr_entry *e, int n);
-void save_msrs(struct vmx_msr_entry *e, int n);
 void kvm_resched(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index bc11c2d..9cc16b8 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -367,7 +367,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->vmcs)
+	if (!vcpu->valid)
 		return;
 
 	vcpu_load(vcpu);
@@ -377,7 +377,7 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 
 static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->vmcs)
+	if (!vcpu->valid)
 		return;
 
 	vcpu_load(vcpu);
@@ -1646,24 +1646,6 @@ void kvm_resched(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_resched);
 
-void load_msrs(struct vmx_msr_entry *e, int n)
-{
-	int i;
-
-	for (i = 0; i < n; ++i)
-		wrmsrl(e[i].index, e[i].data);
-}
-EXPORT_SYMBOL_GPL(load_msrs);
-
-void save_msrs(struct vmx_msr_entry *e, int n)
-{
-	int i;
-
-	for (i = 0; i < n; ++i)
-		rdmsrl(e[i].index, e[i].data);
-}
-EXPORT_SYMBOL_GPL(save_msrs);
-
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	int i;
@@ -2402,7 +2384,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 
 	mutex_lock(&vcpu->mutex);
 
-	if (vcpu->vmcs) {
+	if (vcpu->valid) {
 		mutex_unlock(&vcpu->mutex);
 		return -EEXIST;
 	}
@@ -2450,6 +2432,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 		kvm->nvcpus = n + 1;
 	spin_unlock(&kvm_lock);
 
+	vcpu->valid = 1;
+
 	return r;
 
 out_free_vcpus:
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h
index a869983..82e5d77 100644
--- a/drivers/kvm/kvm_svm.h
+++ b/drivers/kvm/kvm_svm.h
@@ -20,7 +20,10 @@ static const u32 host_save_user_msrs[] = {
 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
 #define NUM_DB_REGS 4
 
+struct kvm_vcpu;
+
 struct vcpu_svm {
+	struct kvm_vcpu *vcpu;
 	struct vmcb *vmcb;
 	unsigned long vmcb_pa;
 	struct svm_cpu_data *svm_data;
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 850a1b1..0c12e9e 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -49,6 +49,11 @@ MODULE_LICENSE("GPL");
 #define SVM_FEATURE_LBRV (1 << 1)
 #define SVM_DEATURE_SVML (1 << 2)
 
+static inline struct vcpu_svm* svm(struct kvm_vcpu *vcpu)
+{
+	return (struct vcpu_svm*)vcpu->_priv;
+}
+
 unsigned long iopm_base;
 unsigned long msrpm_base;
 
@@ -95,7 +100,7 @@ static inline u32 svm_has(u32 feat)
 
 static unsigned get_addr_size(struct kvm_vcpu *vcpu)
 {
-	struct vmcb_save_area *sa = &vcpu->svm->vmcb->save;
+	struct vmcb_save_area *sa = &svm(vcpu)->vmcb->save;
 	u16 cs_attrib;
 
 	if (!(sa->cr0 & X86_CR0_PE) || (sa->rflags & X86_EFLAGS_VM))
@@ -181,7 +186,7 @@ static inline void write_dr7(unsigned long val)
 
 static inline void force_new_asid(struct kvm_vcpu *vcpu)
 {
-	vcpu->svm->asid_generation--;
+	svm(vcpu)->asid_generation--;
 }
 
 static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
@@ -194,22 +199,22 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 	if (!(efer & KVM_EFER_LMA))
 		efer &= ~KVM_EFER_LME;
 
-	vcpu->svm->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+	svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
 	vcpu->shadow_efer = efer;
 }
 
 static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
 {
-	vcpu->svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+	svm(vcpu)->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
 						SVM_EVTINJ_VALID_ERR |
 						SVM_EVTINJ_TYPE_EXEPT |
 						GP_VECTOR;
-	vcpu->svm->vmcb->control.event_inj_err = error_code;
+	svm(vcpu)->vmcb->control.event_inj_err = error_code;
 }
 
 static void inject_ud(struct kvm_vcpu *vcpu)
 {
-	vcpu->svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+	svm(vcpu)->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
 						SVM_EVTINJ_TYPE_EXEPT |
 						UD_VECTOR;
 }
@@ -228,19 +233,19 @@ static int is_external_interrupt(u32 info)
 
 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->svm->next_rip) {
+	if (!svm(vcpu)->next_rip) {
 		printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
 		return;
 	}
-	if (vcpu->svm->next_rip - vcpu->svm->vmcb->save.rip > 15) {
+	if (svm(vcpu)->next_rip - svm(vcpu)->vmcb->save.rip > 15) {
 		printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
 		       __FUNCTION__,
-		       vcpu->svm->vmcb->save.rip,
-		       vcpu->svm->next_rip);
+		       svm(vcpu)->vmcb->save.rip,
+		       svm(vcpu)->next_rip);
 	}
 
-	vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip;
-	vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+	vcpu->rip = svm(vcpu)->vmcb->save.rip = svm(vcpu)->next_rip;
+	svm(vcpu)->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
 
 	vcpu->interrupt_window_open = 1;
 }
@@ -569,23 +574,27 @@ static void init_vmcb(struct vmcb *vmcb)
 
 static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm;
 	struct page *page;
 	int r;
 
 	r = -ENOMEM;
-	vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL);
-	if (!vcpu->svm)
+	svm = kzalloc(sizeof *svm, GFP_KERNEL);
+	if (!svm)
 		goto out1;
 	page = alloc_page(GFP_KERNEL);
 	if (!page)
 		goto out2;
 
-	vcpu->svm->vmcb = page_address(page);
-	clear_page(vcpu->svm->vmcb);
-	vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
-	vcpu->svm->asid_generation = 0;
-	memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs));
-	init_vmcb(vcpu->svm->vmcb);
+	svm->vmcb = page_address(page);
+	clear_page(svm->vmcb);
+	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
+	svm->asid_generation = 0;
+	memset(svm->db_regs, 0, sizeof(svm->db_regs));
+	init_vmcb(svm->vmcb);
+
+	svm->vcpu   = vcpu;
+	vcpu->_priv = svm;
 
 	fx_init(vcpu);
 	vcpu->fpu_active = 1;
@@ -596,18 +605,19 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 	return 0;
 
 out2:
-	kfree(vcpu->svm);
+	kfree(svm);
 out1:
 	return r;
 }
 
 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->svm)
+	if (!svm(vcpu))
 		return;
-	if (vcpu->svm->vmcb)
-		__free_page(pfn_to_page(vcpu->svm->vmcb_pa >> PAGE_SHIFT));
-	kfree(vcpu->svm);
+	if (svm(vcpu)->vmcb)
+		__free_page(pfn_to_page(svm(vcpu)->vmcb_pa >> PAGE_SHIFT));
+	kfree(svm(vcpu));
+	vcpu->_priv = NULL;
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu)
@@ -624,12 +634,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu)
 		 */
 		rdtscll(tsc_this);
 		delta = vcpu->host_tsc - tsc_this;
-		vcpu->svm->vmcb->control.tsc_offset += delta;
+		svm(vcpu)->vmcb->control.tsc_offset += delta;
 		vcpu->cpu = cpu;
 	}
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
-		rdmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]);
+		rdmsrl(host_save_user_msrs[i], svm(vcpu)->host_user_msrs[i]);
 }
 
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@@ -637,7 +647,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 	int i;
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
-		wrmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]);
+		wrmsrl(host_save_user_msrs[i], svm(vcpu)->host_user_msrs[i]);
 
 	rdtscll(vcpu->host_tsc);
 	put_cpu();
@@ -649,31 +659,31 @@ static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
 
 static void svm_cache_regs(struct kvm_vcpu *vcpu)
 {
-	vcpu->regs[VCPU_REGS_RAX] = vcpu->svm->vmcb->save.rax;
-	vcpu->regs[VCPU_REGS_RSP] = vcpu->svm->vmcb->save.rsp;
-	vcpu->rip = vcpu->svm->vmcb->save.rip;
+	vcpu->regs[VCPU_REGS_RAX] = svm(vcpu)->vmcb->save.rax;
+	vcpu->regs[VCPU_REGS_RSP] = svm(vcpu)->vmcb->save.rsp;
+	vcpu->rip = svm(vcpu)->vmcb->save.rip;
 }
 
 static void svm_decache_regs(struct kvm_vcpu *vcpu)
 {
-	vcpu->svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
-	vcpu->svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
-	vcpu->svm->vmcb->save.rip = vcpu->rip;
+	svm(vcpu)->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
+	svm(vcpu)->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
+	svm(vcpu)->vmcb->save.rip = vcpu->rip;
 }
 
 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
 {
-	return vcpu->svm->vmcb->save.rflags;
+	return svm(vcpu)->vmcb->save.rflags;
 }
 
 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
-	vcpu->svm->vmcb->save.rflags = rflags;
+	svm(vcpu)->vmcb->save.rflags = rflags;
 }
 
 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
 {
-	struct vmcb_save_area *save = &vcpu->svm->vmcb->save;
+	struct vmcb_save_area *save = &svm(vcpu)->vmcb->save;
 
 	switch (seg) {
 	case VCPU_SREG_CS: return &save->cs;
@@ -725,26 +735,26 @@ static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
 
 static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-	dt->limit = vcpu->svm->vmcb->save.idtr.limit;
-	dt->base = vcpu->svm->vmcb->save.idtr.base;
+	dt->limit = svm(vcpu)->vmcb->save.idtr.limit;
+	dt->base = svm(vcpu)->vmcb->save.idtr.base;
 }
 
 static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-	vcpu->svm->vmcb->save.idtr.limit = dt->limit;
-	vcpu->svm->vmcb->save.idtr.base = dt->base ;
+	svm(vcpu)->vmcb->save.idtr.limit = dt->limit;
+	svm(vcpu)->vmcb->save.idtr.base = dt->base ;
 }
 
 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-	dt->limit = vcpu->svm->vmcb->save.gdtr.limit;
-	dt->base = vcpu->svm->vmcb->save.gdtr.base;
+	dt->limit = svm(vcpu)->vmcb->save.gdtr.limit;
+	dt->base = svm(vcpu)->vmcb->save.gdtr.base;
 }
 
 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-	vcpu->svm->vmcb->save.gdtr.limit = dt->limit;
-	vcpu->svm->vmcb->save.gdtr.base = dt->base ;
+	svm(vcpu)->vmcb->save.gdtr.limit = dt->limit;
+	svm(vcpu)->vmcb->save.gdtr.base = dt->base ;
 }
 
 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
@@ -757,30 +767,30 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	if (vcpu->shadow_efer & KVM_EFER_LME) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 			vcpu->shadow_efer |= KVM_EFER_LMA;
-			vcpu->svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
+			svm(vcpu)->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
 		}
 
 		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG) ) {
 			vcpu->shadow_efer &= ~KVM_EFER_LMA;
-			vcpu->svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
+			svm(vcpu)->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
 		}
 	}
 #endif
 	if ((vcpu->cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
-		vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
+		svm(vcpu)->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
 		vcpu->fpu_active = 1;
 	}
 
 	vcpu->cr0 = cr0;
 	cr0 |= X86_CR0_PG | X86_CR0_WP;
 	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
-	vcpu->svm->vmcb->save.cr0 = cr0;
+	svm(vcpu)->vmcb->save.cr0 = cr0;
 }
 
 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        vcpu->cr4 = cr4;
-       vcpu->svm->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
+       svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
 }
 
 static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -804,16 +814,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 		s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
 	}
 	if (seg == VCPU_SREG_CS)
-		vcpu->svm->vmcb->save.cpl
-			= (vcpu->svm->vmcb->save.cs.attrib
+		svm(vcpu)->vmcb->save.cpl
+			= (svm(vcpu)->vmcb->save.cs.attrib
 			   >> SVM_SELECTOR_DPL_SHIFT) & 3;
 
 }
 
 /* FIXME:
 
-	vcpu->svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
-	vcpu->svm->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK);
+	svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK;
+	svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK);
 
 */
 
@@ -825,14 +835,14 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
 static void load_host_msrs(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
-	wrmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base);
+	wrmsrl(MSR_GS_BASE, svm(vcpu)->host_gs_base);
 #endif
 }
 
 static void save_host_msrs(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
-	rdmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base);
+	rdmsrl(MSR_GS_BASE, svm(vcpu)->host_gs_base);
 #endif
 }
 
@@ -841,22 +851,22 @@ static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data)
 	if (svm_data->next_asid > svm_data->max_asid) {
 		++svm_data->asid_generation;
 		svm_data->next_asid = 1;
-		vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
+		svm(vcpu)->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
 	}
 
 	vcpu->cpu = svm_data->cpu;
-	vcpu->svm->asid_generation = svm_data->asid_generation;
-	vcpu->svm->vmcb->control.asid = svm_data->next_asid++;
+	svm(vcpu)->asid_generation = svm_data->asid_generation;
+	svm(vcpu)->vmcb->control.asid = svm_data->next_asid++;
 }
 
 static void svm_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 {
-	invlpga(address, vcpu->svm->vmcb->control.asid); // is needed?
+	invlpga(address, svm(vcpu)->vmcb->control.asid); // is needed?
 }
 
 static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
 {
-	return vcpu->svm->db_regs[dr];
+	return svm(vcpu)->db_regs[dr];
 }
 
 static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
@@ -864,16 +874,16 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 {
 	*exception = 0;
 
-	if (vcpu->svm->vmcb->save.dr7 & DR7_GD_MASK) {
-		vcpu->svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
-		vcpu->svm->vmcb->save.dr6 |= DR6_BD_MASK;
+	if (svm(vcpu)->vmcb->save.dr7 & DR7_GD_MASK) {
+		svm(vcpu)->vmcb->save.dr7 &= ~DR7_GD_MASK;
+		svm(vcpu)->vmcb->save.dr6 |= DR6_BD_MASK;
 		*exception = DB_VECTOR;
 		return;
 	}
 
 	switch (dr) {
 	case 0 ... 3:
-		vcpu->svm->db_regs[dr] = value;
+		svm(vcpu)->db_regs[dr] = value;
 		return;
 	case 4 ... 5:
 		if (vcpu->cr4 & X86_CR4_DE) {
@@ -885,7 +895,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 			*exception = GP_VECTOR;
 			return;
 		}
-		vcpu->svm->vmcb->save.dr7 = value;
+		svm(vcpu)->vmcb->save.dr7 = value;
 		return;
 	}
 	default:
@@ -898,7 +908,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 
 static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u32 exit_int_info = vcpu->svm->vmcb->control.exit_int_info;
+	u32 exit_int_info = svm(vcpu)->vmcb->control.exit_int_info;
 	u64 fault_address;
 	u32 error_code;
 	enum emulation_result er;
@@ -909,8 +919,8 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	spin_lock(&vcpu->kvm->lock);
 
-	fault_address  = vcpu->svm->vmcb->control.exit_info_2;
-	error_code = vcpu->svm->vmcb->control.exit_info_1;
+	fault_address  = svm(vcpu)->vmcb->control.exit_info_2;
+	error_code = svm(vcpu)->vmcb->control.exit_info_1;
 	r = kvm_mmu_page_fault(vcpu, fault_address, error_code);
 	if (r < 0) {
 		spin_unlock(&vcpu->kvm->lock);
@@ -942,9 +952,9 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-       vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
+       svm(vcpu)->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
        if (!(vcpu->cr0 & X86_CR0_TS))
-               vcpu->svm->vmcb->save.cr0 &= ~X86_CR0_TS;
+               svm(vcpu)->vmcb->save.cr0 &= ~X86_CR0_TS;
        vcpu->fpu_active = 1;
 
        return 1;
@@ -956,8 +966,8 @@ static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	 * VMCB is undefined after a SHUTDOWN intercept
 	 * so reinitialize it.
 	 */
-	clear_page(vcpu->svm->vmcb);
-	init_vmcb(vcpu->svm->vmcb);
+	clear_page(svm(vcpu)->vmcb);
+	init_vmcb(svm(vcpu)->vmcb);
 
 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
 	return 0;
@@ -972,18 +982,18 @@ static int io_get_override(struct kvm_vcpu *vcpu,
 	gva_t rip;
 	int i;
 
-	rip =  vcpu->svm->vmcb->save.rip;
-	ins_length = vcpu->svm->next_rip - rip;
-	rip += vcpu->svm->vmcb->save.cs.base;
+	rip =  svm(vcpu)->vmcb->save.rip;
+	ins_length = svm(vcpu)->next_rip - rip;
+	rip += svm(vcpu)->vmcb->save.cs.base;
 
 	if (ins_length > MAX_INST_SIZE)
 		printk(KERN_DEBUG
 		       "%s: inst length err, cs base 0x%llx rip 0x%llx "
 		       "next rip 0x%llx ins_length %u\n",
 		       __FUNCTION__,
-		       vcpu->svm->vmcb->save.cs.base,
-		       vcpu->svm->vmcb->save.rip,
-		       vcpu->svm->vmcb->control.exit_info_2,
+		       svm(vcpu)->vmcb->save.cs.base,
+		       svm(vcpu)->vmcb->save.rip,
+		       svm(vcpu)->vmcb->control.exit_info_2,
 		       ins_length);
 
 	if (kvm_read_guest(vcpu, rip, ins_length, inst) != ins_length)
@@ -1003,22 +1013,22 @@ static int io_get_override(struct kvm_vcpu *vcpu,
 			*addr_override = 1;
 			continue;
 		case 0x2e:
-			*seg = &vcpu->svm->vmcb->save.cs;
+			*seg = &svm(vcpu)->vmcb->save.cs;
 			continue;
 		case 0x36:
-			*seg = &vcpu->svm->vmcb->save.ss;
+			*seg = &svm(vcpu)->vmcb->save.ss;
 			continue;
 		case 0x3e:
-			*seg = &vcpu->svm->vmcb->save.ds;
+			*seg = &svm(vcpu)->vmcb->save.ds;
 			continue;
 		case 0x26:
-			*seg = &vcpu->svm->vmcb->save.es;
+			*seg = &svm(vcpu)->vmcb->save.es;
 			continue;
 		case 0x64:
-			*seg = &vcpu->svm->vmcb->save.fs;
+			*seg = &svm(vcpu)->vmcb->save.fs;
 			continue;
 		case 0x65:
-			*seg = &vcpu->svm->vmcb->save.gs;
+			*seg = &svm(vcpu)->vmcb->save.gs;
 			continue;
 		default:
 			return 1;
@@ -1033,7 +1043,7 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address)
 	unsigned long *reg;
 	struct vmcb_seg *seg;
 	int addr_override;
-	struct vmcb_save_area *save_area = &vcpu->svm->vmcb->save;
+	struct vmcb_save_area *save_area = &svm(vcpu)->vmcb->save;
 	u16 cs_attrib = save_area->cs.attrib;
 	unsigned addr_size = get_addr_size(vcpu);
 
@@ -1045,16 +1055,16 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address)
 
 	if (ins) {
 		reg = &vcpu->regs[VCPU_REGS_RDI];
-		seg = &vcpu->svm->vmcb->save.es;
+		seg = &svm(vcpu)->vmcb->save.es;
 	} else {
 		reg = &vcpu->regs[VCPU_REGS_RSI];
-		seg = (seg) ? seg : &vcpu->svm->vmcb->save.ds;
+		seg = (seg) ? seg : &svm(vcpu)->vmcb->save.ds;
 	}
 
 	addr_mask = ~0ULL >> (64 - (addr_size * 8));
 
 	if ((cs_attrib & SVM_SELECTOR_L_MASK) &&
-	    !(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_VM)) {
+	    !(svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_VM)) {
 		*address = (*reg & addr_mask);
 		return addr_mask;
 	}
@@ -1070,7 +1080,7 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address)
 
 static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug?
+	u32 io_info = svm(vcpu)->vmcb->control.exit_info_1; //address size bug?
 	int size, down, in, string, rep;
 	unsigned port;
 	unsigned long count;
@@ -1078,7 +1088,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	++vcpu->stat.io_exits;
 
-	vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2;
+	svm(vcpu)->next_rip = svm(vcpu)->vmcb->control.exit_info_2;
 
 	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
 	port = io_info >> 16;
@@ -1086,7 +1096,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	string = (io_info & SVM_IOIO_STR_MASK) != 0;
 	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
 	count = 1;
-	down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
+	down = (svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
 
 	if (string) {
 		unsigned addr_mask;
@@ -1112,14 +1122,14 @@ static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
+	svm(vcpu)->next_rip = svm(vcpu)->vmcb->save.rip + 1;
 	skip_emulated_instruction(vcpu);
 	return kvm_emulate_halt(vcpu);
 }
 
 static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 3;
+	svm(vcpu)->next_rip = svm(vcpu)->vmcb->save.rip + 3;
 	skip_emulated_instruction(vcpu);
 	return kvm_hypercall(vcpu, kvm_run);
 }
@@ -1139,7 +1149,7 @@ static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_r
 
 static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2;
+	svm(vcpu)->next_rip = svm(vcpu)->vmcb->save.rip + 2;
 	kvm_emulate_cpuid(vcpu);
 	return 1;
 }
@@ -1158,34 +1168,34 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 		u64 tsc;
 
 		rdtscll(tsc);
-		*data = vcpu->svm->vmcb->control.tsc_offset + tsc;
+		*data = svm(vcpu)->vmcb->control.tsc_offset + tsc;
 		break;
 	}
 	case MSR_K6_STAR:
-		*data = vcpu->svm->vmcb->save.star;
+		*data = svm(vcpu)->vmcb->save.star;
 		break;
 #ifdef CONFIG_X86_64
 	case MSR_LSTAR:
-		*data = vcpu->svm->vmcb->save.lstar;
+		*data = svm(vcpu)->vmcb->save.lstar;
 		break;
 	case MSR_CSTAR:
-		*data = vcpu->svm->vmcb->save.cstar;
+		*data = svm(vcpu)->vmcb->save.cstar;
 		break;
 	case MSR_KERNEL_GS_BASE:
-		*data = vcpu->svm->vmcb->save.kernel_gs_base;
+		*data = svm(vcpu)->vmcb->save.kernel_gs_base;
 		break;
 	case MSR_SYSCALL_MASK:
-		*data = vcpu->svm->vmcb->save.sfmask;
+		*data = svm(vcpu)->vmcb->save.sfmask;
 		break;
 #endif
 	case MSR_IA32_SYSENTER_CS:
-		*data = vcpu->svm->vmcb->save.sysenter_cs;
+		*data = svm(vcpu)->vmcb->save.sysenter_cs;
 		break;
 	case MSR_IA32_SYSENTER_EIP:
-		*data = vcpu->svm->vmcb->save.sysenter_eip;
+		*data = svm(vcpu)->vmcb->save.sysenter_eip;
 		break;
 	case MSR_IA32_SYSENTER_ESP:
-		*data = vcpu->svm->vmcb->save.sysenter_esp;
+		*data = svm(vcpu)->vmcb->save.sysenter_esp;
 		break;
 	default:
 		return kvm_get_msr_common(vcpu, ecx, data);
@@ -1201,9 +1211,9 @@ static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (svm_get_msr(vcpu, ecx, &data))
 		svm_inject_gp(vcpu, 0);
 	else {
-		vcpu->svm->vmcb->save.rax = data & 0xffffffff;
+		svm(vcpu)->vmcb->save.rax = data & 0xffffffff;
 		vcpu->regs[VCPU_REGS_RDX] = data >> 32;
-		vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2;
+		svm(vcpu)->next_rip = svm(vcpu)->vmcb->save.rip + 2;
 		skip_emulated_instruction(vcpu);
 	}
 	return 1;
@@ -1216,34 +1226,34 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 		u64 tsc;
 
 		rdtscll(tsc);
-		vcpu->svm->vmcb->control.tsc_offset = data - tsc;
+		svm(vcpu)->vmcb->control.tsc_offset = data - tsc;
 		break;
 	}
 	case MSR_K6_STAR:
-		vcpu->svm->vmcb->save.star = data;
+		svm(vcpu)->vmcb->save.star = data;
 		break;
 #ifdef CONFIG_X86_64
 	case MSR_LSTAR:
-		vcpu->svm->vmcb->save.lstar = data;
+		svm(vcpu)->vmcb->save.lstar = data;
 		break;
 	case MSR_CSTAR:
-		vcpu->svm->vmcb->save.cstar = data;
+		svm(vcpu)->vmcb->save.cstar = data;
 		break;
 	case MSR_KERNEL_GS_BASE:
-		vcpu->svm->vmcb->save.kernel_gs_base = data;
+		svm(vcpu)->vmcb->save.kernel_gs_base = data;
 		break;
 	case MSR_SYSCALL_MASK:
-		vcpu->svm->vmcb->save.sfmask = data;
+		svm(vcpu)->vmcb->save.sfmask = data;
 		break;
 #endif
 	case MSR_IA32_SYSENTER_CS:
-		vcpu->svm->vmcb->save.sysenter_cs = data;
+		svm(vcpu)->vmcb->save.sysenter_cs = data;
 		break;
 	case MSR_IA32_SYSENTER_EIP:
-		vcpu->svm->vmcb->save.sysenter_eip = data;
+		svm(vcpu)->vmcb->save.sysenter_eip = data;
 		break;
 	case MSR_IA32_SYSENTER_ESP:
-		vcpu->svm->vmcb->save.sysenter_esp = data;
+		svm(vcpu)->vmcb->save.sysenter_esp = data;
 		break;
 	default:
 		return kvm_set_msr_common(vcpu, ecx, data);
@@ -1254,9 +1264,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	u32 ecx = vcpu->regs[VCPU_REGS_RCX];
-	u64 data = (vcpu->svm->vmcb->save.rax & -1u)
+	u64 data = (svm(vcpu)->vmcb->save.rax & -1u)
 		| ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
-	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2;
+	svm(vcpu)->next_rip = svm(vcpu)->vmcb->save.rip + 2;
 	if (svm_set_msr(vcpu, ecx, data))
 		svm_inject_gp(vcpu, 0);
 	else
@@ -1266,7 +1276,7 @@ static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	if (vcpu->svm->vmcb->control.exit_info_1)
+	if (svm(vcpu)->vmcb->control.exit_info_1)
 		return wrmsr_interception(vcpu, kvm_run);
 	else
 		return rdmsr_interception(vcpu, kvm_run);
@@ -1338,13 +1348,13 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
 
 static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u32 exit_code = vcpu->svm->vmcb->control.exit_code;
+	u32 exit_code = svm(vcpu)->vmcb->control.exit_code;
 
-	if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) &&
+	if (is_external_interrupt(svm(vcpu)->vmcb->control.exit_int_info) &&
 	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR)
 		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
 		       "exit_code 0x%x\n",
-		       __FUNCTION__, vcpu->svm->vmcb->control.exit_int_info,
+		       __FUNCTION__, svm(vcpu)->vmcb->control.exit_int_info,
 		       exit_code);
 
 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
@@ -1372,9 +1382,9 @@ static void pre_svm_run(struct kvm_vcpu *vcpu)
 
 	struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
 
-	vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+	svm(vcpu)->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
 	if (vcpu->cpu != cpu ||
-	    vcpu->svm->asid_generation != svm_data->asid_generation)
+	    svm(vcpu)->asid_generation != svm_data->asid_generation)
 		new_asid(vcpu, svm_data);
 }
 
@@ -1383,7 +1393,7 @@ static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
 	struct vmcb_control_area *control;
 
-	control = &vcpu->svm->vmcb->control;
+	control = &svm(vcpu)->vmcb->control;
 	control->int_vector = pop_irq(vcpu);
 	control->int_ctl &= ~V_INTR_PRIO_MASK;
 	control->int_ctl |= V_IRQ_MASK |
@@ -1392,7 +1402,7 @@ static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 
 static void kvm_reput_irq(struct kvm_vcpu *vcpu)
 {
-	struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
+	struct vmcb_control_area *control = &svm(vcpu)->vmcb->control;
 
 	if (control->int_ctl & V_IRQ_MASK) {
 		control->int_ctl &= ~V_IRQ_MASK;
@@ -1406,11 +1416,11 @@ static void kvm_reput_irq(struct kvm_vcpu *vcpu)
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
-	struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
+	struct vmcb_control_area *control = &svm(vcpu)->vmcb->control;
 
 	vcpu->interrupt_window_open =
 		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-		 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
+		 (svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_IF));
 
 	if (vcpu->interrupt_window_open && vcpu->irq_summary)
 		/*
@@ -1433,7 +1443,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 {
 	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
 						  vcpu->irq_summary == 0);
-	kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
+	kvm_run->if_flag = (svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = vcpu->cr8;
 	kvm_run->apic_base = vcpu->apic_base;
 }
@@ -1450,7 +1460,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 	return (!vcpu->irq_summary &&
 		kvm_run->request_interrupt_window &&
 		vcpu->interrupt_window_open &&
-		(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
+		(svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_IF));
 }
 
 static void save_db_regs(unsigned long *db_regs)
@@ -1502,15 +1512,15 @@ again:
 	fs_selector = read_fs();
 	gs_selector = read_gs();
 	ldt_selector = read_ldt();
-	vcpu->svm->host_cr2 = kvm_read_cr2();
-	vcpu->svm->host_dr6 = read_dr6();
-	vcpu->svm->host_dr7 = read_dr7();
-	vcpu->svm->vmcb->save.cr2 = vcpu->cr2;
+	svm(vcpu)->host_cr2 = kvm_read_cr2();
+	svm(vcpu)->host_dr6 = read_dr6();
+	svm(vcpu)->host_dr7 = read_dr7();
+	svm(vcpu)->vmcb->save.cr2 = vcpu->cr2;
 
-	if (vcpu->svm->vmcb->save.dr7 & 0xff) {
+	if (svm(vcpu)->vmcb->save.dr7 & 0xff) {
 		write_dr7(0);
-		save_db_regs(vcpu->svm->host_db_regs);
-		load_db_regs(vcpu->svm->db_regs);
+		save_db_regs(svm(vcpu)->host_db_regs);
+		load_db_regs(svm(vcpu)->db_regs);
 	}
 
 	if (vcpu->fpu_active) {
@@ -1607,7 +1617,7 @@ again:
 #endif
 		:
 		: [vcpu]"a"(vcpu),
-		  [svm]"i"(offsetof(struct kvm_vcpu, svm)),
+		  [svm]"i"(offsetof(struct kvm_vcpu, _priv)),
 		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
 		  [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
 		  [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
@@ -1634,14 +1644,14 @@ again:
 		fx_restore(vcpu->host_fx_image);
 	}
 
-	if ((vcpu->svm->vmcb->save.dr7 & 0xff))
-		load_db_regs(vcpu->svm->host_db_regs);
+	if ((svm(vcpu)->vmcb->save.dr7 & 0xff))
+		load_db_regs(svm(vcpu)->host_db_regs);
 
-	vcpu->cr2 = vcpu->svm->vmcb->save.cr2;
+	vcpu->cr2 = svm(vcpu)->vmcb->save.cr2;
 
-	write_dr6(vcpu->svm->host_dr6);
-	write_dr7(vcpu->svm->host_dr7);
-	kvm_write_cr2(vcpu->svm->host_cr2);
+	write_dr6(svm(vcpu)->host_dr6);
+	write_dr7(svm(vcpu)->host_dr7);
+	kvm_write_cr2(svm(vcpu)->host_cr2);
 
 	load_fs(fs_selector);
 	load_gs(gs_selector);
@@ -1655,18 +1665,18 @@ again:
 	 */
 	if (unlikely(prof_on == KVM_PROFILING))
 		profile_hit(KVM_PROFILING,
-			(void *)(unsigned long)vcpu->svm->vmcb->save.rip);
+			(void *)(unsigned long)svm(vcpu)->vmcb->save.rip);
 
 	stgi();
 
 	kvm_reput_irq(vcpu);
 
-	vcpu->svm->next_rip = 0;
+	svm(vcpu)->next_rip = 0;
 
-	if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
+	if (svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_ERR) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		kvm_run->fail_entry.hardware_entry_failure_reason
-			= vcpu->svm->vmcb->control.exit_code;
+			= svm(vcpu)->vmcb->control.exit_code;
 		post_kvm_run_save(vcpu, kvm_run);
 		return 0;
 	}
@@ -1695,12 +1705,12 @@ again:
 
 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {
-	vcpu->svm->vmcb->save.cr3 = root;
+	svm(vcpu)->vmcb->save.cr3 = root;
 	force_new_asid(vcpu);
 
 	if (vcpu->fpu_active) {
-		vcpu->svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
-		vcpu->svm->vmcb->save.cr0 |= X86_CR0_TS;
+		svm(vcpu)->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
+		svm(vcpu)->vmcb->save.cr0 |= X86_CR0_TS;
 		vcpu->fpu_active = 0;
 	}
 }
@@ -1709,26 +1719,26 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
 				  unsigned long  addr,
 				  uint32_t err_code)
 {
-	uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info;
+	uint32_t exit_int_info = svm(vcpu)->vmcb->control.exit_int_info;
 
 	++vcpu->stat.pf_guest;
 
 	if (is_page_fault(exit_int_info)) {
 
-		vcpu->svm->vmcb->control.event_inj_err = 0;
-		vcpu->svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+		svm(vcpu)->vmcb->control.event_inj_err = 0;
+		svm(vcpu)->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
 							SVM_EVTINJ_VALID_ERR |
 							SVM_EVTINJ_TYPE_EXEPT |
 							DF_VECTOR;
 		return;
 	}
 	vcpu->cr2 = addr;
-	vcpu->svm->vmcb->save.cr2 = addr;
-	vcpu->svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+	svm(vcpu)->vmcb->save.cr2 = addr;
+	svm(vcpu)->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
 						SVM_EVTINJ_VALID_ERR |
 						SVM_EVTINJ_TYPE_EXEPT |
 						PF_VECTOR;
-	vcpu->svm->vmcb->control.event_inj_err = err_code;
+	svm(vcpu)->vmcb->control.event_inj_err = err_code;
 }
 
 
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index dac2f93..5f0a7fd 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -32,6 +32,37 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
+struct vmcs {
+	u32 revision_id;
+	u32 abort;
+	char data[0];
+};
+
+struct vcpu_vmx {
+	struct kvm_vcpu      *vcpu;
+	int                   launched;
+	struct kvm_msr_entry *guest_msrs;
+	struct kvm_msr_entry *host_msrs;
+	int                   nmsrs;
+	int                   save_nmsrs;
+	int                   msr_offset_efer;
+#ifdef CONFIG_X86_64
+	int                   msr_offset_kernel_gs_base;
+#endif
+	struct vmcs          *vmcs;
+	struct {
+		int           loaded;
+		u16           fs_sel, gs_sel, ldt_sel;
+		int           fs_gs_ldt_reload_needed;
+	}host_state;
+
+};
+
+static inline struct vcpu_vmx* vmx(struct kvm_vcpu *vcpu)
+{
+	return (struct vcpu_vmx*)vcpu->_priv;
+}
+
 static int init_rmode_tss(struct kvm *kvm);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
@@ -89,16 +120,32 @@ static const u32 vmx_msr_index[] = {
 };
 #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
 
-static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr)
+static void load_msrs(struct kvm_msr_entry *e, int n)
+{
+	int i;
+
+	for (i = 0; i < n; ++i)
+		wrmsrl(e[i].index, e[i].data);
+}
+
+static void save_msrs(struct kvm_msr_entry *e, int n)
+{
+	int i;
+
+	for (i = 0; i < n; ++i)
+		rdmsrl(e[i].index, e[i].data);
+}
+
+static inline u64 msr_efer_save_restore_bits(struct kvm_msr_entry msr)
 {
 	return (u64)msr.data & EFER_SAVE_RESTORE_BITS;
 }
 
 static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu)
 {
-	int efer_offset = vcpu->msr_offset_efer;
-	return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) !=
-		msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]);
+	int efer_offset = vmx(vcpu)->msr_offset_efer;
+	return msr_efer_save_restore_bits(vmx(vcpu)->host_msrs[efer_offset]) !=
+		msr_efer_save_restore_bits(vmx(vcpu)->guest_msrs[efer_offset]);
 }
 
 static inline int is_page_fault(u32 intr_info)
@@ -125,19 +172,19 @@ static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr)
 {
 	int i;
 
-	for (i = 0; i < vcpu->nmsrs; ++i)
-		if (vcpu->guest_msrs[i].index == msr)
+	for (i = 0; i < vmx(vcpu)->nmsrs; ++i)
+		if (vmx(vcpu)->guest_msrs[i].index == msr)
 			return i;
 	return -1;
 }
 
-static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr)
+static struct kvm_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr)
 {
 	int i;
 
 	i = __find_msr_index(vcpu, msr);
 	if (i >= 0)
-		return &vcpu->guest_msrs[i];
+		return &vmx(vcpu)->guest_msrs[i];
 	return NULL;
 }
 
@@ -160,8 +207,8 @@ static void __vcpu_clear(void *arg)
 	int cpu = raw_smp_processor_id();
 
 	if (vcpu->cpu == cpu)
-		vmcs_clear(vcpu->vmcs);
-	if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
+		vmcs_clear(vmx(vcpu)->vmcs);
+	if (per_cpu(current_vmcs, cpu) == vmx(vcpu)->vmcs)
 		per_cpu(current_vmcs, cpu) = NULL;
 	rdtscll(vcpu->host_tsc);
 }
@@ -172,7 +219,7 @@ static void vcpu_clear(struct kvm_vcpu *vcpu)
 		smp_call_function_single(vcpu->cpu, __vcpu_clear, vcpu, 0, 1);
 	else
 		__vcpu_clear(vcpu);
-	vcpu->launched = 0;
+	vmx(vcpu)->launched = 0;
 }
 
 static unsigned long vmcs_readl(unsigned long field)
@@ -285,80 +332,77 @@ static void reload_tss(void)
 static void load_transition_efer(struct kvm_vcpu *vcpu)
 {
 	u64 trans_efer;
-	int efer_offset = vcpu->msr_offset_efer;
+	int efer_offset = vmx(vcpu)->msr_offset_efer;
 
-	trans_efer = vcpu->host_msrs[efer_offset].data;
+	trans_efer = vmx(vcpu)->host_msrs[efer_offset].data;
 	trans_efer &= ~EFER_SAVE_RESTORE_BITS;
 	trans_efer |= msr_efer_save_restore_bits(
-				vcpu->guest_msrs[efer_offset]);
+				vmx(vcpu)->guest_msrs[efer_offset]);
 	wrmsrl(MSR_EFER, trans_efer);
 	vcpu->stat.efer_reload++;
 }
 
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
-	struct vmx_host_state *hs = &vcpu->vmx_host_state;
-
-	if (hs->loaded)
+	if (vmx(vcpu)->host_state.loaded)
 		return;
 
-	hs->loaded = 1;
+	vmx(vcpu)->host_state.loaded = 1;
 	/*
 	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
 	 * allow segment selectors with cpl > 0 or ti == 1.
 	 */
-	hs->ldt_sel = read_ldt();
-	hs->fs_gs_ldt_reload_needed = hs->ldt_sel;
-	hs->fs_sel = read_fs();
-	if (!(hs->fs_sel & 7))
-		vmcs_write16(HOST_FS_SELECTOR, hs->fs_sel);
+	vmx(vcpu)->host_state.ldt_sel = read_ldt();
+	vmx(vcpu)->host_state.fs_gs_ldt_reload_needed = vmx(vcpu)->host_state.ldt_sel;
+	vmx(vcpu)->host_state.fs_sel = read_fs();
+	if (!(vmx(vcpu)->host_state.fs_sel & 7))
+		vmcs_write16(HOST_FS_SELECTOR, vmx(vcpu)->host_state.fs_sel);
 	else {
 		vmcs_write16(HOST_FS_SELECTOR, 0);
-		hs->fs_gs_ldt_reload_needed = 1;
+		vmx(vcpu)->host_state.fs_gs_ldt_reload_needed = 1;
 	}
-	hs->gs_sel = read_gs();
-	if (!(hs->gs_sel & 7))
-		vmcs_write16(HOST_GS_SELECTOR, hs->gs_sel);
+	vmx(vcpu)->host_state.gs_sel = read_gs();
+	if (!(vmx(vcpu)->host_state.gs_sel & 7))
+		vmcs_write16(HOST_GS_SELECTOR, vmx(vcpu)->host_state.gs_sel);
 	else {
 		vmcs_write16(HOST_GS_SELECTOR, 0);
-		hs->fs_gs_ldt_reload_needed = 1;
+		vmx(vcpu)->host_state.fs_gs_ldt_reload_needed = 1;
 	}
 
 #ifdef CONFIG_X86_64
 	vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
 	vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
 #else
-	vmcs_writel(HOST_FS_BASE, segment_base(hs->fs_sel));
-	vmcs_writel(HOST_GS_BASE, segment_base(hs->gs_sel));
+	vmcs_writel(HOST_FS_BASE, segment_base(vmx(vcpu)->host_state.fs_sel));
+	vmcs_writel(HOST_GS_BASE, segment_base(vmx(vcpu)->host_state.gs_sel));
 #endif
 
 #ifdef CONFIG_X86_64
 	if (is_long_mode(vcpu)) {
-		save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1);
+		save_msrs(vmx(vcpu)->host_msrs +
+			  vmx(vcpu)->msr_offset_kernel_gs_base, 1);
 	}
 #endif
-	load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+	load_msrs(vmx(vcpu)->guest_msrs, vmx(vcpu)->save_nmsrs);
 	if (msr_efer_need_save_restore(vcpu))
 		load_transition_efer(vcpu);
 }
 
 static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 {
-	struct vmx_host_state *hs = &vcpu->vmx_host_state;
-
-	if (!hs->loaded)
+	if (!vmx(vcpu)->host_state.loaded)
 		return;
 
-	hs->loaded = 0;
-	if (hs->fs_gs_ldt_reload_needed) {
-		load_ldt(hs->ldt_sel);
-		load_fs(hs->fs_sel);
+	vmx(vcpu)->host_state.loaded = 0;
+	if (vmx(vcpu)->host_state.fs_gs_ldt_reload_needed) {
+		load_ldt(vmx(vcpu)->host_state.ldt_sel);
+		load_fs(vmx(vcpu)->host_state.fs_sel);
 		/*
 		 * If we have to reload gs, we must take care to
 		 * preserve our gs base.
 		 */
 		local_irq_disable();
-		load_gs(hs->gs_sel);
+		load_gs(vmx(vcpu)->host_state.gs_sel);
 #ifdef CONFIG_X86_64
 		wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
 #endif
@@ -366,10 +410,11 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 
 		reload_tss();
 	}
-	save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
-	load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+	save_msrs(vmx(vcpu)->guest_msrs, vmx(vcpu)->save_nmsrs);
+	load_msrs(vmx(vcpu)->host_msrs, vmx(vcpu)->save_nmsrs);
 	if (msr_efer_need_save_restore(vcpu))
-		load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1);
+		load_msrs(vmx(vcpu)->host_msrs +
+			  vmx(vcpu)->msr_offset_efer, 1);
 }
 
 /*
@@ -378,7 +423,7 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
  */
 static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
 {
-	u64 phys_addr = __pa(vcpu->vmcs);
+	u64 phys_addr = __pa(vmx(vcpu)->vmcs);
 	int cpu;
 	u64 tsc_this, delta;
 
@@ -387,16 +432,16 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
 	if (vcpu->cpu != cpu)
 		vcpu_clear(vcpu);
 
-	if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) {
+	if (per_cpu(current_vmcs, cpu) != vmx(vcpu)->vmcs) {
 		u8 error;
 
-		per_cpu(current_vmcs, cpu) = vcpu->vmcs;
+		per_cpu(current_vmcs, cpu) = vmx(vcpu)->vmcs;
 		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
 			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
 			      : "cc");
 		if (error)
 			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
-			       vcpu->vmcs, phys_addr);
+			       vmx(vcpu)->vmcs, phys_addr);
 	}
 
 	if (vcpu->cpu != cpu) {
@@ -503,13 +548,13 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
  */
 void move_msr_up(struct kvm_vcpu *vcpu, int from, int to)
 {
-	struct vmx_msr_entry tmp;
-	tmp = vcpu->guest_msrs[to];
-	vcpu->guest_msrs[to] = vcpu->guest_msrs[from];
-	vcpu->guest_msrs[from] = tmp;
-	tmp = vcpu->host_msrs[to];
-	vcpu->host_msrs[to] = vcpu->host_msrs[from];
-	vcpu->host_msrs[from] = tmp;
+	struct kvm_msr_entry tmp;
+	tmp = vmx(vcpu)->guest_msrs[to];
+	vmx(vcpu)->guest_msrs[to] = vmx(vcpu)->guest_msrs[from];
+	vmx(vcpu)->guest_msrs[from] = tmp;
+	tmp = vmx(vcpu)->host_msrs[to];
+	vmx(vcpu)->host_msrs[to] = vmx(vcpu)->host_msrs[from];
+	vmx(vcpu)->host_msrs[from] = tmp;
 }
 
 /*
@@ -547,13 +592,13 @@ static void setup_msrs(struct kvm_vcpu *vcpu)
 			move_msr_up(vcpu, index, save_nmsrs++);
 	}
 #endif
-	vcpu->save_nmsrs = save_nmsrs;
+	vmx(vcpu)->save_nmsrs = save_nmsrs;
 
 #ifdef CONFIG_X86_64
-	vcpu->msr_offset_kernel_gs_base =
+	vmx(vcpu)->msr_offset_kernel_gs_base =
 		__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
 #endif
-	vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
+	vmx(vcpu)->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
 }
 
 /*
@@ -589,7 +634,7 @@ static void guest_write_tsc(u64 guest_tsc)
 static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 {
 	u64 data;
-	struct vmx_msr_entry *msr;
+	struct kvm_msr_entry *msr;
 
 	if (!pdata) {
 		printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
@@ -639,14 +684,14 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
  */
 static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
-	struct vmx_msr_entry *msr;
+	struct kvm_msr_entry *msr;
 	int ret = 0;
 
 	switch (msr_index) {
 #ifdef CONFIG_X86_64
 	case MSR_EFER:
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
-		if (vcpu->vmx_host_state.loaded)
+		if (vmx(vcpu)->host_state.loaded)
 			load_transition_efer(vcpu);
 		break;
 	case MSR_FS_BASE:
@@ -672,8 +717,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		msr = find_msr_entry(vcpu, msr_index);
 		if (msr) {
 			msr->data = data;
-			if (vcpu->vmx_host_state.loaded)
-				load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+			if (vmx(vcpu)->host_state.loaded)
+				load_msrs(vmx(vcpu)->guest_msrs, vmx(vcpu)->save_nmsrs);
 			break;
 		}
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -1053,7 +1098,7 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
 static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
-	struct vmx_msr_entry *msr = find_msr_entry(vcpu, MSR_EFER);
+	struct kvm_msr_entry *msr = find_msr_entry(vcpu, MSR_EFER);
 
 	vcpu->shadow_efer = efer;
 	if (efer & EFER_LMA) {
@@ -1385,18 +1430,18 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
 		u32 index = vmx_msr_index[i];
 		u32 data_low, data_high;
 		u64 data;
-		int j = vcpu->nmsrs;
+		int j = vmx(vcpu)->nmsrs;
 
 		if (rdmsr_safe(index, &data_low, &data_high) < 0)
 			continue;
 		if (wrmsr_safe(index, data_low, data_high) < 0)
 			continue;
 		data = data_low | ((u64)data_high << 32);
-		vcpu->host_msrs[j].index = index;
-		vcpu->host_msrs[j].reserved = 0;
-		vcpu->host_msrs[j].data = data;
-		vcpu->guest_msrs[j] = vcpu->host_msrs[j];
-		++vcpu->nmsrs;
+		vmx(vcpu)->host_msrs[j].index = index;
+		vmx(vcpu)->host_msrs[j].reserved = 0;
+		vmx(vcpu)->host_msrs[j].data = data;
+		vmx(vcpu)->guest_msrs[j] = vmx(vcpu)->host_msrs[j];
+		++vmx(vcpu)->nmsrs;
 	}
 
 	setup_msrs(vcpu);
@@ -2123,7 +2168,7 @@ again:
 #endif
 		"setbe %0 \n\t"
 	      : "=q" (fail)
-	      : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP),
+	      : "r"(vmx(vcpu)->launched), "d"((unsigned long)HOST_RSP),
 		"c"(vcpu),
 		[rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])),
 		[rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
@@ -2167,7 +2212,7 @@ again:
 	if (unlikely(prof_on == KVM_PROFILING))
 		profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
 
-	vcpu->launched = 1;
+	vmx(vcpu)->launched = 1;
 	r = kvm_handle_exit(kvm_run, vcpu);
 	if (r > 0) {
 		/* Give scheduler a change to reschedule. */
@@ -2232,10 +2277,11 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
 
 static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->vmcs) {
+
+	if (vmx(vcpu)->vmcs) {
 		on_each_cpu(__vcpu_clear, vcpu, 0, 1);
-		free_vmcs(vcpu->vmcs);
-		vcpu->vmcs = NULL;
+		free_vmcs(vmx(vcpu)->vmcs);
+		vmx(vcpu)->vmcs = NULL;
 	}
 }
 
@@ -2246,33 +2292,39 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 
 static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 {
-	struct vmcs *vmcs;
+	struct vcpu_vmx *vmx;
 
-	vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vcpu->guest_msrs)
+	vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
+	if (!vmx)
 		return -ENOMEM;
 
-	vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vcpu->host_msrs)
-		goto out_free_guest_msrs;
+	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vmx->guest_msrs)
+		goto out_free;
 
-	vmcs = alloc_vmcs();
-	if (!vmcs)
-		goto out_free_msrs;
+	vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vmx->host_msrs)
+		goto out_free;
 
-	vmcs_clear(vmcs);
-	vcpu->vmcs = vmcs;
-	vcpu->launched = 0;
+	vmx->vmcs = alloc_vmcs();
+	if (!vmx->vmcs)
+		goto out_free;
+
+	vmcs_clear(vmx->vmcs);
+
+	vmx->vcpu   = vcpu;
+	vcpu->_priv = vmx;
 
 	return 0;
 
-out_free_msrs:
-	kfree(vcpu->host_msrs);
-	vcpu->host_msrs = NULL;
+out_free:
+	if (vmx->host_msrs)
+		kfree(vmx->host_msrs);
+
+	if (vmx->guest_msrs)
+		kfree(vmx->guest_msrs);
 
-out_free_guest_msrs:
-	kfree(vcpu->guest_msrs);
-	vcpu->guest_msrs = NULL;
+	kfree(vmx);
 
 	return -ENOMEM;
 }


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply related	[flat|nested] 12+ messages in thread

[parent not found: <20070726184525.9119.73573.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>]

* [PATCH 1/2] Rename svm() and vmx() to to_svm() and to_vmx().
       [not found]     ` <20070726184525.9119.73573.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
@ 2007-07-27  6:53       ` Rusty Russell
       [not found]         ` <1185519205.12151.47.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Rusty Russell @ 2007-07-27  6:53 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Thu, 2007-07-26 at 14:45 -0400, Gregory Haskins wrote:
> Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>

OK, in anticipation that you would do it, I've done a trivial
s/svm()/to-svm()/ and s/vmx()/to_vmx()/ patch and put my patch on top of
it.

I think the result is quite nice (there are some potential cleanups of
the now-gratuitous to-and-fro conversions, but this is simple).
Probably easiest to fold this one straight into yours and post as one
patch.

Cheers,
Rusty.

==
This goes on top of "[PATCH 1/3] KVM: Remove arch specific components from
the general code" and changes svm() to to_svm() and kvm() to to_kvm().

It uses a tmp var where multiple calls would be needed, and fixes up
some linewrap issues.  It can be simply folded into the previous patch.

Signed-off-by: Rusty Russell <rusty-8n+1lVoiYb80n/F98K4Iww@public.gmane.org>

diff -r b318edfbdb7d drivers/kvm/svm.c
--- a/drivers/kvm/svm.c	Fri Jul 27 15:55:31 2007 +1000
+++ b/drivers/kvm/svm.c	Fri Jul 27 16:09:24 2007 +1000
@@ -49,7 +49,7 @@ MODULE_LICENSE("GPL");
 #define SVM_FEATURE_LBRV (1 << 1)
 #define SVM_DEATURE_SVML (1 << 2)
 
-static inline struct vcpu_svm* svm(struct kvm_vcpu *vcpu)
+static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 {
 	return (struct vcpu_svm*)vcpu->_priv;
 }
@@ -100,7 +100,7 @@ static inline u32 svm_has(u32 feat)
 
 static unsigned get_addr_size(struct kvm_vcpu *vcpu)
 {
-	struct vmcb_save_area *sa = &svm(vcpu)->vmcb->save;
+	struct vmcb_save_area *sa = &to_svm(vcpu)->vmcb->save;
 	u16 cs_attrib;
 
 	if (!(sa->cr0 & X86_CR0_PE) || (sa->rflags & X86_EFLAGS_VM))
@@ -186,7 +186,7 @@ static inline void write_dr7(unsigned lo
 
 static inline void force_new_asid(struct kvm_vcpu *vcpu)
 {
-	svm(vcpu)->asid_generation--;
+	to_svm(vcpu)->asid_generation--;
 }
 
 static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
@@ -199,22 +199,24 @@ static void svm_set_efer(struct kvm_vcpu
 	if (!(efer & KVM_EFER_LMA))
 		efer &= ~KVM_EFER_LME;
 
-	svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+	to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
 	vcpu->shadow_efer = efer;
 }
 
 static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
 {
-	svm(vcpu)->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->vmcb->control.event_inj =		SVM_EVTINJ_VALID |
 						SVM_EVTINJ_VALID_ERR |
 						SVM_EVTINJ_TYPE_EXEPT |
 						GP_VECTOR;
-	svm(vcpu)->vmcb->control.event_inj_err = error_code;
+	svm->vmcb->control.event_inj_err = error_code;
 }
 
 static void inject_ud(struct kvm_vcpu *vcpu)
 {
-	svm(vcpu)->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+	to_svm(vcpu)->vmcb->control.event_inj = SVM_EVTINJ_VALID |
 						SVM_EVTINJ_TYPE_EXEPT |
 						UD_VECTOR;
 }
@@ -233,19 +235,21 @@ static int is_external_interrupt(u32 inf
 
 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
-	if (!svm(vcpu)->next_rip) {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (!svm->next_rip) {
 		printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
 		return;
 	}
-	if (svm(vcpu)->next_rip - svm(vcpu)->vmcb->save.rip > 15) {
+	if (svm->next_rip - svm->vmcb->save.rip > 15) {
 		printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
 		       __FUNCTION__,
-		       svm(vcpu)->vmcb->save.rip,
-		       svm(vcpu)->next_rip);
-	}
-
-	vcpu->rip = svm(vcpu)->vmcb->save.rip = svm(vcpu)->next_rip;
-	svm(vcpu)->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+		       svm->vmcb->save.rip,
+		       svm->next_rip);
+	}
+
+	vcpu->rip = svm->vmcb->save.rip = svm->next_rip;
+	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
 
 	vcpu->interrupt_window_open = 1;
 }
@@ -612,16 +616,19 @@ out1:
 
 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 {
-	if (!svm(vcpu))
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (!svm)
 		return;
-	if (svm(vcpu)->vmcb)
-		__free_page(pfn_to_page(svm(vcpu)->vmcb_pa >> PAGE_SHIFT));
-	kfree(svm(vcpu));
+	if (svm->vmcb)
+		__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
+	kfree(svm);
 	vcpu->_priv = NULL;
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
 	int cpu, i;
 
 	cpu = get_cpu();
@@ -634,20 +641,21 @@ static void svm_vcpu_load(struct kvm_vcp
 		 */
 		rdtscll(tsc_this);
 		delta = vcpu->host_tsc - tsc_this;
-		svm(vcpu)->vmcb->control.tsc_offset += delta;
+		svm->vmcb->control.tsc_offset += delta;
 		vcpu->cpu = cpu;
 	}
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
-		rdmsrl(host_save_user_msrs[i], svm(vcpu)->host_user_msrs[i]);
+		rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
 }
 
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
 	int i;
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
-		wrmsrl(host_save_user_msrs[i], svm(vcpu)->host_user_msrs[i]);
+		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
 
 	rdtscll(vcpu->host_tsc);
 	put_cpu();
@@ -659,31 +667,34 @@ static void svm_vcpu_decache(struct kvm_
 
 static void svm_cache_regs(struct kvm_vcpu *vcpu)
 {
-	vcpu->regs[VCPU_REGS_RAX] = svm(vcpu)->vmcb->save.rax;
-	vcpu->regs[VCPU_REGS_RSP] = svm(vcpu)->vmcb->save.rsp;
-	vcpu->rip = svm(vcpu)->vmcb->save.rip;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	vcpu->regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
+	vcpu->regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
+	vcpu->rip = svm->vmcb->save.rip;
 }
 
 static void svm_decache_regs(struct kvm_vcpu *vcpu)
 {
-	svm(vcpu)->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
-	svm(vcpu)->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
-	svm(vcpu)->vmcb->save.rip = vcpu->rip;
+	struct vcpu_svm *svm = to_svm(vcpu);
+	svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
+	svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
+	svm->vmcb->save.rip = vcpu->rip;
 }
 
 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
 {
-	return svm(vcpu)->vmcb->save.rflags;
+	return to_svm(vcpu)->vmcb->save.rflags;
 }
 
 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
-	svm(vcpu)->vmcb->save.rflags = rflags;
+	to_svm(vcpu)->vmcb->save.rflags = rflags;
 }
 
 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
 {
-	struct vmcb_save_area *save = &svm(vcpu)->vmcb->save;
+	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
 
 	switch (seg) {
 	case VCPU_SREG_CS: return &save->cs;
@@ -735,26 +746,34 @@ static void svm_get_cs_db_l_bits(struct 
 
 static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-	dt->limit = svm(vcpu)->vmcb->save.idtr.limit;
-	dt->base = svm(vcpu)->vmcb->save.idtr.base;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	dt->limit = svm->vmcb->save.idtr.limit;
+	dt->base = svm->vmcb->save.idtr.base;
 }
 
 static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-	svm(vcpu)->vmcb->save.idtr.limit = dt->limit;
-	svm(vcpu)->vmcb->save.idtr.base = dt->base ;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->vmcb->save.idtr.limit = dt->limit;
+	svm->vmcb->save.idtr.base = dt->base ;
 }
 
 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-	dt->limit = svm(vcpu)->vmcb->save.gdtr.limit;
-	dt->base = svm(vcpu)->vmcb->save.gdtr.base;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	dt->limit = svm->vmcb->save.gdtr.limit;
+	dt->base = svm->vmcb->save.gdtr.base;
 }
 
 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
-	svm(vcpu)->vmcb->save.gdtr.limit = dt->limit;
-	svm(vcpu)->vmcb->save.gdtr.base = dt->base ;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->vmcb->save.gdtr.limit = dt->limit;
+	svm->vmcb->save.gdtr.base = dt->base ;
 }
 
 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
@@ -763,39 +782,42 @@ static void svm_decache_cr4_guest_bits(s
 
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 #ifdef CONFIG_X86_64
 	if (vcpu->shadow_efer & KVM_EFER_LME) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 			vcpu->shadow_efer |= KVM_EFER_LMA;
-			svm(vcpu)->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
+			svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
 		}
 
 		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG) ) {
 			vcpu->shadow_efer &= ~KVM_EFER_LMA;
-			svm(vcpu)->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
+			svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
 		}
 	}
 #endif
 	if ((vcpu->cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
-		svm(vcpu)->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
+		svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
 		vcpu->fpu_active = 1;
 	}
 
 	vcpu->cr0 = cr0;
 	cr0 |= X86_CR0_PG | X86_CR0_WP;
 	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
-	svm(vcpu)->vmcb->save.cr0 = cr0;
+	svm->vmcb->save.cr0 = cr0;
 }
 
 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        vcpu->cr4 = cr4;
-       svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
+       to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
 }
 
 static void svm_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb_seg *s = svm_seg(vcpu, seg);
 
 	s->base = var->base;
@@ -814,8 +836,8 @@ static void svm_set_segment(struct kvm_v
 		s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
 	}
 	if (seg == VCPU_SREG_CS)
-		svm(vcpu)->vmcb->save.cpl
-			= (svm(vcpu)->vmcb->save.cs.attrib
+		svm->vmcb->save.cpl
+			= (svm->vmcb->save.cs.attrib
 			   >> SVM_SELECTOR_DPL_SHIFT) & 3;
 
 }
@@ -835,55 +857,59 @@ static void load_host_msrs(struct kvm_vc
 static void load_host_msrs(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
-	wrmsrl(MSR_GS_BASE, svm(vcpu)->host_gs_base);
+	wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
 #endif
 }
 
 static void save_host_msrs(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
-	rdmsrl(MSR_GS_BASE, svm(vcpu)->host_gs_base);
+	rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
 #endif
 }
 
 static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 	if (svm_data->next_asid > svm_data->max_asid) {
 		++svm_data->asid_generation;
 		svm_data->next_asid = 1;
-		svm(vcpu)->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
+		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
 	}
 
 	vcpu->cpu = svm_data->cpu;
-	svm(vcpu)->asid_generation = svm_data->asid_generation;
-	svm(vcpu)->vmcb->control.asid = svm_data->next_asid++;
+	svm->asid_generation = svm_data->asid_generation;
+	svm->vmcb->control.asid = svm_data->next_asid++;
 }
 
 static void svm_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 {
-	invlpga(address, svm(vcpu)->vmcb->control.asid); // is needed?
+	invlpga(address, to_svm(vcpu)->vmcb->control.asid); // is needed?
 }
 
 static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
 {
-	return svm(vcpu)->db_regs[dr];
+	return to_svm(vcpu)->db_regs[dr];
 }
 
 static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 		       int *exception)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 	*exception = 0;
 
-	if (svm(vcpu)->vmcb->save.dr7 & DR7_GD_MASK) {
-		svm(vcpu)->vmcb->save.dr7 &= ~DR7_GD_MASK;
-		svm(vcpu)->vmcb->save.dr6 |= DR6_BD_MASK;
+	if (svm->vmcb->save.dr7 & DR7_GD_MASK) {
+		svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
+		svm->vmcb->save.dr6 |= DR6_BD_MASK;
 		*exception = DB_VECTOR;
 		return;
 	}
 
 	switch (dr) {
 	case 0 ... 3:
-		svm(vcpu)->db_regs[dr] = value;
+		svm->db_regs[dr] = value;
 		return;
 	case 4 ... 5:
 		if (vcpu->cr4 & X86_CR4_DE) {
@@ -895,7 +921,7 @@ static void svm_set_dr(struct kvm_vcpu *
 			*exception = GP_VECTOR;
 			return;
 		}
-		svm(vcpu)->vmcb->save.dr7 = value;
+		svm->vmcb->save.dr7 = value;
 		return;
 	}
 	default:
@@ -908,7 +934,8 @@ static void svm_set_dr(struct kvm_vcpu *
 
 static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u32 exit_int_info = svm(vcpu)->vmcb->control.exit_int_info;
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u32 exit_int_info = svm->vmcb->control.exit_int_info;
 	u64 fault_address;
 	u32 error_code;
 	enum emulation_result er;
@@ -919,8 +946,8 @@ static int pf_interception(struct kvm_vc
 
 	spin_lock(&vcpu->kvm->lock);
 
-	fault_address  = svm(vcpu)->vmcb->control.exit_info_2;
-	error_code = svm(vcpu)->vmcb->control.exit_info_1;
+	fault_address  = svm->vmcb->control.exit_info_2;
+	error_code = svm->vmcb->control.exit_info_1;
 	r = kvm_mmu_page_fault(vcpu, fault_address, error_code);
 	if (r < 0) {
 		spin_unlock(&vcpu->kvm->lock);
@@ -952,22 +979,25 @@ static int pf_interception(struct kvm_vc
 
 static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-       svm(vcpu)->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
-       if (!(vcpu->cr0 & X86_CR0_TS))
-               svm(vcpu)->vmcb->save.cr0 &= ~X86_CR0_TS;
-       vcpu->fpu_active = 1;
-
-       return 1;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
+	if (!(vcpu->cr0 & X86_CR0_TS))
+		svm->vmcb->save.cr0 &= ~X86_CR0_TS;
+	vcpu->fpu_active = 1;
+
+	return 1;
 }
 
 static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
 	/*
 	 * VMCB is undefined after a SHUTDOWN intercept
 	 * so reinitialize it.
 	 */
-	clear_page(svm(vcpu)->vmcb);
-	init_vmcb(svm(vcpu)->vmcb);
+	clear_page(svm->vmcb);
+	init_vmcb(svm->vmcb);
 
 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
 	return 0;
@@ -977,23 +1007,24 @@ static int io_get_override(struct kvm_vc
 			  struct vmcb_seg **seg,
 			  int *addr_override)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
 	u8 inst[MAX_INST_SIZE];
 	unsigned ins_length;
 	gva_t rip;
 	int i;
 
-	rip =  svm(vcpu)->vmcb->save.rip;
-	ins_length = svm(vcpu)->next_rip - rip;
-	rip += svm(vcpu)->vmcb->save.cs.base;
+	rip =  svm->vmcb->save.rip;
+	ins_length = svm->next_rip - rip;
+	rip += svm->vmcb->save.cs.base;
 
 	if (ins_length > MAX_INST_SIZE)
 		printk(KERN_DEBUG
 		       "%s: inst length err, cs base 0x%llx rip 0x%llx "
 		       "next rip 0x%llx ins_length %u\n",
 		       __FUNCTION__,
-		       svm(vcpu)->vmcb->save.cs.base,
-		       svm(vcpu)->vmcb->save.rip,
-		       svm(vcpu)->vmcb->control.exit_info_2,
+		       svm->vmcb->save.cs.base,
+		       svm->vmcb->save.rip,
+		       svm->vmcb->control.exit_info_2,
 		       ins_length);
 
 	if (kvm_read_guest(vcpu, rip, ins_length, inst) != ins_length)
@@ -1013,22 +1044,22 @@ static int io_get_override(struct kvm_vc
 			*addr_override = 1;
 			continue;
 		case 0x2e:
-			*seg = &svm(vcpu)->vmcb->save.cs;
+			*seg = &svm->vmcb->save.cs;
 			continue;
 		case 0x36:
-			*seg = &svm(vcpu)->vmcb->save.ss;
+			*seg = &svm->vmcb->save.ss;
 			continue;
 		case 0x3e:
-			*seg = &svm(vcpu)->vmcb->save.ds;
+			*seg = &svm->vmcb->save.ds;
 			continue;
 		case 0x26:
-			*seg = &svm(vcpu)->vmcb->save.es;
+			*seg = &svm->vmcb->save.es;
 			continue;
 		case 0x64:
-			*seg = &svm(vcpu)->vmcb->save.fs;
+			*seg = &svm->vmcb->save.fs;
 			continue;
 		case 0x65:
-			*seg = &svm(vcpu)->vmcb->save.gs;
+			*seg = &svm->vmcb->save.gs;
 			continue;
 		default:
 			return 1;
@@ -1043,7 +1074,8 @@ static unsigned long io_adress(struct kv
 	unsigned long *reg;
 	struct vmcb_seg *seg;
 	int addr_override;
-	struct vmcb_save_area *save_area = &svm(vcpu)->vmcb->save;
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb_save_area *save_area = &svm->vmcb->save;
 	u16 cs_attrib = save_area->cs.attrib;
 	unsigned addr_size = get_addr_size(vcpu);
 
@@ -1055,16 +1087,16 @@ static unsigned long io_adress(struct kv
 
 	if (ins) {
 		reg = &vcpu->regs[VCPU_REGS_RDI];
-		seg = &svm(vcpu)->vmcb->save.es;
+		seg = &svm->vmcb->save.es;
 	} else {
 		reg = &vcpu->regs[VCPU_REGS_RSI];
-		seg = (seg) ? seg : &svm(vcpu)->vmcb->save.ds;
+		seg = (seg) ? seg : &svm->vmcb->save.ds;
 	}
 
 	addr_mask = ~0ULL >> (64 - (addr_size * 8));
 
 	if ((cs_attrib & SVM_SELECTOR_L_MASK) &&
-	    !(svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_VM)) {
+	    !(svm->vmcb->save.rflags & X86_EFLAGS_VM)) {
 		*address = (*reg & addr_mask);
 		return addr_mask;
 	}
@@ -1080,7 +1112,8 @@ static unsigned long io_adress(struct kv
 
 static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u32 io_info = svm(vcpu)->vmcb->control.exit_info_1; //address size bug?
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u32 io_info = svm->vmcb->control.exit_info_1; //address size bug?
 	int size, down, in, string, rep;
 	unsigned port;
 	unsigned long count;
@@ -1088,7 +1121,7 @@ static int io_interception(struct kvm_vc
 
 	++vcpu->stat.io_exits;
 
-	svm(vcpu)->next_rip = svm(vcpu)->vmcb->control.exit_info_2;
+	svm->next_rip = svm->vmcb->control.exit_info_2;
 
 	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
 	port = io_info >> 16;
@@ -1096,7 +1129,7 @@ static int io_interception(struct kvm_vc
 	string = (io_info & SVM_IOIO_STR_MASK) != 0;
 	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
 	count = 1;
-	down = (svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
+	down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
 
 	if (string) {
 		unsigned addr_mask;
@@ -1122,14 +1155,18 @@ static int nop_on_interception(struct kv
 
 static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	svm(vcpu)->next_rip = svm(vcpu)->vmcb->save.rip + 1;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->next_rip = svm->vmcb->save.rip + 1;
 	skip_emulated_instruction(vcpu);
 	return kvm_emulate_halt(vcpu);
 }
 
 static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	svm(vcpu)->next_rip = svm(vcpu)->vmcb->save.rip + 3;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->next_rip = svm->vmcb->save.rip + 3;
 	skip_emulated_instruction(vcpu);
 	return kvm_hypercall(vcpu, kvm_run);
 }
@@ -1149,7 +1186,9 @@ static int task_switch_interception(stru
 
 static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	svm(vcpu)->next_rip = svm(vcpu)->vmcb->save.rip + 2;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->next_rip = svm->vmcb->save.rip + 2;
 	kvm_emulate_cpuid(vcpu);
 	return 1;
 }
@@ -1163,39 +1202,41 @@ static int emulate_on_interception(struc
 
 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 	switch (ecx) {
 	case MSR_IA32_TIME_STAMP_COUNTER: {
 		u64 tsc;
 
 		rdtscll(tsc);
-		*data = svm(vcpu)->vmcb->control.tsc_offset + tsc;
+		*data = svm->vmcb->control.tsc_offset + tsc;
 		break;
 	}
 	case MSR_K6_STAR:
-		*data = svm(vcpu)->vmcb->save.star;
+		*data = svm->vmcb->save.star;
 		break;
 #ifdef CONFIG_X86_64
 	case MSR_LSTAR:
-		*data = svm(vcpu)->vmcb->save.lstar;
+		*data = svm->vmcb->save.lstar;
 		break;
 	case MSR_CSTAR:
-		*data = svm(vcpu)->vmcb->save.cstar;
+		*data = svm->vmcb->save.cstar;
 		break;
 	case MSR_KERNEL_GS_BASE:
-		*data = svm(vcpu)->vmcb->save.kernel_gs_base;
+		*data = svm->vmcb->save.kernel_gs_base;
 		break;
 	case MSR_SYSCALL_MASK:
-		*data = svm(vcpu)->vmcb->save.sfmask;
+		*data = svm->vmcb->save.sfmask;
 		break;
 #endif
 	case MSR_IA32_SYSENTER_CS:
-		*data = svm(vcpu)->vmcb->save.sysenter_cs;
+		*data = svm->vmcb->save.sysenter_cs;
 		break;
 	case MSR_IA32_SYSENTER_EIP:
-		*data = svm(vcpu)->vmcb->save.sysenter_eip;
+		*data = svm->vmcb->save.sysenter_eip;
 		break;
 	case MSR_IA32_SYSENTER_ESP:
-		*data = svm(vcpu)->vmcb->save.sysenter_esp;
+		*data = svm->vmcb->save.sysenter_esp;
 		break;
 	default:
 		return kvm_get_msr_common(vcpu, ecx, data);
@@ -1205,15 +1246,16 @@ static int svm_get_msr(struct kvm_vcpu *
 
 static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
 	u32 ecx = vcpu->regs[VCPU_REGS_RCX];
 	u64 data;
 
 	if (svm_get_msr(vcpu, ecx, &data))
 		svm_inject_gp(vcpu, 0);
 	else {
-		svm(vcpu)->vmcb->save.rax = data & 0xffffffff;
+		svm->vmcb->save.rax = data & 0xffffffff;
 		vcpu->regs[VCPU_REGS_RDX] = data >> 32;
-		svm(vcpu)->next_rip = svm(vcpu)->vmcb->save.rip + 2;
+		svm->next_rip = svm->vmcb->save.rip + 2;
 		skip_emulated_instruction(vcpu);
 	}
 	return 1;
@@ -1221,39 +1263,41 @@ static int rdmsr_interception(struct kvm
 
 static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 	switch (ecx) {
 	case MSR_IA32_TIME_STAMP_COUNTER: {
 		u64 tsc;
 
 		rdtscll(tsc);
-		svm(vcpu)->vmcb->control.tsc_offset = data - tsc;
+		svm->vmcb->control.tsc_offset = data - tsc;
 		break;
 	}
 	case MSR_K6_STAR:
-		svm(vcpu)->vmcb->save.star = data;
+		svm->vmcb->save.star = data;
 		break;
 #ifdef CONFIG_X86_64
 	case MSR_LSTAR:
-		svm(vcpu)->vmcb->save.lstar = data;
+		svm->vmcb->save.lstar = data;
 		break;
 	case MSR_CSTAR:
-		svm(vcpu)->vmcb->save.cstar = data;
+		svm->vmcb->save.cstar = data;
 		break;
 	case MSR_KERNEL_GS_BASE:
-		svm(vcpu)->vmcb->save.kernel_gs_base = data;
+		svm->vmcb->save.kernel_gs_base = data;
 		break;
 	case MSR_SYSCALL_MASK:
-		svm(vcpu)->vmcb->save.sfmask = data;
+		svm->vmcb->save.sfmask = data;
 		break;
 #endif
 	case MSR_IA32_SYSENTER_CS:
-		svm(vcpu)->vmcb->save.sysenter_cs = data;
+		svm->vmcb->save.sysenter_cs = data;
 		break;
 	case MSR_IA32_SYSENTER_EIP:
-		svm(vcpu)->vmcb->save.sysenter_eip = data;
+		svm->vmcb->save.sysenter_eip = data;
 		break;
 	case MSR_IA32_SYSENTER_ESP:
-		svm(vcpu)->vmcb->save.sysenter_esp = data;
+		svm->vmcb->save.sysenter_esp = data;
 		break;
 	default:
 		return kvm_set_msr_common(vcpu, ecx, data);
@@ -1263,10 +1307,11 @@ static int svm_set_msr(struct kvm_vcpu *
 
 static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
 	u32 ecx = vcpu->regs[VCPU_REGS_RCX];
-	u64 data = (svm(vcpu)->vmcb->save.rax & -1u)
+	u64 data = (svm->vmcb->save.rax & -1u)
 		| ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
-	svm(vcpu)->next_rip = svm(vcpu)->vmcb->save.rip + 2;
+	svm->next_rip = svm->vmcb->save.rip + 2;
 	if (svm_set_msr(vcpu, ecx, data))
 		svm_inject_gp(vcpu, 0);
 	else
@@ -1276,7 +1321,7 @@ static int wrmsr_interception(struct kvm
 
 static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	if (svm(vcpu)->vmcb->control.exit_info_1)
+	if (to_svm(vcpu)->vmcb->control.exit_info_1)
 		return wrmsr_interception(vcpu, kvm_run);
 	else
 		return rdmsr_interception(vcpu, kvm_run);
@@ -1348,13 +1393,14 @@ static int (*svm_exit_handlers[])(struct
 
 static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u32 exit_code = svm(vcpu)->vmcb->control.exit_code;
-
-	if (is_external_interrupt(svm(vcpu)->vmcb->control.exit_int_info) &&
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u32 exit_code = svm->vmcb->control.exit_code;
+
+	if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
 	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR)
 		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
 		       "exit_code 0x%x\n",
-		       __FUNCTION__, svm(vcpu)->vmcb->control.exit_int_info,
+		       __FUNCTION__, svm->vmcb->control.exit_int_info,
 		       exit_code);
 
 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
@@ -1378,13 +1424,14 @@ static void reload_tss(struct kvm_vcpu *
 
 static void pre_svm_run(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
 	int cpu = raw_smp_processor_id();
 
 	struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
 
-	svm(vcpu)->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
 	if (vcpu->cpu != cpu ||
-	    svm(vcpu)->asid_generation != svm_data->asid_generation)
+	    svm->asid_generation != svm_data->asid_generation)
 		new_asid(vcpu, svm_data);
 }
 
@@ -1393,7 +1440,7 @@ static inline void kvm_do_inject_irq(str
 {
 	struct vmcb_control_area *control;
 
-	control = &svm(vcpu)->vmcb->control;
+	control = &to_svm(vcpu)->vmcb->control;
 	control->int_vector = pop_irq(vcpu);
 	control->int_ctl &= ~V_INTR_PRIO_MASK;
 	control->int_ctl |= V_IRQ_MASK |
@@ -1402,7 +1449,7 @@ static inline void kvm_do_inject_irq(str
 
 static void kvm_reput_irq(struct kvm_vcpu *vcpu)
 {
-	struct vmcb_control_area *control = &svm(vcpu)->vmcb->control;
+	struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
 
 	if (control->int_ctl & V_IRQ_MASK) {
 		control->int_ctl &= ~V_IRQ_MASK;
@@ -1416,11 +1463,12 @@ static void do_interrupt_requests(struct
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
-	struct vmcb_control_area *control = &svm(vcpu)->vmcb->control;
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb_control_area *control = &svm->vmcb->control;
 
 	vcpu->interrupt_window_open =
 		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-		 (svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_IF));
+		 (svm->vmcb->save.rflags & X86_EFLAGS_IF));
 
 	if (vcpu->interrupt_window_open && vcpu->irq_summary)
 		/*
@@ -1441,9 +1489,11 @@ static void post_kvm_run_save(struct kvm
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
 						  vcpu->irq_summary == 0);
-	kvm_run->if_flag = (svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
+	kvm_run->if_flag = (svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = vcpu->cr8;
 	kvm_run->apic_base = vcpu->apic_base;
 }
@@ -1460,7 +1510,7 @@ static int dm_request_for_irq_injection(
 	return (!vcpu->irq_summary &&
 		kvm_run->request_interrupt_window &&
 		vcpu->interrupt_window_open &&
-		(svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_IF));
+		(to_svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_IF));
 }
 
 static void save_db_regs(unsigned long *db_regs)
@@ -1486,6 +1536,7 @@ static void svm_flush_tlb(struct kvm_vcp
 
 static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
 	u16 fs_selector;
 	u16 gs_selector;
 	u16 ldt_selector;
@@ -1512,15 +1563,15 @@ again:
 	fs_selector = read_fs();
 	gs_selector = read_gs();
 	ldt_selector = read_ldt();
-	svm(vcpu)->host_cr2 = kvm_read_cr2();
-	svm(vcpu)->host_dr6 = read_dr6();
-	svm(vcpu)->host_dr7 = read_dr7();
-	svm(vcpu)->vmcb->save.cr2 = vcpu->cr2;
-
-	if (svm(vcpu)->vmcb->save.dr7 & 0xff) {
+	svm->host_cr2 = kvm_read_cr2();
+	svm->host_dr6 = read_dr6();
+	svm->host_dr7 = read_dr7();
+	svm->vmcb->save.cr2 = vcpu->cr2;
+
+	if (svm->vmcb->save.dr7 & 0xff) {
 		write_dr7(0);
-		save_db_regs(svm(vcpu)->host_db_regs);
-		load_db_regs(svm(vcpu)->db_regs);
+		save_db_regs(svm->host_db_regs);
+		load_db_regs(svm->db_regs);
 	}
 
 	if (vcpu->fpu_active) {
@@ -1644,14 +1695,14 @@ again:
 		fx_restore(vcpu->host_fx_image);
 	}
 
-	if ((svm(vcpu)->vmcb->save.dr7 & 0xff))
-		load_db_regs(svm(vcpu)->host_db_regs);
-
-	vcpu->cr2 = svm(vcpu)->vmcb->save.cr2;
-
-	write_dr6(svm(vcpu)->host_dr6);
-	write_dr7(svm(vcpu)->host_dr7);
-	kvm_write_cr2(svm(vcpu)->host_cr2);
+	if ((svm->vmcb->save.dr7 & 0xff))
+		load_db_regs(svm->host_db_regs);
+
+	vcpu->cr2 = svm->vmcb->save.cr2;
+
+	write_dr6(svm->host_dr6);
+	write_dr7(svm->host_dr7);
+	kvm_write_cr2(svm->host_cr2);
 
 	load_fs(fs_selector);
 	load_gs(gs_selector);
@@ -1665,18 +1716,18 @@ again:
 	 */
 	if (unlikely(prof_on == KVM_PROFILING))
 		profile_hit(KVM_PROFILING,
-			(void *)(unsigned long)svm(vcpu)->vmcb->save.rip);
+			(void *)(unsigned long)svm->vmcb->save.rip);
 
 	stgi();
 
 	kvm_reput_irq(vcpu);
 
-	svm(vcpu)->next_rip = 0;
-
-	if (svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_ERR) {
+	svm->next_rip = 0;
+
+	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		kvm_run->fail_entry.hardware_entry_failure_reason
-			= svm(vcpu)->vmcb->control.exit_code;
+			= svm->vmcb->control.exit_code;
 		post_kvm_run_save(vcpu, kvm_run);
 		return 0;
 	}
@@ -1705,12 +1756,14 @@ again:
 
 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {
-	svm(vcpu)->vmcb->save.cr3 = root;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->vmcb->save.cr3 = root;
 	force_new_asid(vcpu);
 
 	if (vcpu->fpu_active) {
-		svm(vcpu)->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
-		svm(vcpu)->vmcb->save.cr0 |= X86_CR0_TS;
+		svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
+		svm->vmcb->save.cr0 |= X86_CR0_TS;
 		vcpu->fpu_active = 0;
 	}
 }
@@ -1719,26 +1772,27 @@ static void svm_inject_page_fault(struct
 				  unsigned long  addr,
 				  uint32_t err_code)
 {
-	uint32_t exit_int_info = svm(vcpu)->vmcb->control.exit_int_info;
+	struct vcpu_svm *svm = to_svm(vcpu);
+	uint32_t exit_int_info = svm->vmcb->control.exit_int_info;
 
 	++vcpu->stat.pf_guest;
 
 	if (is_page_fault(exit_int_info)) {
 
-		svm(vcpu)->vmcb->control.event_inj_err = 0;
-		svm(vcpu)->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
-							SVM_EVTINJ_VALID_ERR |
-							SVM_EVTINJ_TYPE_EXEPT |
-							DF_VECTOR;
-		return;
-	}
-	vcpu->cr2 = addr;
-	svm(vcpu)->vmcb->save.cr2 = addr;
-	svm(vcpu)->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+		svm->vmcb->control.event_inj_err = 0;
+		svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
 						SVM_EVTINJ_VALID_ERR |
 						SVM_EVTINJ_TYPE_EXEPT |
-						PF_VECTOR;
-	svm(vcpu)->vmcb->control.event_inj_err = err_code;
+						DF_VECTOR;
+		return;
+	}
+	vcpu->cr2 = addr;
+	svm->vmcb->save.cr2 = addr;
+	svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
+					SVM_EVTINJ_VALID_ERR |
+					SVM_EVTINJ_TYPE_EXEPT |
+					PF_VECTOR;
+	svm->vmcb->control.event_inj_err = err_code;
 }
 
 
diff -r b318edfbdb7d drivers/kvm/vmx.c
--- a/drivers/kvm/vmx.c	Fri Jul 27 15:55:31 2007 +1000
+++ b/drivers/kvm/vmx.c	Fri Jul 27 16:14:52 2007 +1000
@@ -58,7 +58,7 @@ struct vcpu_vmx {
 
 };
 
-static inline struct vcpu_vmx* vmx(struct kvm_vcpu *vcpu)
+static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 {
 	return (struct vcpu_vmx*)vcpu->_priv;
 }
@@ -143,9 +143,10 @@ static inline u64 msr_efer_save_restore_
 
 static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu)
 {
-	int efer_offset = vmx(vcpu)->msr_offset_efer;
-	return msr_efer_save_restore_bits(vmx(vcpu)->host_msrs[efer_offset]) !=
-		msr_efer_save_restore_bits(vmx(vcpu)->guest_msrs[efer_offset]);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int efer_offset = vmx->msr_offset_efer;
+	return msr_efer_save_restore_bits(vmx->host_msrs[efer_offset]) !=
+		msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);
 }
 
 static inline int is_page_fault(u32 intr_info)
@@ -170,21 +171,23 @@ static inline int is_external_interrupt(
 
 static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int i;
 
-	for (i = 0; i < vmx(vcpu)->nmsrs; ++i)
-		if (vmx(vcpu)->guest_msrs[i].index == msr)
+	for (i = 0; i < vmx->nmsrs; ++i)
+		if (vmx->guest_msrs[i].index == msr)
 			return i;
 	return -1;
 }
 
 static struct kvm_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int i;
 
 	i = __find_msr_index(vcpu, msr);
 	if (i >= 0)
-		return &vmx(vcpu)->guest_msrs[i];
+		return &vmx->guest_msrs[i];
 	return NULL;
 }
 
@@ -204,11 +207,12 @@ static void __vcpu_clear(void *arg)
 static void __vcpu_clear(void *arg)
 {
 	struct kvm_vcpu *vcpu = arg;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int cpu = raw_smp_processor_id();
 
 	if (vcpu->cpu == cpu)
-		vmcs_clear(vmx(vcpu)->vmcs);
-	if (per_cpu(current_vmcs, cpu) == vmx(vcpu)->vmcs)
+		vmcs_clear(vmx->vmcs);
+	if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
 		per_cpu(current_vmcs, cpu) = NULL;
 	rdtscll(vcpu->host_tsc);
 }
@@ -219,7 +223,7 @@ static void vcpu_clear(struct kvm_vcpu *
 		smp_call_function_single(vcpu->cpu, __vcpu_clear, vcpu, 0, 1);
 	else
 		__vcpu_clear(vcpu);
-	vmx(vcpu)->launched = 0;
+	to_vmx(vcpu)->launched = 0;
 }
 
 static unsigned long vmcs_readl(unsigned long field)
@@ -332,77 +336,81 @@ static void load_transition_efer(struct 
 static void load_transition_efer(struct kvm_vcpu *vcpu)
 {
 	u64 trans_efer;
-	int efer_offset = vmx(vcpu)->msr_offset_efer;
-
-	trans_efer = vmx(vcpu)->host_msrs[efer_offset].data;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int efer_offset = vmx->msr_offset_efer;
+
+	trans_efer = vmx->host_msrs[efer_offset].data;
 	trans_efer &= ~EFER_SAVE_RESTORE_BITS;
-	trans_efer |= msr_efer_save_restore_bits(
-				vmx(vcpu)->guest_msrs[efer_offset]);
+	trans_efer |= msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);
 	wrmsrl(MSR_EFER, trans_efer);
 	vcpu->stat.efer_reload++;
 }
 
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
-	if (vmx(vcpu)->host_state.loaded)
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vmx->host_state.loaded)
 		return;
 
-	vmx(vcpu)->host_state.loaded = 1;
+	vmx->host_state.loaded = 1;
 	/*
 	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
 	 * allow segment selectors with cpl > 0 or ti == 1.
 	 */
-	vmx(vcpu)->host_state.ldt_sel = read_ldt();
-	vmx(vcpu)->host_state.fs_gs_ldt_reload_needed = vmx(vcpu)->host_state.ldt_sel;
-	vmx(vcpu)->host_state.fs_sel = read_fs();
-	if (!(vmx(vcpu)->host_state.fs_sel & 7))
-		vmcs_write16(HOST_FS_SELECTOR, vmx(vcpu)->host_state.fs_sel);
+	vmx->host_state.ldt_sel = read_ldt();
+	vmx->host_state.fs_gs_ldt_reload_needed = vmx->host_state.ldt_sel;
+	vmx->host_state.fs_sel = read_fs();
+	if (!(vmx->host_state.fs_sel & 7))
+		vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
 	else {
 		vmcs_write16(HOST_FS_SELECTOR, 0);
-		vmx(vcpu)->host_state.fs_gs_ldt_reload_needed = 1;
-	}
-	vmx(vcpu)->host_state.gs_sel = read_gs();
-	if (!(vmx(vcpu)->host_state.gs_sel & 7))
-		vmcs_write16(HOST_GS_SELECTOR, vmx(vcpu)->host_state.gs_sel);
+		vmx->host_state.fs_gs_ldt_reload_needed = 1;
+	}
+	vmx->host_state.gs_sel = read_gs();
+	if (!(vmx->host_state.gs_sel & 7))
+		vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
 	else {
 		vmcs_write16(HOST_GS_SELECTOR, 0);
-		vmx(vcpu)->host_state.fs_gs_ldt_reload_needed = 1;
+		vmx->host_state.fs_gs_ldt_reload_needed = 1;
 	}
 
 #ifdef CONFIG_X86_64
 	vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
 	vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
 #else
-	vmcs_writel(HOST_FS_BASE, segment_base(vmx(vcpu)->host_state.fs_sel));
-	vmcs_writel(HOST_GS_BASE, segment_base(vmx(vcpu)->host_state.gs_sel));
+	vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
+	vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
 #endif
 
 #ifdef CONFIG_X86_64
 	if (is_long_mode(vcpu)) {
-		save_msrs(vmx(vcpu)->host_msrs +
-			  vmx(vcpu)->msr_offset_kernel_gs_base, 1);
+		save_msrs(vmx->host_msrs +
+			  vmx->msr_offset_kernel_gs_base, 1);
 	}
 #endif
-	load_msrs(vmx(vcpu)->guest_msrs, vmx(vcpu)->save_nmsrs);
+	load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
 	if (msr_efer_need_save_restore(vcpu))
 		load_transition_efer(vcpu);
 }
 
 static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 {
-	if (!vmx(vcpu)->host_state.loaded)
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!vmx->host_state.loaded)
 		return;
 
-	vmx(vcpu)->host_state.loaded = 0;
-	if (vmx(vcpu)->host_state.fs_gs_ldt_reload_needed) {
-		load_ldt(vmx(vcpu)->host_state.ldt_sel);
-		load_fs(vmx(vcpu)->host_state.fs_sel);
+	vmx->host_state.loaded = 0;
+	if (vmx->host_state.fs_gs_ldt_reload_needed) {
+		load_ldt(vmx->host_state.ldt_sel);
+		load_fs(vmx->host_state.fs_sel);
 		/*
 		 * If we have to reload gs, we must take care to
 		 * preserve our gs base.
 		 */
 		local_irq_disable();
-		load_gs(vmx(vcpu)->host_state.gs_sel);
+		load_gs(vmx->host_state.gs_sel);
 #ifdef CONFIG_X86_64
 		wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
 #endif
@@ -410,11 +418,10 @@ static void vmx_load_host_state(struct k
 
 		reload_tss();
 	}
-	save_msrs(vmx(vcpu)->guest_msrs, vmx(vcpu)->save_nmsrs);
-	load_msrs(vmx(vcpu)->host_msrs, vmx(vcpu)->save_nmsrs);
+	save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
+	load_msrs(vmx->host_msrs, vmx->save_nmsrs);
 	if (msr_efer_need_save_restore(vcpu))
-		load_msrs(vmx(vcpu)->host_msrs +
-			  vmx(vcpu)->msr_offset_efer, 1);
+		load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
 }
 
 /*
@@ -423,7 +430,8 @@ static void vmx_load_host_state(struct k
  */
 static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
 {
-	u64 phys_addr = __pa(vmx(vcpu)->vmcs);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u64 phys_addr = __pa(vmx->vmcs);
 	int cpu;
 	u64 tsc_this, delta;
 
@@ -432,16 +440,16 @@ static void vmx_vcpu_load(struct kvm_vcp
 	if (vcpu->cpu != cpu)
 		vcpu_clear(vcpu);
 
-	if (per_cpu(current_vmcs, cpu) != vmx(vcpu)->vmcs) {
+	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
 		u8 error;
 
-		per_cpu(current_vmcs, cpu) = vmx(vcpu)->vmcs;
+		per_cpu(current_vmcs, cpu) = vmx->vmcs;
 		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
 			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
 			      : "cc");
 		if (error)
 			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
-			       vmx(vcpu)->vmcs, phys_addr);
+			       vmx->vmcs, phys_addr);
 	}
 
 	if (vcpu->cpu != cpu) {
@@ -548,13 +556,15 @@ static void vmx_inject_gp(struct kvm_vcp
  */
 void move_msr_up(struct kvm_vcpu *vcpu, int from, int to)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct kvm_msr_entry tmp;
-	tmp = vmx(vcpu)->guest_msrs[to];
-	vmx(vcpu)->guest_msrs[to] = vmx(vcpu)->guest_msrs[from];
-	vmx(vcpu)->guest_msrs[from] = tmp;
-	tmp = vmx(vcpu)->host_msrs[to];
-	vmx(vcpu)->host_msrs[to] = vmx(vcpu)->host_msrs[from];
-	vmx(vcpu)->host_msrs[from] = tmp;
+
+	tmp = vmx->guest_msrs[to];
+	vmx->guest_msrs[to] = vmx->guest_msrs[from];
+	vmx->guest_msrs[from] = tmp;
+	tmp = vmx->host_msrs[to];
+	vmx->host_msrs[to] = vmx->host_msrs[from];
+	vmx->host_msrs[from] = tmp;
 }
 
 /*
@@ -564,6 +574,7 @@ void move_msr_up(struct kvm_vcpu *vcpu, 
  */
 static void setup_msrs(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int save_nmsrs;
 
 	save_nmsrs = 0;
@@ -592,13 +603,13 @@ static void setup_msrs(struct kvm_vcpu *
 			move_msr_up(vcpu, index, save_nmsrs++);
 	}
 #endif
-	vmx(vcpu)->save_nmsrs = save_nmsrs;
+	vmx->save_nmsrs = save_nmsrs;
 
 #ifdef CONFIG_X86_64
-	vmx(vcpu)->msr_offset_kernel_gs_base =
+	vmx->msr_offset_kernel_gs_base =
 		__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
 #endif
-	vmx(vcpu)->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
+	vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
 }
 
 /*
@@ -684,6 +695,7 @@ static int vmx_get_msr(struct kvm_vcpu *
  */
 static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct kvm_msr_entry *msr;
 	int ret = 0;
 
@@ -691,7 +703,7 @@ static int vmx_set_msr(struct kvm_vcpu *
 #ifdef CONFIG_X86_64
 	case MSR_EFER:
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
-		if (vmx(vcpu)->host_state.loaded)
+		if (vmx->host_state.loaded)
 			load_transition_efer(vcpu);
 		break;
 	case MSR_FS_BASE:
@@ -717,8 +729,8 @@ static int vmx_set_msr(struct kvm_vcpu *
 		msr = find_msr_entry(vcpu, msr_index);
 		if (msr) {
 			msr->data = data;
-			if (vmx(vcpu)->host_state.loaded)
-				load_msrs(vmx(vcpu)->guest_msrs, vmx(vcpu)->save_nmsrs);
+			if (vmx->host_state.loaded)
+				load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
 			break;
 		}
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -1281,6 +1293,7 @@ static void seg_setup(int seg)
  */
 static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 host_sysenter_cs;
 	u32 junk;
 	unsigned long a;
@@ -1422,18 +1435,18 @@ static int vmx_vcpu_setup(struct kvm_vcp
 		u32 index = vmx_msr_index[i];
 		u32 data_low, data_high;
 		u64 data;
-		int j = vmx(vcpu)->nmsrs;
+		int j = vmx->nmsrs;
 
 		if (rdmsr_safe(index, &data_low, &data_high) < 0)
 			continue;
 		if (wrmsr_safe(index, data_low, data_high) < 0)
 			continue;
 		data = data_low | ((u64)data_high << 32);
-		vmx(vcpu)->host_msrs[j].index = index;
-		vmx(vcpu)->host_msrs[j].reserved = 0;
-		vmx(vcpu)->host_msrs[j].data = data;
-		vmx(vcpu)->guest_msrs[j] = vmx(vcpu)->host_msrs[j];
-		++vmx(vcpu)->nmsrs;
+		vmx->host_msrs[j].index = index;
+		vmx->host_msrs[j].reserved = 0;
+		vmx->host_msrs[j].data = data;
+		vmx->guest_msrs[j] = vmx->host_msrs[j];
+		++vmx->nmsrs;
 	}
 
 	setup_msrs(vcpu);
@@ -2036,6 +2049,7 @@ static void vmx_flush_tlb(struct kvm_vcp
 
 static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u8 fail;
 	int r;
 
@@ -2160,7 +2174,7 @@ again:
 #endif
 		"setbe %0 \n\t"
 	      : "=q" (fail)
-	      : "r"(vmx(vcpu)->launched), "d"((unsigned long)HOST_RSP),
+	      : "r"(vmx->launched), "d"((unsigned long)HOST_RSP),
 		"c"(vcpu),
 		[rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])),
 		[rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
@@ -2204,7 +2218,7 @@ again:
 	if (unlikely(prof_on == KVM_PROFILING))
 		profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
 
-	vmx(vcpu)->launched = 1;
+	vmx->launched = 1;
 	r = kvm_handle_exit(kvm_run, vcpu);
 	if (r > 0) {
 		/* Give scheduler a change to reschedule. */
@@ -2269,11 +2283,12 @@ static void vmx_inject_page_fault(struct
 
 static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
 {
-
-	if (vmx(vcpu)->vmcs) {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vmx->vmcs) {
 		on_each_cpu(__vcpu_clear, vcpu, 0, 1);
-		free_vmcs(vmx(vcpu)->vmcs);
-		vmx(vcpu)->vmcs = NULL;
+		free_vmcs(vmx->vmcs);
+		vmx->vmcs = NULL;
 	}
 }
 



-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <1185519205.12151.47.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]

* [PATCH 2/2] Dynamically allocate vcpus
       [not found]         ` <1185519205.12151.47.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2007-07-27  6:55           ` Rusty Russell
       [not found]             ` <1185519330.12151.50.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Rusty Russell @ 2007-07-27  6:55 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

This patch converts the vcpus array in "struct kvm" to a linked list
of VCPUs, and changes the "vcpu_create" and "vcpu_setup" hooks into
one "vcpu_create" call which does the allocation and initialization of
the vcpu (calling back into the kvm_vcpu_init core helper).

It is untested on SMP or SVM, but the idea is that SVM and VMX
can enclose the common "kvm_vcpu" in separate structures.

More cleanup can follow this patch.

Signed-off-by: Rusty Russell <rusty-8n+1lVoiYb80n/F98K4Iww@public.gmane.org>

diff -r d7a87af170b0 drivers/kvm/kvm.h
--- a/drivers/kvm/kvm.h	Fri Jul 27 16:15:19 2007 +1000
+++ b/drivers/kvm/kvm.h	Fri Jul 27 16:18:36 2007 +1000
@@ -300,10 +300,9 @@ void kvm_io_bus_register_dev(struct kvm_
 			     struct kvm_io_device *dev);
 
 struct kvm_vcpu {
-	int valid;
+	struct list_head list;
 	struct kvm *kvm;
 	int vcpu_id;
-	void *_priv;
 	struct mutex mutex;
 	int   cpu;
 	u64 host_tsc;
@@ -404,8 +403,7 @@ struct kvm {
 	struct list_head active_mmu_pages;
 	int n_free_mmu_pages;
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
-	int nvcpus;
-	struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
+	struct list_head vcpus;
 	int memory_config_version;
 	int busy;
 	unsigned long rmap_overflow;
@@ -428,7 +426,8 @@ struct kvm_arch_ops {
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
 
-	int (*vcpu_create)(struct kvm_vcpu *vcpu);
+	/* Create, but do not attach this VCPU */
+	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
 
 	void (*vcpu_load)(struct kvm_vcpu *vcpu);
@@ -470,7 +469,6 @@ struct kvm_arch_ops {
 	void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);
 
 	int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
-	int (*vcpu_setup)(struct kvm_vcpu *vcpu);
 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
@@ -480,6 +478,9 @@ extern struct kvm_arch_ops *kvm_arch_ops
 
 #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
 #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
+
+int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
+void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module);
 void kvm_exit_arch(void);
diff -r d7a87af170b0 drivers/kvm/kvm_main.c
--- a/drivers/kvm/kvm_main.c	Fri Jul 27 16:15:19 2007 +1000
+++ b/drivers/kvm/kvm_main.c	Fri Jul 27 16:18:36 2007 +1000
@@ -139,11 +139,6 @@ unsigned long segment_base(u16 selector)
 }
 EXPORT_SYMBOL_GPL(segment_base);
 
-static inline int valid_vcpu(int n)
-{
-	return likely(n >= 0 && n < KVM_MAX_VCPUS);
-}
-
 int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
 		   void *dest)
 {
@@ -258,7 +253,7 @@ static void ack_flush(void *_completed)
 
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
-	int i, cpu, needed;
+	int cpu, needed;
 	cpumask_t cpus;
 	struct kvm_vcpu *vcpu;
 	atomic_t completed;
@@ -266,8 +261,7 @@ void kvm_flush_remote_tlbs(struct kvm *k
 	atomic_set(&completed, 0);
 	cpus_clear(cpus);
 	needed = 0;
-	for (i = 0; i < kvm->nvcpus; ++i) {
-		vcpu = &kvm->vcpus[i];
+	list_for_each_entry(vcpu, &kvm->vcpus, list) {
 		if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
 			continue;
 		cpu = vcpu->cpu;
@@ -291,10 +285,62 @@ void kvm_flush_remote_tlbs(struct kvm *k
 	}
 }
 
+int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
+{
+	struct page *page;
+	int r;
+
+	mutex_init(&vcpu->mutex);
+	vcpu->cpu = -1;
+	vcpu->mmu.root_hpa = INVALID_PAGE;
+	vcpu->kvm = kvm;
+	vcpu->vcpu_id = id;
+
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page) {
+		r = -ENOMEM;
+		goto fail;
+	}
+	vcpu->run = page_address(page);
+
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page) {
+		r = -ENOMEM;
+		goto fail_free_run;
+	}
+	vcpu->pio_data = page_address(page);
+
+	vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
+					   FX_IMAGE_ALIGN);
+	vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
+
+	r = kvm_mmu_create(vcpu);
+	if (r < 0)
+		goto fail_free_pio_data;
+
+	return 0;
+
+fail_free_pio_data:
+	free_page((unsigned long)vcpu->pio_data);
+fail_free_run:
+	free_page((unsigned long)vcpu->run);
+fail:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_init);
+
+void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	kvm_mmu_destroy(vcpu);
+	free_page((unsigned long)vcpu->pio_data);
+	free_page((unsigned long)vcpu->run);
+	
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
+
 static struct kvm *kvm_create_vm(void)
 {
 	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
-	int i;
 
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
@@ -303,14 +349,7 @@ static struct kvm *kvm_create_vm(void)
 	spin_lock_init(&kvm->lock);
 	INIT_LIST_HEAD(&kvm->active_mmu_pages);
 	kvm_io_bus_init(&kvm->mmio_bus);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		struct kvm_vcpu *vcpu = &kvm->vcpus[i];
-
-		mutex_init(&vcpu->mutex);
-		vcpu->cpu = -1;
-		vcpu->kvm = kvm;
-		vcpu->mmu.root_hpa = INVALID_PAGE;
-	}
+	INIT_LIST_HEAD(&kvm->vcpus);
 	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
 	spin_unlock(&kvm_lock);
@@ -367,41 +406,32 @@ static void free_pio_guest_pages(struct 
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->valid)
-		return;
-
 	vcpu_load(vcpu);
 	kvm_mmu_unload(vcpu);
 	vcpu_put(vcpu);
 }
 
-static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
-{
-	if (!vcpu->valid)
-		return;
-
-	vcpu_load(vcpu);
-	kvm_mmu_destroy(vcpu);
-	vcpu_put(vcpu);
-	kvm_arch_ops->vcpu_free(vcpu);
-	free_page((unsigned long)vcpu->run);
-	vcpu->run = NULL;
-	free_page((unsigned long)vcpu->pio_data);
-	vcpu->pio_data = NULL;
-	free_pio_guest_pages(vcpu);
-}
-
 static void kvm_free_vcpus(struct kvm *kvm)
 {
-	unsigned int i;
+	struct kvm_vcpu *vcpu;
 
 	/*
 	 * Unpin any mmu pages first.
 	 */
-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_free_vcpu(&kvm->vcpus[i]);
+	list_for_each_entry(vcpu, &kvm->vcpus, list)
+		kvm_unload_vcpu_mmu(vcpu);
+
+	spin_lock(&kvm->lock);
+	while (!list_empty(&kvm->vcpus)) {
+		vcpu = list_first_entry(&kvm->vcpus, struct kvm_vcpu, list);
+		list_del(&vcpu->list);
+
+		/* Drop lock to free it, now it's detached. */
+		spin_unlock(&kvm->lock);
+		kvm_arch_ops->vcpu_free(vcpu);
+		spin_lock(&kvm->lock);
+	}
+	spin_unlock(&kvm->lock);
 }
 
 static int kvm_dev_release(struct inode *inode, struct file *filp)
@@ -2371,78 +2401,48 @@ static int kvm_vm_ioctl_create_vcpu(stru
 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 {
 	int r;
-	struct kvm_vcpu *vcpu;
-	struct page *page;
-
-	r = -EINVAL;
-	if (!valid_vcpu(n))
-		goto out;
-
-	vcpu = &kvm->vcpus[n];
-	vcpu->vcpu_id = n;
-
-	mutex_lock(&vcpu->mutex);
-
-	if (vcpu->valid) {
-		mutex_unlock(&vcpu->mutex);
-		return -EEXIST;
-	}
-
-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	r = -ENOMEM;
-	if (!page)
-		goto out_unlock;
-	vcpu->run = page_address(page);
-
-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	r = -ENOMEM;
-	if (!page)
-		goto out_free_run;
-	vcpu->pio_data = page_address(page);
-
-	vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
-					   FX_IMAGE_ALIGN);
-	vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
-	vcpu->cr0 = 0x10;
-
-	r = kvm_arch_ops->vcpu_create(vcpu);
+	struct kvm_vcpu *vcpu, *i;
+
+	vcpu = kvm_arch_ops->vcpu_create(kvm, n);
+	if (IS_ERR(vcpu))
+		return PTR_ERR(vcpu);
+
+	vcpu_load(vcpu);
+	r = kvm_mmu_setup(vcpu);
+	vcpu_put(vcpu);
 	if (r < 0)
-		goto out_free_vcpus;
-
-	r = kvm_mmu_create(vcpu);
-	if (r < 0)
-		goto out_free_vcpus;
-
-	kvm_arch_ops->vcpu_load(vcpu);
-	r = kvm_mmu_setup(vcpu);
-	if (r >= 0)
-		r = kvm_arch_ops->vcpu_setup(vcpu);
-	vcpu_put(vcpu);
-
-	if (r < 0)
-		goto out_free_vcpus;
-
+		goto free_vcpu;
+
+	spin_lock(&kvm->lock);
+	/* What do we care if they create duplicate CPU ids?  But be nice. */
+	list_for_each_entry(i, &kvm->vcpus, list) {
+		if (i->vcpu_id == n) {
+			r = -EEXIST;
+			spin_unlock(&kvm->lock);
+			goto mmu_unload;
+		}
+	}
+	list_add_tail(&vcpu->list, &kvm->vcpus);
+	spin_unlock(&kvm->lock);
+
+	/* Now it's all set up, let userspace reach it */
 	r = create_vcpu_fd(vcpu);
 	if (r < 0)
-		goto out_free_vcpus;
-
-	spin_lock(&kvm_lock);
-	if (n >= kvm->nvcpus)
-		kvm->nvcpus = n + 1;
-	spin_unlock(&kvm_lock);
-
-	vcpu->valid = 1;
-
+		goto unlink;
 	return r;
 
-out_free_vcpus:
-	kvm_free_vcpu(vcpu);
-out_free_run:
-	free_page((unsigned long)vcpu->run);
-	vcpu->run = NULL;
-out_unlock:
-	mutex_unlock(&vcpu->mutex);
-out:
+unlink:
+	spin_lock(&kvm->lock);
+	list_del(&vcpu->list);
+	spin_unlock(&kvm->lock);
+
+mmu_unload:
+	vcpu_load(vcpu);
+	kvm_mmu_unload(vcpu);
+	vcpu_put(vcpu);
+
+free_vcpu:
+	kvm_arch_ops->vcpu_free(vcpu);
 	return r;
 }
 
@@ -2932,12 +2932,11 @@ static void decache_vcpus_on_cpu(int cpu
 {
 	struct kvm *vm;
 	struct kvm_vcpu *vcpu;
-	int i;
 
 	spin_lock(&kvm_lock);
-	list_for_each_entry(vm, &vm_list, vm_list)
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &vm->vcpus[i];
+	list_for_each_entry(vm, &vm_list, vm_list) {
+		spin_lock(&vm->lock);
+		list_for_each_entry(vcpu, &vm->vcpus, list) {
 			/*
 			 * If the vcpu is locked, then it is running on some
 			 * other cpu and therefore it is not cached on the
@@ -2954,6 +2953,8 @@ static void decache_vcpus_on_cpu(int cpu
 				mutex_unlock(&vcpu->mutex);
 			}
 		}
+		spin_unlock(&vm->lock);
+	}
 	spin_unlock(&kvm_lock);
 }
 
@@ -3069,14 +3070,14 @@ static u64 stat_get(void *_offset)
 	u64 total = 0;
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
-	int i;
 
 	spin_lock(&kvm_lock);
-	list_for_each_entry(kvm, &vm_list, vm_list)
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &kvm->vcpus[i];
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		spin_lock(&kvm->lock);
+		list_for_each_entry(vcpu, &kvm->vcpus, list)
 			total += *(u32 *)((void *)vcpu + offset);
-		}
+		spin_unlock(&kvm->lock);
+	}
 	spin_unlock(&kvm_lock);
 	return total;
 }
diff -r d7a87af170b0 drivers/kvm/kvm_svm.h
--- a/drivers/kvm/kvm_svm.h	Fri Jul 27 16:15:19 2007 +1000
+++ b/drivers/kvm/kvm_svm.h	Fri Jul 27 16:18:36 2007 +1000
@@ -23,7 +23,7 @@ struct kvm_vcpu;
 struct kvm_vcpu;
 
 struct vcpu_svm {
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu vcpu;
 	struct vmcb *vmcb;
 	unsigned long vmcb_pa;
 	struct svm_cpu_data *svm_data;
diff -r d7a87af170b0 drivers/kvm/svm.c
--- a/drivers/kvm/svm.c	Fri Jul 27 16:15:19 2007 +1000
+++ b/drivers/kvm/svm.c	Fri Jul 27 16:18:36 2007 +1000
@@ -51,7 +51,7 @@ MODULE_LICENSE("GPL");
 
 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 {
-	return (struct vcpu_svm*)vcpu->_priv;
+	return container_of(vcpu, struct vcpu_svm, vcpu);
 }
 
 unsigned long iopm_base;
@@ -464,11 +464,6 @@ static void init_sys_seg(struct vmcb_seg
 	seg->attrib = SVM_SELECTOR_P_MASK | type;
 	seg->limit = 0xffff;
 	seg->base = 0;
-}
-
-static int svm_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	return 0;
 }
 
 static void init_vmcb(struct vmcb *vmcb)
@@ -576,19 +571,27 @@ static void init_vmcb(struct vmcb *vmcb)
 	/* rdx = ?? */
 }
 
-static int svm_create_vcpu(struct kvm_vcpu *vcpu)
+static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 {
 	struct vcpu_svm *svm;
 	struct page *page;
-	int r;
-
-	r = -ENOMEM;
+	int err;
+
 	svm = kzalloc(sizeof *svm, GFP_KERNEL);
-	if (!svm)
-		goto out1;
+	if (!svm) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = kvm_vcpu_init(&svm->vcpu, kvm, id);
+	if (err)
+		goto free_svm;
+
 	page = alloc_page(GFP_KERNEL);
-	if (!page)
-		goto out2;
+	if (!page) {
+		err = -ENOMEM;
+		goto uninit;
+	}
 
 	svm->vmcb = page_address(page);
 	clear_page(svm->vmcb);
@@ -597,33 +600,29 @@ static int svm_create_vcpu(struct kvm_vc
 	memset(svm->db_regs, 0, sizeof(svm->db_regs));
 	init_vmcb(svm->vmcb);
 
-	svm->vcpu   = vcpu;
-	vcpu->_priv = svm;
-
-	fx_init(vcpu);
-	vcpu->fpu_active = 1;
-	vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
-	if (vcpu->vcpu_id == 0)
-		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
-
-	return 0;
-
-out2:
+	fx_init(&svm->vcpu);
+	svm->vcpu.fpu_active = 1;
+	svm->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+	if (svm->vcpu.vcpu_id == 0)
+		svm->vcpu.apic_base |= MSR_IA32_APICBASE_BSP;
+
+	return &svm->vcpu;
+
+uninit:
+	kvm_vcpu_uninit(&svm->vcpu);
+free_svm:
 	kfree(svm);
-out1:
-	return r;
+out:
+	return ERR_PTR(err);
 }
 
 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (!svm)
-		return;
-	if (svm->vmcb)
-		__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
+	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
+	kvm_vcpu_uninit(vcpu);
 	kfree(svm);
-	vcpu->_priv = NULL;
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu)
@@ -1591,34 +1590,33 @@ again:
 #endif
 
 #ifdef CONFIG_X86_64
-		"mov %c[rbx](%[vcpu]), %%rbx \n\t"
-		"mov %c[rcx](%[vcpu]), %%rcx \n\t"
-		"mov %c[rdx](%[vcpu]), %%rdx \n\t"
-		"mov %c[rsi](%[vcpu]), %%rsi \n\t"
-		"mov %c[rdi](%[vcpu]), %%rdi \n\t"
-		"mov %c[rbp](%[vcpu]), %%rbp \n\t"
-		"mov %c[r8](%[vcpu]),  %%r8  \n\t"
-		"mov %c[r9](%[vcpu]),  %%r9  \n\t"
-		"mov %c[r10](%[vcpu]), %%r10 \n\t"
-		"mov %c[r11](%[vcpu]), %%r11 \n\t"
-		"mov %c[r12](%[vcpu]), %%r12 \n\t"
-		"mov %c[r13](%[vcpu]), %%r13 \n\t"
-		"mov %c[r14](%[vcpu]), %%r14 \n\t"
-		"mov %c[r15](%[vcpu]), %%r15 \n\t"
+		"mov %c[rbx](%[svm]), %%rbx \n\t"
+		"mov %c[rcx](%[svm]), %%rcx \n\t"
+		"mov %c[rdx](%[svm]), %%rdx \n\t"
+		"mov %c[rsi](%[svm]), %%rsi \n\t"
+		"mov %c[rdi](%[svm]), %%rdi \n\t"
+		"mov %c[rbp](%[svm]), %%rbp \n\t"
+		"mov %c[r8](%[svm]),  %%r8  \n\t"
+		"mov %c[r9](%[svm]),  %%r9  \n\t"
+		"mov %c[r10](%[svm]), %%r10 \n\t"
+		"mov %c[r11](%[svm]), %%r11 \n\t"
+		"mov %c[r12](%[svm]), %%r12 \n\t"
+		"mov %c[r13](%[svm]), %%r13 \n\t"
+		"mov %c[r14](%[svm]), %%r14 \n\t"
+		"mov %c[r15](%[svm]), %%r15 \n\t"
 #else
-		"mov %c[rbx](%[vcpu]), %%ebx \n\t"
-		"mov %c[rcx](%[vcpu]), %%ecx \n\t"
-		"mov %c[rdx](%[vcpu]), %%edx \n\t"
-		"mov %c[rsi](%[vcpu]), %%esi \n\t"
-		"mov %c[rdi](%[vcpu]), %%edi \n\t"
-		"mov %c[rbp](%[vcpu]), %%ebp \n\t"
+		"mov %c[rbx](%[svm]), %%ebx \n\t"
+		"mov %c[rcx](%[svm]), %%ecx \n\t"
+		"mov %c[rdx](%[svm]), %%edx \n\t"
+		"mov %c[rsi](%[svm]), %%esi \n\t"
+		"mov %c[rdi](%[svm]), %%edi \n\t"
+		"mov %c[rbp](%[svm]), %%ebp \n\t"
 #endif
 
 #ifdef CONFIG_X86_64
 		/* Enter guest mode */
 		"push %%rax \n\t"
-		"mov %c[svm](%[vcpu]), %%rax \n\t"
-		"mov %c[vmcb](%%rax), %%rax \n\t"
+		"mov %c[vmcb](%[svm]), %%rax \n\t"
 		SVM_VMLOAD "\n\t"
 		SVM_VMRUN "\n\t"
 		SVM_VMSAVE "\n\t"
@@ -1626,8 +1624,7 @@ again:
 #else
 		/* Enter guest mode */
 		"push %%eax \n\t"
-		"mov %c[svm](%[vcpu]), %%eax \n\t"
-		"mov %c[vmcb](%%eax), %%eax \n\t"
+		"mov %c[vmcb](%[svm]), %%eax \n\t"
 		SVM_VMLOAD "\n\t"
 		SVM_VMRUN "\n\t"
 		SVM_VMSAVE "\n\t"
@@ -1636,55 +1633,54 @@ again:
 
 		/* Save guest registers, load host registers */
 #ifdef CONFIG_X86_64
-		"mov %%rbx, %c[rbx](%[vcpu]) \n\t"
-		"mov %%rcx, %c[rcx](%[vcpu]) \n\t"
-		"mov %%rdx, %c[rdx](%[vcpu]) \n\t"
-		"mov %%rsi, %c[rsi](%[vcpu]) \n\t"
-		"mov %%rdi, %c[rdi](%[vcpu]) \n\t"
-		"mov %%rbp, %c[rbp](%[vcpu]) \n\t"
-		"mov %%r8,  %c[r8](%[vcpu]) \n\t"
-		"mov %%r9,  %c[r9](%[vcpu]) \n\t"
-		"mov %%r10, %c[r10](%[vcpu]) \n\t"
-		"mov %%r11, %c[r11](%[vcpu]) \n\t"
-		"mov %%r12, %c[r12](%[vcpu]) \n\t"
-		"mov %%r13, %c[r13](%[vcpu]) \n\t"
-		"mov %%r14, %c[r14](%[vcpu]) \n\t"
-		"mov %%r15, %c[r15](%[vcpu]) \n\t"
+		"mov %%rbx, %c[rbx](%[svm]) \n\t"
+		"mov %%rcx, %c[rcx](%[svm]) \n\t"
+		"mov %%rdx, %c[rdx](%[svm]) \n\t"
+		"mov %%rsi, %c[rsi](%[svm]) \n\t"
+		"mov %%rdi, %c[rdi](%[svm]) \n\t"
+		"mov %%rbp, %c[rbp](%[svm]) \n\t"
+		"mov %%r8,  %c[r8](%[svm]) \n\t"
+		"mov %%r9,  %c[r9](%[svm]) \n\t"
+		"mov %%r10, %c[r10](%[svm]) \n\t"
+		"mov %%r11, %c[r11](%[svm]) \n\t"
+		"mov %%r12, %c[r12](%[svm]) \n\t"
+		"mov %%r13, %c[r13](%[svm]) \n\t"
+		"mov %%r14, %c[r14](%[svm]) \n\t"
+		"mov %%r15, %c[r15](%[svm]) \n\t"
 
 		"pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
 		"pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
 		"pop  %%rbp; pop  %%rdi; pop  %%rsi;"
 		"pop  %%rdx; pop  %%rcx; pop  %%rbx; \n\t"
 #else
-		"mov %%ebx, %c[rbx](%[vcpu]) \n\t"
-		"mov %%ecx, %c[rcx](%[vcpu]) \n\t"
-		"mov %%edx, %c[rdx](%[vcpu]) \n\t"
-		"mov %%esi, %c[rsi](%[vcpu]) \n\t"
-		"mov %%edi, %c[rdi](%[vcpu]) \n\t"
-		"mov %%ebp, %c[rbp](%[vcpu]) \n\t"
+		"mov %%ebx, %c[rbx](%[svm]) \n\t"
+		"mov %%ecx, %c[rcx](%[svm]) \n\t"
+		"mov %%edx, %c[rdx](%[svm]) \n\t"
+		"mov %%esi, %c[rsi](%[svm]) \n\t"
+		"mov %%edi, %c[rdi](%[svm]) \n\t"
+		"mov %%ebp, %c[rbp](%[svm]) \n\t"
 
 		"pop  %%ebp; pop  %%edi; pop  %%esi;"
 		"pop  %%edx; pop  %%ecx; pop  %%ebx; \n\t"
 #endif
 		:
-		: [vcpu]"a"(vcpu),
-		  [svm]"i"(offsetof(struct kvm_vcpu, _priv)),
+		: [svm]"a"(svm),
 		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
-		  [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
-		  [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
-		  [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
-		  [rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])),
-		  [rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])),
-		  [rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP]))
+		  [rbx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBX])),
+		  [rcx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RCX])),
+		  [rdx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDX])),
+		  [rsi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RSI])),
+		  [rdi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDI])),
+		  [rbp]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBP]))
 #ifdef CONFIG_X86_64
-		  ,[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])),
-		  [r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])),
-		  [r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])),
-		  [r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])),
-		  [r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])),
-		  [r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])),
-		  [r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])),
-		  [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15]))
+		  ,[r8 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R8])),
+		  [r9 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R9 ])),
+		  [r10]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R10])),
+		  [r11]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R11])),
+		  [r12]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R12])),
+		  [r13]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R13])),
+		  [r14]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R14])),
+		  [r15]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R15]))
 #endif
 		: "cc", "memory" );
 
@@ -1865,7 +1861,6 @@ static struct kvm_arch_ops svm_arch_ops 
 
 	.run = svm_vcpu_run,
 	.skip_emulated_instruction = skip_emulated_instruction,
-	.vcpu_setup = svm_vcpu_setup,
 	.patch_hypercall = svm_patch_hypercall,
 };
 
diff -r d7a87af170b0 drivers/kvm/vmx.c
--- a/drivers/kvm/vmx.c	Fri Jul 27 16:15:19 2007 +1000
+++ b/drivers/kvm/vmx.c	Fri Jul 27 16:18:36 2007 +1000
@@ -39,7 +39,7 @@ struct vmcs {
 };
 
 struct vcpu_vmx {
-	struct kvm_vcpu      *vcpu;
+	struct kvm_vcpu       vcpu;
 	int                   launched;
 	struct kvm_msr_entry *guest_msrs;
 	struct kvm_msr_entry *host_msrs;
@@ -60,7 +60,7 @@ struct vcpu_vmx {
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 {
-	return (struct vcpu_vmx*)vcpu->_priv;
+	return container_of(vcpu, struct vcpu_vmx, vcpu);
 }
 
 static int init_rmode_tss(struct kvm *kvm);
@@ -2294,46 +2294,62 @@ static void vmx_free_vmcs(struct kvm_vcp
 
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
 	vmx_free_vmcs(vcpu);
-}
-
-static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_vmx *vmx;
-
-	vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
+	kfree(vmx->host_msrs);
+	kfree(vmx->guest_msrs);
+	kvm_vcpu_uninit(vcpu);
+	kfree(vmx);
+}
+
+static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
+{
+	int err;
+	struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
+
 	if (!vmx)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
+
+	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
 
 	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vmx->guest_msrs)
-		goto out_free;
+	if (!vmx->guest_msrs) {
+		err = -ENOMEM;
+		goto uninit_vcpu;
+	}
 
 	vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!vmx->host_msrs)
-		goto out_free;
+		goto free_guest_msrs;
 
 	vmx->vmcs = alloc_vmcs();
 	if (!vmx->vmcs)
-		goto out_free;
+		goto free_msrs;
 
 	vmcs_clear(vmx->vmcs);
 
-	vmx->vcpu   = vcpu;
-	vcpu->_priv = vmx;
-
-	return 0;
-
-out_free:
-	if (vmx->host_msrs)
-		kfree(vmx->host_msrs);
-
-	if (vmx->guest_msrs)
-		kfree(vmx->guest_msrs);
-
+	vmx_vcpu_load(&vmx->vcpu);
+	err = vmx_vcpu_setup(&vmx->vcpu);
+	vmx_vcpu_put(&vmx->vcpu);
+	if (err)
+		goto free_vmcs;
+
+	return &vmx->vcpu;
+
+free_vmcs:
+	free_vmcs(vmx->vmcs);
+free_msrs:
+	kfree(vmx->host_msrs);
+free_guest_msrs:
+	kfree(vmx->guest_msrs);
+uninit_vcpu:
+	kvm_vcpu_uninit(&vmx->vcpu);
+free_vcpu:
 	kfree(vmx);
-
-	return -ENOMEM;
+	return ERR_PTR(err);
 }
 
 static struct kvm_arch_ops vmx_arch_ops = {
@@ -2381,7 +2397,6 @@ static struct kvm_arch_ops vmx_arch_ops 
 
 	.run = vmx_vcpu_run,
 	.skip_emulated_instruction = skip_emulated_instruction,
-	.vcpu_setup = vmx_vcpu_setup,
 	.patch_hypercall = vmx_patch_hypercall,
 };
 



-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <1185519330.12151.50.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]

* Re: [PATCH 2/2] Dynamically allocate vcpus
       [not found]             ` <1185519330.12151.50.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2007-07-27  7:16               ` Rusty Russell
       [not found]                 ` <1185520617.12151.53.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Rusty Russell @ 2007-07-27  7:16 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Fri, 2007-07-27 at 16:56 +1000, Rusty Russell wrote:
> This patch converts the vcpus array in "struct kvm" to a linked list
> of VCPUs

... and here's the version which leaves it as an array (of pointers).
Oops.

==
Dynamically allocate vcpus

This patch converts the vcpus array in "struct kvm" to a pointer
array, and changes the "vcpu_create" and "vcpu_setup" hooks into one
"vcpu_create" call which does the allocation and initialization of the
vcpu (calling back into the kvm_vcpu_init core helper).

It is untested on SMP or SVM, and there are more possible cleanups in
svm.c and vmx.c, but I was being cautious.

Signed-off-by: Rusty Russell <rusty-8n+1lVoiYb80n/F98K4Iww@public.gmane.org>

diff -r 52331a1d5569 drivers/kvm/kvm.h
--- a/drivers/kvm/kvm.h	Fri Jul 27 16:50:49 2007 +1000
+++ b/drivers/kvm/kvm.h	Fri Jul 27 17:09:03 2007 +1000
@@ -300,10 +300,8 @@ void kvm_io_bus_register_dev(struct kvm_
 			     struct kvm_io_device *dev);
 
 struct kvm_vcpu {
-	int valid;
 	struct kvm *kvm;
 	int vcpu_id;
-	void *_priv;
 	struct mutex mutex;
 	int   cpu;
 	u64 host_tsc;
@@ -404,8 +402,7 @@ struct kvm {
 	struct list_head active_mmu_pages;
 	int n_free_mmu_pages;
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
-	int nvcpus;
-	struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
+	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	int memory_config_version;
 	int busy;
 	unsigned long rmap_overflow;
@@ -428,7 +425,8 @@ struct kvm_arch_ops {
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
 
-	int (*vcpu_create)(struct kvm_vcpu *vcpu);
+	/* Create, but do not attach this VCPU */
+	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
 
 	void (*vcpu_load)(struct kvm_vcpu *vcpu);
@@ -470,7 +468,6 @@ struct kvm_arch_ops {
 	void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);
 
 	int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
-	int (*vcpu_setup)(struct kvm_vcpu *vcpu);
 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
@@ -480,6 +477,9 @@ extern struct kvm_arch_ops *kvm_arch_ops
 
 #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
 #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
+
+int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
+void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module);
 void kvm_exit_arch(void);
diff -r 52331a1d5569 drivers/kvm/kvm_main.c
--- a/drivers/kvm/kvm_main.c	Fri Jul 27 16:50:49 2007 +1000
+++ b/drivers/kvm/kvm_main.c	Fri Jul 27 17:13:45 2007 +1000
@@ -266,8 +266,10 @@ void kvm_flush_remote_tlbs(struct kvm *k
 	atomic_set(&completed, 0);
 	cpus_clear(cpus);
 	needed = 0;
-	for (i = 0; i < kvm->nvcpus; ++i) {
-		vcpu = &kvm->vcpus[i];
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		vcpu = kvm->vcpus[i];
+		if (!vcpu)
+			continue;
 		if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
 			continue;
 		cpu = vcpu->cpu;
@@ -291,10 +293,62 @@ void kvm_flush_remote_tlbs(struct kvm *k
 	}
 }
 
+int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
+{
+	struct page *page;
+	int r;
+
+	mutex_init(&vcpu->mutex);
+	vcpu->cpu = -1;
+	vcpu->mmu.root_hpa = INVALID_PAGE;
+	vcpu->kvm = kvm;
+	vcpu->vcpu_id = id;
+
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page) {
+		r = -ENOMEM;
+		goto fail;
+	}
+	vcpu->run = page_address(page);
+
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page) {
+		r = -ENOMEM;
+		goto fail_free_run;
+	}
+	vcpu->pio_data = page_address(page);
+
+	vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
+					   FX_IMAGE_ALIGN);
+	vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
+
+	r = kvm_mmu_create(vcpu);
+	if (r < 0)
+		goto fail_free_pio_data;
+
+	return 0;
+
+fail_free_pio_data:
+	free_page((unsigned long)vcpu->pio_data);
+fail_free_run:
+	free_page((unsigned long)vcpu->run);
+fail:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_init);
+
+void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	kvm_mmu_destroy(vcpu);
+	free_page((unsigned long)vcpu->pio_data);
+	free_page((unsigned long)vcpu->run);
+	
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
+
 static struct kvm *kvm_create_vm(void)
 {
 	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
-	int i;
 
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
@@ -303,14 +357,6 @@ static struct kvm *kvm_create_vm(void)
 	spin_lock_init(&kvm->lock);
 	INIT_LIST_HEAD(&kvm->active_mmu_pages);
 	kvm_io_bus_init(&kvm->mmio_bus);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		struct kvm_vcpu *vcpu = &kvm->vcpus[i];
-
-		mutex_init(&vcpu->mutex);
-		vcpu->cpu = -1;
-		vcpu->kvm = kvm;
-		vcpu->mmu.root_hpa = INVALID_PAGE;
-	}
 	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
 	spin_unlock(&kvm_lock);
@@ -367,28 +413,9 @@ static void free_pio_guest_pages(struct 
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->valid)
-		return;
-
 	vcpu_load(vcpu);
 	kvm_mmu_unload(vcpu);
 	vcpu_put(vcpu);
-}
-
-static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
-{
-	if (!vcpu->valid)
-		return;
-
-	vcpu_load(vcpu);
-	kvm_mmu_destroy(vcpu);
-	vcpu_put(vcpu);
-	kvm_arch_ops->vcpu_free(vcpu);
-	free_page((unsigned long)vcpu->run);
-	vcpu->run = NULL;
-	free_page((unsigned long)vcpu->pio_data);
-	vcpu->pio_data = NULL;
-	free_pio_guest_pages(vcpu);
 }
 
 static void kvm_free_vcpus(struct kvm *kvm)
@@ -399,9 +426,15 @@ static void kvm_free_vcpus(struct kvm *k
 	 * Unpin any mmu pages first.
 	 */
 	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_free_vcpu(&kvm->vcpus[i]);
+		if (kvm->vcpus[i])
+			kvm_unload_vcpu_mmu(kvm->vcpus[i]);
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (kvm->vcpus[i]) {
+			kvm_arch_ops->vcpu_free(kvm->vcpus[i]);
+			kvm->vcpus[i] = NULL;
+		}
+	}
+
 }
 
 static int kvm_dev_release(struct inode *inode, struct file *filp)
@@ -2373,77 +2406,47 @@ static int kvm_vm_ioctl_create_vcpu(stru
 {
 	int r;
 	struct kvm_vcpu *vcpu;
-	struct page *page;
-
-	r = -EINVAL;
+
 	if (!valid_vcpu(n))
-		goto out;
-
-	vcpu = &kvm->vcpus[n];
-	vcpu->vcpu_id = n;
-
-	mutex_lock(&vcpu->mutex);
-
-	if (vcpu->valid) {
-		mutex_unlock(&vcpu->mutex);
-		return -EEXIST;
-	}
-
-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	r = -ENOMEM;
-	if (!page)
-		goto out_unlock;
-	vcpu->run = page_address(page);
-
-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	r = -ENOMEM;
-	if (!page)
-		goto out_free_run;
-	vcpu->pio_data = page_address(page);
-
-	vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
-					   FX_IMAGE_ALIGN);
-	vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
-	vcpu->cr0 = 0x10;
-
-	r = kvm_arch_ops->vcpu_create(vcpu);
+		return -EINVAL;
+
+	vcpu = kvm_arch_ops->vcpu_create(kvm, n);
+	if (IS_ERR(vcpu))
+		return PTR_ERR(vcpu);
+
+	vcpu_load(vcpu);
+	r = kvm_mmu_setup(vcpu);
+	vcpu_put(vcpu);
 	if (r < 0)
-		goto out_free_vcpus;
-
-	r = kvm_mmu_create(vcpu);
-	if (r < 0)
-		goto out_free_vcpus;
-
-	kvm_arch_ops->vcpu_load(vcpu);
-	r = kvm_mmu_setup(vcpu);
-	if (r >= 0)
-		r = kvm_arch_ops->vcpu_setup(vcpu);
-	vcpu_put(vcpu);
-
-	if (r < 0)
-		goto out_free_vcpus;
-
+		goto free_vcpu;
+
+	spin_lock(&kvm->lock);
+	if (kvm->vcpus[n]) {
+		r = -EEXIST;
+		spin_unlock(&kvm->lock);
+		goto mmu_unload;
+	}
+	kvm->vcpus[n] = vcpu;
+	spin_unlock(&kvm->lock);
+
+	/* Now it's all set up, let userspace reach it */
 	r = create_vcpu_fd(vcpu);
 	if (r < 0)
-		goto out_free_vcpus;
-
-	spin_lock(&kvm_lock);
-	if (n >= kvm->nvcpus)
-		kvm->nvcpus = n + 1;
-	spin_unlock(&kvm_lock);
-
-	vcpu->valid = 1;
-
+		goto unlink;
 	return r;
 
-out_free_vcpus:
-	kvm_free_vcpu(vcpu);
-out_free_run:
-	free_page((unsigned long)vcpu->run);
-	vcpu->run = NULL;
-out_unlock:
-	mutex_unlock(&vcpu->mutex);
-out:
+unlink:
+	spin_lock(&kvm->lock);
+	kvm->vcpus[n] = NULL;
+	spin_unlock(&kvm->lock);
+
+mmu_unload:
+	vcpu_load(vcpu);
+	kvm_mmu_unload(vcpu);
+	vcpu_put(vcpu);
+
+free_vcpu:
+	kvm_arch_ops->vcpu_free(vcpu);
 	return r;
 }
 
@@ -2936,9 +2939,12 @@ static void decache_vcpus_on_cpu(int cpu
 	int i;
 
 	spin_lock(&kvm_lock);
-	list_for_each_entry(vm, &vm_list, vm_list)
+	list_for_each_entry(vm, &vm_list, vm_list) {
+ 		spin_lock(&vm->lock);
 		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &vm->vcpus[i];
+			vcpu = vm->vcpus[i];
+			if (!vcpu)
+				continue;
 			/*
 			 * If the vcpu is locked, then it is running on some
 			 * other cpu and therefore it is not cached on the
@@ -2955,6 +2961,8 @@ static void decache_vcpus_on_cpu(int cpu
 				mutex_unlock(&vcpu->mutex);
 			}
 		}
+ 		spin_unlock(&vm->lock);
+	}
 	spin_unlock(&kvm_lock);
 }
 
@@ -3075,8 +3083,9 @@ static u64 stat_get(void *_offset)
 	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &kvm->vcpus[i];
-			total += *(u32 *)((void *)vcpu + offset);
+			vcpu = kvm->vcpus[i];
+			if (vcpu)
+				total += *(u32 *)((void *)vcpu + offset);
 		}
 	spin_unlock(&kvm_lock);
 	return total;
diff -r 52331a1d5569 drivers/kvm/kvm_svm.h
--- a/drivers/kvm/kvm_svm.h	Fri Jul 27 16:50:49 2007 +1000
+++ b/drivers/kvm/kvm_svm.h	Fri Jul 27 17:09:03 2007 +1000
@@ -23,7 +23,7 @@ struct kvm_vcpu;
 struct kvm_vcpu;
 
 struct vcpu_svm {
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu vcpu;
 	struct vmcb *vmcb;
 	unsigned long vmcb_pa;
 	struct svm_cpu_data *svm_data;
diff -r 52331a1d5569 drivers/kvm/svm.c
--- a/drivers/kvm/svm.c	Fri Jul 27 16:50:49 2007 +1000
+++ b/drivers/kvm/svm.c	Fri Jul 27 17:09:03 2007 +1000
@@ -51,7 +51,7 @@ MODULE_LICENSE("GPL");
 
 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 {
-	return (struct vcpu_svm*)vcpu->_priv;
+	return container_of(vcpu, struct vcpu_svm, vcpu);
 }
 
 unsigned long iopm_base;
@@ -464,11 +464,6 @@ static void init_sys_seg(struct vmcb_seg
 	seg->attrib = SVM_SELECTOR_P_MASK | type;
 	seg->limit = 0xffff;
 	seg->base = 0;
-}
-
-static int svm_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	return 0;
 }
 
 static void init_vmcb(struct vmcb *vmcb)
@@ -576,19 +571,27 @@ static void init_vmcb(struct vmcb *vmcb)
 	/* rdx = ?? */
 }
 
-static int svm_create_vcpu(struct kvm_vcpu *vcpu)
+static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 {
 	struct vcpu_svm *svm;
 	struct page *page;
-	int r;
-
-	r = -ENOMEM;
+	int err;
+
 	svm = kzalloc(sizeof *svm, GFP_KERNEL);
-	if (!svm)
-		goto out1;
+	if (!svm) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = kvm_vcpu_init(&svm->vcpu, kvm, id);
+	if (err)
+		goto free_svm;
+
 	page = alloc_page(GFP_KERNEL);
-	if (!page)
-		goto out2;
+	if (!page) {
+		err = -ENOMEM;
+		goto uninit;
+	}
 
 	svm->vmcb = page_address(page);
 	clear_page(svm->vmcb);
@@ -597,33 +600,29 @@ static int svm_create_vcpu(struct kvm_vc
 	memset(svm->db_regs, 0, sizeof(svm->db_regs));
 	init_vmcb(svm->vmcb);
 
-	svm->vcpu   = vcpu;
-	vcpu->_priv = svm;
-
-	fx_init(vcpu);
-	vcpu->fpu_active = 1;
-	vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
-	if (vcpu->vcpu_id == 0)
-		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
-
-	return 0;
-
-out2:
+	fx_init(&svm->vcpu);
+	svm->vcpu.fpu_active = 1;
+	svm->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+	if (svm->vcpu.vcpu_id == 0)
+		svm->vcpu.apic_base |= MSR_IA32_APICBASE_BSP;
+
+	return &svm->vcpu;
+
+uninit:
+	kvm_vcpu_uninit(&svm->vcpu);
+free_svm:
 	kfree(svm);
-out1:
-	return r;
+out:
+	return ERR_PTR(err);
 }
 
 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (!svm)
-		return;
-	if (svm->vmcb)
-		__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
+	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
+	kvm_vcpu_uninit(vcpu);
 	kfree(svm);
-	vcpu->_priv = NULL;
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu)
@@ -1591,34 +1590,33 @@ again:
 #endif
 
 #ifdef CONFIG_X86_64
-		"mov %c[rbx](%[vcpu]), %%rbx \n\t"
-		"mov %c[rcx](%[vcpu]), %%rcx \n\t"
-		"mov %c[rdx](%[vcpu]), %%rdx \n\t"
-		"mov %c[rsi](%[vcpu]), %%rsi \n\t"
-		"mov %c[rdi](%[vcpu]), %%rdi \n\t"
-		"mov %c[rbp](%[vcpu]), %%rbp \n\t"
-		"mov %c[r8](%[vcpu]),  %%r8  \n\t"
-		"mov %c[r9](%[vcpu]),  %%r9  \n\t"
-		"mov %c[r10](%[vcpu]), %%r10 \n\t"
-		"mov %c[r11](%[vcpu]), %%r11 \n\t"
-		"mov %c[r12](%[vcpu]), %%r12 \n\t"
-		"mov %c[r13](%[vcpu]), %%r13 \n\t"
-		"mov %c[r14](%[vcpu]), %%r14 \n\t"
-		"mov %c[r15](%[vcpu]), %%r15 \n\t"
+		"mov %c[rbx](%[svm]), %%rbx \n\t"
+		"mov %c[rcx](%[svm]), %%rcx \n\t"
+		"mov %c[rdx](%[svm]), %%rdx \n\t"
+		"mov %c[rsi](%[svm]), %%rsi \n\t"
+		"mov %c[rdi](%[svm]), %%rdi \n\t"
+		"mov %c[rbp](%[svm]), %%rbp \n\t"
+		"mov %c[r8](%[svm]),  %%r8  \n\t"
+		"mov %c[r9](%[svm]),  %%r9  \n\t"
+		"mov %c[r10](%[svm]), %%r10 \n\t"
+		"mov %c[r11](%[svm]), %%r11 \n\t"
+		"mov %c[r12](%[svm]), %%r12 \n\t"
+		"mov %c[r13](%[svm]), %%r13 \n\t"
+		"mov %c[r14](%[svm]), %%r14 \n\t"
+		"mov %c[r15](%[svm]), %%r15 \n\t"
 #else
-		"mov %c[rbx](%[vcpu]), %%ebx \n\t"
-		"mov %c[rcx](%[vcpu]), %%ecx \n\t"
-		"mov %c[rdx](%[vcpu]), %%edx \n\t"
-		"mov %c[rsi](%[vcpu]), %%esi \n\t"
-		"mov %c[rdi](%[vcpu]), %%edi \n\t"
-		"mov %c[rbp](%[vcpu]), %%ebp \n\t"
+		"mov %c[rbx](%[svm]), %%ebx \n\t"
+		"mov %c[rcx](%[svm]), %%ecx \n\t"
+		"mov %c[rdx](%[svm]), %%edx \n\t"
+		"mov %c[rsi](%[svm]), %%esi \n\t"
+		"mov %c[rdi](%[svm]), %%edi \n\t"
+		"mov %c[rbp](%[svm]), %%ebp \n\t"
 #endif
 
 #ifdef CONFIG_X86_64
 		/* Enter guest mode */
 		"push %%rax \n\t"
-		"mov %c[svm](%[vcpu]), %%rax \n\t"
-		"mov %c[vmcb](%%rax), %%rax \n\t"
+		"mov %c[vmcb](%[svm]), %%rax \n\t"
 		SVM_VMLOAD "\n\t"
 		SVM_VMRUN "\n\t"
 		SVM_VMSAVE "\n\t"
@@ -1626,8 +1624,7 @@ again:
 #else
 		/* Enter guest mode */
 		"push %%eax \n\t"
-		"mov %c[svm](%[vcpu]), %%eax \n\t"
-		"mov %c[vmcb](%%eax), %%eax \n\t"
+		"mov %c[vmcb](%[svm]), %%eax \n\t"
 		SVM_VMLOAD "\n\t"
 		SVM_VMRUN "\n\t"
 		SVM_VMSAVE "\n\t"
@@ -1636,55 +1633,54 @@ again:
 
 		/* Save guest registers, load host registers */
 #ifdef CONFIG_X86_64
-		"mov %%rbx, %c[rbx](%[vcpu]) \n\t"
-		"mov %%rcx, %c[rcx](%[vcpu]) \n\t"
-		"mov %%rdx, %c[rdx](%[vcpu]) \n\t"
-		"mov %%rsi, %c[rsi](%[vcpu]) \n\t"
-		"mov %%rdi, %c[rdi](%[vcpu]) \n\t"
-		"mov %%rbp, %c[rbp](%[vcpu]) \n\t"
-		"mov %%r8,  %c[r8](%[vcpu]) \n\t"
-		"mov %%r9,  %c[r9](%[vcpu]) \n\t"
-		"mov %%r10, %c[r10](%[vcpu]) \n\t"
-		"mov %%r11, %c[r11](%[vcpu]) \n\t"
-		"mov %%r12, %c[r12](%[vcpu]) \n\t"
-		"mov %%r13, %c[r13](%[vcpu]) \n\t"
-		"mov %%r14, %c[r14](%[vcpu]) \n\t"
-		"mov %%r15, %c[r15](%[vcpu]) \n\t"
+		"mov %%rbx, %c[rbx](%[svm]) \n\t"
+		"mov %%rcx, %c[rcx](%[svm]) \n\t"
+		"mov %%rdx, %c[rdx](%[svm]) \n\t"
+		"mov %%rsi, %c[rsi](%[svm]) \n\t"
+		"mov %%rdi, %c[rdi](%[svm]) \n\t"
+		"mov %%rbp, %c[rbp](%[svm]) \n\t"
+		"mov %%r8,  %c[r8](%[svm]) \n\t"
+		"mov %%r9,  %c[r9](%[svm]) \n\t"
+		"mov %%r10, %c[r10](%[svm]) \n\t"
+		"mov %%r11, %c[r11](%[svm]) \n\t"
+		"mov %%r12, %c[r12](%[svm]) \n\t"
+		"mov %%r13, %c[r13](%[svm]) \n\t"
+		"mov %%r14, %c[r14](%[svm]) \n\t"
+		"mov %%r15, %c[r15](%[svm]) \n\t"
 
 		"pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
 		"pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
 		"pop  %%rbp; pop  %%rdi; pop  %%rsi;"
 		"pop  %%rdx; pop  %%rcx; pop  %%rbx; \n\t"
 #else
-		"mov %%ebx, %c[rbx](%[vcpu]) \n\t"
-		"mov %%ecx, %c[rcx](%[vcpu]) \n\t"
-		"mov %%edx, %c[rdx](%[vcpu]) \n\t"
-		"mov %%esi, %c[rsi](%[vcpu]) \n\t"
-		"mov %%edi, %c[rdi](%[vcpu]) \n\t"
-		"mov %%ebp, %c[rbp](%[vcpu]) \n\t"
+		"mov %%ebx, %c[rbx](%[svm]) \n\t"
+		"mov %%ecx, %c[rcx](%[svm]) \n\t"
+		"mov %%edx, %c[rdx](%[svm]) \n\t"
+		"mov %%esi, %c[rsi](%[svm]) \n\t"
+		"mov %%edi, %c[rdi](%[svm]) \n\t"
+		"mov %%ebp, %c[rbp](%[svm]) \n\t"
 
 		"pop  %%ebp; pop  %%edi; pop  %%esi;"
 		"pop  %%edx; pop  %%ecx; pop  %%ebx; \n\t"
 #endif
 		:
-		: [vcpu]"a"(vcpu),
-		  [svm]"i"(offsetof(struct kvm_vcpu, _priv)),
+		: [svm]"a"(svm),
 		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
-		  [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
-		  [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
-		  [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
-		  [rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])),
-		  [rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])),
-		  [rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP]))
+		  [rbx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBX])),
+		  [rcx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RCX])),
+		  [rdx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDX])),
+		  [rsi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RSI])),
+		  [rdi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDI])),
+		  [rbp]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBP]))
 #ifdef CONFIG_X86_64
-		  ,[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])),
-		  [r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])),
-		  [r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])),
-		  [r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])),
-		  [r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])),
-		  [r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])),
-		  [r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])),
-		  [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15]))
+		  ,[r8 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R8])),
+		  [r9 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R9 ])),
+		  [r10]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R10])),
+		  [r11]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R11])),
+		  [r12]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R12])),
+		  [r13]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R13])),
+		  [r14]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R14])),
+		  [r15]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R15]))
 #endif
 		: "cc", "memory" );
 
@@ -1865,7 +1861,6 @@ static struct kvm_arch_ops svm_arch_ops 
 
 	.run = svm_vcpu_run,
 	.skip_emulated_instruction = skip_emulated_instruction,
-	.vcpu_setup = svm_vcpu_setup,
 	.patch_hypercall = svm_patch_hypercall,
 };
 
diff -r 52331a1d5569 drivers/kvm/vmx.c
--- a/drivers/kvm/vmx.c	Fri Jul 27 16:50:49 2007 +1000
+++ b/drivers/kvm/vmx.c	Fri Jul 27 17:09:03 2007 +1000
@@ -39,7 +39,7 @@ struct vmcs {
 };
 
 struct vcpu_vmx {
-	struct kvm_vcpu      *vcpu;
+	struct kvm_vcpu       vcpu;
 	int                   launched;
 	struct kvm_msr_entry *guest_msrs;
 	struct kvm_msr_entry *host_msrs;
@@ -60,7 +60,7 @@ struct vcpu_vmx {
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 {
-	return (struct vcpu_vmx*)vcpu->_priv;
+	return container_of(vcpu, struct vcpu_vmx, vcpu);
 }
 
 static int init_rmode_tss(struct kvm *kvm);
@@ -2302,46 +2302,62 @@ static void vmx_free_vmcs(struct kvm_vcp
 
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
 	vmx_free_vmcs(vcpu);
-}
-
-static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_vmx *vmx;
-
-	vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
+	kfree(vmx->host_msrs);
+	kfree(vmx->guest_msrs);
+	kvm_vcpu_uninit(vcpu);
+	kfree(vmx);
+}
+
+static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
+{
+	int err;
+	struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
+
 	if (!vmx)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
+
+	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
 
 	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vmx->guest_msrs)
-		goto out_free;
+	if (!vmx->guest_msrs) {
+		err = -ENOMEM;
+		goto uninit_vcpu;
+	}
 
 	vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!vmx->host_msrs)
-		goto out_free;
+		goto free_guest_msrs;
 
 	vmx->vmcs = alloc_vmcs();
 	if (!vmx->vmcs)
-		goto out_free;
+		goto free_msrs;
 
 	vmcs_clear(vmx->vmcs);
 
-	vmx->vcpu   = vcpu;
-	vcpu->_priv = vmx;
-
-	return 0;
-
-out_free:
-	if (vmx->host_msrs)
-		kfree(vmx->host_msrs);
-
-	if (vmx->guest_msrs)
-		kfree(vmx->guest_msrs);
-
+	vmx_vcpu_load(&vmx->vcpu);
+	err = vmx_vcpu_setup(&vmx->vcpu);
+	vmx_vcpu_put(&vmx->vcpu);
+	if (err)
+		goto free_vmcs;
+
+	return &vmx->vcpu;
+
+free_vmcs:
+	free_vmcs(vmx->vmcs);
+free_msrs:
+	kfree(vmx->host_msrs);
+free_guest_msrs:
+	kfree(vmx->guest_msrs);
+uninit_vcpu:
+	kvm_vcpu_uninit(&vmx->vcpu);
+free_vcpu:
 	kfree(vmx);
-
-	return -ENOMEM;
+	return ERR_PTR(err);
 }
 
 static struct kvm_arch_ops vmx_arch_ops = {
@@ -2389,7 +2405,6 @@ static struct kvm_arch_ops vmx_arch_ops 
 
 	.run = vmx_vcpu_run,
 	.skip_emulated_instruction = skip_emulated_instruction,
-	.vcpu_setup = vmx_vcpu_setup,
 	.patch_hypercall = vmx_patch_hypercall,
 };
 




-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <1185520617.12151.53.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]

* Re: [PATCH 2/2] Dynamically allocate vcpus
       [not found]                 ` <1185520617.12151.53.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2007-07-27  7:39                   ` Jun Koi
       [not found]                     ` <fdaac4d50707270039v57024d36ma20f0e4895258c08-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2007-07-29  8:03                   ` Avi Kivity
  1 sibling, 1 reply; 12+ messages in thread
From: Jun Koi @ 2007-07-27  7:39 UTC (permalink / raw)
  To: Rusty Russell; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Hi Rusty,

So with this patch, we no longer have limitation on the number of
vcpus running in a VM?

Thanks,
Jun

On 7/27/07, Rusty Russell <rusty-8n+1lVoiYb80n/F98K4Iww@public.gmane.org> wrote:
> On Fri, 2007-07-27 at 16:56 +1000, Rusty Russell wrote:
> > This patch converts the vcpus array in "struct kvm" to a linked list
> > of VCPUs
>
> ... and here's the version which leaves it as an array (of pointers).
> Oops.
>
> ==
> Dynamically allocate vcpus
>
> This patch converts the vcpus array in "struct kvm" to a pointer
> array, and changes the "vcpu_create" and "vcpu_setup" hooks into one
> "vcpu_create" call which does the allocation and initialization of the
> vcpu (calling back into the kvm_vcpu_init core helper).
>
> It is untested on SMP or SVM, and there are more possible cleanups in
> svm.c and vmx.c, but I was being cautious.
>
> Signed-off-by: Rusty Russell <rusty-8n+1lVoiYb80n/F98K4Iww@public.gmane.org>
>
> diff -r 52331a1d5569 drivers/kvm/kvm.h
> --- a/drivers/kvm/kvm.h Fri Jul 27 16:50:49 2007 +1000
> +++ b/drivers/kvm/kvm.h Fri Jul 27 17:09:03 2007 +1000
> @@ -300,10 +300,8 @@ void kvm_io_bus_register_dev(struct kvm_
>                              struct kvm_io_device *dev);
>
>  struct kvm_vcpu {
> -       int valid;
>         struct kvm *kvm;
>         int vcpu_id;
> -       void *_priv;
>         struct mutex mutex;
>         int   cpu;
>         u64 host_tsc;
> @@ -404,8 +402,7 @@ struct kvm {
>         struct list_head active_mmu_pages;
>         int n_free_mmu_pages;
>         struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
> -       int nvcpus;
> -       struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
> +       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
>         int memory_config_version;
>         int busy;
>         unsigned long rmap_overflow;
> @@ -428,7 +425,8 @@ struct kvm_arch_ops {
>         int (*hardware_setup)(void);               /* __init */
>         void (*hardware_unsetup)(void);            /* __exit */
>
> -       int (*vcpu_create)(struct kvm_vcpu *vcpu);
> +       /* Create, but do not attach this VCPU */
> +       struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
>         void (*vcpu_free)(struct kvm_vcpu *vcpu);
>
>         void (*vcpu_load)(struct kvm_vcpu *vcpu);
> @@ -470,7 +468,6 @@ struct kvm_arch_ops {
>         void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);
>
>         int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
> -       int (*vcpu_setup)(struct kvm_vcpu *vcpu);
>         void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
>         void (*patch_hypercall)(struct kvm_vcpu *vcpu,
>                                 unsigned char *hypercall_addr);
> @@ -480,6 +477,9 @@ extern struct kvm_arch_ops *kvm_arch_ops
>
>  #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
>  #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
> +
> +int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
> +void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
>
>  int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module);
>  void kvm_exit_arch(void);
> diff -r 52331a1d5569 drivers/kvm/kvm_main.c
> --- a/drivers/kvm/kvm_main.c    Fri Jul 27 16:50:49 2007 +1000
> +++ b/drivers/kvm/kvm_main.c    Fri Jul 27 17:13:45 2007 +1000
> @@ -266,8 +266,10 @@ void kvm_flush_remote_tlbs(struct kvm *k
>         atomic_set(&completed, 0);
>         cpus_clear(cpus);
>         needed = 0;
> -       for (i = 0; i < kvm->nvcpus; ++i) {
> -               vcpu = &kvm->vcpus[i];
> +       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
> +               vcpu = kvm->vcpus[i];
> +               if (!vcpu)
> +                       continue;
>                 if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
>                         continue;
>                 cpu = vcpu->cpu;
> @@ -291,10 +293,62 @@ void kvm_flush_remote_tlbs(struct kvm *k
>         }
>  }
>
> +int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
> +{
> +       struct page *page;
> +       int r;
> +
> +       mutex_init(&vcpu->mutex);
> +       vcpu->cpu = -1;
> +       vcpu->mmu.root_hpa = INVALID_PAGE;
> +       vcpu->kvm = kvm;
> +       vcpu->vcpu_id = id;
> +
> +       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +       if (!page) {
> +               r = -ENOMEM;
> +               goto fail;
> +       }
> +       vcpu->run = page_address(page);
> +
> +       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +       if (!page) {
> +               r = -ENOMEM;
> +               goto fail_free_run;
> +       }
> +       vcpu->pio_data = page_address(page);
> +
> +       vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
> +                                          FX_IMAGE_ALIGN);
> +       vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
> +
> +       r = kvm_mmu_create(vcpu);
> +       if (r < 0)
> +               goto fail_free_pio_data;
> +
> +       return 0;
> +
> +fail_free_pio_data:
> +       free_page((unsigned long)vcpu->pio_data);
> +fail_free_run:
> +       free_page((unsigned long)vcpu->run);
> +fail:
> +       return -ENOMEM;
> +}
> +EXPORT_SYMBOL_GPL(kvm_vcpu_init);
> +
> +void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
> +{
> +       kvm_mmu_destroy(vcpu);
> +       free_page((unsigned long)vcpu->pio_data);
> +       free_page((unsigned long)vcpu->run);
> +
> +}
> +EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
> +
>  static struct kvm *kvm_create_vm(void)
>  {
>         struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
> -       int i;
>
>         if (!kvm)
>                 return ERR_PTR(-ENOMEM);
> @@ -303,14 +357,6 @@ static struct kvm *kvm_create_vm(void)
>         spin_lock_init(&kvm->lock);
>         INIT_LIST_HEAD(&kvm->active_mmu_pages);
>         kvm_io_bus_init(&kvm->mmio_bus);
> -       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
> -               struct kvm_vcpu *vcpu = &kvm->vcpus[i];
> -
> -               mutex_init(&vcpu->mutex);
> -               vcpu->cpu = -1;
> -               vcpu->kvm = kvm;
> -               vcpu->mmu.root_hpa = INVALID_PAGE;
> -       }
>         spin_lock(&kvm_lock);
>         list_add(&kvm->vm_list, &vm_list);
>         spin_unlock(&kvm_lock);
> @@ -367,28 +413,9 @@ static void free_pio_guest_pages(struct
>
>  static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
>  {
> -       if (!vcpu->valid)
> -               return;
> -
>         vcpu_load(vcpu);
>         kvm_mmu_unload(vcpu);
>         vcpu_put(vcpu);
> -}
> -
> -static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
> -{
> -       if (!vcpu->valid)
> -               return;
> -
> -       vcpu_load(vcpu);
> -       kvm_mmu_destroy(vcpu);
> -       vcpu_put(vcpu);
> -       kvm_arch_ops->vcpu_free(vcpu);
> -       free_page((unsigned long)vcpu->run);
> -       vcpu->run = NULL;
> -       free_page((unsigned long)vcpu->pio_data);
> -       vcpu->pio_data = NULL;
> -       free_pio_guest_pages(vcpu);
>  }
>
>  static void kvm_free_vcpus(struct kvm *kvm)
> @@ -399,9 +426,15 @@ static void kvm_free_vcpus(struct kvm *k
>          * Unpin any mmu pages first.
>          */
>         for (i = 0; i < KVM_MAX_VCPUS; ++i)
> -               kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
> -       for (i = 0; i < KVM_MAX_VCPUS; ++i)
> -               kvm_free_vcpu(&kvm->vcpus[i]);
> +               if (kvm->vcpus[i])
> +                       kvm_unload_vcpu_mmu(kvm->vcpus[i]);
> +       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
> +               if (kvm->vcpus[i]) {
> +                       kvm_arch_ops->vcpu_free(kvm->vcpus[i]);
> +                       kvm->vcpus[i] = NULL;
> +               }
> +       }
> +
>  }
>
>  static int kvm_dev_release(struct inode *inode, struct file *filp)
> @@ -2373,77 +2406,47 @@ static int kvm_vm_ioctl_create_vcpu(stru
>  {
>         int r;
>         struct kvm_vcpu *vcpu;
> -       struct page *page;
> -
> -       r = -EINVAL;
> +
>         if (!valid_vcpu(n))
> -               goto out;
> -
> -       vcpu = &kvm->vcpus[n];
> -       vcpu->vcpu_id = n;
> -
> -       mutex_lock(&vcpu->mutex);
> -
> -       if (vcpu->valid) {
> -               mutex_unlock(&vcpu->mutex);
> -               return -EEXIST;
> -       }
> -
> -       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> -       r = -ENOMEM;
> -       if (!page)
> -               goto out_unlock;
> -       vcpu->run = page_address(page);
> -
> -       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> -       r = -ENOMEM;
> -       if (!page)
> -               goto out_free_run;
> -       vcpu->pio_data = page_address(page);
> -
> -       vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
> -                                          FX_IMAGE_ALIGN);
> -       vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
> -       vcpu->cr0 = 0x10;
> -
> -       r = kvm_arch_ops->vcpu_create(vcpu);
> +               return -EINVAL;
> +
> +       vcpu = kvm_arch_ops->vcpu_create(kvm, n);
> +       if (IS_ERR(vcpu))
> +               return PTR_ERR(vcpu);
> +
> +       vcpu_load(vcpu);
> +       r = kvm_mmu_setup(vcpu);
> +       vcpu_put(vcpu);
>         if (r < 0)
> -               goto out_free_vcpus;
> -
> -       r = kvm_mmu_create(vcpu);
> -       if (r < 0)
> -               goto out_free_vcpus;
> -
> -       kvm_arch_ops->vcpu_load(vcpu);
> -       r = kvm_mmu_setup(vcpu);
> -       if (r >= 0)
> -               r = kvm_arch_ops->vcpu_setup(vcpu);
> -       vcpu_put(vcpu);
> -
> -       if (r < 0)
> -               goto out_free_vcpus;
> -
> +               goto free_vcpu;
> +
> +       spin_lock(&kvm->lock);
> +       if (kvm->vcpus[n]) {
> +               r = -EEXIST;
> +               spin_unlock(&kvm->lock);
> +               goto mmu_unload;
> +       }
> +       kvm->vcpus[n] = vcpu;
> +       spin_unlock(&kvm->lock);
> +
> +       /* Now it's all set up, let userspace reach it */
>         r = create_vcpu_fd(vcpu);
>         if (r < 0)
> -               goto out_free_vcpus;
> -
> -       spin_lock(&kvm_lock);
> -       if (n >= kvm->nvcpus)
> -               kvm->nvcpus = n + 1;
> -       spin_unlock(&kvm_lock);
> -
> -       vcpu->valid = 1;
> -
> +               goto unlink;
>         return r;
>
> -out_free_vcpus:
> -       kvm_free_vcpu(vcpu);
> -out_free_run:
> -       free_page((unsigned long)vcpu->run);
> -       vcpu->run = NULL;
> -out_unlock:
> -       mutex_unlock(&vcpu->mutex);
> -out:
> +unlink:
> +       spin_lock(&kvm->lock);
> +       kvm->vcpus[n] = NULL;
> +       spin_unlock(&kvm->lock);
> +
> +mmu_unload:
> +       vcpu_load(vcpu);
> +       kvm_mmu_unload(vcpu);
> +       vcpu_put(vcpu);
> +
> +free_vcpu:
> +       kvm_arch_ops->vcpu_free(vcpu);
>         return r;
>  }
>
> @@ -2936,9 +2939,12 @@ static void decache_vcpus_on_cpu(int cpu
>         int i;
>
>         spin_lock(&kvm_lock);
> -       list_for_each_entry(vm, &vm_list, vm_list)
> +       list_for_each_entry(vm, &vm_list, vm_list) {
> +               spin_lock(&vm->lock);
>                 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
> -                       vcpu = &vm->vcpus[i];
> +                       vcpu = vm->vcpus[i];
> +                       if (!vcpu)
> +                               continue;
>                         /*
>                          * If the vcpu is locked, then it is running on some
>                          * other cpu and therefore it is not cached on the
> @@ -2955,6 +2961,8 @@ static void decache_vcpus_on_cpu(int cpu
>                                 mutex_unlock(&vcpu->mutex);
>                         }
>                 }
> +               spin_unlock(&vm->lock);
> +       }
>         spin_unlock(&kvm_lock);
>  }
>
> @@ -3075,8 +3083,9 @@ static u64 stat_get(void *_offset)
>         spin_lock(&kvm_lock);
>         list_for_each_entry(kvm, &vm_list, vm_list)
>                 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
> -                       vcpu = &kvm->vcpus[i];
> -                       total += *(u32 *)((void *)vcpu + offset);
> +                       vcpu = kvm->vcpus[i];
> +                       if (vcpu)
> +                               total += *(u32 *)((void *)vcpu + offset);
>                 }
>         spin_unlock(&kvm_lock);
>         return total;
> diff -r 52331a1d5569 drivers/kvm/kvm_svm.h
> --- a/drivers/kvm/kvm_svm.h     Fri Jul 27 16:50:49 2007 +1000
> +++ b/drivers/kvm/kvm_svm.h     Fri Jul 27 17:09:03 2007 +1000
> @@ -23,7 +23,7 @@ struct kvm_vcpu;
>  struct kvm_vcpu;
>
>  struct vcpu_svm {
> -       struct kvm_vcpu *vcpu;
> +       struct kvm_vcpu vcpu;
>         struct vmcb *vmcb;
>         unsigned long vmcb_pa;
>         struct svm_cpu_data *svm_data;
> diff -r 52331a1d5569 drivers/kvm/svm.c
> --- a/drivers/kvm/svm.c Fri Jul 27 16:50:49 2007 +1000
> +++ b/drivers/kvm/svm.c Fri Jul 27 17:09:03 2007 +1000
> @@ -51,7 +51,7 @@ MODULE_LICENSE("GPL");
>
>  static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
>  {
> -       return (struct vcpu_svm*)vcpu->_priv;
> +       return container_of(vcpu, struct vcpu_svm, vcpu);
>  }
>
>  unsigned long iopm_base;
> @@ -464,11 +464,6 @@ static void init_sys_seg(struct vmcb_seg
>         seg->attrib = SVM_SELECTOR_P_MASK | type;
>         seg->limit = 0xffff;
>         seg->base = 0;
> -}
> -
> -static int svm_vcpu_setup(struct kvm_vcpu *vcpu)
> -{
> -       return 0;
>  }
>
>  static void init_vmcb(struct vmcb *vmcb)
> @@ -576,19 +571,27 @@ static void init_vmcb(struct vmcb *vmcb)
>         /* rdx = ?? */
>  }
>
> -static int svm_create_vcpu(struct kvm_vcpu *vcpu)
> +static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
>  {
>         struct vcpu_svm *svm;
>         struct page *page;
> -       int r;
> -
> -       r = -ENOMEM;
> +       int err;
> +
>         svm = kzalloc(sizeof *svm, GFP_KERNEL);
> -       if (!svm)
> -               goto out1;
> +       if (!svm) {
> +               err = -ENOMEM;
> +               goto out;
> +       }
> +
> +       err = kvm_vcpu_init(&svm->vcpu, kvm, id);
> +       if (err)
> +               goto free_svm;
> +
>         page = alloc_page(GFP_KERNEL);
> -       if (!page)
> -               goto out2;
> +       if (!page) {
> +               err = -ENOMEM;
> +               goto uninit;
> +       }
>
>         svm->vmcb = page_address(page);
>         clear_page(svm->vmcb);
> @@ -597,33 +600,29 @@ static int svm_create_vcpu(struct kvm_vc
>         memset(svm->db_regs, 0, sizeof(svm->db_regs));
>         init_vmcb(svm->vmcb);
>
> -       svm->vcpu   = vcpu;
> -       vcpu->_priv = svm;
> -
> -       fx_init(vcpu);
> -       vcpu->fpu_active = 1;
> -       vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
> -       if (vcpu->vcpu_id == 0)
> -               vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
> -
> -       return 0;
> -
> -out2:
> +       fx_init(&svm->vcpu);
> +       svm->vcpu.fpu_active = 1;
> +       svm->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
> +       if (svm->vcpu.vcpu_id == 0)
> +               svm->vcpu.apic_base |= MSR_IA32_APICBASE_BSP;
> +
> +       return &svm->vcpu;
> +
> +uninit:
> +       kvm_vcpu_uninit(&svm->vcpu);
> +free_svm:
>         kfree(svm);
> -out1:
> -       return r;
> +out:
> +       return ERR_PTR(err);
>  }
>
>  static void svm_free_vcpu(struct kvm_vcpu *vcpu)
>  {
>         struct vcpu_svm *svm = to_svm(vcpu);
>
> -       if (!svm)
> -               return;
> -       if (svm->vmcb)
> -               __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
> +       __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
> +       kvm_vcpu_uninit(vcpu);
>         kfree(svm);
> -       vcpu->_priv = NULL;
>  }
>
>  static void svm_vcpu_load(struct kvm_vcpu *vcpu)
> @@ -1591,34 +1590,33 @@ again:
>  #endif
>
>  #ifdef CONFIG_X86_64
> -               "mov %c[rbx](%[vcpu]), %%rbx \n\t"
> -               "mov %c[rcx](%[vcpu]), %%rcx \n\t"
> -               "mov %c[rdx](%[vcpu]), %%rdx \n\t"
> -               "mov %c[rsi](%[vcpu]), %%rsi \n\t"
> -               "mov %c[rdi](%[vcpu]), %%rdi \n\t"
> -               "mov %c[rbp](%[vcpu]), %%rbp \n\t"
> -               "mov %c[r8](%[vcpu]),  %%r8  \n\t"
> -               "mov %c[r9](%[vcpu]),  %%r9  \n\t"
> -               "mov %c[r10](%[vcpu]), %%r10 \n\t"
> -               "mov %c[r11](%[vcpu]), %%r11 \n\t"
> -               "mov %c[r12](%[vcpu]), %%r12 \n\t"
> -               "mov %c[r13](%[vcpu]), %%r13 \n\t"
> -               "mov %c[r14](%[vcpu]), %%r14 \n\t"
> -               "mov %c[r15](%[vcpu]), %%r15 \n\t"
> +               "mov %c[rbx](%[svm]), %%rbx \n\t"
> +               "mov %c[rcx](%[svm]), %%rcx \n\t"
> +               "mov %c[rdx](%[svm]), %%rdx \n\t"
> +               "mov %c[rsi](%[svm]), %%rsi \n\t"
> +               "mov %c[rdi](%[svm]), %%rdi \n\t"
> +               "mov %c[rbp](%[svm]), %%rbp \n\t"
> +               "mov %c[r8](%[svm]),  %%r8  \n\t"
> +               "mov %c[r9](%[svm]),  %%r9  \n\t"
> +               "mov %c[r10](%[svm]), %%r10 \n\t"
> +               "mov %c[r11](%[svm]), %%r11 \n\t"
> +               "mov %c[r12](%[svm]), %%r12 \n\t"
> +               "mov %c[r13](%[svm]), %%r13 \n\t"
> +               "mov %c[r14](%[svm]), %%r14 \n\t"
> +               "mov %c[r15](%[svm]), %%r15 \n\t"
>  #else
> -               "mov %c[rbx](%[vcpu]), %%ebx \n\t"
> -               "mov %c[rcx](%[vcpu]), %%ecx \n\t"
> -               "mov %c[rdx](%[vcpu]), %%edx \n\t"
> -               "mov %c[rsi](%[vcpu]), %%esi \n\t"
> -               "mov %c[rdi](%[vcpu]), %%edi \n\t"
> -               "mov %c[rbp](%[vcpu]), %%ebp \n\t"
> +               "mov %c[rbx](%[svm]), %%ebx \n\t"
> +               "mov %c[rcx](%[svm]), %%ecx \n\t"
> +               "mov %c[rdx](%[svm]), %%edx \n\t"
> +               "mov %c[rsi](%[svm]), %%esi \n\t"
> +               "mov %c[rdi](%[svm]), %%edi \n\t"
> +               "mov %c[rbp](%[svm]), %%ebp \n\t"
>  #endif
>
>  #ifdef CONFIG_X86_64
>                 /* Enter guest mode */
>                 "push %%rax \n\t"
> -               "mov %c[svm](%[vcpu]), %%rax \n\t"
> -               "mov %c[vmcb](%%rax), %%rax \n\t"
> +               "mov %c[vmcb](%[svm]), %%rax \n\t"
>                 SVM_VMLOAD "\n\t"
>                 SVM_VMRUN "\n\t"
>                 SVM_VMSAVE "\n\t"
> @@ -1626,8 +1624,7 @@ again:
>  #else
>                 /* Enter guest mode */
>                 "push %%eax \n\t"
> -               "mov %c[svm](%[vcpu]), %%eax \n\t"
> -               "mov %c[vmcb](%%eax), %%eax \n\t"
> +               "mov %c[vmcb](%[svm]), %%eax \n\t"
>                 SVM_VMLOAD "\n\t"
>                 SVM_VMRUN "\n\t"
>                 SVM_VMSAVE "\n\t"
> @@ -1636,55 +1633,54 @@ again:
>
>                 /* Save guest registers, load host registers */
>  #ifdef CONFIG_X86_64
> -               "mov %%rbx, %c[rbx](%[vcpu]) \n\t"
> -               "mov %%rcx, %c[rcx](%[vcpu]) \n\t"
> -               "mov %%rdx, %c[rdx](%[vcpu]) \n\t"
> -               "mov %%rsi, %c[rsi](%[vcpu]) \n\t"
> -               "mov %%rdi, %c[rdi](%[vcpu]) \n\t"
> -               "mov %%rbp, %c[rbp](%[vcpu]) \n\t"
> -               "mov %%r8,  %c[r8](%[vcpu]) \n\t"
> -               "mov %%r9,  %c[r9](%[vcpu]) \n\t"
> -               "mov %%r10, %c[r10](%[vcpu]) \n\t"
> -               "mov %%r11, %c[r11](%[vcpu]) \n\t"
> -               "mov %%r12, %c[r12](%[vcpu]) \n\t"
> -               "mov %%r13, %c[r13](%[vcpu]) \n\t"
> -               "mov %%r14, %c[r14](%[vcpu]) \n\t"
> -               "mov %%r15, %c[r15](%[vcpu]) \n\t"
> +               "mov %%rbx, %c[rbx](%[svm]) \n\t"
> +               "mov %%rcx, %c[rcx](%[svm]) \n\t"
> +               "mov %%rdx, %c[rdx](%[svm]) \n\t"
> +               "mov %%rsi, %c[rsi](%[svm]) \n\t"
> +               "mov %%rdi, %c[rdi](%[svm]) \n\t"
> +               "mov %%rbp, %c[rbp](%[svm]) \n\t"
> +               "mov %%r8,  %c[r8](%[svm]) \n\t"
> +               "mov %%r9,  %c[r9](%[svm]) \n\t"
> +               "mov %%r10, %c[r10](%[svm]) \n\t"
> +               "mov %%r11, %c[r11](%[svm]) \n\t"
> +               "mov %%r12, %c[r12](%[svm]) \n\t"
> +               "mov %%r13, %c[r13](%[svm]) \n\t"
> +               "mov %%r14, %c[r14](%[svm]) \n\t"
> +               "mov %%r15, %c[r15](%[svm]) \n\t"
>
>                 "pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
>                 "pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
>                 "pop  %%rbp; pop  %%rdi; pop  %%rsi;"
>                 "pop  %%rdx; pop  %%rcx; pop  %%rbx; \n\t"
>  #else
> -               "mov %%ebx, %c[rbx](%[vcpu]) \n\t"
> -               "mov %%ecx, %c[rcx](%[vcpu]) \n\t"
> -               "mov %%edx, %c[rdx](%[vcpu]) \n\t"
> -               "mov %%esi, %c[rsi](%[vcpu]) \n\t"
> -               "mov %%edi, %c[rdi](%[vcpu]) \n\t"
> -               "mov %%ebp, %c[rbp](%[vcpu]) \n\t"
> +               "mov %%ebx, %c[rbx](%[svm]) \n\t"
> +               "mov %%ecx, %c[rcx](%[svm]) \n\t"
> +               "mov %%edx, %c[rdx](%[svm]) \n\t"
> +               "mov %%esi, %c[rsi](%[svm]) \n\t"
> +               "mov %%edi, %c[rdi](%[svm]) \n\t"
> +               "mov %%ebp, %c[rbp](%[svm]) \n\t"
>
>                 "pop  %%ebp; pop  %%edi; pop  %%esi;"
>                 "pop  %%edx; pop  %%ecx; pop  %%ebx; \n\t"
>  #endif
>                 :
> -               : [vcpu]"a"(vcpu),
> -                 [svm]"i"(offsetof(struct kvm_vcpu, _priv)),
> +               : [svm]"a"(svm),
>                   [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
> -                 [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
> -                 [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
> -                 [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
> -                 [rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])),
> -                 [rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])),
> -                 [rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP]))
> +                 [rbx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBX])),
> +                 [rcx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RCX])),
> +                 [rdx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDX])),
> +                 [rsi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RSI])),
> +                 [rdi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDI])),
> +                 [rbp]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBP]))
>  #ifdef CONFIG_X86_64
> -                 ,[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])),
> -                 [r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])),
> -                 [r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])),
> -                 [r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])),
> -                 [r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])),
> -                 [r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])),
> -                 [r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])),
> -                 [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15]))
> +                 ,[r8 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R8])),
> +                 [r9 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R9 ])),
> +                 [r10]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R10])),
> +                 [r11]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R11])),
> +                 [r12]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R12])),
> +                 [r13]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R13])),
> +                 [r14]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R14])),
> +                 [r15]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R15]))
>  #endif
>                 : "cc", "memory" );
>
> @@ -1865,7 +1861,6 @@ static struct kvm_arch_ops svm_arch_ops
>
>         .run = svm_vcpu_run,
>         .skip_emulated_instruction = skip_emulated_instruction,
> -       .vcpu_setup = svm_vcpu_setup,
>         .patch_hypercall = svm_patch_hypercall,
>  };
>
> diff -r 52331a1d5569 drivers/kvm/vmx.c
> --- a/drivers/kvm/vmx.c Fri Jul 27 16:50:49 2007 +1000
> +++ b/drivers/kvm/vmx.c Fri Jul 27 17:09:03 2007 +1000
> @@ -39,7 +39,7 @@ struct vmcs {
>  };
>
>  struct vcpu_vmx {
> -       struct kvm_vcpu      *vcpu;
> +       struct kvm_vcpu       vcpu;
>         int                   launched;
>         struct kvm_msr_entry *guest_msrs;
>         struct kvm_msr_entry *host_msrs;
> @@ -60,7 +60,7 @@ struct vcpu_vmx {
>
>  static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
>  {
> -       return (struct vcpu_vmx*)vcpu->_priv;
> +       return container_of(vcpu, struct vcpu_vmx, vcpu);
>  }
>
>  static int init_rmode_tss(struct kvm *kvm);
> @@ -2302,46 +2302,62 @@ static void vmx_free_vmcs(struct kvm_vcp
>
>  static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
>  {
> +       struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
>         vmx_free_vmcs(vcpu);
> -}
> -
> -static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
> -{
> -       struct vcpu_vmx *vmx;
> -
> -       vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
> +       kfree(vmx->host_msrs);
> +       kfree(vmx->guest_msrs);
> +       kvm_vcpu_uninit(vcpu);
> +       kfree(vmx);
> +}
> +
> +static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
> +{
> +       int err;
> +       struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
> +
>         if (!vmx)
> -               return -ENOMEM;
> +               return ERR_PTR(-ENOMEM);
> +
> +       err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
> +       if (err)
> +               goto free_vcpu;
>
>         vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
> -       if (!vmx->guest_msrs)
> -               goto out_free;
> +       if (!vmx->guest_msrs) {
> +               err = -ENOMEM;
> +               goto uninit_vcpu;
> +       }
>
>         vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
>         if (!vmx->host_msrs)
> -               goto out_free;
> +               goto free_guest_msrs;
>
>         vmx->vmcs = alloc_vmcs();
>         if (!vmx->vmcs)
> -               goto out_free;
> +               goto free_msrs;
>
>         vmcs_clear(vmx->vmcs);
>
> -       vmx->vcpu   = vcpu;
> -       vcpu->_priv = vmx;
> -
> -       return 0;
> -
> -out_free:
> -       if (vmx->host_msrs)
> -               kfree(vmx->host_msrs);
> -
> -       if (vmx->guest_msrs)
> -               kfree(vmx->guest_msrs);
> -
> +       vmx_vcpu_load(&vmx->vcpu);
> +       err = vmx_vcpu_setup(&vmx->vcpu);
> +       vmx_vcpu_put(&vmx->vcpu);
> +       if (err)
> +               goto free_vmcs;
> +
> +       return &vmx->vcpu;
> +
> +free_vmcs:
> +       free_vmcs(vmx->vmcs);
> +free_msrs:
> +       kfree(vmx->host_msrs);
> +free_guest_msrs:
> +       kfree(vmx->guest_msrs);
> +uninit_vcpu:
> +       kvm_vcpu_uninit(&vmx->vcpu);
> +free_vcpu:
>         kfree(vmx);
> -
> -       return -ENOMEM;
> +       return ERR_PTR(err);
>  }
>
>  static struct kvm_arch_ops vmx_arch_ops = {
> @@ -2389,7 +2405,6 @@ static struct kvm_arch_ops vmx_arch_ops
>
>         .run = vmx_vcpu_run,
>         .skip_emulated_instruction = skip_emulated_instruction,
> -       .vcpu_setup = vmx_vcpu_setup,
>         .patch_hypercall = vmx_patch_hypercall,
>  };
>
>
>
>
>
> -------------------------------------------------------------------------
> This SF.net email is sponsored by: Splunk Inc.
> Still grepping through log files to find problems?  Stop.
> Now Search log events and configuration files using AJAX and a browser.
> Download your FREE copy of Splunk now >>  http://get.splunk.com/
> _______________________________________________
> kvm-devel mailing list
> kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
> https://lists.sourceforge.net/lists/listinfo/kvm-devel
>

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <fdaac4d50707270039v57024d36ma20f0e4895258c08-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]

* Re: [PATCH 2/2] Dynamically allocate vcpus
       [not found]                     ` <fdaac4d50707270039v57024d36ma20f0e4895258c08-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2007-07-27  8:17                       ` Rusty Russell
  0 siblings, 0 replies; 12+ messages in thread
From: Rusty Russell @ 2007-07-27  8:17 UTC (permalink / raw)
  To: Jun Koi; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Fri, 2007-07-27 at 16:39 +0900, Jun Koi wrote:
> Hi Rusty,
> 
> So with this patch, we no longer have limitation on the number of
> vcpus running in a VM?

Well, the #define needs to be changed, but that's a trivial matter.

If we want it really big, we should look at dynamic reallocation of the
array, but that's pretty easy too.

Cheers,
Rusty.



-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] Dynamically allocate vcpus
       [not found]                 ` <1185520617.12151.53.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
  2007-07-27  7:39                   ` Jun Koi
@ 2007-07-29  8:03                   ` Avi Kivity
  1 sibling, 0 replies; 12+ messages in thread
From: Avi Kivity @ 2007-07-29  8:03 UTC (permalink / raw)
  To: Rusty Russell; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Rusty Russell wrote:
> Dynamically allocate vcpus
>
> This patch converts the vcpus array in "struct kvm" to a pointer
> array, and changes the "vcpu_create" and "vcpu_setup" hooks into one
> "vcpu_create" call which does the allocation and initialization of the
> vcpu (calling back into the kvm_vcpu_init core helper).
>
> It is untested on SMP or SVM, and there are more possible cleanups in
> svm.c and vmx.c, but I was being cautious.
>   

Applied; thanks.  Both smp and svm work.  But watch that whitespace.


-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/3] KVM: Clean up VMCLEAR/VMPTRLD code on VMX
       [not found] ` <20070726184241.9119.87037.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
  2007-07-26 18:45   ` [PATCH 1/3] KVM: Remove arch specific components from the general code Gregory Haskins
@ 2007-07-26 18:45   ` Gregory Haskins
  2007-07-26 18:45   ` [PATCH 3/3] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware Gregory Haskins
  2 siblings, 0 replies; 12+ messages in thread
From: Gregory Haskins @ 2007-07-26 18:45 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---

 drivers/kvm/vmx.c |   70 +++++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 5f0a7fd..d6354ca 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -188,6 +188,20 @@ static struct kvm_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr)
 	return NULL;
 }
 
+static void vmcs_load(struct vmcs *vmcs)
+{
+	u64 phys_addr = __pa(vmcs);
+	u8 error;
+	
+	asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
+		      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
+		      : "cc");
+
+	if (error)
+		printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
+		       vmcs, phys_addr);
+}
+
 static void vmcs_clear(struct vmcs *vmcs)
 {
 	u64 phys_addr = __pa(vmcs);
@@ -206,10 +220,34 @@ static void __vcpu_clear(void *arg)
 	struct kvm_vcpu *vcpu = arg;
 	int cpu = raw_smp_processor_id();
 
-	if (vcpu->cpu == cpu)
+	if (vcpu->cpu != -1) {
+		/*
+		 * We should *never* try to __vcpu_clear a remote VMCS. This
+		 * would have been addressed at a higher layer already
+		 */
+		BUG_ON(vcpu->cpu != cpu);
+
+		/*
+		 * Execute the VMCLEAR operation regardless of whether the 
+		 * VMCS is currently active on this CPU or not (it doesn't
+		 * necessarily have to be)
+		 */
 		vmcs_clear(vmx(vcpu)->vmcs);
-	if (per_cpu(current_vmcs, cpu) == vmx(vcpu)->vmcs)
-		per_cpu(current_vmcs, cpu) = NULL;
+
+		/*
+		 * And finally, if this VMCS *was* currently active on this
+		 * CPU, mark the CPU as available again
+		 */
+		if (per_cpu(current_vmcs, cpu) == vmx(vcpu)->vmcs)
+			per_cpu(current_vmcs, cpu) = NULL;
+	} else
+		/*
+		 * If vcpu->cpu thinks we are not installed anywhere,
+		 * but this CPU thinks are are currently active, something is
+		 * wacked.
+		 */
+		BUG_ON(per_cpu(current_vmcs, cpu) == vmx(vcpu)->vmcs);
+
 	rdtscll(vcpu->host_tsc);
 }
 
@@ -220,6 +258,7 @@ static void vcpu_clear(struct kvm_vcpu *vcpu)
 	else
 		__vcpu_clear(vcpu);
 	vmx(vcpu)->launched = 0;
+	vcpu->cpu           = -1;
 }
 
 static unsigned long vmcs_readl(unsigned long field)
@@ -423,7 +462,6 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
  */
 static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
 {
-	u64 phys_addr = __pa(vmx(vcpu)->vmcs);
 	int cpu;
 	u64 tsc_this, delta;
 
@@ -432,16 +470,22 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
 	if (vcpu->cpu != cpu)
 		vcpu_clear(vcpu);
 
-	if (per_cpu(current_vmcs, cpu) != vmx(vcpu)->vmcs) {
-		u8 error;
-
+	/*
+	 * By the time we get here, we know that either our VMCS is already
+	 * loaded on the current CPU (from previous runs), or that its not
+	 * loaded *anywhere* in the system at all (due to the vcpu_clear()
+	 * operation above).  Either way, we must check to make sure we are
+	 * the currently loaded pointer, and correct it if we are not.
+	 *
+	 * Note: A race condition exists against current_vmcs between the
+	 * following update, and any IPIs dispatched to clear a different
+	 * VMCS.  Currently, this race condition is believed to be benign,
+	 * but tread carefully.
+	 */
+ 	if (per_cpu(current_vmcs, cpu) != vmx(vcpu)->vmcs) {
+		/* Re-establish ourselves as the current VMCS */
+		vmcs_load(vmx(vcpu)->vmcs);
 		per_cpu(current_vmcs, cpu) = vmx(vcpu)->vmcs;
-		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
-			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
-			      : "cc");
-		if (error)
-			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
-			       vmx(vcpu)->vmcs, phys_addr);
 	}
 
 	if (vcpu->cpu != cpu) {


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware
       [not found] ` <20070726184241.9119.87037.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
  2007-07-26 18:45   ` [PATCH 1/3] KVM: Remove arch specific components from the general code Gregory Haskins
  2007-07-26 18:45   ` [PATCH 2/3] KVM: Clean up VMCLEAR/VMPTRLD code on VMX Gregory Haskins
@ 2007-07-26 18:45   ` Gregory Haskins
       [not found]     ` <20070726184535.9119.4862.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
  2 siblings, 1 reply; 12+ messages in thread
From: Gregory Haskins @ 2007-07-26 18:45 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

We need to provide locking around the current_vmcs/VMCS interactions to
protect against race conditions.

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---

 drivers/kvm/vmx.c |   25 +++++++++++++++++++------
 1 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index d6354ca..78ff917 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -219,6 +219,7 @@ static void __vcpu_clear(void *arg)
 {
 	struct kvm_vcpu *vcpu = arg;
 	int cpu = raw_smp_processor_id();
+	unsigned long flags;
 
 	if (vcpu->cpu != -1) {
 		/*
@@ -238,8 +239,10 @@ static void __vcpu_clear(void *arg)
 		 * And finally, if this VMCS *was* currently active on this
 		 * CPU, mark the CPU as available again
 		 */
+		local_irq_save(flags);
 		if (per_cpu(current_vmcs, cpu) == vmx(vcpu)->vmcs)
 			per_cpu(current_vmcs, cpu) = NULL;
+		local_irq_restore(flags);
 	} else
 		/*
 		 * If vcpu->cpu thinks we are not installed anywhere,
@@ -464,6 +467,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
 {
 	int cpu;
 	u64 tsc_this, delta;
+	unsigned long flags;
+	int reload = 0;
 
 	cpu = get_cpu();
 
@@ -477,16 +482,24 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
 	 * operation above).  Either way, we must check to make sure we are
 	 * the currently loaded pointer, and correct it if we are not.
 	 *
-	 * Note: A race condition exists against current_vmcs between the
-	 * following update, and any IPIs dispatched to clear a different
-	 * VMCS.  Currently, this race condition is believed to be benign,
-	 * but tread carefully.
+	 * Note: We disable interrupts to prevent a race condition in
+	 * current_vmcs against IPIs from remote CPUs to clear their own VMCS.
+	 *
+	 * Also note that preemption is currently disabled, so there is no race
+	 * between the current_vmcs and the VMPTRLD operation which happens
+	 * shortly after the current_vmcs update external to the critical
+	 * section.
 	 */
+	local_irq_save(flags);
  	if (per_cpu(current_vmcs, cpu) != vmx(vcpu)->vmcs) {
-		/* Re-establish ourselves as the current VMCS */
-		vmcs_load(vmx(vcpu)->vmcs);
 		per_cpu(current_vmcs, cpu) = vmx(vcpu)->vmcs;
+		reload = 1;
 	}
+	local_irq_restore(flags);
+
+	if (reload)
+		/* Re-establish ourselves as the current VMCS */
+		vmcs_load(vmx(vcpu)->vmcs);
 
 	if (vcpu->cpu != cpu) {
 		struct descriptor_table dt;


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply related	[flat|nested] 12+ messages in thread

[parent not found: <20070726184535.9119.4862.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>]

* Re: [PATCH 3/3] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware
       [not found]     ` <20070726184535.9119.4862.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
@ 2007-07-27  4:58       ` Avi Kivity
  0 siblings, 0 replies; 12+ messages in thread
From: Avi Kivity @ 2007-07-27  4:58 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Gregory Haskins wrote:
> We need to provide locking around the current_vmcs/VMCS interactions to
> protect against race conditions.
>
>   

We agreed the race was benign?  Do you no longer think so?


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware
@ 2007-07-27 11:29 Gregory Haskins
  0 siblings, 0 replies; 12+ messages in thread
From: Gregory Haskins @ 2007-07-27 11:29 UTC (permalink / raw)
  To: avi-atKUWr5tajBWk0Htik3J/w; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Fri, 2007-07-27 at 07:58 +0300, Avi Kivity wrote:
> Gregory Haskins wrote:
> > We need to provide locking around the current_vmcs/VMCS interactions to
> > protect against race conditions.
> >
> >   
> 
> We agreed the race was benign?  Do you no longer think so?

No, I agree (see "0/3" note).  I was just including it in case we wanted
to lock it down for posterity.  Since it adds what would amount to
unnecessary overhead, the comments provided in 2/3 are probably
sufficient.

-Greg




-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2007-07-29  8:03 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-26 18:45 [PATCH 0/3] Arch cleanup v3 Gregory Haskins
     [not found] ` <20070726184241.9119.87037.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
2007-07-26 18:45   ` [PATCH 1/3] KVM: Remove arch specific components from the general code Gregory Haskins
     [not found]     ` <20070726184525.9119.73573.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
2007-07-27  6:53       ` [PATCH 1/2] Rename svm() and vmx() to to_svm() and to_vmx() Rusty Russell
     [not found]         ` <1185519205.12151.47.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2007-07-27  6:55           ` [PATCH 2/2] Dynamically allocate vcpus Rusty Russell
     [not found]             ` <1185519330.12151.50.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2007-07-27  7:16               ` Rusty Russell
     [not found]                 ` <1185520617.12151.53.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2007-07-27  7:39                   ` Jun Koi
     [not found]                     ` <fdaac4d50707270039v57024d36ma20f0e4895258c08-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-07-27  8:17                       ` Rusty Russell
2007-07-29  8:03                   ` Avi Kivity
2007-07-26 18:45   ` [PATCH 2/3] KVM: Clean up VMCLEAR/VMPTRLD code on VMX Gregory Haskins
2007-07-26 18:45   ` [PATCH 3/3] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware Gregory Haskins
     [not found]     ` <20070726184535.9119.4862.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
2007-07-27  4:58       ` Avi Kivity
  -- strict thread matches above, loose matches on Subject: below --
2007-07-27 11:29 Gregory Haskins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox