public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] dynamically create vcpus + vmx/svm structures
@ 2007-07-13  1:20 Paul Turner
       [not found] ` <Pine.LNX.4.64.0707121815040.23503-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Paul Turner @ 2007-07-13  1:20 UTC (permalink / raw)
  To: avi-atKUWr5tajBWk0Htik3J/w; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

[-- Attachment #1: Type: TEXT/PLAIN, Size: 28320 bytes --]

From: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

- vcpus now allocated on demand
- vmx/svm fields separated into arch specific structures on vcpus
- vmx/svm fields now only allocated on corresponding architectures

- Paul

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 0f7a4d9..c631192 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -16,6 +16,7 @@ #include <linux/mm.h>
  #include <asm/signal.h>

  #include "vmx.h"
+#include "kvm_svm.h"
  #include <linux/kvm.h>
  #include <linux/kvm_para.h>

@@ -326,16 +327,64 @@ struct kvm_io_device *kvm_io_bus_find_de
  void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
  			     struct kvm_io_device *dev);

+struct kvm_vmx_data {
+	int msr_offset_efer;
+
+        #ifdef CONFIG_X86_64
+	int msr_offset_kernel_gs_base;
+        #endif
+
+        struct vmx_host_state {
+		int loaded;
+		u16 fs_sel, gs_sel, ldt_sel;
+		int fs_gs_ldt_reload_needed;
+	} host_state;
+
+	struct vmx_msr_entry *guest_msrs;
+	struct vmx_msr_entry *host_msrs;
+
+	struct {
+		int active;
+		u8 save_iopl;
+		struct kvm_save_segment {
+			u16 selector;
+			unsigned long base;
+			u32 limit;
+			u32 ar;
+		} tr, es, ds, fs, gs;
+	} rmode;
+	int halt_request; /* real mode */
+ 
+	struct vmcs *vmcs;
+};
+
+struct kvm_svm_data {
+	struct vmcb *vmcb;
+	unsigned long vmcb_pa;
+	struct svm_cpu_data *svm_data;
+	uint64_t asid_generation;
+
+	unsigned long db_regs[NUM_DB_REGS];
+
+	u64 next_rip;
+
+	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
+	u64 host_gs_base;
+	unsigned long host_cr2;
+	unsigned long host_db_regs[NUM_DB_REGS];
+	unsigned long host_dr6;
+	unsigned long host_dr7;
+};
+
+
  struct kvm_vcpu {
  	struct kvm *kvm;
+	struct mutex *mutex; /* refers to corresponding vcpu_mutex on kvm */
  	int vcpu_id;
-	union {
-		struct vmcs *vmcs;
-		struct vcpu_svm *svm;
-	};
-	struct mutex mutex;
+
  	int   cpu;
  	int   launched;
+
  	u64 host_tsc;
  	struct kvm_run *run;
  	int interrupt_window_open;
@@ -361,12 +410,6 @@ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE
  	u64 ia32_misc_enable_msr;
  	int nmsrs;
  	int save_nmsrs;
-	int msr_offset_efer;
-#ifdef CONFIG_X86_64
-	int msr_offset_kernel_gs_base;
-#endif
-	struct vmx_msr_entry *guest_msrs;
-	struct vmx_msr_entry *host_msrs;

  	struct kvm_mmu mmu;

@@ -385,11 +428,6 @@ #endif
  	char *guest_fx_image;
  	int fpu_active;
  	int guest_fpu_loaded;
-	struct vmx_host_state {
-		int loaded;
-		u16 fs_sel, gs_sel, ldt_sel;
-		int fs_gs_ldt_reload_needed;
-	} vmx_host_state;

  	int mmio_needed;
  	int mmio_read_completed;
@@ -406,22 +444,16 @@ #endif

  	struct kvm_stat stat;

-	struct {
-		int active;
-		u8 save_iopl;
-		struct kvm_save_segment {
-			u16 selector;
-			unsigned long base;
-			u32 limit;
-			u32 ar;
-		} tr, es, ds, fs, gs;
-	} rmode;
-	int halt_request; /* real mode on Intel only */
-
  	int cpuid_nent;
  	struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+	union {
+		struct kvm_vmx_data vmx[0];
+		struct kvm_svm_data svm[0];
+	};
  };

+
  struct kvm_mem_alias {
  	gfn_t base_gfn;
  	unsigned long npages;
@@ -448,8 +480,11 @@ struct kvm {
  	struct list_head active_mmu_pages;
  	int n_free_mmu_pages;
  	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
+
  	int nvcpus;
-	struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
+	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+	struct mutex vcpu_mutex[KVM_MAX_VCPUS];
+
  	int memory_config_version;
  	int busy;
  	unsigned long rmap_overflow;
@@ -472,7 +507,8 @@ struct kvm_arch_ops {
  	int (*hardware_setup)(void);               /* __init */
  	void (*hardware_unsetup)(void);            /* __exit */

-	int (*vcpu_create)(struct kvm_vcpu *vcpu);
+	int (*vcpu_size)(void);
+	int (*vcpu_init)(struct kvm_vcpu *vcpu);
  	void (*vcpu_free)(struct kvm_vcpu *vcpu);

  	void (*vcpu_load)(struct kvm_vcpu *vcpu);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 4d2ef9b..86345be 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -234,7 +234,7 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
   */
  static void vcpu_load(struct kvm_vcpu *vcpu)
  {
-	mutex_lock(&vcpu->mutex);
+	mutex_lock(vcpu->mutex);
  	kvm_arch_ops->vcpu_load(vcpu);
  }

@@ -244,11 +244,12 @@ static void vcpu_load(struct kvm_vcpu *v
   */
  static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot)
  {
-	struct kvm_vcpu *vcpu = &kvm->vcpus[slot];
+	struct kvm_vcpu *vcpu;

-	mutex_lock(&vcpu->mutex);
-	if (!vcpu->vmcs) {
-		mutex_unlock(&vcpu->mutex);
+	mutex_lock(&kvm->vcpu_mutex[slot]);
+	vcpu = kvm->vcpus[slot];
+	if (!vcpu) {
+		mutex_unlock(&kvm->vcpu_mutex[slot]);
  		return NULL;
  	}
  	kvm_arch_ops->vcpu_load(vcpu);
@@ -258,7 +259,7 @@ static struct kvm_vcpu *vcpu_load_slot(s
  static void vcpu_put(struct kvm_vcpu *vcpu)
  {
  	kvm_arch_ops->vcpu_put(vcpu);
-	mutex_unlock(&vcpu->mutex);
+	mutex_unlock(vcpu->mutex);
  }

  static void ack_flush(void *_completed)
@@ -279,7 +280,7 @@ void kvm_flush_remote_tlbs(struct kvm *k
  	cpus_clear(cpus);
  	needed = 0;
  	for (i = 0; i < kvm->nvcpus; ++i) {
-		vcpu = &kvm->vcpus[i];
+		vcpu = kvm->vcpus[i];
  		if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
  			continue;
  		cpu = vcpu->cpu;
@@ -318,14 +319,10 @@ static struct kvm *kvm_create_vm(void)
  	list_add(&kvm->vm_list, &vm_list);
  	spin_unlock(&kvm_lock);
  	kvm_io_bus_init(&kvm->mmio_bus);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		struct kvm_vcpu *vcpu = &kvm->vcpus[i];

-		mutex_init(&vcpu->mutex);
-		vcpu->cpu = -1;
-		vcpu->kvm = kvm;
-		vcpu->mmu.root_hpa = INVALID_PAGE;
-	}
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) 
+		mutex_init(&kvm->vcpu_mutex[i]);
+
  	return kvm;
  }

@@ -379,7 +376,7 @@ static void free_pio_guest_pages(struct

  static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
  {
-	if (!vcpu->vmcs)
+	if (!vcpu)
  		return;

  	vcpu_load(vcpu);
@@ -389,7 +386,7 @@ static void kvm_unload_vcpu_mmu(struct k

  static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
  {
-	if (!vcpu->vmcs)
+	if (!vcpu)
  		return;

  	vcpu_load(vcpu);
@@ -401,6 +398,8 @@ static void kvm_free_vcpu(struct kvm_vcp
  	free_page((unsigned long)vcpu->pio_data);
  	vcpu->pio_data = NULL;
  	free_pio_guest_pages(vcpu);
+
+	kfree(vcpu);
  }

  static void kvm_free_vcpus(struct kvm *kvm)
@@ -411,9 +410,11 @@ static void kvm_free_vcpus(struct kvm *k
  	 * Unpin any mmu pages first.
  	 */
  	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_free_vcpu(&kvm->vcpus[i]);
+		kvm_unload_vcpu_mmu(kvm->vcpus[i]);
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		kvm_free_vcpu(kvm->vcpus[i]);
+		kvm->vcpus[i] = NULL;
+	}
  }

  static int kvm_dev_release(struct inode *inode, struct file *filp)
@@ -430,6 +431,7 @@ static void kvm_destroy_vm(struct kvm *k
  	kvm_io_bus_destroy(&kvm->mmio_bus);
  	kvm_free_vcpus(kvm);
  	kvm_free_physmem(kvm);
+
  	kfree(kvm);
  }

@@ -796,7 +798,7 @@ raced:

  	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
  		struct kvm_vcpu *vcpu;
-
+
  		vcpu = vcpu_load_slot(kvm, i);
  		if (!vcpu)
  			continue;
@@ -922,11 +924,11 @@ static int kvm_vm_ioctl_set_memory_alias

  	spin_unlock(&kvm->lock);

-	vcpu_load(&kvm->vcpus[0]);
+	vcpu_load(kvm->vcpus[0]);
  	spin_lock(&kvm->lock);
-	kvm_mmu_zap_all(&kvm->vcpus[0]);
+	kvm_mmu_zap_all(kvm->vcpus[0]);
  	spin_unlock(&kvm->lock);
-	vcpu_put(&kvm->vcpus[0]);
+	vcpu_put(kvm->vcpus[0]);

  	return 0;

@@ -2380,40 +2382,51 @@ static int kvm_vm_ioctl_create_vcpu(stru
  {
  	int r;
  	struct kvm_vcpu *vcpu;
-	struct page *page;
+	struct page *pio_page, *run_page;

  	r = -EINVAL;
  	if (!valid_vcpu(n))
  		goto out;

-	vcpu = &kvm->vcpus[n];
-	vcpu->vcpu_id = n;
+	mutex_lock(&kvm->vcpu_mutex[n]);
+	if (kvm->vcpus[n]) {
+		r = -EEXIST;
+		goto out_unlock;
+	}

-	mutex_lock(&vcpu->mutex);
+	vcpu = kzalloc(kvm_arch_ops->vcpu_size(), GFP_KERNEL);

-	if (vcpu->vmcs) {
-		mutex_unlock(&vcpu->mutex);
-		return -EEXIST;
+	if (!vcpu) {
+		r = -ENOMEM;
+		goto out_unlock;
  	}

-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	vcpu->mutex = &kvm->vcpu_mutex[n];
+	vcpu->cpu = -1;
+	vcpu->kvm = kvm;
+	vcpu->mmu.root_hpa = INVALID_PAGE;
+
+	vcpu->vcpu_id = n;
+	kvm->vcpus[n] = vcpu;
+
+	run_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  	r = -ENOMEM;
-	if (!page)
-		goto out_unlock;
-	vcpu->run = page_address(page);
+	if (!run_page)
+		goto out_deallocate;
+	vcpu->run = page_address(run_page);

-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	pio_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  	r = -ENOMEM;
-	if (!page)
+	if (!pio_page)
  		goto out_free_run;
-	vcpu->pio_data = page_address(page);
+	vcpu->pio_data = page_address(pio_page);

  	vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
  					   FX_IMAGE_ALIGN);
  	vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
  	vcpu->cr0 = 0x10;
-
-	r = kvm_arch_ops->vcpu_create(vcpu);
+ 
+	r = kvm_arch_ops->vcpu_init(vcpu);
  	if (r < 0)
  		goto out_free_vcpus;

@@ -2425,6 +2438,7 @@ static int kvm_vm_ioctl_create_vcpu(stru
  	r = kvm_mmu_setup(vcpu);
  	if (r >= 0)
  		r = kvm_arch_ops->vcpu_setup(vcpu);
+
  	vcpu_put(vcpu);

  	if (r < 0)
@@ -2438,20 +2452,25 @@ static int kvm_vm_ioctl_create_vcpu(stru
  	if (n >= kvm->nvcpus)
  		kvm->nvcpus = n + 1;
  	spin_unlock(&kvm_lock);
-
+
  	return r;

  out_free_vcpus:
  	kvm_free_vcpu(vcpu);
+	vcpu = NULL;
  out_free_run:
-	free_page((unsigned long)vcpu->run);
-	vcpu->run = NULL;
+	free_page((unsigned long)run_page);
+out_deallocate:
+	kfree(vcpu);
+	kvm->vcpus[n] = NULL;
  out_unlock:
-	mutex_unlock(&vcpu->mutex);
+	mutex_unlock(&kvm->vcpu_mutex[n]);
+
  out:
  	return r;
  }

+
  static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
  {
  	u64 efer;
@@ -2846,9 +2865,7 @@ static int kvm_dev_ioctl_create_vm(void)
  		kvm_destroy_vm(kvm);
  		return r;
  	}
-
  	kvm->filp = file;
-
  	return fd;
  }

@@ -2962,7 +2979,7 @@ static void decache_vcpus_on_cpu(int cpu
  	spin_lock(&kvm_lock);
  	list_for_each_entry(vm, &vm_list, vm_list)
  		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &vm->vcpus[i];
+			vcpu = vm->vcpus[i];
  			/*
  			 * If the vcpu is locked, then it is running on some
  			 * other cpu and therefore it is not cached on the
@@ -2971,12 +2988,12 @@ static void decache_vcpus_on_cpu(int cpu
  			 * If it's not locked, check the last cpu it executed
  			 * on.
  			 */
-			if (mutex_trylock(&vcpu->mutex)) {
+			if (mutex_trylock(vcpu->mutex)) {
  				if (vcpu->cpu == cpu) {
  					kvm_arch_ops->vcpu_decache(vcpu);
  					vcpu->cpu = -1;
  				}
-				mutex_unlock(&vcpu->mutex);
+				mutex_unlock(vcpu->mutex);
  			}
  		}
  	spin_unlock(&kvm_lock);
@@ -3080,7 +3097,7 @@ static u64 stat_get(void *_offset)
  	spin_lock(&kvm_lock);
  	list_for_each_entry(kvm, &vm_list, vm_list)
  		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &kvm->vcpus[i];
+			vcpu = kvm->vcpus[i];
  			total += *(u32 *)((void *)vcpu + offset);
  		}
  	spin_unlock(&kvm_lock);
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h
index a869983..48ad325 100644
--- a/drivers/kvm/kvm_svm.h
+++ b/drivers/kvm/kvm_svm.h
@@ -7,7 +7,6 @@ #include <linux/list.h>
  #include <asm/msr.h>

  #include "svm.h"
-#include "kvm.h"

  static const u32 host_save_user_msrs[] = {
  #ifdef CONFIG_X86_64
@@ -20,23 +19,4 @@ #endif
  #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
  #define NUM_DB_REGS 4

-struct vcpu_svm {
-	struct vmcb *vmcb;
-	unsigned long vmcb_pa;
-	struct svm_cpu_data *svm_data;
-	uint64_t asid_generation;
-
-	unsigned long db_regs[NUM_DB_REGS];
-
-	u64 next_rip;
-
-	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
-	u64 host_gs_base;
-	unsigned long host_cr2;
-	unsigned long host_db_regs[NUM_DB_REGS];
-	unsigned long host_dr6;
-	unsigned long host_dr7;
-};
-
  #endif
-
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 52a11cc..40a1baf 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -14,7 +14,7 @@
   *
   */

-#include "kvm_svm.h"
+#include "kvm.h"
  #include "x86_emulate.h"

  #include <linux/module.h>
@@ -568,18 +568,20 @@ static void init_vmcb(struct vmcb *vmcb)
  	/* rdx = ?? */
  }

-static int svm_create_vcpu(struct kvm_vcpu *vcpu)
+static int svm_vcpu_size(void)
+{
+	return sizeof(struct kvm) + sizeof(struct kvm_svm_data);
+}
+
+static int svm_init_vcpu(struct kvm_vcpu *vcpu)
  {
  	struct page *page;
  	int r;

  	r = -ENOMEM;
-	vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL);
-	if (!vcpu->svm)
-		goto out1;
  	page = alloc_page(GFP_KERNEL);
  	if (!page)
-		goto out2;
+		goto out1;

  	vcpu->svm->vmcb = page_address(page);
  	clear_page(vcpu->svm->vmcb);
@@ -596,8 +598,6 @@ static int svm_create_vcpu(struct kvm_vc

  	return 0;

-out2:
-	kfree(vcpu->svm);
  out1:
  	return r;
  }
@@ -1610,7 +1610,7 @@ #endif
  		:
  		: [vcpu]"a"(vcpu),
  		  [svm]"i"(offsetof(struct kvm_vcpu, svm)),
-		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
+		  [vmcb]"i"(offsetof(struct kvm_svm_data, vmcb_pa)),
  		  [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
  		  [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
  		  [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
@@ -1765,7 +1765,8 @@ static struct kvm_arch_ops svm_arch_ops
  	.hardware_enable = svm_hardware_enable,
  	.hardware_disable = svm_hardware_disable,

-	.vcpu_create = svm_create_vcpu,
+	.vcpu_size = svm_vcpu_size,
+	.vcpu_init = svm_init_vcpu,
  	.vcpu_free = svm_free_vcpu,

  	.vcpu_load = svm_vcpu_load,
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 7fa62c7..da14d2f 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -96,9 +96,9 @@ static inline u64 msr_efer_save_restore_

  static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu)
  {
-	int efer_offset = vcpu->msr_offset_efer;
-	return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) !=
-		msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]);
+	int efer_offset = vcpu->vmx->msr_offset_efer;
+	return msr_efer_save_restore_bits(vcpu->vmx->host_msrs[efer_offset]) !=
+		msr_efer_save_restore_bits(vcpu->vmx->guest_msrs[efer_offset]);
  }

  static inline int is_page_fault(u32 intr_info)
@@ -126,7 +126,7 @@ static int __find_msr_index(struct kvm_v
  	int i;

  	for (i = 0; i < vcpu->nmsrs; ++i)
-		if (vcpu->guest_msrs[i].index == msr)
+		if (vcpu->vmx->guest_msrs[i].index == msr)
  			return i;
  	return -1;
  }
@@ -137,7 +137,7 @@ static struct vmx_msr_entry *find_msr_en

  	i = __find_msr_index(vcpu, msr);
  	if (i >= 0)
-		return &vcpu->guest_msrs[i];
+		return &vcpu->vmx->guest_msrs[i];
  	return NULL;
  }

@@ -160,8 +160,8 @@ static void __vcpu_clear(void *arg)
  	int cpu = raw_smp_processor_id();

  	if (vcpu->cpu == cpu)
-		vmcs_clear(vcpu->vmcs);
-	if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
+		vmcs_clear(vcpu->vmx->vmcs);
+	if (per_cpu(current_vmcs, cpu) == vcpu->vmx->vmcs)
  		per_cpu(current_vmcs, cpu) = NULL;
  	rdtscll(vcpu->host_tsc);
  }
@@ -260,7 +260,7 @@ static void update_exception_bitmap(stru
  		eb |= 1u << NM_VECTOR;
  	if (vcpu->guest_debug.enabled)
  		eb |= 1u << 1;
-	if (vcpu->rmode.active)
+	if (vcpu->vmx->rmode.active)
  		eb = ~0;
  	vmcs_write32(EXCEPTION_BITMAP, eb);
  }
@@ -285,19 +285,19 @@ #endif
  static void load_transition_efer(struct kvm_vcpu *vcpu)
  {
  	u64 trans_efer;
-	int efer_offset = vcpu->msr_offset_efer;
+	int efer_offset = vcpu->vmx->msr_offset_efer;

-	trans_efer = vcpu->host_msrs[efer_offset].data;
+	trans_efer = vcpu->vmx->host_msrs[efer_offset].data;
  	trans_efer &= ~EFER_SAVE_RESTORE_BITS;
  	trans_efer |= msr_efer_save_restore_bits(
-				vcpu->guest_msrs[efer_offset]);
+				vcpu->vmx->guest_msrs[efer_offset]);
  	wrmsrl(MSR_EFER, trans_efer);
  	vcpu->stat.efer_reload++;
  }

  static void vmx_save_host_state(struct kvm_vcpu *vcpu)
  {
-	struct vmx_host_state *hs = &vcpu->vmx_host_state;
+	struct vmx_host_state *hs = &vcpu->vmx->host_state;

  	if (hs->loaded)
  		return;
@@ -334,17 +334,17 @@ #endif

  #ifdef CONFIG_X86_64
  	if (is_long_mode(vcpu)) {
-		save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1);
+		save_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_kernel_gs_base, 1);
  	}
  #endif
-	load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+	load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
  	if (msr_efer_need_save_restore(vcpu))
  		load_transition_efer(vcpu);
  }

  static void vmx_load_host_state(struct kvm_vcpu *vcpu)
  {
-	struct vmx_host_state *hs = &vcpu->vmx_host_state;
+	struct vmx_host_state *hs = &vcpu->vmx->host_state;

  	if (!hs->loaded)
  		return;
@@ -366,10 +366,10 @@ #endif

  		reload_tss();
  	}
-	save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
-	load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+	save_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
+	load_msrs(vcpu->vmx->host_msrs, vcpu->save_nmsrs);
  	if (msr_efer_need_save_restore(vcpu))
-		load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1);
+		load_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_efer, 1);
  }

  /*
@@ -378,7 +378,7 @@ #endif
   */
  static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
  {
-	u64 phys_addr = __pa(vcpu->vmcs);
+	u64 phys_addr = __pa(vcpu->vmx->vmcs);
  	int cpu;
  	u64 tsc_this, delta;

@@ -387,16 +387,16 @@ static void vmx_vcpu_load(struct kvm_vcp
  	if (vcpu->cpu != cpu)
  		vcpu_clear(vcpu);

-	if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) {
+	if (per_cpu(current_vmcs, cpu) != vcpu->vmx->vmcs) {
  		u8 error;

-		per_cpu(current_vmcs, cpu) = vcpu->vmcs;
+		per_cpu(current_vmcs, cpu) = vcpu->vmx->vmcs;
  		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
  			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
  			      : "cc");
  		if (error)
  			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
-			       vcpu->vmcs, phys_addr);
+			       vcpu->vmx->vmcs, phys_addr);
  	}

  	if (vcpu->cpu != cpu) {
@@ -504,12 +504,12 @@ static void vmx_inject_gp(struct kvm_vcp
  void move_msr_up(struct kvm_vcpu *vcpu, int from, int to)
  {
  	struct vmx_msr_entry tmp;
-	tmp = vcpu->guest_msrs[to];
-	vcpu->guest_msrs[to] = vcpu->guest_msrs[from];
-	vcpu->guest_msrs[from] = tmp;
-	tmp = vcpu->host_msrs[to];
-	vcpu->host_msrs[to] = vcpu->host_msrs[from];
-	vcpu->host_msrs[from] = tmp;
+	tmp = vcpu->vmx->guest_msrs[to];
+	vcpu->vmx->guest_msrs[to] = vcpu->vmx->guest_msrs[from];
+	vcpu->vmx->guest_msrs[from] = tmp;
+	tmp = vcpu->vmx->host_msrs[to];
+	vcpu->vmx->host_msrs[to] = vcpu->vmx->host_msrs[from];
+	vcpu->vmx->host_msrs[from] = tmp;
  }

  /*
@@ -550,10 +550,10 @@ #endif
  	vcpu->save_nmsrs = save_nmsrs;

  #ifdef CONFIG_X86_64
-	vcpu->msr_offset_kernel_gs_base =
+	vcpu->vmx->msr_offset_kernel_gs_base =
  		__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
  #endif
-	vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
+	vcpu->vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
  }

  /*
@@ -646,7 +646,7 @@ static int vmx_set_msr(struct kvm_vcpu *
  #ifdef CONFIG_X86_64
  	case MSR_EFER:
  		ret = kvm_set_msr_common(vcpu, msr_index, data);
-		if (vcpu->vmx_host_state.loaded)
+		if (vcpu->vmx->host_state.loaded)
  			load_transition_efer(vcpu);
  		break;
  	case MSR_FS_BASE:
@@ -672,8 +672,8 @@ #endif
  		msr = find_msr_entry(vcpu, msr_index);
  		if (msr) {
  			msr->data = data;
-			if (vcpu->vmx_host_state.loaded)
-				load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+			if (vcpu->vmx->host_state.loaded)
+				load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
  			break;
  		}
  		ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -868,15 +868,15 @@ static void enter_pmode(struct kvm_vcpu
  {
  	unsigned long flags;

-	vcpu->rmode.active = 0;
+	vcpu->vmx->rmode.active = 0;

-	vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);
-	vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);
-	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
+	vmcs_writel(GUEST_TR_BASE, vcpu->vmx->rmode.tr.base);
+	vmcs_write32(GUEST_TR_LIMIT, vcpu->vmx->rmode.tr.limit);
+	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->vmx->rmode.tr.ar);

  	flags = vmcs_readl(GUEST_RFLAGS);
  	flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
-	flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
+	flags |= (vcpu->vmx->rmode.save_iopl << IOPL_SHIFT);
  	vmcs_writel(GUEST_RFLAGS, flags);

  	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~CR4_VME_MASK) |
@@ -884,10 +884,10 @@ static void enter_pmode(struct kvm_vcpu

  	update_exception_bitmap(vcpu);

-	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es);
-	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds);
-	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs);
-	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs);
+	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);

  	vmcs_write16(GUEST_SS_SELECTOR, 0);
  	vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -920,19 +920,19 @@ static void enter_rmode(struct kvm_vcpu
  {
  	unsigned long flags;

-	vcpu->rmode.active = 1;
+	vcpu->vmx->rmode.active = 1;

-	vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
+	vcpu->vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
  	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));

-	vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
+	vcpu->vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
  	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);

-	vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
+	vcpu->vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
  	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);

  	flags = vmcs_readl(GUEST_RFLAGS);
-	vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
+	vcpu->vmx->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;

  	flags |= IOPL_MASK | X86_EFLAGS_VM;

@@ -950,11 +950,10 @@ static void enter_rmode(struct kvm_vcpu
  		vmcs_writel(GUEST_CS_BASE, 0xf0000);
  	vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);

-	fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
-	fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);
-	fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
-	fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
-
+	fix_rmode_seg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+	fix_rmode_seg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+	fix_rmode_seg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+	fix_rmode_seg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);
  	init_rmode_tss(vcpu->kvm);
  }

@@ -1002,10 +1001,10 @@ static void vmx_set_cr0(struct kvm_vcpu
  {
  	vmx_fpu_deactivate(vcpu);

-	if (vcpu->rmode.active && (cr0 & CR0_PE_MASK))
+	if (vcpu->vmx->rmode.active && (cr0 & CR0_PE_MASK))
  		enter_pmode(vcpu);

-	if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
+	if (!vcpu->vmx->rmode.active && !(cr0 & CR0_PE_MASK))
  		enter_rmode(vcpu);

  #ifdef CONFIG_X86_64
@@ -1036,7 +1035,7 @@ static void vmx_set_cr3(struct kvm_vcpu
  static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  {
  	vmcs_writel(CR4_READ_SHADOW, cr4);
-	vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?
+	vmcs_writel(GUEST_CR4, cr4 | (vcpu->vmx->rmode.active ?
  		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
  	vcpu->cr4 = cr4;
  }
@@ -1124,17 +1123,17 @@ static void vmx_set_segment(struct kvm_v
  	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
  	u32 ar;

-	if (vcpu->rmode.active && seg == VCPU_SREG_TR) {
-		vcpu->rmode.tr.selector = var->selector;
-		vcpu->rmode.tr.base = var->base;
-		vcpu->rmode.tr.limit = var->limit;
-		vcpu->rmode.tr.ar = vmx_segment_access_rights(var);
+	if (vcpu->vmx->rmode.active && seg == VCPU_SREG_TR) {
+		vcpu->vmx->rmode.tr.selector = var->selector;
+		vcpu->vmx->rmode.tr.base = var->base;
+		vcpu->vmx->rmode.tr.limit = var->limit;
+		vcpu->vmx->rmode.tr.ar = vmx_segment_access_rights(var);
  		return;
  	}
  	vmcs_writel(sf->base, var->base);
  	vmcs_write32(sf->limit, var->limit);
  	vmcs_write16(sf->selector, var->selector);
-	if (vcpu->rmode.active && var->s) {
+	if (vcpu->vmx->rmode.active && var->s) {
  		/*
  		 * Hack real-mode segments into vm86 compatibility.
  		 */
@@ -1253,11 +1252,11 @@ static int vmx_vcpu_setup(struct kvm_vcp
  	vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
  	vcpu->cr8 = 0;
  	vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+
  	if (vcpu->vcpu_id == 0)
  		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;

  	fx_init(vcpu);
-
  	/*
  	 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
  	 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.
@@ -1384,13 +1383,12 @@ #endif
  		if (wrmsr_safe(index, data_low, data_high) < 0)
  			continue;
  		data = data_low | ((u64)data_high << 32);
-		vcpu->host_msrs[j].index = index;
-		vcpu->host_msrs[j].reserved = 0;
-		vcpu->host_msrs[j].data = data;
-		vcpu->guest_msrs[j] = vcpu->host_msrs[j];
+		vcpu->vmx->host_msrs[j].index = index;
+		vcpu->vmx->host_msrs[j].reserved = 0;
+		vcpu->vmx->host_msrs[j].data = data;
+		vcpu->vmx->guest_msrs[j] = vcpu->vmx->host_msrs[j];
  		++vcpu->nmsrs;
  	}
-
  	setup_msrs(vcpu);

  	vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS,
@@ -1408,7 +1406,6 @@ #endif

  	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
  	vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
-
  	vcpu->cr0 = 0x60000010;
  	vmx_set_cr0(vcpu, vcpu->cr0); // enter rmode
  	vmx_set_cr4(vcpu, 0);
@@ -1479,7 +1476,7 @@ static void kvm_do_inject_irq(struct kvm
  	if (!vcpu->irq_pending[word_index])
  		clear_bit(word_index, &vcpu->irq_summary);

-	if (vcpu->rmode.active) {
+	if (vcpu->vmx->rmode.active) {
  		inject_rmode_irq(vcpu, irq);
  		return;
  	}
@@ -1538,7 +1535,7 @@ static void kvm_guest_debug_pre(struct k
  static int handle_rmode_exception(struct kvm_vcpu *vcpu,
  				  int vec, u32 err_code)
  {
-	if (!vcpu->rmode.active)
+	if (!vcpu->vmx->rmode.active)
  		return 0;

  	/*
@@ -1620,11 +1617,11 @@ static int handle_exception(struct kvm_v
  		}
  	}

-	if (vcpu->rmode.active &&
+	if (vcpu->vmx->rmode.active &&
  	    handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
  								error_code)) {
-		if (vcpu->halt_request) {
-			vcpu->halt_request = 0;
+		if (vcpu->vmx->halt_request) {
+			vcpu->vmx->halt_request = 0;
  			return kvm_emulate_halt(vcpu);
  		}
  		return 1;
@@ -2225,28 +2222,34 @@ static void vmx_inject_page_fault(struct

  static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
  {
-	if (vcpu->vmcs) {
+	if (vcpu->vmx->vmcs) {
  		on_each_cpu(__vcpu_clear, vcpu, 0, 1);
-		free_vmcs(vcpu->vmcs);
-		vcpu->vmcs = NULL;
+		free_vmcs(vcpu->vmx->vmcs);
+		vcpu->vmx->vmcs = NULL;
+
  	}
  }

+static int vmx_vcpu_size(void)
+{
+	return sizeof(struct kvm_vcpu) + sizeof(struct kvm_vmx_data);
+}
+
  static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
  {
  	vmx_free_vmcs(vcpu);
  }

-static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
+static int vmx_init_vcpu(struct kvm_vcpu *vcpu)
  {
  	struct vmcs *vmcs;

-	vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vcpu->guest_msrs)
+	vcpu->vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vcpu->vmx->guest_msrs)
  		return -ENOMEM;

-	vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vcpu->host_msrs)
+	vcpu->vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vcpu->vmx->host_msrs)
  		goto out_free_guest_msrs;

  	vmcs = alloc_vmcs();
@@ -2254,18 +2257,18 @@ static int vmx_create_vcpu(struct kvm_vc
  		goto out_free_msrs;

  	vmcs_clear(vmcs);
-	vcpu->vmcs = vmcs;
+	vcpu->vmx->vmcs = vmcs;
  	vcpu->launched = 0;

  	return 0;

  out_free_msrs:
-	kfree(vcpu->host_msrs);
-	vcpu->host_msrs = NULL;
+	kfree(vcpu->vmx->host_msrs);
+	vcpu->vmx->host_msrs = NULL;

  out_free_guest_msrs:
-	kfree(vcpu->guest_msrs);
-	vcpu->guest_msrs = NULL;
+	kfree(vcpu->vmx->guest_msrs);
+	vcpu->vmx->guest_msrs = NULL;

  	return -ENOMEM;
  }
@@ -2278,7 +2281,8 @@ static struct kvm_arch_ops vmx_arch_ops
  	.hardware_enable = hardware_enable,
  	.hardware_disable = hardware_disable,

-	.vcpu_create = vmx_create_vcpu,
+	.vcpu_size = vmx_vcpu_size,
+	.vcpu_init = vmx_init_vcpu,
  	.vcpu_free = vmx_free_vcpu,

  	.vcpu_load = vmx_vcpu_load,
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index f60012d..4e821ed 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1155,7 +1155,7 @@ special_insn:
  		DPRINTF("Urk! I don't handle SCAS.\n");
  		goto cannot_emulate;
  	case 0xf4:              /* hlt */
-		ctxt->vcpu->halt_request = 1;
+		ctxt->vcpu->vmx->halt_request = 1;
  		goto done;
  	case 0xc3: /* ret */
  		dst.ptr = &_eip;

[-- Attachment #2: Type: TEXT/PLAIN, Size: 28840 bytes --]

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 0f7a4d9..c631192 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -16,6 +16,7 @@ #include <linux/mm.h>
 #include <asm/signal.h>
 
 #include "vmx.h"
+#include "kvm_svm.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -326,16 +327,64 @@ struct kvm_io_device *kvm_io_bus_find_de
 void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 			     struct kvm_io_device *dev);
 
+struct kvm_vmx_data {
+	int msr_offset_efer;
+
+        #ifdef CONFIG_X86_64
+	int msr_offset_kernel_gs_base;
+        #endif
+
+        struct vmx_host_state {
+		int loaded;
+		u16 fs_sel, gs_sel, ldt_sel;
+		int fs_gs_ldt_reload_needed;
+	} host_state;
+
+	struct vmx_msr_entry *guest_msrs;
+	struct vmx_msr_entry *host_msrs;
+
+	struct {
+		int active;
+		u8 save_iopl;
+		struct kvm_save_segment {
+			u16 selector;
+			unsigned long base;
+			u32 limit;
+			u32 ar;
+		} tr, es, ds, fs, gs;
+	} rmode;
+	int halt_request; /* real mode */
+	
+	struct vmcs *vmcs;
+};
+
+struct kvm_svm_data {
+	struct vmcb *vmcb;
+	unsigned long vmcb_pa;
+	struct svm_cpu_data *svm_data;
+	uint64_t asid_generation;
+
+	unsigned long db_regs[NUM_DB_REGS];
+
+	u64 next_rip;
+
+	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
+	u64 host_gs_base;
+	unsigned long host_cr2;
+	unsigned long host_db_regs[NUM_DB_REGS];
+	unsigned long host_dr6;
+	unsigned long host_dr7;
+};
+
+
 struct kvm_vcpu {
 	struct kvm *kvm;
+	struct mutex *mutex; /* refers to corresponding vcpu_mutex on kvm */
 	int vcpu_id;
-	union {
-		struct vmcs *vmcs;
-		struct vcpu_svm *svm;
-	};
-	struct mutex mutex;
+
 	int   cpu;
 	int   launched;
+
 	u64 host_tsc;
 	struct kvm_run *run;
 	int interrupt_window_open;
@@ -361,12 +410,6 @@ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE
 	u64 ia32_misc_enable_msr;
 	int nmsrs;
 	int save_nmsrs;
-	int msr_offset_efer;
-#ifdef CONFIG_X86_64
-	int msr_offset_kernel_gs_base;
-#endif
-	struct vmx_msr_entry *guest_msrs;
-	struct vmx_msr_entry *host_msrs;
 
 	struct kvm_mmu mmu;
 
@@ -385,11 +428,6 @@ #endif
 	char *guest_fx_image;
 	int fpu_active;
 	int guest_fpu_loaded;
-	struct vmx_host_state {
-		int loaded;
-		u16 fs_sel, gs_sel, ldt_sel;
-		int fs_gs_ldt_reload_needed;
-	} vmx_host_state;
 
 	int mmio_needed;
 	int mmio_read_completed;
@@ -406,22 +444,16 @@ #endif
 
 	struct kvm_stat stat;
 
-	struct {
-		int active;
-		u8 save_iopl;
-		struct kvm_save_segment {
-			u16 selector;
-			unsigned long base;
-			u32 limit;
-			u32 ar;
-		} tr, es, ds, fs, gs;
-	} rmode;
-	int halt_request; /* real mode on Intel only */
-
 	int cpuid_nent;
 	struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+	union {
+		struct kvm_vmx_data vmx[0];
+		struct kvm_svm_data svm[0];
+	};
 };
 
+
 struct kvm_mem_alias {
 	gfn_t base_gfn;
 	unsigned long npages;
@@ -448,8 +480,11 @@ struct kvm {
 	struct list_head active_mmu_pages;
 	int n_free_mmu_pages;
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
+
 	int nvcpus;
-	struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
+	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+	struct mutex vcpu_mutex[KVM_MAX_VCPUS];
+	
 	int memory_config_version;
 	int busy;
 	unsigned long rmap_overflow;
@@ -472,7 +507,8 @@ struct kvm_arch_ops {
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
 
-	int (*vcpu_create)(struct kvm_vcpu *vcpu);
+	int (*vcpu_size)(void);
+	int (*vcpu_init)(struct kvm_vcpu *vcpu);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
 
 	void (*vcpu_load)(struct kvm_vcpu *vcpu);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 4d2ef9b..86345be 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -234,7 +234,7 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
  */
 static void vcpu_load(struct kvm_vcpu *vcpu)
 {
-	mutex_lock(&vcpu->mutex);
+	mutex_lock(vcpu->mutex);
 	kvm_arch_ops->vcpu_load(vcpu);
 }
 
@@ -244,11 +244,12 @@ static void vcpu_load(struct kvm_vcpu *v
  */
 static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot)
 {
-	struct kvm_vcpu *vcpu = &kvm->vcpus[slot];
+	struct kvm_vcpu *vcpu;
 
-	mutex_lock(&vcpu->mutex);
-	if (!vcpu->vmcs) {
-		mutex_unlock(&vcpu->mutex);
+	mutex_lock(&kvm->vcpu_mutex[slot]);
+	vcpu = kvm->vcpus[slot];
+	if (!vcpu) {
+		mutex_unlock(&kvm->vcpu_mutex[slot]);
 		return NULL;
 	}
 	kvm_arch_ops->vcpu_load(vcpu);
@@ -258,7 +259,7 @@ static struct kvm_vcpu *vcpu_load_slot(s
 static void vcpu_put(struct kvm_vcpu *vcpu)
 {
 	kvm_arch_ops->vcpu_put(vcpu);
-	mutex_unlock(&vcpu->mutex);
+	mutex_unlock(vcpu->mutex);
 }
 
 static void ack_flush(void *_completed)
@@ -279,7 +280,7 @@ void kvm_flush_remote_tlbs(struct kvm *k
 	cpus_clear(cpus);
 	needed = 0;
 	for (i = 0; i < kvm->nvcpus; ++i) {
-		vcpu = &kvm->vcpus[i];
+		vcpu = kvm->vcpus[i];
 		if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
 			continue;
 		cpu = vcpu->cpu;
@@ -318,14 +319,10 @@ static struct kvm *kvm_create_vm(void)
 	list_add(&kvm->vm_list, &vm_list);
 	spin_unlock(&kvm_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
-		mutex_init(&vcpu->mutex);
-		vcpu->cpu = -1;
-		vcpu->kvm = kvm;
-		vcpu->mmu.root_hpa = INVALID_PAGE;
-	}
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) 
+		mutex_init(&kvm->vcpu_mutex[i]);
+
 	return kvm;
 }
 
@@ -379,7 +376,7 @@ static void free_pio_guest_pages(struct 
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->vmcs)
+	if (!vcpu)
 		return;
 
 	vcpu_load(vcpu);
@@ -389,7 +386,7 @@ static void kvm_unload_vcpu_mmu(struct k
 
 static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->vmcs)
+	if (!vcpu)
 		return;
 
 	vcpu_load(vcpu);
@@ -401,6 +398,8 @@ static void kvm_free_vcpu(struct kvm_vcp
 	free_page((unsigned long)vcpu->pio_data);
 	vcpu->pio_data = NULL;
 	free_pio_guest_pages(vcpu);
+
+	kfree(vcpu);
 }
 
 static void kvm_free_vcpus(struct kvm *kvm)
@@ -411,9 +410,11 @@ static void kvm_free_vcpus(struct kvm *k
 	 * Unpin any mmu pages first.
 	 */
 	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_free_vcpu(&kvm->vcpus[i]);
+		kvm_unload_vcpu_mmu(kvm->vcpus[i]);
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		kvm_free_vcpu(kvm->vcpus[i]);
+		kvm->vcpus[i] = NULL;
+	}
 }
 
 static int kvm_dev_release(struct inode *inode, struct file *filp)
@@ -430,6 +431,7 @@ static void kvm_destroy_vm(struct kvm *k
 	kvm_io_bus_destroy(&kvm->mmio_bus);
 	kvm_free_vcpus(kvm);
 	kvm_free_physmem(kvm);
+
 	kfree(kvm);
 }
 
@@ -796,7 +798,7 @@ raced:
 
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		struct kvm_vcpu *vcpu;
-
+		
 		vcpu = vcpu_load_slot(kvm, i);
 		if (!vcpu)
 			continue;
@@ -922,11 +924,11 @@ static int kvm_vm_ioctl_set_memory_alias
 
 	spin_unlock(&kvm->lock);
 
-	vcpu_load(&kvm->vcpus[0]);
+	vcpu_load(kvm->vcpus[0]);
 	spin_lock(&kvm->lock);
-	kvm_mmu_zap_all(&kvm->vcpus[0]);
+	kvm_mmu_zap_all(kvm->vcpus[0]);
 	spin_unlock(&kvm->lock);
-	vcpu_put(&kvm->vcpus[0]);
+	vcpu_put(kvm->vcpus[0]);
 
 	return 0;
 
@@ -2380,40 +2382,51 @@ static int kvm_vm_ioctl_create_vcpu(stru
 {
 	int r;
 	struct kvm_vcpu *vcpu;
-	struct page *page;
+	struct page *pio_page, *run_page;
 
 	r = -EINVAL;
 	if (!valid_vcpu(n))
 		goto out;
 
-	vcpu = &kvm->vcpus[n];
-	vcpu->vcpu_id = n;
+	mutex_lock(&kvm->vcpu_mutex[n]);
+	if (kvm->vcpus[n]) {
+		r = -EEXIST;
+		goto out_unlock;
+	}
 
-	mutex_lock(&vcpu->mutex);
+	vcpu = kzalloc(kvm_arch_ops->vcpu_size(), GFP_KERNEL);
 
-	if (vcpu->vmcs) {
-		mutex_unlock(&vcpu->mutex);
-		return -EEXIST;
+	if (!vcpu) {
+		r = -ENOMEM;
+		goto out_unlock;
 	}
 
-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	vcpu->mutex = &kvm->vcpu_mutex[n];
+	vcpu->cpu = -1;
+	vcpu->kvm = kvm;
+	vcpu->mmu.root_hpa = INVALID_PAGE;
+
+	vcpu->vcpu_id = n;
+	kvm->vcpus[n] = vcpu;
+
+	run_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	r = -ENOMEM;
-	if (!page)
-		goto out_unlock;
-	vcpu->run = page_address(page);
+	if (!run_page)
+		goto out_deallocate;
+	vcpu->run = page_address(run_page);
 
-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	pio_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	r = -ENOMEM;
-	if (!page)
+	if (!pio_page)
 		goto out_free_run;
-	vcpu->pio_data = page_address(page);
+	vcpu->pio_data = page_address(pio_page);
 
 	vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
 					   FX_IMAGE_ALIGN);
 	vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
 	vcpu->cr0 = 0x10;
-
-	r = kvm_arch_ops->vcpu_create(vcpu);
+	
+	r = kvm_arch_ops->vcpu_init(vcpu);
 	if (r < 0)
 		goto out_free_vcpus;
 
@@ -2425,6 +2438,7 @@ static int kvm_vm_ioctl_create_vcpu(stru
 	r = kvm_mmu_setup(vcpu);
 	if (r >= 0)
 		r = kvm_arch_ops->vcpu_setup(vcpu);
+
 	vcpu_put(vcpu);
 
 	if (r < 0)
@@ -2438,20 +2452,25 @@ static int kvm_vm_ioctl_create_vcpu(stru
 	if (n >= kvm->nvcpus)
 		kvm->nvcpus = n + 1;
 	spin_unlock(&kvm_lock);
-
+	
 	return r;
 
 out_free_vcpus:
 	kvm_free_vcpu(vcpu);
+	vcpu = NULL;
 out_free_run:
-	free_page((unsigned long)vcpu->run);
-	vcpu->run = NULL;
+	free_page((unsigned long)run_page);
+out_deallocate:
+	kfree(vcpu);
+	kvm->vcpus[n] = NULL;
 out_unlock:
-	mutex_unlock(&vcpu->mutex);
+	mutex_unlock(&kvm->vcpu_mutex[n]);
+	
 out:
 	return r;
 }
 
+
 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
 {
 	u64 efer;
@@ -2846,9 +2865,7 @@ static int kvm_dev_ioctl_create_vm(void)
 		kvm_destroy_vm(kvm);
 		return r;
 	}
-
 	kvm->filp = file;
-
 	return fd;
 }
 
@@ -2962,7 +2979,7 @@ static void decache_vcpus_on_cpu(int cpu
 	spin_lock(&kvm_lock);
 	list_for_each_entry(vm, &vm_list, vm_list)
 		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &vm->vcpus[i];
+			vcpu = vm->vcpus[i];
 			/*
 			 * If the vcpu is locked, then it is running on some
 			 * other cpu and therefore it is not cached on the
@@ -2971,12 +2988,12 @@ static void decache_vcpus_on_cpu(int cpu
 			 * If it's not locked, check the last cpu it executed
 			 * on.
 			 */
-			if (mutex_trylock(&vcpu->mutex)) {
+			if (mutex_trylock(vcpu->mutex)) {
 				if (vcpu->cpu == cpu) {
 					kvm_arch_ops->vcpu_decache(vcpu);
 					vcpu->cpu = -1;
 				}
-				mutex_unlock(&vcpu->mutex);
+				mutex_unlock(vcpu->mutex);
 			}
 		}
 	spin_unlock(&kvm_lock);
@@ -3080,7 +3097,7 @@ static u64 stat_get(void *_offset)
 	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &kvm->vcpus[i];
+			vcpu = kvm->vcpus[i];
 			total += *(u32 *)((void *)vcpu + offset);
 		}
 	spin_unlock(&kvm_lock);
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h
index a869983..48ad325 100644
--- a/drivers/kvm/kvm_svm.h
+++ b/drivers/kvm/kvm_svm.h
@@ -7,7 +7,6 @@ #include <linux/list.h>
 #include <asm/msr.h>
 
 #include "svm.h"
-#include "kvm.h"
 
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
@@ -20,23 +19,4 @@ #endif
 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
 #define NUM_DB_REGS 4
 
-struct vcpu_svm {
-	struct vmcb *vmcb;
-	unsigned long vmcb_pa;
-	struct svm_cpu_data *svm_data;
-	uint64_t asid_generation;
-
-	unsigned long db_regs[NUM_DB_REGS];
-
-	u64 next_rip;
-
-	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
-	u64 host_gs_base;
-	unsigned long host_cr2;
-	unsigned long host_db_regs[NUM_DB_REGS];
-	unsigned long host_dr6;
-	unsigned long host_dr7;
-};
-
 #endif
-
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 52a11cc..40a1baf 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -14,7 +14,7 @@
  *
  */
 
-#include "kvm_svm.h"
+#include "kvm.h"
 #include "x86_emulate.h"
 
 #include <linux/module.h>
@@ -568,18 +568,20 @@ static void init_vmcb(struct vmcb *vmcb)
 	/* rdx = ?? */
 }
 
-static int svm_create_vcpu(struct kvm_vcpu *vcpu)
+static int svm_vcpu_size(void)
+{
+	return sizeof(struct kvm) + sizeof(struct kvm_svm_data);
+}
+
+static int svm_init_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct page *page;
 	int r;
 
 	r = -ENOMEM;
-	vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL);
-	if (!vcpu->svm)
-		goto out1;
 	page = alloc_page(GFP_KERNEL);
 	if (!page)
-		goto out2;
+		goto out1;
 
 	vcpu->svm->vmcb = page_address(page);
 	clear_page(vcpu->svm->vmcb);
@@ -596,8 +598,6 @@ static int svm_create_vcpu(struct kvm_vc
 
 	return 0;
 
-out2:
-	kfree(vcpu->svm);
 out1:
 	return r;
 }
@@ -1610,7 +1610,7 @@ #endif
 		:
 		: [vcpu]"a"(vcpu),
 		  [svm]"i"(offsetof(struct kvm_vcpu, svm)),
-		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
+		  [vmcb]"i"(offsetof(struct kvm_svm_data, vmcb_pa)),
 		  [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
 		  [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
 		  [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
@@ -1765,7 +1765,8 @@ static struct kvm_arch_ops svm_arch_ops 
 	.hardware_enable = svm_hardware_enable,
 	.hardware_disable = svm_hardware_disable,
 
-	.vcpu_create = svm_create_vcpu,
+	.vcpu_size = svm_vcpu_size,
+	.vcpu_init = svm_init_vcpu,
 	.vcpu_free = svm_free_vcpu,
 
 	.vcpu_load = svm_vcpu_load,
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 7fa62c7..da14d2f 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -96,9 +96,9 @@ static inline u64 msr_efer_save_restore_
 
 static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu)
 {
-	int efer_offset = vcpu->msr_offset_efer;
-	return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) !=
-		msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]);
+	int efer_offset = vcpu->vmx->msr_offset_efer;
+	return msr_efer_save_restore_bits(vcpu->vmx->host_msrs[efer_offset]) !=
+		msr_efer_save_restore_bits(vcpu->vmx->guest_msrs[efer_offset]);
 }
 
 static inline int is_page_fault(u32 intr_info)
@@ -126,7 +126,7 @@ static int __find_msr_index(struct kvm_v
 	int i;
 
 	for (i = 0; i < vcpu->nmsrs; ++i)
-		if (vcpu->guest_msrs[i].index == msr)
+		if (vcpu->vmx->guest_msrs[i].index == msr)
 			return i;
 	return -1;
 }
@@ -137,7 +137,7 @@ static struct vmx_msr_entry *find_msr_en
 
 	i = __find_msr_index(vcpu, msr);
 	if (i >= 0)
-		return &vcpu->guest_msrs[i];
+		return &vcpu->vmx->guest_msrs[i];
 	return NULL;
 }
 
@@ -160,8 +160,8 @@ static void __vcpu_clear(void *arg)
 	int cpu = raw_smp_processor_id();
 
 	if (vcpu->cpu == cpu)
-		vmcs_clear(vcpu->vmcs);
-	if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
+		vmcs_clear(vcpu->vmx->vmcs);
+	if (per_cpu(current_vmcs, cpu) == vcpu->vmx->vmcs)
 		per_cpu(current_vmcs, cpu) = NULL;
 	rdtscll(vcpu->host_tsc);
 }
@@ -260,7 +260,7 @@ static void update_exception_bitmap(stru
 		eb |= 1u << NM_VECTOR;
 	if (vcpu->guest_debug.enabled)
 		eb |= 1u << 1;
-	if (vcpu->rmode.active)
+	if (vcpu->vmx->rmode.active)
 		eb = ~0;
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -285,19 +285,19 @@ #endif
 static void load_transition_efer(struct kvm_vcpu *vcpu)
 {
 	u64 trans_efer;
-	int efer_offset = vcpu->msr_offset_efer;
+	int efer_offset = vcpu->vmx->msr_offset_efer;
 
-	trans_efer = vcpu->host_msrs[efer_offset].data;
+	trans_efer = vcpu->vmx->host_msrs[efer_offset].data;
 	trans_efer &= ~EFER_SAVE_RESTORE_BITS;
 	trans_efer |= msr_efer_save_restore_bits(
-				vcpu->guest_msrs[efer_offset]);
+				vcpu->vmx->guest_msrs[efer_offset]);
 	wrmsrl(MSR_EFER, trans_efer);
 	vcpu->stat.efer_reload++;
 }
 
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
-	struct vmx_host_state *hs = &vcpu->vmx_host_state;
+	struct vmx_host_state *hs = &vcpu->vmx->host_state;
 
 	if (hs->loaded)
 		return;
@@ -334,17 +334,17 @@ #endif
 
 #ifdef CONFIG_X86_64
 	if (is_long_mode(vcpu)) {
-		save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1);
+		save_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_kernel_gs_base, 1);
 	}
 #endif
-	load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+	load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
 	if (msr_efer_need_save_restore(vcpu))
 		load_transition_efer(vcpu);
 }
 
 static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 {
-	struct vmx_host_state *hs = &vcpu->vmx_host_state;
+	struct vmx_host_state *hs = &vcpu->vmx->host_state;
 
 	if (!hs->loaded)
 		return;
@@ -366,10 +366,10 @@ #endif
 
 		reload_tss();
 	}
-	save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
-	load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+	save_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
+	load_msrs(vcpu->vmx->host_msrs, vcpu->save_nmsrs);
 	if (msr_efer_need_save_restore(vcpu))
-		load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1);
+		load_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_efer, 1);
 }
 
 /*
@@ -378,7 +378,7 @@ #endif
  */
 static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
 {
-	u64 phys_addr = __pa(vcpu->vmcs);
+	u64 phys_addr = __pa(vcpu->vmx->vmcs);
 	int cpu;
 	u64 tsc_this, delta;
 
@@ -387,16 +387,16 @@ static void vmx_vcpu_load(struct kvm_vcp
 	if (vcpu->cpu != cpu)
 		vcpu_clear(vcpu);
 
-	if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) {
+	if (per_cpu(current_vmcs, cpu) != vcpu->vmx->vmcs) {
 		u8 error;
 
-		per_cpu(current_vmcs, cpu) = vcpu->vmcs;
+		per_cpu(current_vmcs, cpu) = vcpu->vmx->vmcs;
 		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
 			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
 			      : "cc");
 		if (error)
 			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
-			       vcpu->vmcs, phys_addr);
+			       vcpu->vmx->vmcs, phys_addr);
 	}
 
 	if (vcpu->cpu != cpu) {
@@ -504,12 +504,12 @@ static void vmx_inject_gp(struct kvm_vcp
 void move_msr_up(struct kvm_vcpu *vcpu, int from, int to)
 {
 	struct vmx_msr_entry tmp;
-	tmp = vcpu->guest_msrs[to];
-	vcpu->guest_msrs[to] = vcpu->guest_msrs[from];
-	vcpu->guest_msrs[from] = tmp;
-	tmp = vcpu->host_msrs[to];
-	vcpu->host_msrs[to] = vcpu->host_msrs[from];
-	vcpu->host_msrs[from] = tmp;
+	tmp = vcpu->vmx->guest_msrs[to];
+	vcpu->vmx->guest_msrs[to] = vcpu->vmx->guest_msrs[from];
+	vcpu->vmx->guest_msrs[from] = tmp;
+	tmp = vcpu->vmx->host_msrs[to];
+	vcpu->vmx->host_msrs[to] = vcpu->vmx->host_msrs[from];
+	vcpu->vmx->host_msrs[from] = tmp;
 }
 
 /*
@@ -550,10 +550,10 @@ #endif
 	vcpu->save_nmsrs = save_nmsrs;
 
 #ifdef CONFIG_X86_64
-	vcpu->msr_offset_kernel_gs_base =
+	vcpu->vmx->msr_offset_kernel_gs_base =
 		__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
 #endif
-	vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
+	vcpu->vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
 }
 
 /*
@@ -646,7 +646,7 @@ static int vmx_set_msr(struct kvm_vcpu *
 #ifdef CONFIG_X86_64
 	case MSR_EFER:
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
-		if (vcpu->vmx_host_state.loaded)
+		if (vcpu->vmx->host_state.loaded)
 			load_transition_efer(vcpu);
 		break;
 	case MSR_FS_BASE:
@@ -672,8 +672,8 @@ #endif
 		msr = find_msr_entry(vcpu, msr_index);
 		if (msr) {
 			msr->data = data;
-			if (vcpu->vmx_host_state.loaded)
-				load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+			if (vcpu->vmx->host_state.loaded)
+				load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
 			break;
 		}
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -868,15 +868,15 @@ static void enter_pmode(struct kvm_vcpu 
 {
 	unsigned long flags;
 
-	vcpu->rmode.active = 0;
+	vcpu->vmx->rmode.active = 0;
 
-	vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);
-	vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);
-	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
+	vmcs_writel(GUEST_TR_BASE, vcpu->vmx->rmode.tr.base);
+	vmcs_write32(GUEST_TR_LIMIT, vcpu->vmx->rmode.tr.limit);
+	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->vmx->rmode.tr.ar);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
 	flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
-	flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
+	flags |= (vcpu->vmx->rmode.save_iopl << IOPL_SHIFT);
 	vmcs_writel(GUEST_RFLAGS, flags);
 
 	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~CR4_VME_MASK) |
@@ -884,10 +884,10 @@ static void enter_pmode(struct kvm_vcpu 
 
 	update_exception_bitmap(vcpu);
 
-	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es);
-	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds);
-	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs);
-	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs);
+	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);
 
 	vmcs_write16(GUEST_SS_SELECTOR, 0);
 	vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -920,19 +920,19 @@ static void enter_rmode(struct kvm_vcpu 
 {
 	unsigned long flags;
 
-	vcpu->rmode.active = 1;
+	vcpu->vmx->rmode.active = 1;
 
-	vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
+	vcpu->vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
 	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
 
-	vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
+	vcpu->vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
 	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
 
-	vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
+	vcpu->vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
 	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
-	vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
+	vcpu->vmx->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
 
 	flags |= IOPL_MASK | X86_EFLAGS_VM;
 
@@ -950,11 +950,10 @@ static void enter_rmode(struct kvm_vcpu 
 		vmcs_writel(GUEST_CS_BASE, 0xf0000);
 	vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
 
-	fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
-	fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);
-	fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
-	fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
-
+	fix_rmode_seg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+	fix_rmode_seg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+	fix_rmode_seg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+	fix_rmode_seg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);
 	init_rmode_tss(vcpu->kvm);
 }
 
@@ -1002,10 +1001,10 @@ static void vmx_set_cr0(struct kvm_vcpu 
 {
 	vmx_fpu_deactivate(vcpu);
 
-	if (vcpu->rmode.active && (cr0 & CR0_PE_MASK))
+	if (vcpu->vmx->rmode.active && (cr0 & CR0_PE_MASK))
 		enter_pmode(vcpu);
 
-	if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
+	if (!vcpu->vmx->rmode.active && !(cr0 & CR0_PE_MASK))
 		enter_rmode(vcpu);
 
 #ifdef CONFIG_X86_64
@@ -1036,7 +1035,7 @@ static void vmx_set_cr3(struct kvm_vcpu 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
 	vmcs_writel(CR4_READ_SHADOW, cr4);
-	vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?
+	vmcs_writel(GUEST_CR4, cr4 | (vcpu->vmx->rmode.active ?
 		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
 	vcpu->cr4 = cr4;
 }
@@ -1124,17 +1123,17 @@ static void vmx_set_segment(struct kvm_v
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	u32 ar;
 
-	if (vcpu->rmode.active && seg == VCPU_SREG_TR) {
-		vcpu->rmode.tr.selector = var->selector;
-		vcpu->rmode.tr.base = var->base;
-		vcpu->rmode.tr.limit = var->limit;
-		vcpu->rmode.tr.ar = vmx_segment_access_rights(var);
+	if (vcpu->vmx->rmode.active && seg == VCPU_SREG_TR) {
+		vcpu->vmx->rmode.tr.selector = var->selector;
+		vcpu->vmx->rmode.tr.base = var->base;
+		vcpu->vmx->rmode.tr.limit = var->limit;
+		vcpu->vmx->rmode.tr.ar = vmx_segment_access_rights(var);
 		return;
 	}
 	vmcs_writel(sf->base, var->base);
 	vmcs_write32(sf->limit, var->limit);
 	vmcs_write16(sf->selector, var->selector);
-	if (vcpu->rmode.active && var->s) {
+	if (vcpu->vmx->rmode.active && var->s) {
 		/*
 		 * Hack real-mode segments into vm86 compatibility.
 		 */
@@ -1253,11 +1252,11 @@ static int vmx_vcpu_setup(struct kvm_vcp
 	vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	vcpu->cr8 = 0;
 	vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+
 	if (vcpu->vcpu_id == 0)
 		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
 
 	fx_init(vcpu);
-
 	/*
 	 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
 	 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.
@@ -1384,13 +1383,12 @@ #endif
 		if (wrmsr_safe(index, data_low, data_high) < 0)
 			continue;
 		data = data_low | ((u64)data_high << 32);
-		vcpu->host_msrs[j].index = index;
-		vcpu->host_msrs[j].reserved = 0;
-		vcpu->host_msrs[j].data = data;
-		vcpu->guest_msrs[j] = vcpu->host_msrs[j];
+		vcpu->vmx->host_msrs[j].index = index;
+		vcpu->vmx->host_msrs[j].reserved = 0;
+		vcpu->vmx->host_msrs[j].data = data;
+		vcpu->vmx->guest_msrs[j] = vcpu->vmx->host_msrs[j];
 		++vcpu->nmsrs;
 	}
-
 	setup_msrs(vcpu);
 
 	vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS,
@@ -1408,7 +1406,6 @@ #endif
 
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
-
 	vcpu->cr0 = 0x60000010;
 	vmx_set_cr0(vcpu, vcpu->cr0); // enter rmode
 	vmx_set_cr4(vcpu, 0);
@@ -1479,7 +1476,7 @@ static void kvm_do_inject_irq(struct kvm
 	if (!vcpu->irq_pending[word_index])
 		clear_bit(word_index, &vcpu->irq_summary);
 
-	if (vcpu->rmode.active) {
+	if (vcpu->vmx->rmode.active) {
 		inject_rmode_irq(vcpu, irq);
 		return;
 	}
@@ -1538,7 +1535,7 @@ static void kvm_guest_debug_pre(struct k
 static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 				  int vec, u32 err_code)
 {
-	if (!vcpu->rmode.active)
+	if (!vcpu->vmx->rmode.active)
 		return 0;
 
 	/*
@@ -1620,11 +1617,11 @@ static int handle_exception(struct kvm_v
 		}
 	}
 
-	if (vcpu->rmode.active &&
+	if (vcpu->vmx->rmode.active &&
 	    handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
 								error_code)) {
-		if (vcpu->halt_request) {
-			vcpu->halt_request = 0;
+		if (vcpu->vmx->halt_request) {
+			vcpu->vmx->halt_request = 0;
 			return kvm_emulate_halt(vcpu);
 		}
 		return 1;
@@ -2225,28 +2222,34 @@ static void vmx_inject_page_fault(struct
 
 static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->vmcs) {
+	if (vcpu->vmx->vmcs) {
 		on_each_cpu(__vcpu_clear, vcpu, 0, 1);
-		free_vmcs(vcpu->vmcs);
-		vcpu->vmcs = NULL;
+		free_vmcs(vcpu->vmx->vmcs);
+		vcpu->vmx->vmcs = NULL;
+
 	}
 }
 
+static int vmx_vcpu_size(void)
+{
+	return sizeof(struct kvm_vcpu) + sizeof(struct kvm_vmx_data);
+}
+
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	vmx_free_vmcs(vcpu);
 }
 
-static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
+static int vmx_init_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vmcs *vmcs;
 
-	vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vcpu->guest_msrs)
+	vcpu->vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vcpu->vmx->guest_msrs)
 		return -ENOMEM;
 
-	vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vcpu->host_msrs)
+	vcpu->vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vcpu->vmx->host_msrs)
 		goto out_free_guest_msrs;
 
 	vmcs = alloc_vmcs();
@@ -2254,18 +2257,18 @@ static int vmx_create_vcpu(struct kvm_vc
 		goto out_free_msrs;
 
 	vmcs_clear(vmcs);
-	vcpu->vmcs = vmcs;
+	vcpu->vmx->vmcs = vmcs;
 	vcpu->launched = 0;
 
 	return 0;
 
 out_free_msrs:
-	kfree(vcpu->host_msrs);
-	vcpu->host_msrs = NULL;
+	kfree(vcpu->vmx->host_msrs);
+	vcpu->vmx->host_msrs = NULL;
 
 out_free_guest_msrs:
-	kfree(vcpu->guest_msrs);
-	vcpu->guest_msrs = NULL;
+	kfree(vcpu->vmx->guest_msrs);
+	vcpu->vmx->guest_msrs = NULL;
 
 	return -ENOMEM;
 }
@@ -2278,7 +2281,8 @@ static struct kvm_arch_ops vmx_arch_ops 
 	.hardware_enable = hardware_enable,
 	.hardware_disable = hardware_disable,
 
-	.vcpu_create = vmx_create_vcpu,
+	.vcpu_size = vmx_vcpu_size,
+	.vcpu_init = vmx_init_vcpu,
 	.vcpu_free = vmx_free_vcpu,
 
 	.vcpu_load = vmx_vcpu_load,
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index f60012d..4e821ed 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1155,7 +1155,7 @@ special_insn:
 		DPRINTF("Urk! I don't handle SCAS.\n");
 		goto cannot_emulate;
 	case 0xf4:              /* hlt */
-		ctxt->vcpu->halt_request = 1;
+		ctxt->vcpu->vmx->halt_request = 1;
 		goto done;
 	case 0xc3: /* ret */
 		dst.ptr = &_eip;

[-- Attachment #3: Type: text/plain, Size: 286 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

[-- Attachment #4: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] dynamically create vcpus + vmx/svm structures
       [not found] ` <Pine.LNX.4.64.0707121815040.23503-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
@ 2007-07-13  7:59   ` Avi Kivity
       [not found]     ` <ed628a920707131534x7fe57ca3sfa09d9d79412d9c6@mail.gmail.com>
  0 siblings, 1 reply; 5+ messages in thread
From: Avi Kivity @ 2007-07-13  7:59 UTC (permalink / raw)
  To: Paul Turner; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Paul Turner wrote:
> From: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>
> - vcpus now allocated on demand
> - vmx/svm fields separated into arch specific structures on vcpus
> - vmx/svm fields now only allocated on corresponding architectures
>
> - Paul
>
> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
> index 0f7a4d9..c631192 100644
> --- a/drivers/kvm/kvm.h
> +++ b/drivers/kvm/kvm.h
> @@ -16,6 +16,7 @@ #include <linux/mm.h>
>  #include <asm/signal.h>
>
>  #include "vmx.h"
> +#include "kvm_svm.h"

This can probably be avoided, see below.

>  #include <linux/kvm.h>
>  #include <linux/kvm_para.h>
>
> @@ -326,16 +327,64 @@ struct kvm_io_device *kvm_io_bus_find_de
>  void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
>                   struct kvm_io_device *dev);
>
> +struct kvm_vmx_data {
> +    int msr_offset_efer;
> +
> +        #ifdef CONFIG_X86_64
> +    int msr_offset_kernel_gs_base;
> +        #endif
> +
> +        struct vmx_host_state {
> +        int loaded;
> +        u16 fs_sel, gs_sel, ldt_sel;
> +        int fs_gs_ldt_reload_needed;
> +    } host_state;
> +
> +    struct vmx_msr_entry *guest_msrs;
> +    struct vmx_msr_entry *host_msrs;
> +
> +    struct {
> +        int active;
> +        u8 save_iopl;
> +        struct kvm_save_segment {
> +            u16 selector;
> +            unsigned long base;
> +            u32 limit;
> +            u32 ar;
> +        } tr, es, ds, fs, gs;
> +    } rmode;
> +    int halt_request; /* real mode */
> + +    struct vmcs *vmcs;
> +};
> +

If this is moved to vmx.c, we can avoid including vmx.h and have no arch
dependent code here (given that we don't even need the size).

> +struct kvm_svm_data {
> +    struct vmcb *vmcb;
> +    unsigned long vmcb_pa;
> +    struct svm_cpu_data *svm_data;
> +    uint64_t asid_generation;
> +
> +    unsigned long db_regs[NUM_DB_REGS];
> +
> +    u64 next_rip;
> +
> +    u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
> +    u64 host_gs_base;
> +    unsigned long host_cr2;
> +    unsigned long host_db_regs[NUM_DB_REGS];
> +    unsigned long host_dr6;
> +    unsigned long host_dr7;
> +};

This can remain in kvm_svm.h.


> +
> +
>  struct kvm_vcpu {
>      struct kvm *kvm;
> +    struct mutex *mutex; /* refers to corresponding vcpu_mutex on kvm */

Please keep this as a real structure, not a pointer.  Existence testing
of the vcpu is now simply if (kvm->vcpus[slot]).

> +

No gratuitous empty lines please.

>  struct kvm_mem_alias {
>      gfn_t base_gfn;
>      unsigned long npages;
> @@ -448,8 +480,11 @@ struct kvm {
>      struct list_head active_mmu_pages;
>      int n_free_mmu_pages;
>      struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
> +
>      int nvcpus;
> -    struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
> +    struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
> +    struct mutex vcpu_mutex[KVM_MAX_VCPUS];
> +
>      int memory_config_version;
>      int busy;
>      unsigned long rmap_overflow;
> @@ -472,7 +507,8 @@ struct kvm_arch_ops {
>      int (*hardware_setup)(void);               /* __init */
>      void (*hardware_unsetup)(void);            /* __exit */
>
> -    int (*vcpu_create)(struct kvm_vcpu *vcpu);
> +    int (*vcpu_size)(void);
> +    int (*vcpu_init)(struct kvm_vcpu *vcpu);

I would prefer combining these two into 'struct kvm_vcpu
*vcpu_create()', but it's also okay as is.

>
>  static int kvm_dev_release(struct inode *inode, struct file *filp)
> @@ -430,6 +431,7 @@ static void kvm_destroy_vm(struct kvm *k
>      kvm_io_bus_destroy(&kvm->mmio_bus);
>      kvm_free_vcpus(kvm);
>      kvm_free_physmem(kvm);
> +

empty line.
>      kfree(kvm);
>  }
>
> @@ -796,7 +798,7 @@ raced:
>
>      for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>          struct kvm_vcpu *vcpu;
> -
> +

random noise?

>
> @@ -2380,40 +2382,51 @@ static int kvm_vm_ioctl_create_vcpu(stru
>  {
>      int r;
>      struct kvm_vcpu *vcpu;
> -    struct page *page;
> +    struct page *pio_page, *run_page;
>
>      r = -EINVAL;
>      if (!valid_vcpu(n))
>          goto out;
>
> -    vcpu = &kvm->vcpus[n];
> -    vcpu->vcpu_id = n;
> +    mutex_lock(&kvm->vcpu_mutex[n]);
> +    if (kvm->vcpus[n]) {
> +        r = -EEXIST;
> +        goto out_unlock;
> +    }
>
> -    mutex_lock(&vcpu->mutex);
> +    vcpu = kzalloc(kvm_arch_ops->vcpu_size(), GFP_KERNEL);
>
> -    if (vcpu->vmcs) {
> -        mutex_unlock(&vcpu->mutex);
> -        return -EEXIST;
> +    if (!vcpu) {
> +        r = -ENOMEM;
> +        goto out_unlock;
>      }
>
> -    page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +    vcpu->mutex = &kvm->vcpu_mutex[n];
> +    vcpu->cpu = -1;
> +    vcpu->kvm = kvm;
> +    vcpu->mmu.root_hpa = INVALID_PAGE;
> +
> +    vcpu->vcpu_id = n;
> +    kvm->vcpus[n] = vcpu;
> +
> +    run_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>      r = -ENOMEM;
> -    if (!page)
> -        goto out_unlock;
> -    vcpu->run = page_address(page);
> +    if (!run_page)
> +        goto out_deallocate;
> +    vcpu->run = page_address(run_page);
>

This cleanup is good, but makes the patch larger.  Please defer it.

> diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
> index f60012d..4e821ed 100644
> --- a/drivers/kvm/x86_emulate.c
> +++ b/drivers/kvm/x86_emulate.c
> @@ -1155,7 +1155,7 @@ special_insn:
>          DPRINTF("Urk! I don't handle SCAS.\n");
>          goto cannot_emulate;
>      case 0xf4:              /* hlt */
> -        ctxt->vcpu->halt_request = 1;
> +        ctxt->vcpu->vmx->halt_request = 1;
>          goto done;
>      case 0xc3: /* ret */
>          dst.ptr = &_eip;

This is common code, and will stomp on svm data if executed on amd.  I
don't think that amd will ever need to emulate hlt, nevertheless let's
make ->halt_request a member of struct kvm_vcpu.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] dynamically create vcpus + vmx/svm structures
       [not found]       ` <ed628a920707131534x7fe57ca3sfa09d9d79412d9c6-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2007-07-14  6:14         ` Avi Kivity
       [not found]           ` <469869CF.8030106-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Avi Kivity @ 2007-07-14  6:14 UTC (permalink / raw)
  To: Paul Turner; +Cc: KVM

[added back cc]

Paul Turner wrote:
> On 7/13/07, Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
>> Paul Turner wrote:
>> > From: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>> >
>> > - vcpus now allocated on demand
>> > - vmx/svm fields separated into arch specific structures on vcpus
>> > - vmx/svm fields now only allocated on corresponding architectures
>> >
>> > - Paul
>> >
>> > diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
>> > index 0f7a4d9..c631192 100644
>> > --- a/drivers/kvm/kvm.h
>> > +++ b/drivers/kvm/kvm.h
>> > @@ -16,6 +16,7 @@ #include <linux/mm.h>
>> >  #include <asm/signal.h>
>> >
>> >  #include "vmx.h"
>> > +#include "kvm_svm.h"
>>
>> This can probably be avoided, see below.
>>
>> >  #include <linux/kvm.h>
>> >  #include <linux/kvm_para.h>
>> >
>> > @@ -326,16 +327,64 @@ struct kvm_io_device *kvm_io_bus_find_de
>> >  void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
>> >                   struct kvm_io_device *dev);
>> >
>> > +struct kvm_vmx_data {
>> > +    int msr_offset_efer;
>> > +
>> > +        #ifdef CONFIG_X86_64
>> > +    int msr_offset_kernel_gs_base;
>> > +        #endif
>> > +
>> > +        struct vmx_host_state {
>> > +        int loaded;
>> > +        u16 fs_sel, gs_sel, ldt_sel;
>> > +        int fs_gs_ldt_reload_needed;
>> > +    } host_state;
>> > +
>> > +    struct vmx_msr_entry *guest_msrs;
>> > +    struct vmx_msr_entry *host_msrs;
>> > +
>> > +    struct {
>> > +        int active;
>> > +        u8 save_iopl;
>> > +        struct kvm_save_segment {
>> > +            u16 selector;
>> > +            unsigned long base;
>> > +            u32 limit;
>> > +            u32 ar;
>> > +        } tr, es, ds, fs, gs;
>> > +    } rmode;
>> > +    int halt_request; /* real mode */
>> > + +    struct vmcs *vmcs;
>> > +};
>> > +
>>
>> If this is moved to vmx.c, we can avoid including vmx.h and have no arch
>> dependent code here (given that we don't even need the size).
>>
>
> I originally did this however gcc refuses to compile with the
> incomplete types, although after further investigation it turns out
> it's a bug in gcc with an incomplete implementation of zero sized
> arrays under a union, so I can fix this now.  See notes below..
>

Looks like you forgot the notes below :)

Anyway the only fix I can see is to have a long[0] member at the end,
and have vmx.c define a function vmx(vcpu) which returns the vmx
specific data.  Accesses would look like

  vmx(vcpu)->cr0 = 42;

which is odd, but I've seen worse.  But if you have a better solution,
let's hear it.

>> > +struct kvm_svm_data {
>> > +    struct vmcb *vmcb;
>> > +    unsigned long vmcb_pa;
>> > +    struct svm_cpu_data *svm_data;
>> > +    uint64_t asid_generation;
>> > +
>> > +    unsigned long db_regs[NUM_DB_REGS];
>> > +
>> > +    u64 next_rip;
>> > +
>> > +    u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
>> > +    u64 host_gs_base;
>> > +    unsigned long host_cr2;
>> > +    unsigned long host_db_regs[NUM_DB_REGS];
>> > +    unsigned long host_dr6;
>> > +    unsigned long host_dr7;
>> > +};
>>
>> This can remain in kvm_svm.h.
>
> I was going to move both structures out in a small follow up patch, I
> didn't in this one because of the compile issue above + not wanting to
> make this patch any larger than it already is.. :)  I can merge it
> into this one if you prefer..

Don't understand.  If it remains in kvm_svm.h, the patch gets smaller,
not larger.

>
>>
>>
>> > +
>> > +
>> >  struct kvm_vcpu {
>> >      struct kvm *kvm;
>> > +    struct mutex *mutex; /* refers to corresponding vcpu_mutex on
>> kvm */
>>
>> Please keep this as a real structure, not a pointer.  Existence testing
>> of the vcpu is now simply if (kvm->vcpus[slot]).
>
> Some of the existing code makes the assumption that locking the cpu
> locks the slot as well; also if we don't have an associated lock/mutex
> then we'd have to take a global lock on slot updates/checks.  Finally
> you'd still have a race in between checking it's a valid vcpu and
> trying to acquire it's mutex..

The only place the race matters is in vcpu creation.  There, we can do
something like

    vcpu = kvm_arch_ops->vcpu_create(...);
    spin_lock(kvm);
    if (kvm->vcpus[slot]) {
        r = -EEXIST;
        vcpu_free(vcpu);
    } else
        kvm->vcpus[slot] = vcpu;
    spin_unlock(kvm);

In the other places, if the user has a thread creating a vcpu and
another thread performing an operation on it, it's perfectly legitimate
to return -ENOENT.

>> > -    page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> > +    vcpu->mutex = &kvm->vcpu_mutex[n];
>> > +    vcpu->cpu = -1;
>> > +    vcpu->kvm = kvm;
>> > +    vcpu->mmu.root_hpa = INVALID_PAGE;
>> > +
>> > +    vcpu->vcpu_id = n;
>> > +    kvm->vcpus[n] = vcpu;
>> > +
>> > +    run_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> >      r = -ENOMEM;
>> > -    if (!page)
>> > -        goto out_unlock;
>> > -    vcpu->run = page_address(page);
>> > +    if (!run_page)
>> > +        goto out_deallocate;
>> > +    vcpu->run = page_address(run_page);
>> >
>>
>> This cleanup is good, but makes the patch larger.  Please defer it.
>>
>
> this needs to be done on vcpu creation (it was part of vm init
> before), if you're concerned about patch size I can break up the
> structure separation and dynamic allocation fairly easily since they
> are different commits in my repository (I just didn't originally want
> to rebase them both :)

Yes, you're right -- it is necessary.  It can live in the main patch.

>
> please advise on splits/etc as above and ill resubmit
>

One patch is okay.  We should aim for kvm_main.c not knowing (including)
anything about vmx or svm.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] dynamically create vcpus + vmx/svm structures
       [not found]           ` <469869CF.8030106-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-07-18 22:36             ` Paul Turner
       [not found]               ` <Pine.LNX.4.64.0707181528430.32400-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Paul Turner @ 2007-07-18 22:36 UTC (permalink / raw)
  To: Avi Kivity; +Cc: KVM

[-- Attachment #1: Type: TEXT/PLAIN, Size: 24433 bytes --]

here's an updated version that instead takes kvm_lock and leaves the svm 
stuff in the relevant header file for now.  unfortunately we still need 
the includes since gcc is braindead and can't compile untyped [0]-size 
arrays properly, the only two ways i can see to fix this is either 
embedding vcpu in an arch specific struct and or using a long[0] member 
and a vmx/svm macro as you mentioned before..

ill prepare a follow up patch that migrates the arch cruft out of kvm.h 
(the long[0] approach) now, of course that changes all the vmx/svm 
references again but at least they're now consolidated

- Paul

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 7bdffe6..9fff8b7 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -16,6 +16,7 @@ #include <linux/mm.h>
  #include <asm/signal.h>

  #include "vmx.h"
+#include "kvm_svm.h"
  #include <linux/kvm.h>
  #include <linux/kvm_para.h>

@@ -308,13 +309,39 @@ struct kvm_io_device *kvm_io_bus_find_de
  void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
  			     struct kvm_io_device *dev);

+struct kvm_vmx_data {
+	int msr_offset_efer;
+
+        #ifdef CONFIG_X86_64
+	int msr_offset_kernel_gs_base;
+        #endif
+
+        struct vmx_host_state {
+		int loaded;
+		u16 fs_sel, gs_sel, ldt_sel;
+		int fs_gs_ldt_reload_needed;
+	} host_state;
+
+	struct vmx_msr_entry *guest_msrs;
+	struct vmx_msr_entry *host_msrs;
+
+	struct {
+		int active;
+		u8 save_iopl;
+		struct kvm_save_segment {
+			u16 selector;
+			unsigned long base;
+			u32 limit;
+			u32 ar;
+		} tr, es, ds, fs, gs;
+	} rmode;
+
+	struct vmcs *vmcs;
+};
+
  struct kvm_vcpu {
  	struct kvm *kvm;
  	int vcpu_id;
-	union {
-		struct vmcs *vmcs;
-		struct vcpu_svm *svm;
-	};
  	struct mutex mutex;
  	int   cpu;
  	int   launched;
@@ -342,12 +369,6 @@ struct kvm_vcpu {
  	u64 ia32_misc_enable_msr;
  	int nmsrs;
  	int save_nmsrs;
-	int msr_offset_efer;
-#ifdef CONFIG_X86_64
-	int msr_offset_kernel_gs_base;
-#endif
-	struct vmx_msr_entry *guest_msrs;
-	struct vmx_msr_entry *host_msrs;

  	struct kvm_mmu mmu;

@@ -366,11 +387,6 @@ #endif
  	char *guest_fx_image;
  	int fpu_active;
  	int guest_fpu_loaded;
-	struct vmx_host_state {
-		int loaded;
-		u16 fs_sel, gs_sel, ldt_sel;
-		int fs_gs_ldt_reload_needed;
-	} vmx_host_state;

  	int mmio_needed;
  	int mmio_read_completed;
@@ -387,20 +403,13 @@ #endif

  	struct kvm_stat stat;

-	struct {
-		int active;
-		u8 save_iopl;
-		struct kvm_save_segment {
-			u16 selector;
-			unsigned long base;
-			u32 limit;
-			u32 ar;
-		} tr, es, ds, fs, gs;
-	} rmode;
-	int halt_request; /* real mode on Intel only */
-
  	int cpuid_nent;
  	struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+	int halt_request; /* real mode emulation */
+
+	struct kvm_vmx_data vmx[0];
+	struct kvm_svm_data svm[0];
  };

  struct kvm_mem_alias {
@@ -430,7 +439,7 @@ struct kvm {
  	int n_free_mmu_pages;
  	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
  	int nvcpus;
-	struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
+	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
  	int memory_config_version;
  	int busy;
  	unsigned long rmap_overflow;
@@ -453,7 +462,8 @@ struct kvm_arch_ops {
  	int (*hardware_setup)(void);               /* __init */
  	void (*hardware_unsetup)(void);            /* __exit */

-	int (*vcpu_create)(struct kvm_vcpu *vcpu);
+	int (*vcpu_size)(void);
+	int (*vcpu_init)(struct kvm_vcpu *vcpu);
  	void (*vcpu_free)(struct kvm_vcpu *vcpu);

  	void (*vcpu_load)(struct kvm_vcpu *vcpu);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 67654c3..6831024 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -267,7 +267,7 @@ void kvm_flush_remote_tlbs(struct kvm *k
  	cpus_clear(cpus);
  	needed = 0;
  	for (i = 0; i < kvm->nvcpus; ++i) {
-		vcpu = &kvm->vcpus[i];
+		vcpu = kvm->vcpus[i];
  		if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
  			continue;
  		cpu = vcpu->cpu;
@@ -294,7 +294,6 @@ void kvm_flush_remote_tlbs(struct kvm *k
  static struct kvm *kvm_create_vm(void)
  {
  	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
-	int i;

  	if (!kvm)
  		return ERR_PTR(-ENOMEM);
@@ -306,14 +305,7 @@ static struct kvm *kvm_create_vm(void)
  	list_add(&kvm->vm_list, &vm_list);
  	spin_unlock(&kvm_lock);
  	kvm_io_bus_init(&kvm->mmio_bus);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		struct kvm_vcpu *vcpu = &kvm->vcpus[i];

-		mutex_init(&vcpu->mutex);
-		vcpu->cpu = -1;
-		vcpu->kvm = kvm;
-		vcpu->mmu.root_hpa = INVALID_PAGE;
-	}
  	return kvm;
  }

@@ -367,7 +359,7 @@ static void free_pio_guest_pages(struct

  static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
  {
-	if (!vcpu->vmcs)
+	if (!vcpu)
  		return;

  	vcpu_load(vcpu);
@@ -377,7 +369,7 @@ static void kvm_unload_vcpu_mmu(struct k

  static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
  {
-	if (!vcpu->vmcs)
+	if (!vcpu)
  		return;

  	vcpu_load(vcpu);
@@ -389,6 +381,7 @@ static void kvm_free_vcpu(struct kvm_vcp
  	free_page((unsigned long)vcpu->pio_data);
  	vcpu->pio_data = NULL;
  	free_pio_guest_pages(vcpu);
+	kfree(vcpu);
  }

  static void kvm_free_vcpus(struct kvm *kvm)
@@ -399,9 +392,11 @@ static void kvm_free_vcpus(struct kvm *k
  	 * Unpin any mmu pages first.
  	 */
  	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_free_vcpu(&kvm->vcpus[i]);
+		kvm_unload_vcpu_mmu(kvm->vcpus[i]);
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		kvm_free_vcpu(kvm->vcpus[i]);
+		kvm->vcpus[i] = NULL;
+	}
  }

  static int kvm_dev_release(struct inode *inode, struct file *filp)
@@ -2371,40 +2366,54 @@ static int kvm_vm_ioctl_create_vcpu(stru
  {
  	int r;
  	struct kvm_vcpu *vcpu;
-	struct page *page;
+	struct page *pio_page, *run_page;

  	r = -EINVAL;
  	if (!valid_vcpu(n))
  		goto out;

-	vcpu = &kvm->vcpus[n];
-	vcpu->vcpu_id = n;
+	vcpu = kzalloc(kvm_arch_ops->vcpu_size(), GFP_KERNEL);

+	if (!vcpu) {
+		r = -ENOMEM;
+		goto out;
+	}
+
+	mutex_init(&vcpu->mutex);
  	mutex_lock(&vcpu->mutex);
+	vcpu->cpu = -1;
+	vcpu->kvm = kvm;
+	vcpu->mmu.root_hpa = INVALID_PAGE;

-	if (vcpu->vmcs) {
-		mutex_unlock(&vcpu->mutex);
-		return -EEXIST;
-	}
+	vcpu->vcpu_id = n;
+ 
+	spin_lock(&kvm->lock);
+	if (kvm->vcpus[n]) {
+		spin_unlock(&kvm->lock);
+		r = -EEXIST;
+		goto out_free;
+	} 
+	kvm->vcpus[n] = vcpu;
+	spin_unlock(&kvm->lock);

-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	run_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  	r = -ENOMEM;
-	if (!page)
-		goto out_unlock;
-	vcpu->run = page_address(page);
+	if (!run_page)
+		goto out_unassign;
+	vcpu->run = page_address(run_page);

-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	pio_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  	r = -ENOMEM;
-	if (!page)
+	if (!pio_page)
  		goto out_free_run;
-	vcpu->pio_data = page_address(page);
+	vcpu->pio_data = page_address(pio_page);

  	vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
  					   FX_IMAGE_ALIGN);
  	vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
  	vcpu->cr0 = 0x10;

-	r = kvm_arch_ops->vcpu_create(vcpu);
+	r = kvm_arch_ops->vcpu_init(vcpu);
  	if (r < 0)
  		goto out_free_vcpus;

@@ -2434,15 +2443,20 @@ static int kvm_vm_ioctl_create_vcpu(stru

  out_free_vcpus:
  	kvm_free_vcpu(vcpu);
+	vcpu = NULL;
  out_free_run:
-	free_page((unsigned long)vcpu->run);
-	vcpu->run = NULL;
-out_unlock:
-	mutex_unlock(&vcpu->mutex);
+	free_page((unsigned long)run_page);
+out_unassign:
+	spin_lock(&kvm->lock);
+	kvm->vcpus[n] = NULL;
+	spin_unlock(&kvm->lock);
+out_free:
+	kfree(vcpu);
  out:
  	return r;
  }

+
  static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
  {
  	u64 efer;
@@ -2934,7 +2948,7 @@ static void decache_vcpus_on_cpu(int cpu
  	spin_lock(&kvm_lock);
  	list_for_each_entry(vm, &vm_list, vm_list)
  		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &vm->vcpus[i];
+			vcpu = vm->vcpus[i];
  			/*
  			 * If the vcpu is locked, then it is running on some
  			 * other cpu and therefore it is not cached on the
@@ -3071,7 +3085,7 @@ static u64 stat_get(void *_offset)
  	spin_lock(&kvm_lock);
  	list_for_each_entry(kvm, &vm_list, vm_list)
  		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &kvm->vcpus[i];
+			vcpu = kvm->vcpus[i];
  			total += *(u32 *)((void *)vcpu + offset);
  		}
  	spin_unlock(&kvm_lock);
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h
index a869983..156359a 100644
--- a/drivers/kvm/kvm_svm.h
+++ b/drivers/kvm/kvm_svm.h
@@ -7,7 +7,6 @@ #include <linux/list.h>
  #include <asm/msr.h>

  #include "svm.h"
-#include "kvm.h"

  static const u32 host_save_user_msrs[] = {
  #ifdef CONFIG_X86_64
@@ -20,7 +19,7 @@ #endif
  #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
  #define NUM_DB_REGS 4

-struct vcpu_svm {
+struct kvm_svm_data {
  	struct vmcb *vmcb;
  	unsigned long vmcb_pa;
  	struct svm_cpu_data *svm_data;
@@ -39,4 +38,3 @@ struct vcpu_svm {
  };

  #endif
-
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 850a1b1..16a3b6e 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -14,7 +14,7 @@
   *
   */

-#include "kvm_svm.h"
+#include "kvm.h"
  #include "x86_emulate.h"

  #include <linux/module.h>
@@ -567,18 +567,20 @@ static void init_vmcb(struct vmcb *vmcb)
  	/* rdx = ?? */
  }

-static int svm_create_vcpu(struct kvm_vcpu *vcpu)
+static int svm_vcpu_size(void)
+{
+	return sizeof(struct kvm) + sizeof(struct kvm_svm_data);
+}
+
+static int svm_init_vcpu(struct kvm_vcpu *vcpu)
  {
  	struct page *page;
  	int r;

  	r = -ENOMEM;
-	vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL);
-	if (!vcpu->svm)
-		goto out1;
  	page = alloc_page(GFP_KERNEL);
  	if (!page)
-		goto out2;
+		goto out1;

  	vcpu->svm->vmcb = page_address(page);
  	clear_page(vcpu->svm->vmcb);
@@ -595,8 +597,6 @@ static int svm_create_vcpu(struct kvm_vc

  	return 0;

-out2:
-	kfree(vcpu->svm);
  out1:
  	return r;
  }
@@ -1608,7 +1608,7 @@ #endif
  		:
  		: [vcpu]"a"(vcpu),
  		  [svm]"i"(offsetof(struct kvm_vcpu, svm)),
-		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
+		  [vmcb]"i"(offsetof(struct kvm_svm_data, vmcb_pa)),
  		  [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
  		  [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
  		  [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
@@ -1763,7 +1763,8 @@ static struct kvm_arch_ops svm_arch_ops
  	.hardware_enable = svm_hardware_enable,
  	.hardware_disable = svm_hardware_disable,

-	.vcpu_create = svm_create_vcpu,
+	.vcpu_size = svm_vcpu_size,
+	.vcpu_init = svm_init_vcpu,
  	.vcpu_free = svm_free_vcpu,

  	.vcpu_load = svm_vcpu_load,
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 2c4f01b..49587a1 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -96,9 +96,9 @@ static inline u64 msr_efer_save_restore_

  static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu)
  {
-	int efer_offset = vcpu->msr_offset_efer;
-	return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) !=
-		msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]);
+	int efer_offset = vcpu->vmx->msr_offset_efer;
+	return msr_efer_save_restore_bits(vcpu->vmx->host_msrs[efer_offset]) !=
+		msr_efer_save_restore_bits(vcpu->vmx->guest_msrs[efer_offset]);
  }

  static inline int is_page_fault(u32 intr_info)
@@ -126,7 +126,7 @@ static int __find_msr_index(struct kvm_v
  	int i;

  	for (i = 0; i < vcpu->nmsrs; ++i)
-		if (vcpu->guest_msrs[i].index == msr)
+		if (vcpu->vmx->guest_msrs[i].index == msr)
  			return i;
  	return -1;
  }
@@ -137,7 +137,7 @@ static struct vmx_msr_entry *find_msr_en

  	i = __find_msr_index(vcpu, msr);
  	if (i >= 0)
-		return &vcpu->guest_msrs[i];
+		return &vcpu->vmx->guest_msrs[i];
  	return NULL;
  }

@@ -160,8 +160,8 @@ static void __vcpu_clear(void *arg)
  	int cpu = raw_smp_processor_id();

  	if (vcpu->cpu == cpu)
-		vmcs_clear(vcpu->vmcs);
-	if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
+		vmcs_clear(vcpu->vmx->vmcs);
+	if (per_cpu(current_vmcs, cpu) == vcpu->vmx->vmcs)
  		per_cpu(current_vmcs, cpu) = NULL;
  	rdtscll(vcpu->host_tsc);
  }
@@ -260,7 +260,7 @@ static void update_exception_bitmap(stru
  		eb |= 1u << NM_VECTOR;
  	if (vcpu->guest_debug.enabled)
  		eb |= 1u << 1;
-	if (vcpu->rmode.active)
+	if (vcpu->vmx->rmode.active)
  		eb = ~0;
  	vmcs_write32(EXCEPTION_BITMAP, eb);
  }
@@ -285,19 +285,19 @@ #endif
  static void load_transition_efer(struct kvm_vcpu *vcpu)
  {
  	u64 trans_efer;
-	int efer_offset = vcpu->msr_offset_efer;
+	int efer_offset = vcpu->vmx->msr_offset_efer;

-	trans_efer = vcpu->host_msrs[efer_offset].data;
+	trans_efer = vcpu->vmx->host_msrs[efer_offset].data;
  	trans_efer &= ~EFER_SAVE_RESTORE_BITS;
  	trans_efer |= msr_efer_save_restore_bits(
-				vcpu->guest_msrs[efer_offset]);
+				vcpu->vmx->guest_msrs[efer_offset]);
  	wrmsrl(MSR_EFER, trans_efer);
  	vcpu->stat.efer_reload++;
  }

  static void vmx_save_host_state(struct kvm_vcpu *vcpu)
  {
-	struct vmx_host_state *hs = &vcpu->vmx_host_state;
+	struct vmx_host_state *hs = &vcpu->vmx->host_state;

  	if (hs->loaded)
  		return;
@@ -334,17 +334,17 @@ #endif

  #ifdef CONFIG_X86_64
  	if (is_long_mode(vcpu)) {
-		save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1);
+		save_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_kernel_gs_base, 1);
  	}
  #endif
-	load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+	load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
  	if (msr_efer_need_save_restore(vcpu))
  		load_transition_efer(vcpu);
  }

  static void vmx_load_host_state(struct kvm_vcpu *vcpu)
  {
-	struct vmx_host_state *hs = &vcpu->vmx_host_state;
+	struct vmx_host_state *hs = &vcpu->vmx->host_state;

  	if (!hs->loaded)
  		return;
@@ -366,10 +366,10 @@ #endif

  		reload_tss();
  	}
-	save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
-	load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+	save_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
+	load_msrs(vcpu->vmx->host_msrs, vcpu->save_nmsrs);
  	if (msr_efer_need_save_restore(vcpu))
-		load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1);
+		load_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_efer, 1);
  }

  /*
@@ -378,7 +378,7 @@ #endif
   */
  static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
  {
-	u64 phys_addr = __pa(vcpu->vmcs);
+	u64 phys_addr = __pa(vcpu->vmx->vmcs);
  	int cpu;
  	u64 tsc_this, delta;

@@ -387,16 +387,16 @@ static void vmx_vcpu_load(struct kvm_vcp
  	if (vcpu->cpu != cpu)
  		vcpu_clear(vcpu);

-	if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) {
+	if (per_cpu(current_vmcs, cpu) != vcpu->vmx->vmcs) {
  		u8 error;

-		per_cpu(current_vmcs, cpu) = vcpu->vmcs;
+		per_cpu(current_vmcs, cpu) = vcpu->vmx->vmcs;
  		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
  			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
  			      : "cc");
  		if (error)
  			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
-			       vcpu->vmcs, phys_addr);
+			       vcpu->vmx->vmcs, phys_addr);
  	}

  	if (vcpu->cpu != cpu) {
@@ -504,12 +504,12 @@ static void vmx_inject_gp(struct kvm_vcp
  void move_msr_up(struct kvm_vcpu *vcpu, int from, int to)
  {
  	struct vmx_msr_entry tmp;
-	tmp = vcpu->guest_msrs[to];
-	vcpu->guest_msrs[to] = vcpu->guest_msrs[from];
-	vcpu->guest_msrs[from] = tmp;
-	tmp = vcpu->host_msrs[to];
-	vcpu->host_msrs[to] = vcpu->host_msrs[from];
-	vcpu->host_msrs[from] = tmp;
+	tmp = vcpu->vmx->guest_msrs[to];
+	vcpu->vmx->guest_msrs[to] = vcpu->vmx->guest_msrs[from];
+	vcpu->vmx->guest_msrs[from] = tmp;
+	tmp = vcpu->vmx->host_msrs[to];
+	vcpu->vmx->host_msrs[to] = vcpu->vmx->host_msrs[from];
+	vcpu->vmx->host_msrs[from] = tmp;
  }

  /*
@@ -550,10 +550,10 @@ #endif
  	vcpu->save_nmsrs = save_nmsrs;

  #ifdef CONFIG_X86_64
-	vcpu->msr_offset_kernel_gs_base =
+	vcpu->vmx->msr_offset_kernel_gs_base =
  		__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
  #endif
-	vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
+	vcpu->vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
  }

  /*
@@ -646,7 +646,7 @@ static int vmx_set_msr(struct kvm_vcpu *
  #ifdef CONFIG_X86_64
  	case MSR_EFER:
  		ret = kvm_set_msr_common(vcpu, msr_index, data);
-		if (vcpu->vmx_host_state.loaded)
+		if (vcpu->vmx->host_state.loaded)
  			load_transition_efer(vcpu);
  		break;
  	case MSR_FS_BASE:
@@ -672,8 +672,8 @@ #endif
  		msr = find_msr_entry(vcpu, msr_index);
  		if (msr) {
  			msr->data = data;
-			if (vcpu->vmx_host_state.loaded)
-				load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+			if (vcpu->vmx->host_state.loaded)
+				load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
  			break;
  		}
  		ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -868,15 +868,15 @@ static void enter_pmode(struct kvm_vcpu
  {
  	unsigned long flags;

-	vcpu->rmode.active = 0;
+	vcpu->vmx->rmode.active = 0;

-	vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);
-	vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);
-	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
+	vmcs_writel(GUEST_TR_BASE, vcpu->vmx->rmode.tr.base);
+	vmcs_write32(GUEST_TR_LIMIT, vcpu->vmx->rmode.tr.limit);
+	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->vmx->rmode.tr.ar);

  	flags = vmcs_readl(GUEST_RFLAGS);
  	flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
-	flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
+	flags |= (vcpu->vmx->rmode.save_iopl << IOPL_SHIFT);
  	vmcs_writel(GUEST_RFLAGS, flags);

  	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@ -884,10 +884,10 @@ static void enter_pmode(struct kvm_vcpu

  	update_exception_bitmap(vcpu);

-	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es);
-	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds);
-	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs);
-	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs);
+	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);

  	vmcs_write16(GUEST_SS_SELECTOR, 0);
  	vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -920,19 +920,19 @@ static void enter_rmode(struct kvm_vcpu
  {
  	unsigned long flags;

-	vcpu->rmode.active = 1;
+	vcpu->vmx->rmode.active = 1;

-	vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
+	vcpu->vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
  	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));

-	vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
+	vcpu->vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
  	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);

-	vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
+	vcpu->vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
  	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);

  	flags = vmcs_readl(GUEST_RFLAGS);
-	vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
+	vcpu->vmx->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;

  	flags |= IOPL_MASK | X86_EFLAGS_VM;

@@ -950,11 +950,10 @@ static void enter_rmode(struct kvm_vcpu
  		vmcs_writel(GUEST_CS_BASE, 0xf0000);
  	vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);

-	fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
-	fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);
-	fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
-	fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
-
+	fix_rmode_seg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+	fix_rmode_seg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+	fix_rmode_seg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+	fix_rmode_seg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);
  	init_rmode_tss(vcpu->kvm);
  }

@@ -1002,10 +1001,10 @@ static void vmx_set_cr0(struct kvm_vcpu
  {
  	vmx_fpu_deactivate(vcpu);

-	if (vcpu->rmode.active && (cr0 & X86_CR0_PE))
+	if (vcpu->vmx->rmode.active && (cr0 & X86_CR0_PE))
  		enter_pmode(vcpu);

-	if (!vcpu->rmode.active && !(cr0 & X86_CR0_PE))
+	if (!vcpu->vmx->rmode.active && !(cr0 & X86_CR0_PE))
  		enter_rmode(vcpu);

  #ifdef CONFIG_X86_64
@@ -1036,7 +1035,7 @@ static void vmx_set_cr3(struct kvm_vcpu
  static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  {
  	vmcs_writel(CR4_READ_SHADOW, cr4);
-	vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?
+	vmcs_writel(GUEST_CR4, cr4 | (vcpu->vmx->rmode.active ?
  		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
  	vcpu->cr4 = cr4;
  }
@@ -1124,17 +1123,17 @@ static void vmx_set_segment(struct kvm_v
  	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
  	u32 ar;

-	if (vcpu->rmode.active && seg == VCPU_SREG_TR) {
-		vcpu->rmode.tr.selector = var->selector;
-		vcpu->rmode.tr.base = var->base;
-		vcpu->rmode.tr.limit = var->limit;
-		vcpu->rmode.tr.ar = vmx_segment_access_rights(var);
+	if (vcpu->vmx->rmode.active && seg == VCPU_SREG_TR) {
+		vcpu->vmx->rmode.tr.selector = var->selector;
+		vcpu->vmx->rmode.tr.base = var->base;
+		vcpu->vmx->rmode.tr.limit = var->limit;
+		vcpu->vmx->rmode.tr.ar = vmx_segment_access_rights(var);
  		return;
  	}
  	vmcs_writel(sf->base, var->base);
  	vmcs_write32(sf->limit, var->limit);
  	vmcs_write16(sf->selector, var->selector);
-	if (vcpu->rmode.active && var->s) {
+	if (vcpu->vmx->rmode.active && var->s) {
  		/*
  		 * Hack real-mode segments into vm86 compatibility.
  		 */
@@ -1253,6 +1252,7 @@ static int vmx_vcpu_setup(struct kvm_vcp
  	vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
  	vcpu->cr8 = 0;
  	vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+
  	if (vcpu->vcpu_id == 0)
  		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;

@@ -1384,10 +1384,10 @@ #endif
  		if (wrmsr_safe(index, data_low, data_high) < 0)
  			continue;
  		data = data_low | ((u64)data_high << 32);
-		vcpu->host_msrs[j].index = index;
-		vcpu->host_msrs[j].reserved = 0;
-		vcpu->host_msrs[j].data = data;
-		vcpu->guest_msrs[j] = vcpu->host_msrs[j];
+		vcpu->vmx->host_msrs[j].index = index;
+		vcpu->vmx->host_msrs[j].reserved = 0;
+		vcpu->vmx->host_msrs[j].data = data;
+		vcpu->vmx->guest_msrs[j] = vcpu->vmx->host_msrs[j];
  		++vcpu->nmsrs;
  	}

@@ -1479,7 +1479,7 @@ static void kvm_do_inject_irq(struct kvm
  	if (!vcpu->irq_pending[word_index])
  		clear_bit(word_index, &vcpu->irq_summary);

-	if (vcpu->rmode.active) {
+	if (vcpu->vmx->rmode.active) {
  		inject_rmode_irq(vcpu, irq);
  		return;
  	}
@@ -1538,7 +1538,7 @@ static void kvm_guest_debug_pre(struct k
  static int handle_rmode_exception(struct kvm_vcpu *vcpu,
  				  int vec, u32 err_code)
  {
-	if (!vcpu->rmode.active)
+	if (!vcpu->vmx->rmode.active)
  		return 0;

  	/*
@@ -1619,7 +1619,7 @@ static int handle_exception(struct kvm_v
  		}
  	}

-	if (vcpu->rmode.active &&
+	if (vcpu->vmx->rmode.active &&
  	    handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
  								error_code)) {
  		if (vcpu->halt_request) {
@@ -2224,28 +2224,34 @@ static void vmx_inject_page_fault(struct

  static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
  {
-	if (vcpu->vmcs) {
+	if (vcpu->vmx->vmcs) {
  		on_each_cpu(__vcpu_clear, vcpu, 0, 1);
-		free_vmcs(vcpu->vmcs);
-		vcpu->vmcs = NULL;
+		free_vmcs(vcpu->vmx->vmcs);
+		vcpu->vmx->vmcs = NULL;
+
  	}
  }

+static int vmx_vcpu_size(void)
+{
+	return sizeof(struct kvm_vcpu) + sizeof(struct kvm_vmx_data);
+}
+
  static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
  {
  	vmx_free_vmcs(vcpu);
  }

-static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
+static int vmx_init_vcpu(struct kvm_vcpu *vcpu)
  {
  	struct vmcs *vmcs;

-	vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vcpu->guest_msrs)
+	vcpu->vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vcpu->vmx->guest_msrs)
  		return -ENOMEM;

-	vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vcpu->host_msrs)
+	vcpu->vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vcpu->vmx->host_msrs)
  		goto out_free_guest_msrs;

  	vmcs = alloc_vmcs();
@@ -2253,18 +2259,18 @@ static int vmx_create_vcpu(struct kvm_vc
  		goto out_free_msrs;

  	vmcs_clear(vmcs);
-	vcpu->vmcs = vmcs;
+	vcpu->vmx->vmcs = vmcs;
  	vcpu->launched = 0;

  	return 0;

  out_free_msrs:
-	kfree(vcpu->host_msrs);
-	vcpu->host_msrs = NULL;
+	kfree(vcpu->vmx->host_msrs);
+	vcpu->vmx->host_msrs = NULL;

  out_free_guest_msrs:
-	kfree(vcpu->guest_msrs);
-	vcpu->guest_msrs = NULL;
+	kfree(vcpu->vmx->guest_msrs);
+	vcpu->vmx->guest_msrs = NULL;

  	return -ENOMEM;
  }
@@ -2277,7 +2283,8 @@ static struct kvm_arch_ops vmx_arch_ops
  	.hardware_enable = hardware_enable,
  	.hardware_disable = hardware_disable,

-	.vcpu_create = vmx_create_vcpu,
+	.vcpu_size = vmx_vcpu_size,
+	.vcpu_init = vmx_init_vcpu,
  	.vcpu_free = vmx_free_vcpu,

  	.vcpu_load = vmx_vcpu_load,

[-- Attachment #2: Type: TEXT/PLAIN, Size: 24460 bytes --]

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 7bdffe6..9fff8b7 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -16,6 +16,7 @@ #include <linux/mm.h>
 #include <asm/signal.h>
 
 #include "vmx.h"
+#include "kvm_svm.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -308,13 +309,39 @@ struct kvm_io_device *kvm_io_bus_find_de
 void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 			     struct kvm_io_device *dev);
 
+struct kvm_vmx_data {
+	int msr_offset_efer;
+
+        #ifdef CONFIG_X86_64
+	int msr_offset_kernel_gs_base;
+        #endif
+
+        struct vmx_host_state {
+		int loaded;
+		u16 fs_sel, gs_sel, ldt_sel;
+		int fs_gs_ldt_reload_needed;
+	} host_state;
+
+	struct vmx_msr_entry *guest_msrs;
+	struct vmx_msr_entry *host_msrs;
+
+	struct {
+		int active;
+		u8 save_iopl;
+		struct kvm_save_segment {
+			u16 selector;
+			unsigned long base;
+			u32 limit;
+			u32 ar;
+		} tr, es, ds, fs, gs;
+	} rmode;
+
+	struct vmcs *vmcs;
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 	int vcpu_id;
-	union {
-		struct vmcs *vmcs;
-		struct vcpu_svm *svm;
-	};
 	struct mutex mutex;
 	int   cpu;
 	int   launched;
@@ -342,12 +369,6 @@ struct kvm_vcpu {
 	u64 ia32_misc_enable_msr;
 	int nmsrs;
 	int save_nmsrs;
-	int msr_offset_efer;
-#ifdef CONFIG_X86_64
-	int msr_offset_kernel_gs_base;
-#endif
-	struct vmx_msr_entry *guest_msrs;
-	struct vmx_msr_entry *host_msrs;
 
 	struct kvm_mmu mmu;
 
@@ -366,11 +387,6 @@ #endif
 	char *guest_fx_image;
 	int fpu_active;
 	int guest_fpu_loaded;
-	struct vmx_host_state {
-		int loaded;
-		u16 fs_sel, gs_sel, ldt_sel;
-		int fs_gs_ldt_reload_needed;
-	} vmx_host_state;
 
 	int mmio_needed;
 	int mmio_read_completed;
@@ -387,20 +403,13 @@ #endif
 
 	struct kvm_stat stat;
 
-	struct {
-		int active;
-		u8 save_iopl;
-		struct kvm_save_segment {
-			u16 selector;
-			unsigned long base;
-			u32 limit;
-			u32 ar;
-		} tr, es, ds, fs, gs;
-	} rmode;
-	int halt_request; /* real mode on Intel only */
-
 	int cpuid_nent;
 	struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+	int halt_request; /* real mode emulation */
+
+	struct kvm_vmx_data vmx[0];
+	struct kvm_svm_data svm[0];
 };
 
 struct kvm_mem_alias {
@@ -430,7 +439,7 @@ struct kvm {
 	int n_free_mmu_pages;
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 	int nvcpus;
-	struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
+	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	int memory_config_version;
 	int busy;
 	unsigned long rmap_overflow;
@@ -453,7 +462,8 @@ struct kvm_arch_ops {
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
 
-	int (*vcpu_create)(struct kvm_vcpu *vcpu);
+	int (*vcpu_size)(void);
+	int (*vcpu_init)(struct kvm_vcpu *vcpu);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
 
 	void (*vcpu_load)(struct kvm_vcpu *vcpu);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 67654c3..6831024 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -267,7 +267,7 @@ void kvm_flush_remote_tlbs(struct kvm *k
 	cpus_clear(cpus);
 	needed = 0;
 	for (i = 0; i < kvm->nvcpus; ++i) {
-		vcpu = &kvm->vcpus[i];
+		vcpu = kvm->vcpus[i];
 		if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
 			continue;
 		cpu = vcpu->cpu;
@@ -294,7 +294,6 @@ void kvm_flush_remote_tlbs(struct kvm *k
 static struct kvm *kvm_create_vm(void)
 {
 	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
-	int i;
 
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
@@ -306,14 +305,7 @@ static struct kvm *kvm_create_vm(void)
 	list_add(&kvm->vm_list, &vm_list);
 	spin_unlock(&kvm_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
-		mutex_init(&vcpu->mutex);
-		vcpu->cpu = -1;
-		vcpu->kvm = kvm;
-		vcpu->mmu.root_hpa = INVALID_PAGE;
-	}
 	return kvm;
 }
 
@@ -367,7 +359,7 @@ static void free_pio_guest_pages(struct 
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->vmcs)
+	if (!vcpu)
 		return;
 
 	vcpu_load(vcpu);
@@ -377,7 +369,7 @@ static void kvm_unload_vcpu_mmu(struct k
 
 static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->vmcs)
+	if (!vcpu)
 		return;
 
 	vcpu_load(vcpu);
@@ -389,6 +381,7 @@ static void kvm_free_vcpu(struct kvm_vcp
 	free_page((unsigned long)vcpu->pio_data);
 	vcpu->pio_data = NULL;
 	free_pio_guest_pages(vcpu);
+	kfree(vcpu);
 }
 
 static void kvm_free_vcpus(struct kvm *kvm)
@@ -399,9 +392,11 @@ static void kvm_free_vcpus(struct kvm *k
 	 * Unpin any mmu pages first.
 	 */
 	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		kvm_free_vcpu(&kvm->vcpus[i]);
+		kvm_unload_vcpu_mmu(kvm->vcpus[i]);
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		kvm_free_vcpu(kvm->vcpus[i]);
+		kvm->vcpus[i] = NULL;
+	}
 }
 
 static int kvm_dev_release(struct inode *inode, struct file *filp)
@@ -2371,40 +2366,54 @@ static int kvm_vm_ioctl_create_vcpu(stru
 {
 	int r;
 	struct kvm_vcpu *vcpu;
-	struct page *page;
+	struct page *pio_page, *run_page;
 
 	r = -EINVAL;
 	if (!valid_vcpu(n))
 		goto out;
 
-	vcpu = &kvm->vcpus[n];
-	vcpu->vcpu_id = n;
+	vcpu = kzalloc(kvm_arch_ops->vcpu_size(), GFP_KERNEL);
 
+	if (!vcpu) {
+		r = -ENOMEM;
+		goto out;
+	}
+
+	mutex_init(&vcpu->mutex);
 	mutex_lock(&vcpu->mutex);
+	vcpu->cpu = -1;
+	vcpu->kvm = kvm;
+	vcpu->mmu.root_hpa = INVALID_PAGE;
 
-	if (vcpu->vmcs) {
-		mutex_unlock(&vcpu->mutex);
-		return -EEXIST;
-	}
+	vcpu->vcpu_id = n;
+	
+	spin_lock(&kvm->lock);
+	if (kvm->vcpus[n]) {
+		spin_unlock(&kvm->lock);
+		r = -EEXIST;
+		goto out_free;
+	}	
+	kvm->vcpus[n] = vcpu;
+	spin_unlock(&kvm->lock);
 
-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	run_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	r = -ENOMEM;
-	if (!page)
-		goto out_unlock;
-	vcpu->run = page_address(page);
+	if (!run_page)
+		goto out_unassign;
+	vcpu->run = page_address(run_page);
 
-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	pio_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	r = -ENOMEM;
-	if (!page)
+	if (!pio_page)
 		goto out_free_run;
-	vcpu->pio_data = page_address(page);
+	vcpu->pio_data = page_address(pio_page);
 
 	vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
 					   FX_IMAGE_ALIGN);
 	vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
 	vcpu->cr0 = 0x10;
 
-	r = kvm_arch_ops->vcpu_create(vcpu);
+	r = kvm_arch_ops->vcpu_init(vcpu);
 	if (r < 0)
 		goto out_free_vcpus;
 
@@ -2434,15 +2443,20 @@ static int kvm_vm_ioctl_create_vcpu(stru
 
 out_free_vcpus:
 	kvm_free_vcpu(vcpu);
+	vcpu = NULL;
 out_free_run:
-	free_page((unsigned long)vcpu->run);
-	vcpu->run = NULL;
-out_unlock:
-	mutex_unlock(&vcpu->mutex);
+	free_page((unsigned long)run_page);
+out_unassign:
+	spin_lock(&kvm->lock);
+	kvm->vcpus[n] = NULL;
+	spin_unlock(&kvm->lock);
+out_free:
+	kfree(vcpu);
 out:
 	return r;
 }
 
+
 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
 {
 	u64 efer;
@@ -2934,7 +2948,7 @@ static void decache_vcpus_on_cpu(int cpu
 	spin_lock(&kvm_lock);
 	list_for_each_entry(vm, &vm_list, vm_list)
 		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &vm->vcpus[i];
+			vcpu = vm->vcpus[i];
 			/*
 			 * If the vcpu is locked, then it is running on some
 			 * other cpu and therefore it is not cached on the
@@ -3071,7 +3085,7 @@ static u64 stat_get(void *_offset)
 	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = &kvm->vcpus[i];
+			vcpu = kvm->vcpus[i];
 			total += *(u32 *)((void *)vcpu + offset);
 		}
 	spin_unlock(&kvm_lock);
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h
index a869983..156359a 100644
--- a/drivers/kvm/kvm_svm.h
+++ b/drivers/kvm/kvm_svm.h
@@ -7,7 +7,6 @@ #include <linux/list.h>
 #include <asm/msr.h>
 
 #include "svm.h"
-#include "kvm.h"
 
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
@@ -20,7 +19,7 @@ #endif
 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
 #define NUM_DB_REGS 4
 
-struct vcpu_svm {
+struct kvm_svm_data {
 	struct vmcb *vmcb;
 	unsigned long vmcb_pa;
 	struct svm_cpu_data *svm_data;
@@ -39,4 +38,3 @@ struct vcpu_svm {
 };
 
 #endif
-
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 850a1b1..16a3b6e 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -14,7 +14,7 @@
  *
  */
 
-#include "kvm_svm.h"
+#include "kvm.h"
 #include "x86_emulate.h"
 
 #include <linux/module.h>
@@ -567,18 +567,20 @@ static void init_vmcb(struct vmcb *vmcb)
 	/* rdx = ?? */
 }
 
-static int svm_create_vcpu(struct kvm_vcpu *vcpu)
+static int svm_vcpu_size(void)
+{
+	return sizeof(struct kvm) + sizeof(struct kvm_svm_data);
+}
+
+static int svm_init_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct page *page;
 	int r;
 
 	r = -ENOMEM;
-	vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL);
-	if (!vcpu->svm)
-		goto out1;
 	page = alloc_page(GFP_KERNEL);
 	if (!page)
-		goto out2;
+		goto out1;
 
 	vcpu->svm->vmcb = page_address(page);
 	clear_page(vcpu->svm->vmcb);
@@ -595,8 +597,6 @@ static int svm_create_vcpu(struct kvm_vc
 
 	return 0;
 
-out2:
-	kfree(vcpu->svm);
 out1:
 	return r;
 }
@@ -1608,7 +1608,7 @@ #endif
 		:
 		: [vcpu]"a"(vcpu),
 		  [svm]"i"(offsetof(struct kvm_vcpu, svm)),
-		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
+		  [vmcb]"i"(offsetof(struct kvm_svm_data, vmcb_pa)),
 		  [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
 		  [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
 		  [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
@@ -1763,7 +1763,8 @@ static struct kvm_arch_ops svm_arch_ops 
 	.hardware_enable = svm_hardware_enable,
 	.hardware_disable = svm_hardware_disable,
 
-	.vcpu_create = svm_create_vcpu,
+	.vcpu_size = svm_vcpu_size,
+	.vcpu_init = svm_init_vcpu,
 	.vcpu_free = svm_free_vcpu,
 
 	.vcpu_load = svm_vcpu_load,
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 2c4f01b..49587a1 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -96,9 +96,9 @@ static inline u64 msr_efer_save_restore_
 
 static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu)
 {
-	int efer_offset = vcpu->msr_offset_efer;
-	return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) !=
-		msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]);
+	int efer_offset = vcpu->vmx->msr_offset_efer;
+	return msr_efer_save_restore_bits(vcpu->vmx->host_msrs[efer_offset]) !=
+		msr_efer_save_restore_bits(vcpu->vmx->guest_msrs[efer_offset]);
 }
 
 static inline int is_page_fault(u32 intr_info)
@@ -126,7 +126,7 @@ static int __find_msr_index(struct kvm_v
 	int i;
 
 	for (i = 0; i < vcpu->nmsrs; ++i)
-		if (vcpu->guest_msrs[i].index == msr)
+		if (vcpu->vmx->guest_msrs[i].index == msr)
 			return i;
 	return -1;
 }
@@ -137,7 +137,7 @@ static struct vmx_msr_entry *find_msr_en
 
 	i = __find_msr_index(vcpu, msr);
 	if (i >= 0)
-		return &vcpu->guest_msrs[i];
+		return &vcpu->vmx->guest_msrs[i];
 	return NULL;
 }
 
@@ -160,8 +160,8 @@ static void __vcpu_clear(void *arg)
 	int cpu = raw_smp_processor_id();
 
 	if (vcpu->cpu == cpu)
-		vmcs_clear(vcpu->vmcs);
-	if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
+		vmcs_clear(vcpu->vmx->vmcs);
+	if (per_cpu(current_vmcs, cpu) == vcpu->vmx->vmcs)
 		per_cpu(current_vmcs, cpu) = NULL;
 	rdtscll(vcpu->host_tsc);
 }
@@ -260,7 +260,7 @@ static void update_exception_bitmap(stru
 		eb |= 1u << NM_VECTOR;
 	if (vcpu->guest_debug.enabled)
 		eb |= 1u << 1;
-	if (vcpu->rmode.active)
+	if (vcpu->vmx->rmode.active)
 		eb = ~0;
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -285,19 +285,19 @@ #endif
 static void load_transition_efer(struct kvm_vcpu *vcpu)
 {
 	u64 trans_efer;
-	int efer_offset = vcpu->msr_offset_efer;
+	int efer_offset = vcpu->vmx->msr_offset_efer;
 
-	trans_efer = vcpu->host_msrs[efer_offset].data;
+	trans_efer = vcpu->vmx->host_msrs[efer_offset].data;
 	trans_efer &= ~EFER_SAVE_RESTORE_BITS;
 	trans_efer |= msr_efer_save_restore_bits(
-				vcpu->guest_msrs[efer_offset]);
+				vcpu->vmx->guest_msrs[efer_offset]);
 	wrmsrl(MSR_EFER, trans_efer);
 	vcpu->stat.efer_reload++;
 }
 
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
-	struct vmx_host_state *hs = &vcpu->vmx_host_state;
+	struct vmx_host_state *hs = &vcpu->vmx->host_state;
 
 	if (hs->loaded)
 		return;
@@ -334,17 +334,17 @@ #endif
 
 #ifdef CONFIG_X86_64
 	if (is_long_mode(vcpu)) {
-		save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1);
+		save_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_kernel_gs_base, 1);
 	}
 #endif
-	load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+	load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
 	if (msr_efer_need_save_restore(vcpu))
 		load_transition_efer(vcpu);
 }
 
 static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 {
-	struct vmx_host_state *hs = &vcpu->vmx_host_state;
+	struct vmx_host_state *hs = &vcpu->vmx->host_state;
 
 	if (!hs->loaded)
 		return;
@@ -366,10 +366,10 @@ #endif
 
 		reload_tss();
 	}
-	save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
-	load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+	save_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
+	load_msrs(vcpu->vmx->host_msrs, vcpu->save_nmsrs);
 	if (msr_efer_need_save_restore(vcpu))
-		load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1);
+		load_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_efer, 1);
 }
 
 /*
@@ -378,7 +378,7 @@ #endif
  */
 static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
 {
-	u64 phys_addr = __pa(vcpu->vmcs);
+	u64 phys_addr = __pa(vcpu->vmx->vmcs);
 	int cpu;
 	u64 tsc_this, delta;
 
@@ -387,16 +387,16 @@ static void vmx_vcpu_load(struct kvm_vcp
 	if (vcpu->cpu != cpu)
 		vcpu_clear(vcpu);
 
-	if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) {
+	if (per_cpu(current_vmcs, cpu) != vcpu->vmx->vmcs) {
 		u8 error;
 
-		per_cpu(current_vmcs, cpu) = vcpu->vmcs;
+		per_cpu(current_vmcs, cpu) = vcpu->vmx->vmcs;
 		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
 			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
 			      : "cc");
 		if (error)
 			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
-			       vcpu->vmcs, phys_addr);
+			       vcpu->vmx->vmcs, phys_addr);
 	}
 
 	if (vcpu->cpu != cpu) {
@@ -504,12 +504,12 @@ static void vmx_inject_gp(struct kvm_vcp
 void move_msr_up(struct kvm_vcpu *vcpu, int from, int to)
 {
 	struct vmx_msr_entry tmp;
-	tmp = vcpu->guest_msrs[to];
-	vcpu->guest_msrs[to] = vcpu->guest_msrs[from];
-	vcpu->guest_msrs[from] = tmp;
-	tmp = vcpu->host_msrs[to];
-	vcpu->host_msrs[to] = vcpu->host_msrs[from];
-	vcpu->host_msrs[from] = tmp;
+	tmp = vcpu->vmx->guest_msrs[to];
+	vcpu->vmx->guest_msrs[to] = vcpu->vmx->guest_msrs[from];
+	vcpu->vmx->guest_msrs[from] = tmp;
+	tmp = vcpu->vmx->host_msrs[to];
+	vcpu->vmx->host_msrs[to] = vcpu->vmx->host_msrs[from];
+	vcpu->vmx->host_msrs[from] = tmp;
 }
 
 /*
@@ -550,10 +550,10 @@ #endif
 	vcpu->save_nmsrs = save_nmsrs;
 
 #ifdef CONFIG_X86_64
-	vcpu->msr_offset_kernel_gs_base =
+	vcpu->vmx->msr_offset_kernel_gs_base =
 		__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
 #endif
-	vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
+	vcpu->vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
 }
 
 /*
@@ -646,7 +646,7 @@ static int vmx_set_msr(struct kvm_vcpu *
 #ifdef CONFIG_X86_64
 	case MSR_EFER:
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
-		if (vcpu->vmx_host_state.loaded)
+		if (vcpu->vmx->host_state.loaded)
 			load_transition_efer(vcpu);
 		break;
 	case MSR_FS_BASE:
@@ -672,8 +672,8 @@ #endif
 		msr = find_msr_entry(vcpu, msr_index);
 		if (msr) {
 			msr->data = data;
-			if (vcpu->vmx_host_state.loaded)
-				load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+			if (vcpu->vmx->host_state.loaded)
+				load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
 			break;
 		}
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -868,15 +868,15 @@ static void enter_pmode(struct kvm_vcpu 
 {
 	unsigned long flags;
 
-	vcpu->rmode.active = 0;
+	vcpu->vmx->rmode.active = 0;
 
-	vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);
-	vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);
-	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
+	vmcs_writel(GUEST_TR_BASE, vcpu->vmx->rmode.tr.base);
+	vmcs_write32(GUEST_TR_LIMIT, vcpu->vmx->rmode.tr.limit);
+	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->vmx->rmode.tr.ar);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
 	flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
-	flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
+	flags |= (vcpu->vmx->rmode.save_iopl << IOPL_SHIFT);
 	vmcs_writel(GUEST_RFLAGS, flags);
 
 	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@ -884,10 +884,10 @@ static void enter_pmode(struct kvm_vcpu 
 
 	update_exception_bitmap(vcpu);
 
-	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es);
-	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds);
-	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs);
-	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs);
+	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);
 
 	vmcs_write16(GUEST_SS_SELECTOR, 0);
 	vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -920,19 +920,19 @@ static void enter_rmode(struct kvm_vcpu 
 {
 	unsigned long flags;
 
-	vcpu->rmode.active = 1;
+	vcpu->vmx->rmode.active = 1;
 
-	vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
+	vcpu->vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
 	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
 
-	vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
+	vcpu->vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
 	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
 
-	vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
+	vcpu->vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
 	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
-	vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
+	vcpu->vmx->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
 
 	flags |= IOPL_MASK | X86_EFLAGS_VM;
 
@@ -950,11 +950,10 @@ static void enter_rmode(struct kvm_vcpu 
 		vmcs_writel(GUEST_CS_BASE, 0xf0000);
 	vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
 
-	fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
-	fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);
-	fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
-	fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
-
+	fix_rmode_seg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+	fix_rmode_seg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+	fix_rmode_seg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+	fix_rmode_seg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);
 	init_rmode_tss(vcpu->kvm);
 }
 
@@ -1002,10 +1001,10 @@ static void vmx_set_cr0(struct kvm_vcpu 
 {
 	vmx_fpu_deactivate(vcpu);
 
-	if (vcpu->rmode.active && (cr0 & X86_CR0_PE))
+	if (vcpu->vmx->rmode.active && (cr0 & X86_CR0_PE))
 		enter_pmode(vcpu);
 
-	if (!vcpu->rmode.active && !(cr0 & X86_CR0_PE))
+	if (!vcpu->vmx->rmode.active && !(cr0 & X86_CR0_PE))
 		enter_rmode(vcpu);
 
 #ifdef CONFIG_X86_64
@@ -1036,7 +1035,7 @@ static void vmx_set_cr3(struct kvm_vcpu 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
 	vmcs_writel(CR4_READ_SHADOW, cr4);
-	vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?
+	vmcs_writel(GUEST_CR4, cr4 | (vcpu->vmx->rmode.active ?
 		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
 	vcpu->cr4 = cr4;
 }
@@ -1124,17 +1123,17 @@ static void vmx_set_segment(struct kvm_v
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	u32 ar;
 
-	if (vcpu->rmode.active && seg == VCPU_SREG_TR) {
-		vcpu->rmode.tr.selector = var->selector;
-		vcpu->rmode.tr.base = var->base;
-		vcpu->rmode.tr.limit = var->limit;
-		vcpu->rmode.tr.ar = vmx_segment_access_rights(var);
+	if (vcpu->vmx->rmode.active && seg == VCPU_SREG_TR) {
+		vcpu->vmx->rmode.tr.selector = var->selector;
+		vcpu->vmx->rmode.tr.base = var->base;
+		vcpu->vmx->rmode.tr.limit = var->limit;
+		vcpu->vmx->rmode.tr.ar = vmx_segment_access_rights(var);
 		return;
 	}
 	vmcs_writel(sf->base, var->base);
 	vmcs_write32(sf->limit, var->limit);
 	vmcs_write16(sf->selector, var->selector);
-	if (vcpu->rmode.active && var->s) {
+	if (vcpu->vmx->rmode.active && var->s) {
 		/*
 		 * Hack real-mode segments into vm86 compatibility.
 		 */
@@ -1253,6 +1252,7 @@ static int vmx_vcpu_setup(struct kvm_vcp
 	vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	vcpu->cr8 = 0;
 	vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+
 	if (vcpu->vcpu_id == 0)
 		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
 
@@ -1384,10 +1384,10 @@ #endif
 		if (wrmsr_safe(index, data_low, data_high) < 0)
 			continue;
 		data = data_low | ((u64)data_high << 32);
-		vcpu->host_msrs[j].index = index;
-		vcpu->host_msrs[j].reserved = 0;
-		vcpu->host_msrs[j].data = data;
-		vcpu->guest_msrs[j] = vcpu->host_msrs[j];
+		vcpu->vmx->host_msrs[j].index = index;
+		vcpu->vmx->host_msrs[j].reserved = 0;
+		vcpu->vmx->host_msrs[j].data = data;
+		vcpu->vmx->guest_msrs[j] = vcpu->vmx->host_msrs[j];
 		++vcpu->nmsrs;
 	}
 
@@ -1479,7 +1479,7 @@ static void kvm_do_inject_irq(struct kvm
 	if (!vcpu->irq_pending[word_index])
 		clear_bit(word_index, &vcpu->irq_summary);
 
-	if (vcpu->rmode.active) {
+	if (vcpu->vmx->rmode.active) {
 		inject_rmode_irq(vcpu, irq);
 		return;
 	}
@@ -1538,7 +1538,7 @@ static void kvm_guest_debug_pre(struct k
 static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 				  int vec, u32 err_code)
 {
-	if (!vcpu->rmode.active)
+	if (!vcpu->vmx->rmode.active)
 		return 0;
 
 	/*
@@ -1619,7 +1619,7 @@ static int handle_exception(struct kvm_v
 		}
 	}
 
-	if (vcpu->rmode.active &&
+	if (vcpu->vmx->rmode.active &&
 	    handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
 								error_code)) {
 		if (vcpu->halt_request) {
@@ -2224,28 +2224,34 @@ static void vmx_inject_page_fault(struct
 
 static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->vmcs) {
+	if (vcpu->vmx->vmcs) {
 		on_each_cpu(__vcpu_clear, vcpu, 0, 1);
-		free_vmcs(vcpu->vmcs);
-		vcpu->vmcs = NULL;
+		free_vmcs(vcpu->vmx->vmcs);
+		vcpu->vmx->vmcs = NULL;
+
 	}
 }
 
+static int vmx_vcpu_size(void)
+{
+	return sizeof(struct kvm_vcpu) + sizeof(struct kvm_vmx_data);
+}
+
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	vmx_free_vmcs(vcpu);
 }
 
-static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
+static int vmx_init_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vmcs *vmcs;
 
-	vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vcpu->guest_msrs)
+	vcpu->vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vcpu->vmx->guest_msrs)
 		return -ENOMEM;
 
-	vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!vcpu->host_msrs)
+	vcpu->vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!vcpu->vmx->host_msrs)
 		goto out_free_guest_msrs;
 
 	vmcs = alloc_vmcs();
@@ -2253,18 +2259,18 @@ static int vmx_create_vcpu(struct kvm_vc
 		goto out_free_msrs;
 
 	vmcs_clear(vmcs);
-	vcpu->vmcs = vmcs;
+	vcpu->vmx->vmcs = vmcs;
 	vcpu->launched = 0;
 
 	return 0;
 
 out_free_msrs:
-	kfree(vcpu->host_msrs);
-	vcpu->host_msrs = NULL;
+	kfree(vcpu->vmx->host_msrs);
+	vcpu->vmx->host_msrs = NULL;
 
 out_free_guest_msrs:
-	kfree(vcpu->guest_msrs);
-	vcpu->guest_msrs = NULL;
+	kfree(vcpu->vmx->guest_msrs);
+	vcpu->vmx->guest_msrs = NULL;
 
 	return -ENOMEM;
 }
@@ -2277,7 +2283,8 @@ static struct kvm_arch_ops vmx_arch_ops 
 	.hardware_enable = hardware_enable,
 	.hardware_disable = hardware_disable,
 
-	.vcpu_create = vmx_create_vcpu,
+	.vcpu_size = vmx_vcpu_size,
+	.vcpu_init = vmx_init_vcpu,
 	.vcpu_free = vmx_free_vcpu,
 
 	.vcpu_load = vmx_vcpu_load,

[-- Attachment #3: Type: text/plain, Size: 286 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

[-- Attachment #4: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] dynamically create vcpus + vmx/svm structures
       [not found]               ` <Pine.LNX.4.64.0707181528430.32400-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
@ 2007-07-19  9:19                 ` Avi Kivity
  0 siblings, 0 replies; 5+ messages in thread
From: Avi Kivity @ 2007-07-19  9:19 UTC (permalink / raw)
  To: Paul Turner; +Cc: KVM

Paul Turner wrote:
> here's an updated version that instead takes kvm_lock and leaves the 
> svm stuff in the relevant header file for now.  unfortunately we still 
> need the includes since gcc is braindead and can't compile untyped 
> [0]-size arrays properly, the only two ways i can see to fix this is 
> either embedding vcpu in an arch specific struct and or using a 
> long[0] member and a vmx/svm macro as you mentioned before..
>

Since this turned out to be a little more involved than anticipated, can 
you push out a patch that just converts the vcpu array to a pointer 
array?  That will allow careful review of the locking changes, and is 
independent of splitting that arch independent stuff off.

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2007-07-19  9:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-13  1:20 [PATCH] dynamically create vcpus + vmx/svm structures Paul Turner
     [not found] ` <Pine.LNX.4.64.0707121815040.23503-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2007-07-13  7:59   ` Avi Kivity
     [not found]     ` <ed628a920707131534x7fe57ca3sfa09d9d79412d9c6@mail.gmail.com>
     [not found]       ` <ed628a920707131534x7fe57ca3sfa09d9d79412d9c6-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-07-14  6:14         ` Avi Kivity
     [not found]           ` <469869CF.8030106-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-07-18 22:36             ` Paul Turner
     [not found]               ` <Pine.LNX.4.64.0707181528430.32400-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2007-07-19  9:19                 ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox