* [PATCH] dynamically create vcpus + vmx/svm structures
@ 2007-07-13 1:20 Paul Turner
[not found] ` <Pine.LNX.4.64.0707121815040.23503-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Paul Turner @ 2007-07-13 1:20 UTC (permalink / raw)
To: avi-atKUWr5tajBWk0Htik3J/w; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
[-- Attachment #1: Type: TEXT/PLAIN, Size: 28320 bytes --]
From: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
- vcpus now allocated on demand
- vmx/svm fields separated into arch specific structures on vcpus
- vmx/svm fields now only allocated on corresponding architectures
- Paul
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 0f7a4d9..c631192 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -16,6 +16,7 @@ #include <linux/mm.h>
#include <asm/signal.h>
#include "vmx.h"
+#include "kvm_svm.h"
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -326,16 +327,64 @@ struct kvm_io_device *kvm_io_bus_find_de
void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev);
+struct kvm_vmx_data {
+ int msr_offset_efer;
+
+ #ifdef CONFIG_X86_64
+ int msr_offset_kernel_gs_base;
+ #endif
+
+ struct vmx_host_state {
+ int loaded;
+ u16 fs_sel, gs_sel, ldt_sel;
+ int fs_gs_ldt_reload_needed;
+ } host_state;
+
+ struct vmx_msr_entry *guest_msrs;
+ struct vmx_msr_entry *host_msrs;
+
+ struct {
+ int active;
+ u8 save_iopl;
+ struct kvm_save_segment {
+ u16 selector;
+ unsigned long base;
+ u32 limit;
+ u32 ar;
+ } tr, es, ds, fs, gs;
+ } rmode;
+ int halt_request; /* real mode */
+
+ struct vmcs *vmcs;
+};
+
+struct kvm_svm_data {
+ struct vmcb *vmcb;
+ unsigned long vmcb_pa;
+ struct svm_cpu_data *svm_data;
+ uint64_t asid_generation;
+
+ unsigned long db_regs[NUM_DB_REGS];
+
+ u64 next_rip;
+
+ u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
+ u64 host_gs_base;
+ unsigned long host_cr2;
+ unsigned long host_db_regs[NUM_DB_REGS];
+ unsigned long host_dr6;
+ unsigned long host_dr7;
+};
+
+
struct kvm_vcpu {
struct kvm *kvm;
+ struct mutex *mutex; /* refers to corresponding vcpu_mutex on kvm */
int vcpu_id;
- union {
- struct vmcs *vmcs;
- struct vcpu_svm *svm;
- };
- struct mutex mutex;
+
int cpu;
int launched;
+
u64 host_tsc;
struct kvm_run *run;
int interrupt_window_open;
@@ -361,12 +410,6 @@ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE
u64 ia32_misc_enable_msr;
int nmsrs;
int save_nmsrs;
- int msr_offset_efer;
-#ifdef CONFIG_X86_64
- int msr_offset_kernel_gs_base;
-#endif
- struct vmx_msr_entry *guest_msrs;
- struct vmx_msr_entry *host_msrs;
struct kvm_mmu mmu;
@@ -385,11 +428,6 @@ #endif
char *guest_fx_image;
int fpu_active;
int guest_fpu_loaded;
- struct vmx_host_state {
- int loaded;
- u16 fs_sel, gs_sel, ldt_sel;
- int fs_gs_ldt_reload_needed;
- } vmx_host_state;
int mmio_needed;
int mmio_read_completed;
@@ -406,22 +444,16 @@ #endif
struct kvm_stat stat;
- struct {
- int active;
- u8 save_iopl;
- struct kvm_save_segment {
- u16 selector;
- unsigned long base;
- u32 limit;
- u32 ar;
- } tr, es, ds, fs, gs;
- } rmode;
- int halt_request; /* real mode on Intel only */
-
int cpuid_nent;
struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+ union {
+ struct kvm_vmx_data vmx[0];
+ struct kvm_svm_data svm[0];
+ };
};
+
struct kvm_mem_alias {
gfn_t base_gfn;
unsigned long npages;
@@ -448,8 +480,11 @@ struct kvm {
struct list_head active_mmu_pages;
int n_free_mmu_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
+
int nvcpus;
- struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ struct mutex vcpu_mutex[KVM_MAX_VCPUS];
+
int memory_config_version;
int busy;
unsigned long rmap_overflow;
@@ -472,7 +507,8 @@ struct kvm_arch_ops {
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
- int (*vcpu_create)(struct kvm_vcpu *vcpu);
+ int (*vcpu_size)(void);
+ int (*vcpu_init)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 4d2ef9b..86345be 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -234,7 +234,7 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
*/
static void vcpu_load(struct kvm_vcpu *vcpu)
{
- mutex_lock(&vcpu->mutex);
+ mutex_lock(vcpu->mutex);
kvm_arch_ops->vcpu_load(vcpu);
}
@@ -244,11 +244,12 @@ static void vcpu_load(struct kvm_vcpu *v
*/
static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot)
{
- struct kvm_vcpu *vcpu = &kvm->vcpus[slot];
+ struct kvm_vcpu *vcpu;
- mutex_lock(&vcpu->mutex);
- if (!vcpu->vmcs) {
- mutex_unlock(&vcpu->mutex);
+ mutex_lock(&kvm->vcpu_mutex[slot]);
+ vcpu = kvm->vcpus[slot];
+ if (!vcpu) {
+ mutex_unlock(&kvm->vcpu_mutex[slot]);
return NULL;
}
kvm_arch_ops->vcpu_load(vcpu);
@@ -258,7 +259,7 @@ static struct kvm_vcpu *vcpu_load_slot(s
static void vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_arch_ops->vcpu_put(vcpu);
- mutex_unlock(&vcpu->mutex);
+ mutex_unlock(vcpu->mutex);
}
static void ack_flush(void *_completed)
@@ -279,7 +280,7 @@ void kvm_flush_remote_tlbs(struct kvm *k
cpus_clear(cpus);
needed = 0;
for (i = 0; i < kvm->nvcpus; ++i) {
- vcpu = &kvm->vcpus[i];
+ vcpu = kvm->vcpus[i];
if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
continue;
cpu = vcpu->cpu;
@@ -318,14 +319,10 @@ static struct kvm *kvm_create_vm(void)
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
kvm_io_bus_init(&kvm->mmio_bus);
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- struct kvm_vcpu *vcpu = &kvm->vcpus[i];
- mutex_init(&vcpu->mutex);
- vcpu->cpu = -1;
- vcpu->kvm = kvm;
- vcpu->mmu.root_hpa = INVALID_PAGE;
- }
+ for (i = 0; i < KVM_MAX_VCPUS; ++i)
+ mutex_init(&kvm->vcpu_mutex[i]);
+
return kvm;
}
@@ -379,7 +376,7 @@ static void free_pio_guest_pages(struct
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
- if (!vcpu->vmcs)
+ if (!vcpu)
return;
vcpu_load(vcpu);
@@ -389,7 +386,7 @@ static void kvm_unload_vcpu_mmu(struct k
static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
{
- if (!vcpu->vmcs)
+ if (!vcpu)
return;
vcpu_load(vcpu);
@@ -401,6 +398,8 @@ static void kvm_free_vcpu(struct kvm_vcp
free_page((unsigned long)vcpu->pio_data);
vcpu->pio_data = NULL;
free_pio_guest_pages(vcpu);
+
+ kfree(vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
@@ -411,9 +410,11 @@ static void kvm_free_vcpus(struct kvm *k
* Unpin any mmu pages first.
*/
for (i = 0; i < KVM_MAX_VCPUS; ++i)
- kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
- for (i = 0; i < KVM_MAX_VCPUS; ++i)
- kvm_free_vcpu(&kvm->vcpus[i]);
+ kvm_unload_vcpu_mmu(kvm->vcpus[i]);
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ kvm_free_vcpu(kvm->vcpus[i]);
+ kvm->vcpus[i] = NULL;
+ }
}
static int kvm_dev_release(struct inode *inode, struct file *filp)
@@ -430,6 +431,7 @@ static void kvm_destroy_vm(struct kvm *k
kvm_io_bus_destroy(&kvm->mmio_bus);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
+
kfree(kvm);
}
@@ -796,7 +798,7 @@ raced:
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
struct kvm_vcpu *vcpu;
-
+
vcpu = vcpu_load_slot(kvm, i);
if (!vcpu)
continue;
@@ -922,11 +924,11 @@ static int kvm_vm_ioctl_set_memory_alias
spin_unlock(&kvm->lock);
- vcpu_load(&kvm->vcpus[0]);
+ vcpu_load(kvm->vcpus[0]);
spin_lock(&kvm->lock);
- kvm_mmu_zap_all(&kvm->vcpus[0]);
+ kvm_mmu_zap_all(kvm->vcpus[0]);
spin_unlock(&kvm->lock);
- vcpu_put(&kvm->vcpus[0]);
+ vcpu_put(kvm->vcpus[0]);
return 0;
@@ -2380,40 +2382,51 @@ static int kvm_vm_ioctl_create_vcpu(stru
{
int r;
struct kvm_vcpu *vcpu;
- struct page *page;
+ struct page *pio_page, *run_page;
r = -EINVAL;
if (!valid_vcpu(n))
goto out;
- vcpu = &kvm->vcpus[n];
- vcpu->vcpu_id = n;
+ mutex_lock(&kvm->vcpu_mutex[n]);
+ if (kvm->vcpus[n]) {
+ r = -EEXIST;
+ goto out_unlock;
+ }
- mutex_lock(&vcpu->mutex);
+ vcpu = kzalloc(kvm_arch_ops->vcpu_size(), GFP_KERNEL);
- if (vcpu->vmcs) {
- mutex_unlock(&vcpu->mutex);
- return -EEXIST;
+ if (!vcpu) {
+ r = -ENOMEM;
+ goto out_unlock;
}
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ vcpu->mutex = &kvm->vcpu_mutex[n];
+ vcpu->cpu = -1;
+ vcpu->kvm = kvm;
+ vcpu->mmu.root_hpa = INVALID_PAGE;
+
+ vcpu->vcpu_id = n;
+ kvm->vcpus[n] = vcpu;
+
+ run_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
r = -ENOMEM;
- if (!page)
- goto out_unlock;
- vcpu->run = page_address(page);
+ if (!run_page)
+ goto out_deallocate;
+ vcpu->run = page_address(run_page);
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ pio_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
r = -ENOMEM;
- if (!page)
+ if (!pio_page)
goto out_free_run;
- vcpu->pio_data = page_address(page);
+ vcpu->pio_data = page_address(pio_page);
vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
FX_IMAGE_ALIGN);
vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
vcpu->cr0 = 0x10;
-
- r = kvm_arch_ops->vcpu_create(vcpu);
+
+ r = kvm_arch_ops->vcpu_init(vcpu);
if (r < 0)
goto out_free_vcpus;
@@ -2425,6 +2438,7 @@ static int kvm_vm_ioctl_create_vcpu(stru
r = kvm_mmu_setup(vcpu);
if (r >= 0)
r = kvm_arch_ops->vcpu_setup(vcpu);
+
vcpu_put(vcpu);
if (r < 0)
@@ -2438,20 +2452,25 @@ static int kvm_vm_ioctl_create_vcpu(stru
if (n >= kvm->nvcpus)
kvm->nvcpus = n + 1;
spin_unlock(&kvm_lock);
-
+
return r;
out_free_vcpus:
kvm_free_vcpu(vcpu);
+ vcpu = NULL;
out_free_run:
- free_page((unsigned long)vcpu->run);
- vcpu->run = NULL;
+ free_page((unsigned long)run_page);
+out_deallocate:
+ kfree(vcpu);
+ kvm->vcpus[n] = NULL;
out_unlock:
- mutex_unlock(&vcpu->mutex);
+ mutex_unlock(&kvm->vcpu_mutex[n]);
+
out:
return r;
}
+
static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
{
u64 efer;
@@ -2846,9 +2865,7 @@ static int kvm_dev_ioctl_create_vm(void)
kvm_destroy_vm(kvm);
return r;
}
-
kvm->filp = file;
-
return fd;
}
@@ -2962,7 +2979,7 @@ static void decache_vcpus_on_cpu(int cpu
spin_lock(&kvm_lock);
list_for_each_entry(vm, &vm_list, vm_list)
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- vcpu = &vm->vcpus[i];
+ vcpu = vm->vcpus[i];
/*
* If the vcpu is locked, then it is running on some
* other cpu and therefore it is not cached on the
@@ -2971,12 +2988,12 @@ static void decache_vcpus_on_cpu(int cpu
* If it's not locked, check the last cpu it executed
* on.
*/
- if (mutex_trylock(&vcpu->mutex)) {
+ if (mutex_trylock(vcpu->mutex)) {
if (vcpu->cpu == cpu) {
kvm_arch_ops->vcpu_decache(vcpu);
vcpu->cpu = -1;
}
- mutex_unlock(&vcpu->mutex);
+ mutex_unlock(vcpu->mutex);
}
}
spin_unlock(&kvm_lock);
@@ -3080,7 +3097,7 @@ static u64 stat_get(void *_offset)
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- vcpu = &kvm->vcpus[i];
+ vcpu = kvm->vcpus[i];
total += *(u32 *)((void *)vcpu + offset);
}
spin_unlock(&kvm_lock);
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h
index a869983..48ad325 100644
--- a/drivers/kvm/kvm_svm.h
+++ b/drivers/kvm/kvm_svm.h
@@ -7,7 +7,6 @@ #include <linux/list.h>
#include <asm/msr.h>
#include "svm.h"
-#include "kvm.h"
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
@@ -20,23 +19,4 @@ #endif
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
#define NUM_DB_REGS 4
-struct vcpu_svm {
- struct vmcb *vmcb;
- unsigned long vmcb_pa;
- struct svm_cpu_data *svm_data;
- uint64_t asid_generation;
-
- unsigned long db_regs[NUM_DB_REGS];
-
- u64 next_rip;
-
- u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
- u64 host_gs_base;
- unsigned long host_cr2;
- unsigned long host_db_regs[NUM_DB_REGS];
- unsigned long host_dr6;
- unsigned long host_dr7;
-};
-
#endif
-
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 52a11cc..40a1baf 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -14,7 +14,7 @@
*
*/
-#include "kvm_svm.h"
+#include "kvm.h"
#include "x86_emulate.h"
#include <linux/module.h>
@@ -568,18 +568,20 @@ static void init_vmcb(struct vmcb *vmcb)
/* rdx = ?? */
}
-static int svm_create_vcpu(struct kvm_vcpu *vcpu)
+static int svm_vcpu_size(void)
+{
+ return sizeof(struct kvm) + sizeof(struct kvm_svm_data);
+}
+
+static int svm_init_vcpu(struct kvm_vcpu *vcpu)
{
struct page *page;
int r;
r = -ENOMEM;
- vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL);
- if (!vcpu->svm)
- goto out1;
page = alloc_page(GFP_KERNEL);
if (!page)
- goto out2;
+ goto out1;
vcpu->svm->vmcb = page_address(page);
clear_page(vcpu->svm->vmcb);
@@ -596,8 +598,6 @@ static int svm_create_vcpu(struct kvm_vc
return 0;
-out2:
- kfree(vcpu->svm);
out1:
return r;
}
@@ -1610,7 +1610,7 @@ #endif
:
: [vcpu]"a"(vcpu),
[svm]"i"(offsetof(struct kvm_vcpu, svm)),
- [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
+ [vmcb]"i"(offsetof(struct kvm_svm_data, vmcb_pa)),
[rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
[rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
[rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
@@ -1765,7 +1765,8 @@ static struct kvm_arch_ops svm_arch_ops
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
- .vcpu_create = svm_create_vcpu,
+ .vcpu_size = svm_vcpu_size,
+ .vcpu_init = svm_init_vcpu,
.vcpu_free = svm_free_vcpu,
.vcpu_load = svm_vcpu_load,
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 7fa62c7..da14d2f 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -96,9 +96,9 @@ static inline u64 msr_efer_save_restore_
static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu)
{
- int efer_offset = vcpu->msr_offset_efer;
- return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) !=
- msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]);
+ int efer_offset = vcpu->vmx->msr_offset_efer;
+ return msr_efer_save_restore_bits(vcpu->vmx->host_msrs[efer_offset]) !=
+ msr_efer_save_restore_bits(vcpu->vmx->guest_msrs[efer_offset]);
}
static inline int is_page_fault(u32 intr_info)
@@ -126,7 +126,7 @@ static int __find_msr_index(struct kvm_v
int i;
for (i = 0; i < vcpu->nmsrs; ++i)
- if (vcpu->guest_msrs[i].index == msr)
+ if (vcpu->vmx->guest_msrs[i].index == msr)
return i;
return -1;
}
@@ -137,7 +137,7 @@ static struct vmx_msr_entry *find_msr_en
i = __find_msr_index(vcpu, msr);
if (i >= 0)
- return &vcpu->guest_msrs[i];
+ return &vcpu->vmx->guest_msrs[i];
return NULL;
}
@@ -160,8 +160,8 @@ static void __vcpu_clear(void *arg)
int cpu = raw_smp_processor_id();
if (vcpu->cpu == cpu)
- vmcs_clear(vcpu->vmcs);
- if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
+ vmcs_clear(vcpu->vmx->vmcs);
+ if (per_cpu(current_vmcs, cpu) == vcpu->vmx->vmcs)
per_cpu(current_vmcs, cpu) = NULL;
rdtscll(vcpu->host_tsc);
}
@@ -260,7 +260,7 @@ static void update_exception_bitmap(stru
eb |= 1u << NM_VECTOR;
if (vcpu->guest_debug.enabled)
eb |= 1u << 1;
- if (vcpu->rmode.active)
+ if (vcpu->vmx->rmode.active)
eb = ~0;
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@@ -285,19 +285,19 @@ #endif
static void load_transition_efer(struct kvm_vcpu *vcpu)
{
u64 trans_efer;
- int efer_offset = vcpu->msr_offset_efer;
+ int efer_offset = vcpu->vmx->msr_offset_efer;
- trans_efer = vcpu->host_msrs[efer_offset].data;
+ trans_efer = vcpu->vmx->host_msrs[efer_offset].data;
trans_efer &= ~EFER_SAVE_RESTORE_BITS;
trans_efer |= msr_efer_save_restore_bits(
- vcpu->guest_msrs[efer_offset]);
+ vcpu->vmx->guest_msrs[efer_offset]);
wrmsrl(MSR_EFER, trans_efer);
vcpu->stat.efer_reload++;
}
static void vmx_save_host_state(struct kvm_vcpu *vcpu)
{
- struct vmx_host_state *hs = &vcpu->vmx_host_state;
+ struct vmx_host_state *hs = &vcpu->vmx->host_state;
if (hs->loaded)
return;
@@ -334,17 +334,17 @@ #endif
#ifdef CONFIG_X86_64
if (is_long_mode(vcpu)) {
- save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1);
+ save_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_kernel_gs_base, 1);
}
#endif
- load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+ load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
if (msr_efer_need_save_restore(vcpu))
load_transition_efer(vcpu);
}
static void vmx_load_host_state(struct kvm_vcpu *vcpu)
{
- struct vmx_host_state *hs = &vcpu->vmx_host_state;
+ struct vmx_host_state *hs = &vcpu->vmx->host_state;
if (!hs->loaded)
return;
@@ -366,10 +366,10 @@ #endif
reload_tss();
}
- save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
- load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+ save_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
+ load_msrs(vcpu->vmx->host_msrs, vcpu->save_nmsrs);
if (msr_efer_need_save_restore(vcpu))
- load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1);
+ load_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_efer, 1);
}
/*
@@ -378,7 +378,7 @@ #endif
*/
static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
{
- u64 phys_addr = __pa(vcpu->vmcs);
+ u64 phys_addr = __pa(vcpu->vmx->vmcs);
int cpu;
u64 tsc_this, delta;
@@ -387,16 +387,16 @@ static void vmx_vcpu_load(struct kvm_vcp
if (vcpu->cpu != cpu)
vcpu_clear(vcpu);
- if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) {
+ if (per_cpu(current_vmcs, cpu) != vcpu->vmx->vmcs) {
u8 error;
- per_cpu(current_vmcs, cpu) = vcpu->vmcs;
+ per_cpu(current_vmcs, cpu) = vcpu->vmx->vmcs;
asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
: "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
: "cc");
if (error)
printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
- vcpu->vmcs, phys_addr);
+ vcpu->vmx->vmcs, phys_addr);
}
if (vcpu->cpu != cpu) {
@@ -504,12 +504,12 @@ static void vmx_inject_gp(struct kvm_vcp
void move_msr_up(struct kvm_vcpu *vcpu, int from, int to)
{
struct vmx_msr_entry tmp;
- tmp = vcpu->guest_msrs[to];
- vcpu->guest_msrs[to] = vcpu->guest_msrs[from];
- vcpu->guest_msrs[from] = tmp;
- tmp = vcpu->host_msrs[to];
- vcpu->host_msrs[to] = vcpu->host_msrs[from];
- vcpu->host_msrs[from] = tmp;
+ tmp = vcpu->vmx->guest_msrs[to];
+ vcpu->vmx->guest_msrs[to] = vcpu->vmx->guest_msrs[from];
+ vcpu->vmx->guest_msrs[from] = tmp;
+ tmp = vcpu->vmx->host_msrs[to];
+ vcpu->vmx->host_msrs[to] = vcpu->vmx->host_msrs[from];
+ vcpu->vmx->host_msrs[from] = tmp;
}
/*
@@ -550,10 +550,10 @@ #endif
vcpu->save_nmsrs = save_nmsrs;
#ifdef CONFIG_X86_64
- vcpu->msr_offset_kernel_gs_base =
+ vcpu->vmx->msr_offset_kernel_gs_base =
__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
#endif
- vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
+ vcpu->vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
}
/*
@@ -646,7 +646,7 @@ static int vmx_set_msr(struct kvm_vcpu *
#ifdef CONFIG_X86_64
case MSR_EFER:
ret = kvm_set_msr_common(vcpu, msr_index, data);
- if (vcpu->vmx_host_state.loaded)
+ if (vcpu->vmx->host_state.loaded)
load_transition_efer(vcpu);
break;
case MSR_FS_BASE:
@@ -672,8 +672,8 @@ #endif
msr = find_msr_entry(vcpu, msr_index);
if (msr) {
msr->data = data;
- if (vcpu->vmx_host_state.loaded)
- load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+ if (vcpu->vmx->host_state.loaded)
+ load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
break;
}
ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -868,15 +868,15 @@ static void enter_pmode(struct kvm_vcpu
{
unsigned long flags;
- vcpu->rmode.active = 0;
+ vcpu->vmx->rmode.active = 0;
- vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);
- vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);
- vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
+ vmcs_writel(GUEST_TR_BASE, vcpu->vmx->rmode.tr.base);
+ vmcs_write32(GUEST_TR_LIMIT, vcpu->vmx->rmode.tr.limit);
+ vmcs_write32(GUEST_TR_AR_BYTES, vcpu->vmx->rmode.tr.ar);
flags = vmcs_readl(GUEST_RFLAGS);
flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
- flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
+ flags |= (vcpu->vmx->rmode.save_iopl << IOPL_SHIFT);
vmcs_writel(GUEST_RFLAGS, flags);
vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~CR4_VME_MASK) |
@@ -884,10 +884,10 @@ static void enter_pmode(struct kvm_vcpu
update_exception_bitmap(vcpu);
- fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es);
- fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds);
- fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs);
- fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs);
+ fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+ fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+ fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+ fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);
vmcs_write16(GUEST_SS_SELECTOR, 0);
vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -920,19 +920,19 @@ static void enter_rmode(struct kvm_vcpu
{
unsigned long flags;
- vcpu->rmode.active = 1;
+ vcpu->vmx->rmode.active = 1;
- vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
+ vcpu->vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
- vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
+ vcpu->vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
- vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
+ vcpu->vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
flags = vmcs_readl(GUEST_RFLAGS);
- vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
+ vcpu->vmx->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
flags |= IOPL_MASK | X86_EFLAGS_VM;
@@ -950,11 +950,10 @@ static void enter_rmode(struct kvm_vcpu
vmcs_writel(GUEST_CS_BASE, 0xf0000);
vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
- fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
- fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);
- fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
- fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
-
+ fix_rmode_seg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+ fix_rmode_seg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+ fix_rmode_seg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+ fix_rmode_seg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);
init_rmode_tss(vcpu->kvm);
}
@@ -1002,10 +1001,10 @@ static void vmx_set_cr0(struct kvm_vcpu
{
vmx_fpu_deactivate(vcpu);
- if (vcpu->rmode.active && (cr0 & CR0_PE_MASK))
+ if (vcpu->vmx->rmode.active && (cr0 & CR0_PE_MASK))
enter_pmode(vcpu);
- if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
+ if (!vcpu->vmx->rmode.active && !(cr0 & CR0_PE_MASK))
enter_rmode(vcpu);
#ifdef CONFIG_X86_64
@@ -1036,7 +1035,7 @@ static void vmx_set_cr3(struct kvm_vcpu
static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
vmcs_writel(CR4_READ_SHADOW, cr4);
- vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?
+ vmcs_writel(GUEST_CR4, cr4 | (vcpu->vmx->rmode.active ?
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
vcpu->cr4 = cr4;
}
@@ -1124,17 +1123,17 @@ static void vmx_set_segment(struct kvm_v
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
u32 ar;
- if (vcpu->rmode.active && seg == VCPU_SREG_TR) {
- vcpu->rmode.tr.selector = var->selector;
- vcpu->rmode.tr.base = var->base;
- vcpu->rmode.tr.limit = var->limit;
- vcpu->rmode.tr.ar = vmx_segment_access_rights(var);
+ if (vcpu->vmx->rmode.active && seg == VCPU_SREG_TR) {
+ vcpu->vmx->rmode.tr.selector = var->selector;
+ vcpu->vmx->rmode.tr.base = var->base;
+ vcpu->vmx->rmode.tr.limit = var->limit;
+ vcpu->vmx->rmode.tr.ar = vmx_segment_access_rights(var);
return;
}
vmcs_writel(sf->base, var->base);
vmcs_write32(sf->limit, var->limit);
vmcs_write16(sf->selector, var->selector);
- if (vcpu->rmode.active && var->s) {
+ if (vcpu->vmx->rmode.active && var->s) {
/*
* Hack real-mode segments into vm86 compatibility.
*/
@@ -1253,11 +1252,11 @@ static int vmx_vcpu_setup(struct kvm_vcp
vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
vcpu->cr8 = 0;
vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+
if (vcpu->vcpu_id == 0)
vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
fx_init(vcpu);
-
/*
* GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
* insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
@@ -1384,13 +1383,12 @@ #endif
if (wrmsr_safe(index, data_low, data_high) < 0)
continue;
data = data_low | ((u64)data_high << 32);
- vcpu->host_msrs[j].index = index;
- vcpu->host_msrs[j].reserved = 0;
- vcpu->host_msrs[j].data = data;
- vcpu->guest_msrs[j] = vcpu->host_msrs[j];
+ vcpu->vmx->host_msrs[j].index = index;
+ vcpu->vmx->host_msrs[j].reserved = 0;
+ vcpu->vmx->host_msrs[j].data = data;
+ vcpu->vmx->guest_msrs[j] = vcpu->vmx->host_msrs[j];
++vcpu->nmsrs;
}
-
setup_msrs(vcpu);
vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS,
@@ -1408,7 +1406,6 @@ #endif
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
-
vcpu->cr0 = 0x60000010;
vmx_set_cr0(vcpu, vcpu->cr0); // enter rmode
vmx_set_cr4(vcpu, 0);
@@ -1479,7 +1476,7 @@ static void kvm_do_inject_irq(struct kvm
if (!vcpu->irq_pending[word_index])
clear_bit(word_index, &vcpu->irq_summary);
- if (vcpu->rmode.active) {
+ if (vcpu->vmx->rmode.active) {
inject_rmode_irq(vcpu, irq);
return;
}
@@ -1538,7 +1535,7 @@ static void kvm_guest_debug_pre(struct k
static int handle_rmode_exception(struct kvm_vcpu *vcpu,
int vec, u32 err_code)
{
- if (!vcpu->rmode.active)
+ if (!vcpu->vmx->rmode.active)
return 0;
/*
@@ -1620,11 +1617,11 @@ static int handle_exception(struct kvm_v
}
}
- if (vcpu->rmode.active &&
+ if (vcpu->vmx->rmode.active &&
handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
error_code)) {
- if (vcpu->halt_request) {
- vcpu->halt_request = 0;
+ if (vcpu->vmx->halt_request) {
+ vcpu->vmx->halt_request = 0;
return kvm_emulate_halt(vcpu);
}
return 1;
@@ -2225,28 +2222,34 @@ static void vmx_inject_page_fault(struct
static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
{
- if (vcpu->vmcs) {
+ if (vcpu->vmx->vmcs) {
on_each_cpu(__vcpu_clear, vcpu, 0, 1);
- free_vmcs(vcpu->vmcs);
- vcpu->vmcs = NULL;
+ free_vmcs(vcpu->vmx->vmcs);
+ vcpu->vmx->vmcs = NULL;
+
}
}
+static int vmx_vcpu_size(void)
+{
+ return sizeof(struct kvm_vcpu) + sizeof(struct kvm_vmx_data);
+}
+
static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
{
vmx_free_vmcs(vcpu);
}
-static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
+static int vmx_init_vcpu(struct kvm_vcpu *vcpu)
{
struct vmcs *vmcs;
- vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!vcpu->guest_msrs)
+ vcpu->vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vcpu->vmx->guest_msrs)
return -ENOMEM;
- vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!vcpu->host_msrs)
+ vcpu->vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vcpu->vmx->host_msrs)
goto out_free_guest_msrs;
vmcs = alloc_vmcs();
@@ -2254,18 +2257,18 @@ static int vmx_create_vcpu(struct kvm_vc
goto out_free_msrs;
vmcs_clear(vmcs);
- vcpu->vmcs = vmcs;
+ vcpu->vmx->vmcs = vmcs;
vcpu->launched = 0;
return 0;
out_free_msrs:
- kfree(vcpu->host_msrs);
- vcpu->host_msrs = NULL;
+ kfree(vcpu->vmx->host_msrs);
+ vcpu->vmx->host_msrs = NULL;
out_free_guest_msrs:
- kfree(vcpu->guest_msrs);
- vcpu->guest_msrs = NULL;
+ kfree(vcpu->vmx->guest_msrs);
+ vcpu->vmx->guest_msrs = NULL;
return -ENOMEM;
}
@@ -2278,7 +2281,8 @@ static struct kvm_arch_ops vmx_arch_ops
.hardware_enable = hardware_enable,
.hardware_disable = hardware_disable,
- .vcpu_create = vmx_create_vcpu,
+ .vcpu_size = vmx_vcpu_size,
+ .vcpu_init = vmx_init_vcpu,
.vcpu_free = vmx_free_vcpu,
.vcpu_load = vmx_vcpu_load,
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index f60012d..4e821ed 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1155,7 +1155,7 @@ special_insn:
DPRINTF("Urk! I don't handle SCAS.\n");
goto cannot_emulate;
case 0xf4: /* hlt */
- ctxt->vcpu->halt_request = 1;
+ ctxt->vcpu->vmx->halt_request = 1;
goto done;
case 0xc3: /* ret */
dst.ptr = &_eip;
[-- Attachment #2: Type: TEXT/PLAIN, Size: 28840 bytes --]
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 0f7a4d9..c631192 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -16,6 +16,7 @@ #include <linux/mm.h>
#include <asm/signal.h>
#include "vmx.h"
+#include "kvm_svm.h"
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -326,16 +327,64 @@ struct kvm_io_device *kvm_io_bus_find_de
void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev);
+struct kvm_vmx_data {
+ int msr_offset_efer;
+
+ #ifdef CONFIG_X86_64
+ int msr_offset_kernel_gs_base;
+ #endif
+
+ struct vmx_host_state {
+ int loaded;
+ u16 fs_sel, gs_sel, ldt_sel;
+ int fs_gs_ldt_reload_needed;
+ } host_state;
+
+ struct vmx_msr_entry *guest_msrs;
+ struct vmx_msr_entry *host_msrs;
+
+ struct {
+ int active;
+ u8 save_iopl;
+ struct kvm_save_segment {
+ u16 selector;
+ unsigned long base;
+ u32 limit;
+ u32 ar;
+ } tr, es, ds, fs, gs;
+ } rmode;
+ int halt_request; /* real mode */
+
+ struct vmcs *vmcs;
+};
+
+struct kvm_svm_data {
+ struct vmcb *vmcb;
+ unsigned long vmcb_pa;
+ struct svm_cpu_data *svm_data;
+ uint64_t asid_generation;
+
+ unsigned long db_regs[NUM_DB_REGS];
+
+ u64 next_rip;
+
+ u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
+ u64 host_gs_base;
+ unsigned long host_cr2;
+ unsigned long host_db_regs[NUM_DB_REGS];
+ unsigned long host_dr6;
+ unsigned long host_dr7;
+};
+
+
struct kvm_vcpu {
struct kvm *kvm;
+ struct mutex *mutex; /* refers to corresponding vcpu_mutex on kvm */
int vcpu_id;
- union {
- struct vmcs *vmcs;
- struct vcpu_svm *svm;
- };
- struct mutex mutex;
+
int cpu;
int launched;
+
u64 host_tsc;
struct kvm_run *run;
int interrupt_window_open;
@@ -361,12 +410,6 @@ #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE
u64 ia32_misc_enable_msr;
int nmsrs;
int save_nmsrs;
- int msr_offset_efer;
-#ifdef CONFIG_X86_64
- int msr_offset_kernel_gs_base;
-#endif
- struct vmx_msr_entry *guest_msrs;
- struct vmx_msr_entry *host_msrs;
struct kvm_mmu mmu;
@@ -385,11 +428,6 @@ #endif
char *guest_fx_image;
int fpu_active;
int guest_fpu_loaded;
- struct vmx_host_state {
- int loaded;
- u16 fs_sel, gs_sel, ldt_sel;
- int fs_gs_ldt_reload_needed;
- } vmx_host_state;
int mmio_needed;
int mmio_read_completed;
@@ -406,22 +444,16 @@ #endif
struct kvm_stat stat;
- struct {
- int active;
- u8 save_iopl;
- struct kvm_save_segment {
- u16 selector;
- unsigned long base;
- u32 limit;
- u32 ar;
- } tr, es, ds, fs, gs;
- } rmode;
- int halt_request; /* real mode on Intel only */
-
int cpuid_nent;
struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+ union {
+ struct kvm_vmx_data vmx[0];
+ struct kvm_svm_data svm[0];
+ };
};
+
struct kvm_mem_alias {
gfn_t base_gfn;
unsigned long npages;
@@ -448,8 +480,11 @@ struct kvm {
struct list_head active_mmu_pages;
int n_free_mmu_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
+
int nvcpus;
- struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ struct mutex vcpu_mutex[KVM_MAX_VCPUS];
+
int memory_config_version;
int busy;
unsigned long rmap_overflow;
@@ -472,7 +507,8 @@ struct kvm_arch_ops {
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
- int (*vcpu_create)(struct kvm_vcpu *vcpu);
+ int (*vcpu_size)(void);
+ int (*vcpu_init)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 4d2ef9b..86345be 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -234,7 +234,7 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
*/
static void vcpu_load(struct kvm_vcpu *vcpu)
{
- mutex_lock(&vcpu->mutex);
+ mutex_lock(vcpu->mutex);
kvm_arch_ops->vcpu_load(vcpu);
}
@@ -244,11 +244,12 @@ static void vcpu_load(struct kvm_vcpu *v
*/
static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot)
{
- struct kvm_vcpu *vcpu = &kvm->vcpus[slot];
+ struct kvm_vcpu *vcpu;
- mutex_lock(&vcpu->mutex);
- if (!vcpu->vmcs) {
- mutex_unlock(&vcpu->mutex);
+ mutex_lock(&kvm->vcpu_mutex[slot]);
+ vcpu = kvm->vcpus[slot];
+ if (!vcpu) {
+ mutex_unlock(&kvm->vcpu_mutex[slot]);
return NULL;
}
kvm_arch_ops->vcpu_load(vcpu);
@@ -258,7 +259,7 @@ static struct kvm_vcpu *vcpu_load_slot(s
static void vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_arch_ops->vcpu_put(vcpu);
- mutex_unlock(&vcpu->mutex);
+ mutex_unlock(vcpu->mutex);
}
static void ack_flush(void *_completed)
@@ -279,7 +280,7 @@ void kvm_flush_remote_tlbs(struct kvm *k
cpus_clear(cpus);
needed = 0;
for (i = 0; i < kvm->nvcpus; ++i) {
- vcpu = &kvm->vcpus[i];
+ vcpu = kvm->vcpus[i];
if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
continue;
cpu = vcpu->cpu;
@@ -318,14 +319,10 @@ static struct kvm *kvm_create_vm(void)
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
kvm_io_bus_init(&kvm->mmio_bus);
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- struct kvm_vcpu *vcpu = &kvm->vcpus[i];
- mutex_init(&vcpu->mutex);
- vcpu->cpu = -1;
- vcpu->kvm = kvm;
- vcpu->mmu.root_hpa = INVALID_PAGE;
- }
+ for (i = 0; i < KVM_MAX_VCPUS; ++i)
+ mutex_init(&kvm->vcpu_mutex[i]);
+
return kvm;
}
@@ -379,7 +376,7 @@ static void free_pio_guest_pages(struct
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
- if (!vcpu->vmcs)
+ if (!vcpu)
return;
vcpu_load(vcpu);
@@ -389,7 +386,7 @@ static void kvm_unload_vcpu_mmu(struct k
static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
{
- if (!vcpu->vmcs)
+ if (!vcpu)
return;
vcpu_load(vcpu);
@@ -401,6 +398,8 @@ static void kvm_free_vcpu(struct kvm_vcp
free_page((unsigned long)vcpu->pio_data);
vcpu->pio_data = NULL;
free_pio_guest_pages(vcpu);
+
+ kfree(vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
@@ -411,9 +410,11 @@ static void kvm_free_vcpus(struct kvm *k
* Unpin any mmu pages first.
*/
for (i = 0; i < KVM_MAX_VCPUS; ++i)
- kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
- for (i = 0; i < KVM_MAX_VCPUS; ++i)
- kvm_free_vcpu(&kvm->vcpus[i]);
+ kvm_unload_vcpu_mmu(kvm->vcpus[i]);
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ kvm_free_vcpu(kvm->vcpus[i]);
+ kvm->vcpus[i] = NULL;
+ }
}
static int kvm_dev_release(struct inode *inode, struct file *filp)
@@ -430,6 +431,7 @@ static void kvm_destroy_vm(struct kvm *k
kvm_io_bus_destroy(&kvm->mmio_bus);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
+
kfree(kvm);
}
@@ -796,7 +798,7 @@ raced:
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
struct kvm_vcpu *vcpu;
-
+
vcpu = vcpu_load_slot(kvm, i);
if (!vcpu)
continue;
@@ -922,11 +924,11 @@ static int kvm_vm_ioctl_set_memory_alias
spin_unlock(&kvm->lock);
- vcpu_load(&kvm->vcpus[0]);
+ vcpu_load(kvm->vcpus[0]);
spin_lock(&kvm->lock);
- kvm_mmu_zap_all(&kvm->vcpus[0]);
+ kvm_mmu_zap_all(kvm->vcpus[0]);
spin_unlock(&kvm->lock);
- vcpu_put(&kvm->vcpus[0]);
+ vcpu_put(kvm->vcpus[0]);
return 0;
@@ -2380,40 +2382,51 @@ static int kvm_vm_ioctl_create_vcpu(stru
{
int r;
struct kvm_vcpu *vcpu;
- struct page *page;
+ struct page *pio_page, *run_page;
r = -EINVAL;
if (!valid_vcpu(n))
goto out;
- vcpu = &kvm->vcpus[n];
- vcpu->vcpu_id = n;
+ mutex_lock(&kvm->vcpu_mutex[n]);
+ if (kvm->vcpus[n]) {
+ r = -EEXIST;
+ goto out_unlock;
+ }
- mutex_lock(&vcpu->mutex);
+ vcpu = kzalloc(kvm_arch_ops->vcpu_size(), GFP_KERNEL);
- if (vcpu->vmcs) {
- mutex_unlock(&vcpu->mutex);
- return -EEXIST;
+ if (!vcpu) {
+ r = -ENOMEM;
+ goto out_unlock;
}
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ vcpu->mutex = &kvm->vcpu_mutex[n];
+ vcpu->cpu = -1;
+ vcpu->kvm = kvm;
+ vcpu->mmu.root_hpa = INVALID_PAGE;
+
+ vcpu->vcpu_id = n;
+ kvm->vcpus[n] = vcpu;
+
+ run_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
r = -ENOMEM;
- if (!page)
- goto out_unlock;
- vcpu->run = page_address(page);
+ if (!run_page)
+ goto out_deallocate;
+ vcpu->run = page_address(run_page);
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ pio_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
r = -ENOMEM;
- if (!page)
+ if (!pio_page)
goto out_free_run;
- vcpu->pio_data = page_address(page);
+ vcpu->pio_data = page_address(pio_page);
vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
FX_IMAGE_ALIGN);
vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
vcpu->cr0 = 0x10;
-
- r = kvm_arch_ops->vcpu_create(vcpu);
+
+ r = kvm_arch_ops->vcpu_init(vcpu);
if (r < 0)
goto out_free_vcpus;
@@ -2425,6 +2438,7 @@ static int kvm_vm_ioctl_create_vcpu(stru
r = kvm_mmu_setup(vcpu);
if (r >= 0)
r = kvm_arch_ops->vcpu_setup(vcpu);
+
vcpu_put(vcpu);
if (r < 0)
@@ -2438,20 +2452,25 @@ static int kvm_vm_ioctl_create_vcpu(stru
if (n >= kvm->nvcpus)
kvm->nvcpus = n + 1;
spin_unlock(&kvm_lock);
-
+
return r;
out_free_vcpus:
kvm_free_vcpu(vcpu);
+ vcpu = NULL;
out_free_run:
- free_page((unsigned long)vcpu->run);
- vcpu->run = NULL;
+ free_page((unsigned long)run_page);
+out_deallocate:
+ kfree(vcpu);
+ kvm->vcpus[n] = NULL;
out_unlock:
- mutex_unlock(&vcpu->mutex);
+ mutex_unlock(&kvm->vcpu_mutex[n]);
+
out:
return r;
}
+
static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
{
u64 efer;
@@ -2846,9 +2865,7 @@ static int kvm_dev_ioctl_create_vm(void)
kvm_destroy_vm(kvm);
return r;
}
-
kvm->filp = file;
-
return fd;
}
@@ -2962,7 +2979,7 @@ static void decache_vcpus_on_cpu(int cpu
spin_lock(&kvm_lock);
list_for_each_entry(vm, &vm_list, vm_list)
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- vcpu = &vm->vcpus[i];
+ vcpu = vm->vcpus[i];
/*
* If the vcpu is locked, then it is running on some
* other cpu and therefore it is not cached on the
@@ -2971,12 +2988,12 @@ static void decache_vcpus_on_cpu(int cpu
* If it's not locked, check the last cpu it executed
* on.
*/
- if (mutex_trylock(&vcpu->mutex)) {
+ if (mutex_trylock(vcpu->mutex)) {
if (vcpu->cpu == cpu) {
kvm_arch_ops->vcpu_decache(vcpu);
vcpu->cpu = -1;
}
- mutex_unlock(&vcpu->mutex);
+ mutex_unlock(vcpu->mutex);
}
}
spin_unlock(&kvm_lock);
@@ -3080,7 +3097,7 @@ static u64 stat_get(void *_offset)
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- vcpu = &kvm->vcpus[i];
+ vcpu = kvm->vcpus[i];
total += *(u32 *)((void *)vcpu + offset);
}
spin_unlock(&kvm_lock);
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h
index a869983..48ad325 100644
--- a/drivers/kvm/kvm_svm.h
+++ b/drivers/kvm/kvm_svm.h
@@ -7,7 +7,6 @@ #include <linux/list.h>
#include <asm/msr.h>
#include "svm.h"
-#include "kvm.h"
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
@@ -20,23 +19,4 @@ #endif
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
#define NUM_DB_REGS 4
-struct vcpu_svm {
- struct vmcb *vmcb;
- unsigned long vmcb_pa;
- struct svm_cpu_data *svm_data;
- uint64_t asid_generation;
-
- unsigned long db_regs[NUM_DB_REGS];
-
- u64 next_rip;
-
- u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
- u64 host_gs_base;
- unsigned long host_cr2;
- unsigned long host_db_regs[NUM_DB_REGS];
- unsigned long host_dr6;
- unsigned long host_dr7;
-};
-
#endif
-
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 52a11cc..40a1baf 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -14,7 +14,7 @@
*
*/
-#include "kvm_svm.h"
+#include "kvm.h"
#include "x86_emulate.h"
#include <linux/module.h>
@@ -568,18 +568,20 @@ static void init_vmcb(struct vmcb *vmcb)
/* rdx = ?? */
}
-static int svm_create_vcpu(struct kvm_vcpu *vcpu)
+static int svm_vcpu_size(void)
+{
+ return sizeof(struct kvm) + sizeof(struct kvm_svm_data);
+}
+
+static int svm_init_vcpu(struct kvm_vcpu *vcpu)
{
struct page *page;
int r;
r = -ENOMEM;
- vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL);
- if (!vcpu->svm)
- goto out1;
page = alloc_page(GFP_KERNEL);
if (!page)
- goto out2;
+ goto out1;
vcpu->svm->vmcb = page_address(page);
clear_page(vcpu->svm->vmcb);
@@ -596,8 +598,6 @@ static int svm_create_vcpu(struct kvm_vc
return 0;
-out2:
- kfree(vcpu->svm);
out1:
return r;
}
@@ -1610,7 +1610,7 @@ #endif
:
: [vcpu]"a"(vcpu),
[svm]"i"(offsetof(struct kvm_vcpu, svm)),
- [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
+ [vmcb]"i"(offsetof(struct kvm_svm_data, vmcb_pa)),
[rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
[rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])),
[rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])),
@@ -1765,7 +1765,8 @@ static struct kvm_arch_ops svm_arch_ops
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
- .vcpu_create = svm_create_vcpu,
+ .vcpu_size = svm_vcpu_size,
+ .vcpu_init = svm_init_vcpu,
.vcpu_free = svm_free_vcpu,
.vcpu_load = svm_vcpu_load,
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 7fa62c7..da14d2f 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -96,9 +96,9 @@ static inline u64 msr_efer_save_restore_
static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu)
{
- int efer_offset = vcpu->msr_offset_efer;
- return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) !=
- msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]);
+ int efer_offset = vcpu->vmx->msr_offset_efer;
+ return msr_efer_save_restore_bits(vcpu->vmx->host_msrs[efer_offset]) !=
+ msr_efer_save_restore_bits(vcpu->vmx->guest_msrs[efer_offset]);
}
static inline int is_page_fault(u32 intr_info)
@@ -126,7 +126,7 @@ static int __find_msr_index(struct kvm_v
int i;
for (i = 0; i < vcpu->nmsrs; ++i)
- if (vcpu->guest_msrs[i].index == msr)
+ if (vcpu->vmx->guest_msrs[i].index == msr)
return i;
return -1;
}
@@ -137,7 +137,7 @@ static struct vmx_msr_entry *find_msr_en
i = __find_msr_index(vcpu, msr);
if (i >= 0)
- return &vcpu->guest_msrs[i];
+ return &vcpu->vmx->guest_msrs[i];
return NULL;
}
@@ -160,8 +160,8 @@ static void __vcpu_clear(void *arg)
int cpu = raw_smp_processor_id();
if (vcpu->cpu == cpu)
- vmcs_clear(vcpu->vmcs);
- if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
+ vmcs_clear(vcpu->vmx->vmcs);
+ if (per_cpu(current_vmcs, cpu) == vcpu->vmx->vmcs)
per_cpu(current_vmcs, cpu) = NULL;
rdtscll(vcpu->host_tsc);
}
@@ -260,7 +260,7 @@ static void update_exception_bitmap(stru
eb |= 1u << NM_VECTOR;
if (vcpu->guest_debug.enabled)
eb |= 1u << 1;
- if (vcpu->rmode.active)
+ if (vcpu->vmx->rmode.active)
eb = ~0;
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@@ -285,19 +285,19 @@ #endif
static void load_transition_efer(struct kvm_vcpu *vcpu)
{
u64 trans_efer;
- int efer_offset = vcpu->msr_offset_efer;
+ int efer_offset = vcpu->vmx->msr_offset_efer;
- trans_efer = vcpu->host_msrs[efer_offset].data;
+ trans_efer = vcpu->vmx->host_msrs[efer_offset].data;
trans_efer &= ~EFER_SAVE_RESTORE_BITS;
trans_efer |= msr_efer_save_restore_bits(
- vcpu->guest_msrs[efer_offset]);
+ vcpu->vmx->guest_msrs[efer_offset]);
wrmsrl(MSR_EFER, trans_efer);
vcpu->stat.efer_reload++;
}
static void vmx_save_host_state(struct kvm_vcpu *vcpu)
{
- struct vmx_host_state *hs = &vcpu->vmx_host_state;
+ struct vmx_host_state *hs = &vcpu->vmx->host_state;
if (hs->loaded)
return;
@@ -334,17 +334,17 @@ #endif
#ifdef CONFIG_X86_64
if (is_long_mode(vcpu)) {
- save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1);
+ save_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_kernel_gs_base, 1);
}
#endif
- load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+ load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
if (msr_efer_need_save_restore(vcpu))
load_transition_efer(vcpu);
}
static void vmx_load_host_state(struct kvm_vcpu *vcpu)
{
- struct vmx_host_state *hs = &vcpu->vmx_host_state;
+ struct vmx_host_state *hs = &vcpu->vmx->host_state;
if (!hs->loaded)
return;
@@ -366,10 +366,10 @@ #endif
reload_tss();
}
- save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
- load_msrs(vcpu->host_msrs, vcpu->save_nmsrs);
+ save_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
+ load_msrs(vcpu->vmx->host_msrs, vcpu->save_nmsrs);
if (msr_efer_need_save_restore(vcpu))
- load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1);
+ load_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_efer, 1);
}
/*
@@ -378,7 +378,7 @@ #endif
*/
static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
{
- u64 phys_addr = __pa(vcpu->vmcs);
+ u64 phys_addr = __pa(vcpu->vmx->vmcs);
int cpu;
u64 tsc_this, delta;
@@ -387,16 +387,16 @@ static void vmx_vcpu_load(struct kvm_vcp
if (vcpu->cpu != cpu)
vcpu_clear(vcpu);
- if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) {
+ if (per_cpu(current_vmcs, cpu) != vcpu->vmx->vmcs) {
u8 error;
- per_cpu(current_vmcs, cpu) = vcpu->vmcs;
+ per_cpu(current_vmcs, cpu) = vcpu->vmx->vmcs;
asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
: "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
: "cc");
if (error)
printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
- vcpu->vmcs, phys_addr);
+ vcpu->vmx->vmcs, phys_addr);
}
if (vcpu->cpu != cpu) {
@@ -504,12 +504,12 @@ static void vmx_inject_gp(struct kvm_vcp
void move_msr_up(struct kvm_vcpu *vcpu, int from, int to)
{
struct vmx_msr_entry tmp;
- tmp = vcpu->guest_msrs[to];
- vcpu->guest_msrs[to] = vcpu->guest_msrs[from];
- vcpu->guest_msrs[from] = tmp;
- tmp = vcpu->host_msrs[to];
- vcpu->host_msrs[to] = vcpu->host_msrs[from];
- vcpu->host_msrs[from] = tmp;
+ tmp = vcpu->vmx->guest_msrs[to];
+ vcpu->vmx->guest_msrs[to] = vcpu->vmx->guest_msrs[from];
+ vcpu->vmx->guest_msrs[from] = tmp;
+ tmp = vcpu->vmx->host_msrs[to];
+ vcpu->vmx->host_msrs[to] = vcpu->vmx->host_msrs[from];
+ vcpu->vmx->host_msrs[from] = tmp;
}
/*
@@ -550,10 +550,10 @@ #endif
vcpu->save_nmsrs = save_nmsrs;
#ifdef CONFIG_X86_64
- vcpu->msr_offset_kernel_gs_base =
+ vcpu->vmx->msr_offset_kernel_gs_base =
__find_msr_index(vcpu, MSR_KERNEL_GS_BASE);
#endif
- vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
+ vcpu->vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER);
}
/*
@@ -646,7 +646,7 @@ static int vmx_set_msr(struct kvm_vcpu *
#ifdef CONFIG_X86_64
case MSR_EFER:
ret = kvm_set_msr_common(vcpu, msr_index, data);
- if (vcpu->vmx_host_state.loaded)
+ if (vcpu->vmx->host_state.loaded)
load_transition_efer(vcpu);
break;
case MSR_FS_BASE:
@@ -672,8 +672,8 @@ #endif
msr = find_msr_entry(vcpu, msr_index);
if (msr) {
msr->data = data;
- if (vcpu->vmx_host_state.loaded)
- load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs);
+ if (vcpu->vmx->host_state.loaded)
+ load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs);
break;
}
ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -868,15 +868,15 @@ static void enter_pmode(struct kvm_vcpu
{
unsigned long flags;
- vcpu->rmode.active = 0;
+ vcpu->vmx->rmode.active = 0;
- vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);
- vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);
- vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
+ vmcs_writel(GUEST_TR_BASE, vcpu->vmx->rmode.tr.base);
+ vmcs_write32(GUEST_TR_LIMIT, vcpu->vmx->rmode.tr.limit);
+ vmcs_write32(GUEST_TR_AR_BYTES, vcpu->vmx->rmode.tr.ar);
flags = vmcs_readl(GUEST_RFLAGS);
flags &= ~(IOPL_MASK | X86_EFLAGS_VM);
- flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
+ flags |= (vcpu->vmx->rmode.save_iopl << IOPL_SHIFT);
vmcs_writel(GUEST_RFLAGS, flags);
vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~CR4_VME_MASK) |
@@ -884,10 +884,10 @@ static void enter_pmode(struct kvm_vcpu
update_exception_bitmap(vcpu);
- fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es);
- fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds);
- fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs);
- fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs);
+ fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+ fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+ fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+ fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);
vmcs_write16(GUEST_SS_SELECTOR, 0);
vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -920,19 +920,19 @@ static void enter_rmode(struct kvm_vcpu
{
unsigned long flags;
- vcpu->rmode.active = 1;
+ vcpu->vmx->rmode.active = 1;
- vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
+ vcpu->vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
- vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
+ vcpu->vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
- vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
+ vcpu->vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
flags = vmcs_readl(GUEST_RFLAGS);
- vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
+ vcpu->vmx->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT;
flags |= IOPL_MASK | X86_EFLAGS_VM;
@@ -950,11 +950,10 @@ static void enter_rmode(struct kvm_vcpu
vmcs_writel(GUEST_CS_BASE, 0xf0000);
vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
- fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es);
- fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds);
- fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
- fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
-
+ fix_rmode_seg(VCPU_SREG_ES, &vcpu->vmx->rmode.es);
+ fix_rmode_seg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds);
+ fix_rmode_seg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs);
+ fix_rmode_seg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs);
init_rmode_tss(vcpu->kvm);
}
@@ -1002,10 +1001,10 @@ static void vmx_set_cr0(struct kvm_vcpu
{
vmx_fpu_deactivate(vcpu);
- if (vcpu->rmode.active && (cr0 & CR0_PE_MASK))
+ if (vcpu->vmx->rmode.active && (cr0 & CR0_PE_MASK))
enter_pmode(vcpu);
- if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
+ if (!vcpu->vmx->rmode.active && !(cr0 & CR0_PE_MASK))
enter_rmode(vcpu);
#ifdef CONFIG_X86_64
@@ -1036,7 +1035,7 @@ static void vmx_set_cr3(struct kvm_vcpu
static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
vmcs_writel(CR4_READ_SHADOW, cr4);
- vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?
+ vmcs_writel(GUEST_CR4, cr4 | (vcpu->vmx->rmode.active ?
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
vcpu->cr4 = cr4;
}
@@ -1124,17 +1123,17 @@ static void vmx_set_segment(struct kvm_v
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
u32 ar;
- if (vcpu->rmode.active && seg == VCPU_SREG_TR) {
- vcpu->rmode.tr.selector = var->selector;
- vcpu->rmode.tr.base = var->base;
- vcpu->rmode.tr.limit = var->limit;
- vcpu->rmode.tr.ar = vmx_segment_access_rights(var);
+ if (vcpu->vmx->rmode.active && seg == VCPU_SREG_TR) {
+ vcpu->vmx->rmode.tr.selector = var->selector;
+ vcpu->vmx->rmode.tr.base = var->base;
+ vcpu->vmx->rmode.tr.limit = var->limit;
+ vcpu->vmx->rmode.tr.ar = vmx_segment_access_rights(var);
return;
}
vmcs_writel(sf->base, var->base);
vmcs_write32(sf->limit, var->limit);
vmcs_write16(sf->selector, var->selector);
- if (vcpu->rmode.active && var->s) {
+ if (vcpu->vmx->rmode.active && var->s) {
/*
* Hack real-mode segments into vm86 compatibility.
*/
@@ -1253,11 +1252,11 @@ static int vmx_vcpu_setup(struct kvm_vcp
vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
vcpu->cr8 = 0;
vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+
if (vcpu->vcpu_id == 0)
vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
fx_init(vcpu);
-
/*
* GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
* insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
@@ -1384,13 +1383,12 @@ #endif
if (wrmsr_safe(index, data_low, data_high) < 0)
continue;
data = data_low | ((u64)data_high << 32);
- vcpu->host_msrs[j].index = index;
- vcpu->host_msrs[j].reserved = 0;
- vcpu->host_msrs[j].data = data;
- vcpu->guest_msrs[j] = vcpu->host_msrs[j];
+ vcpu->vmx->host_msrs[j].index = index;
+ vcpu->vmx->host_msrs[j].reserved = 0;
+ vcpu->vmx->host_msrs[j].data = data;
+ vcpu->vmx->guest_msrs[j] = vcpu->vmx->host_msrs[j];
++vcpu->nmsrs;
}
-
setup_msrs(vcpu);
vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS,
@@ -1408,7 +1406,6 @@ #endif
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
-
vcpu->cr0 = 0x60000010;
vmx_set_cr0(vcpu, vcpu->cr0); // enter rmode
vmx_set_cr4(vcpu, 0);
@@ -1479,7 +1476,7 @@ static void kvm_do_inject_irq(struct kvm
if (!vcpu->irq_pending[word_index])
clear_bit(word_index, &vcpu->irq_summary);
- if (vcpu->rmode.active) {
+ if (vcpu->vmx->rmode.active) {
inject_rmode_irq(vcpu, irq);
return;
}
@@ -1538,7 +1535,7 @@ static void kvm_guest_debug_pre(struct k
static int handle_rmode_exception(struct kvm_vcpu *vcpu,
int vec, u32 err_code)
{
- if (!vcpu->rmode.active)
+ if (!vcpu->vmx->rmode.active)
return 0;
/*
@@ -1620,11 +1617,11 @@ static int handle_exception(struct kvm_v
}
}
- if (vcpu->rmode.active &&
+ if (vcpu->vmx->rmode.active &&
handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
error_code)) {
- if (vcpu->halt_request) {
- vcpu->halt_request = 0;
+ if (vcpu->vmx->halt_request) {
+ vcpu->vmx->halt_request = 0;
return kvm_emulate_halt(vcpu);
}
return 1;
@@ -2225,28 +2222,34 @@ static void vmx_inject_page_fault(struct
static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
{
- if (vcpu->vmcs) {
+ if (vcpu->vmx->vmcs) {
on_each_cpu(__vcpu_clear, vcpu, 0, 1);
- free_vmcs(vcpu->vmcs);
- vcpu->vmcs = NULL;
+ free_vmcs(vcpu->vmx->vmcs);
+ vcpu->vmx->vmcs = NULL;
+
}
}
+static int vmx_vcpu_size(void)
+{
+ return sizeof(struct kvm_vcpu) + sizeof(struct kvm_vmx_data);
+}
+
static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
{
vmx_free_vmcs(vcpu);
}
-static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
+static int vmx_init_vcpu(struct kvm_vcpu *vcpu)
{
struct vmcs *vmcs;
- vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!vcpu->guest_msrs)
+ vcpu->vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vcpu->vmx->guest_msrs)
return -ENOMEM;
- vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!vcpu->host_msrs)
+ vcpu->vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vcpu->vmx->host_msrs)
goto out_free_guest_msrs;
vmcs = alloc_vmcs();
@@ -2254,18 +2257,18 @@ static int vmx_create_vcpu(struct kvm_vc
goto out_free_msrs;
vmcs_clear(vmcs);
- vcpu->vmcs = vmcs;
+ vcpu->vmx->vmcs = vmcs;
vcpu->launched = 0;
return 0;
out_free_msrs:
- kfree(vcpu->host_msrs);
- vcpu->host_msrs = NULL;
+ kfree(vcpu->vmx->host_msrs);
+ vcpu->vmx->host_msrs = NULL;
out_free_guest_msrs:
- kfree(vcpu->guest_msrs);
- vcpu->guest_msrs = NULL;
+ kfree(vcpu->vmx->guest_msrs);
+ vcpu->vmx->guest_msrs = NULL;
return -ENOMEM;
}
@@ -2278,7 +2281,8 @@ static struct kvm_arch_ops vmx_arch_ops
.hardware_enable = hardware_enable,
.hardware_disable = hardware_disable,
- .vcpu_create = vmx_create_vcpu,
+ .vcpu_size = vmx_vcpu_size,
+ .vcpu_init = vmx_init_vcpu,
.vcpu_free = vmx_free_vcpu,
.vcpu_load = vmx_vcpu_load,
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index f60012d..4e821ed 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1155,7 +1155,7 @@ special_insn:
DPRINTF("Urk! I don't handle SCAS.\n");
goto cannot_emulate;
case 0xf4: /* hlt */
- ctxt->vcpu->halt_request = 1;
+ ctxt->vcpu->vmx->halt_request = 1;
goto done;
case 0xc3: /* ret */
dst.ptr = &_eip;
[-- Attachment #3: Type: text/plain, Size: 286 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
[-- Attachment #4: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 5+ messages in thread[parent not found: <Pine.LNX.4.64.0707121815040.23503-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>]
* Re: [PATCH] dynamically create vcpus + vmx/svm structures [not found] ` <Pine.LNX.4.64.0707121815040.23503-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org> @ 2007-07-13 7:59 ` Avi Kivity [not found] ` <ed628a920707131534x7fe57ca3sfa09d9d79412d9c6@mail.gmail.com> 0 siblings, 1 reply; 5+ messages in thread From: Avi Kivity @ 2007-07-13 7:59 UTC (permalink / raw) To: Paul Turner; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f Paul Turner wrote: > From: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> > > - vcpus now allocated on demand > - vmx/svm fields separated into arch specific structures on vcpus > - vmx/svm fields now only allocated on corresponding architectures > > - Paul > > diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h > index 0f7a4d9..c631192 100644 > --- a/drivers/kvm/kvm.h > +++ b/drivers/kvm/kvm.h > @@ -16,6 +16,7 @@ #include <linux/mm.h> > #include <asm/signal.h> > > #include "vmx.h" > +#include "kvm_svm.h" This can probably be avoided, see below. > #include <linux/kvm.h> > #include <linux/kvm_para.h> > > @@ -326,16 +327,64 @@ struct kvm_io_device *kvm_io_bus_find_de > void kvm_io_bus_register_dev(struct kvm_io_bus *bus, > struct kvm_io_device *dev); > > +struct kvm_vmx_data { > + int msr_offset_efer; > + > + #ifdef CONFIG_X86_64 > + int msr_offset_kernel_gs_base; > + #endif > + > + struct vmx_host_state { > + int loaded; > + u16 fs_sel, gs_sel, ldt_sel; > + int fs_gs_ldt_reload_needed; > + } host_state; > + > + struct vmx_msr_entry *guest_msrs; > + struct vmx_msr_entry *host_msrs; > + > + struct { > + int active; > + u8 save_iopl; > + struct kvm_save_segment { > + u16 selector; > + unsigned long base; > + u32 limit; > + u32 ar; > + } tr, es, ds, fs, gs; > + } rmode; > + int halt_request; /* real mode */ > + + struct vmcs *vmcs; > +}; > + If this is moved to vmx.c, we can avoid including vmx.h and have no arch dependent code here (given that we don't even need the size). > +struct kvm_svm_data { > + struct vmcb *vmcb; > + unsigned long vmcb_pa; > + struct svm_cpu_data *svm_data; > + uint64_t asid_generation; > + > + unsigned long db_regs[NUM_DB_REGS]; > + > + u64 next_rip; > + > + u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; > + u64 host_gs_base; > + unsigned long host_cr2; > + unsigned long host_db_regs[NUM_DB_REGS]; > + unsigned long host_dr6; > + unsigned long host_dr7; > +}; This can remain in kvm_svm.h. > + > + > struct kvm_vcpu { > struct kvm *kvm; > + struct mutex *mutex; /* refers to corresponding vcpu_mutex on kvm */ Please keep this as a real structure, not a pointer. Existence testing of the vcpu is now simply if (kvm->vcpus[slot]). > + No gratuitous empty lines please. > struct kvm_mem_alias { > gfn_t base_gfn; > unsigned long npages; > @@ -448,8 +480,11 @@ struct kvm { > struct list_head active_mmu_pages; > int n_free_mmu_pages; > struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; > + > int nvcpus; > - struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; > + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; > + struct mutex vcpu_mutex[KVM_MAX_VCPUS]; > + > int memory_config_version; > int busy; > unsigned long rmap_overflow; > @@ -472,7 +507,8 @@ struct kvm_arch_ops { > int (*hardware_setup)(void); /* __init */ > void (*hardware_unsetup)(void); /* __exit */ > > - int (*vcpu_create)(struct kvm_vcpu *vcpu); > + int (*vcpu_size)(void); > + int (*vcpu_init)(struct kvm_vcpu *vcpu); I would prefer combining these two into 'struct kvm_vcpu *vcpu_create()', but it's also okay as is. > > static int kvm_dev_release(struct inode *inode, struct file *filp) > @@ -430,6 +431,7 @@ static void kvm_destroy_vm(struct kvm *k > kvm_io_bus_destroy(&kvm->mmio_bus); > kvm_free_vcpus(kvm); > kvm_free_physmem(kvm); > + empty line. > kfree(kvm); > } > > @@ -796,7 +798,7 @@ raced: > > for (i = 0; i < KVM_MAX_VCPUS; ++i) { > struct kvm_vcpu *vcpu; > - > + random noise? > > @@ -2380,40 +2382,51 @@ static int kvm_vm_ioctl_create_vcpu(stru > { > int r; > struct kvm_vcpu *vcpu; > - struct page *page; > + struct page *pio_page, *run_page; > > r = -EINVAL; > if (!valid_vcpu(n)) > goto out; > > - vcpu = &kvm->vcpus[n]; > - vcpu->vcpu_id = n; > + mutex_lock(&kvm->vcpu_mutex[n]); > + if (kvm->vcpus[n]) { > + r = -EEXIST; > + goto out_unlock; > + } > > - mutex_lock(&vcpu->mutex); > + vcpu = kzalloc(kvm_arch_ops->vcpu_size(), GFP_KERNEL); > > - if (vcpu->vmcs) { > - mutex_unlock(&vcpu->mutex); > - return -EEXIST; > + if (!vcpu) { > + r = -ENOMEM; > + goto out_unlock; > } > > - page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + vcpu->mutex = &kvm->vcpu_mutex[n]; > + vcpu->cpu = -1; > + vcpu->kvm = kvm; > + vcpu->mmu.root_hpa = INVALID_PAGE; > + > + vcpu->vcpu_id = n; > + kvm->vcpus[n] = vcpu; > + > + run_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > r = -ENOMEM; > - if (!page) > - goto out_unlock; > - vcpu->run = page_address(page); > + if (!run_page) > + goto out_deallocate; > + vcpu->run = page_address(run_page); > This cleanup is good, but makes the patch larger. Please defer it. > diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c > index f60012d..4e821ed 100644 > --- a/drivers/kvm/x86_emulate.c > +++ b/drivers/kvm/x86_emulate.c > @@ -1155,7 +1155,7 @@ special_insn: > DPRINTF("Urk! I don't handle SCAS.\n"); > goto cannot_emulate; > case 0xf4: /* hlt */ > - ctxt->vcpu->halt_request = 1; > + ctxt->vcpu->vmx->halt_request = 1; > goto done; > case 0xc3: /* ret */ > dst.ptr = &_eip; This is common code, and will stomp on svm data if executed on amd. I don't think that amd will ever need to emulate hlt, nevertheless let's make ->halt_request a member of struct kvm_vcpu. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ^ permalink raw reply [flat|nested] 5+ messages in thread
[parent not found: <ed628a920707131534x7fe57ca3sfa09d9d79412d9c6@mail.gmail.com>]
[parent not found: <ed628a920707131534x7fe57ca3sfa09d9d79412d9c6-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH] dynamically create vcpus + vmx/svm structures [not found] ` <ed628a920707131534x7fe57ca3sfa09d9d79412d9c6-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2007-07-14 6:14 ` Avi Kivity [not found] ` <469869CF.8030106-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 5+ messages in thread From: Avi Kivity @ 2007-07-14 6:14 UTC (permalink / raw) To: Paul Turner; +Cc: KVM [added back cc] Paul Turner wrote: > On 7/13/07, Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote: >> Paul Turner wrote: >> > From: Paul Turner <pjt-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> >> > >> > - vcpus now allocated on demand >> > - vmx/svm fields separated into arch specific structures on vcpus >> > - vmx/svm fields now only allocated on corresponding architectures >> > >> > - Paul >> > >> > diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h >> > index 0f7a4d9..c631192 100644 >> > --- a/drivers/kvm/kvm.h >> > +++ b/drivers/kvm/kvm.h >> > @@ -16,6 +16,7 @@ #include <linux/mm.h> >> > #include <asm/signal.h> >> > >> > #include "vmx.h" >> > +#include "kvm_svm.h" >> >> This can probably be avoided, see below. >> >> > #include <linux/kvm.h> >> > #include <linux/kvm_para.h> >> > >> > @@ -326,16 +327,64 @@ struct kvm_io_device *kvm_io_bus_find_de >> > void kvm_io_bus_register_dev(struct kvm_io_bus *bus, >> > struct kvm_io_device *dev); >> > >> > +struct kvm_vmx_data { >> > + int msr_offset_efer; >> > + >> > + #ifdef CONFIG_X86_64 >> > + int msr_offset_kernel_gs_base; >> > + #endif >> > + >> > + struct vmx_host_state { >> > + int loaded; >> > + u16 fs_sel, gs_sel, ldt_sel; >> > + int fs_gs_ldt_reload_needed; >> > + } host_state; >> > + >> > + struct vmx_msr_entry *guest_msrs; >> > + struct vmx_msr_entry *host_msrs; >> > + >> > + struct { >> > + int active; >> > + u8 save_iopl; >> > + struct kvm_save_segment { >> > + u16 selector; >> > + unsigned long base; >> > + u32 limit; >> > + u32 ar; >> > + } tr, es, ds, fs, gs; >> > + } rmode; >> > + int halt_request; /* real mode */ >> > + + struct vmcs *vmcs; >> > +}; >> > + >> >> If this is moved to vmx.c, we can avoid including vmx.h and have no arch >> dependent code here (given that we don't even need the size). >> > > I originally did this however gcc refuses to compile with the > incomplete types, although after further investigation it turns out > it's a bug in gcc with an incomplete implementation of zero sized > arrays under a union, so I can fix this now. See notes below.. > Looks like you forgot the notes below :) Anyway the only fix I can see is to have a long[0] member at the end, and have vmx.c define a function vmx(vcpu) which returns the vmx specific data. Accesses would look like vmx(vcpu)->cr0 = 42; which is odd, but I've seen worse. But if you have a better solution, let's hear it. >> > +struct kvm_svm_data { >> > + struct vmcb *vmcb; >> > + unsigned long vmcb_pa; >> > + struct svm_cpu_data *svm_data; >> > + uint64_t asid_generation; >> > + >> > + unsigned long db_regs[NUM_DB_REGS]; >> > + >> > + u64 next_rip; >> > + >> > + u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; >> > + u64 host_gs_base; >> > + unsigned long host_cr2; >> > + unsigned long host_db_regs[NUM_DB_REGS]; >> > + unsigned long host_dr6; >> > + unsigned long host_dr7; >> > +}; >> >> This can remain in kvm_svm.h. > > I was going to move both structures out in a small follow up patch, I > didn't in this one because of the compile issue above + not wanting to > make this patch any larger than it already is.. :) I can merge it > into this one if you prefer.. Don't understand. If it remains in kvm_svm.h, the patch gets smaller, not larger. > >> >> >> > + >> > + >> > struct kvm_vcpu { >> > struct kvm *kvm; >> > + struct mutex *mutex; /* refers to corresponding vcpu_mutex on >> kvm */ >> >> Please keep this as a real structure, not a pointer. Existence testing >> of the vcpu is now simply if (kvm->vcpus[slot]). > > Some of the existing code makes the assumption that locking the cpu > locks the slot as well; also if we don't have an associated lock/mutex > then we'd have to take a global lock on slot updates/checks. Finally > you'd still have a race in between checking it's a valid vcpu and > trying to acquire it's mutex.. The only place the race matters is in vcpu creation. There, we can do something like vcpu = kvm_arch_ops->vcpu_create(...); spin_lock(kvm); if (kvm->vcpus[slot]) { r = -EEXIST; vcpu_free(vcpu); } else kvm->vcpus[slot] = vcpu; spin_unlock(kvm); In the other places, if the user has a thread creating a vcpu and another thread performing an operation on it, it's perfectly legitimate to return -ENOENT. >> > - page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> > + vcpu->mutex = &kvm->vcpu_mutex[n]; >> > + vcpu->cpu = -1; >> > + vcpu->kvm = kvm; >> > + vcpu->mmu.root_hpa = INVALID_PAGE; >> > + >> > + vcpu->vcpu_id = n; >> > + kvm->vcpus[n] = vcpu; >> > + >> > + run_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> > r = -ENOMEM; >> > - if (!page) >> > - goto out_unlock; >> > - vcpu->run = page_address(page); >> > + if (!run_page) >> > + goto out_deallocate; >> > + vcpu->run = page_address(run_page); >> > >> >> This cleanup is good, but makes the patch larger. Please defer it. >> > > this needs to be done on vcpu creation (it was part of vm init > before), if you're concerned about patch size I can break up the > structure separation and dynamic allocation fairly easily since they > are different commits in my repository (I just didn't originally want > to rebase them both :) Yes, you're right -- it is necessary. It can live in the main patch. > > please advise on splits/etc as above and ill resubmit > One patch is okay. We should aim for kvm_main.c not knowing (including) anything about vmx or svm. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ^ permalink raw reply [flat|nested] 5+ messages in thread
[parent not found: <469869CF.8030106-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: [PATCH] dynamically create vcpus + vmx/svm structures [not found] ` <469869CF.8030106-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-07-18 22:36 ` Paul Turner [not found] ` <Pine.LNX.4.64.0707181528430.32400-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org> 0 siblings, 1 reply; 5+ messages in thread From: Paul Turner @ 2007-07-18 22:36 UTC (permalink / raw) To: Avi Kivity; +Cc: KVM [-- Attachment #1: Type: TEXT/PLAIN, Size: 24433 bytes --] here's an updated version that instead takes kvm_lock and leaves the svm stuff in the relevant header file for now. unfortunately we still need the includes since gcc is braindead and can't compile untyped [0]-size arrays properly, the only two ways i can see to fix this is either embedding vcpu in an arch specific struct and or using a long[0] member and a vmx/svm macro as you mentioned before.. ill prepare a follow up patch that migrates the arch cruft out of kvm.h (the long[0] approach) now, of course that changes all the vmx/svm references again but at least they're now consolidated - Paul diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 7bdffe6..9fff8b7 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -16,6 +16,7 @@ #include <linux/mm.h> #include <asm/signal.h> #include "vmx.h" +#include "kvm_svm.h" #include <linux/kvm.h> #include <linux/kvm_para.h> @@ -308,13 +309,39 @@ struct kvm_io_device *kvm_io_bus_find_de void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev); +struct kvm_vmx_data { + int msr_offset_efer; + + #ifdef CONFIG_X86_64 + int msr_offset_kernel_gs_base; + #endif + + struct vmx_host_state { + int loaded; + u16 fs_sel, gs_sel, ldt_sel; + int fs_gs_ldt_reload_needed; + } host_state; + + struct vmx_msr_entry *guest_msrs; + struct vmx_msr_entry *host_msrs; + + struct { + int active; + u8 save_iopl; + struct kvm_save_segment { + u16 selector; + unsigned long base; + u32 limit; + u32 ar; + } tr, es, ds, fs, gs; + } rmode; + + struct vmcs *vmcs; +}; + struct kvm_vcpu { struct kvm *kvm; int vcpu_id; - union { - struct vmcs *vmcs; - struct vcpu_svm *svm; - }; struct mutex mutex; int cpu; int launched; @@ -342,12 +369,6 @@ struct kvm_vcpu { u64 ia32_misc_enable_msr; int nmsrs; int save_nmsrs; - int msr_offset_efer; -#ifdef CONFIG_X86_64 - int msr_offset_kernel_gs_base; -#endif - struct vmx_msr_entry *guest_msrs; - struct vmx_msr_entry *host_msrs; struct kvm_mmu mmu; @@ -366,11 +387,6 @@ #endif char *guest_fx_image; int fpu_active; int guest_fpu_loaded; - struct vmx_host_state { - int loaded; - u16 fs_sel, gs_sel, ldt_sel; - int fs_gs_ldt_reload_needed; - } vmx_host_state; int mmio_needed; int mmio_read_completed; @@ -387,20 +403,13 @@ #endif struct kvm_stat stat; - struct { - int active; - u8 save_iopl; - struct kvm_save_segment { - u16 selector; - unsigned long base; - u32 limit; - u32 ar; - } tr, es, ds, fs, gs; - } rmode; - int halt_request; /* real mode on Intel only */ - int cpuid_nent; struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; + + int halt_request; /* real mode emulation */ + + struct kvm_vmx_data vmx[0]; + struct kvm_svm_data svm[0]; }; struct kvm_mem_alias { @@ -430,7 +439,7 @@ struct kvm { int n_free_mmu_pages; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; int nvcpus; - struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; int memory_config_version; int busy; unsigned long rmap_overflow; @@ -453,7 +462,8 @@ struct kvm_arch_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ - int (*vcpu_create)(struct kvm_vcpu *vcpu); + int (*vcpu_size)(void); + int (*vcpu_init)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 67654c3..6831024 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -267,7 +267,7 @@ void kvm_flush_remote_tlbs(struct kvm *k cpus_clear(cpus); needed = 0; for (i = 0; i < kvm->nvcpus; ++i) { - vcpu = &kvm->vcpus[i]; + vcpu = kvm->vcpus[i]; if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) continue; cpu = vcpu->cpu; @@ -294,7 +294,6 @@ void kvm_flush_remote_tlbs(struct kvm *k static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); - int i; if (!kvm) return ERR_PTR(-ENOMEM); @@ -306,14 +305,7 @@ static struct kvm *kvm_create_vm(void) list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); kvm_io_bus_init(&kvm->mmio_bus); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - struct kvm_vcpu *vcpu = &kvm->vcpus[i]; - mutex_init(&vcpu->mutex); - vcpu->cpu = -1; - vcpu->kvm = kvm; - vcpu->mmu.root_hpa = INVALID_PAGE; - } return kvm; } @@ -367,7 +359,7 @@ static void free_pio_guest_pages(struct static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { - if (!vcpu->vmcs) + if (!vcpu) return; vcpu_load(vcpu); @@ -377,7 +369,7 @@ static void kvm_unload_vcpu_mmu(struct k static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { - if (!vcpu->vmcs) + if (!vcpu) return; vcpu_load(vcpu); @@ -389,6 +381,7 @@ static void kvm_free_vcpu(struct kvm_vcp free_page((unsigned long)vcpu->pio_data); vcpu->pio_data = NULL; free_pio_guest_pages(vcpu); + kfree(vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -399,9 +392,11 @@ static void kvm_free_vcpus(struct kvm *k * Unpin any mmu pages first. */ for (i = 0; i < KVM_MAX_VCPUS; ++i) - kvm_unload_vcpu_mmu(&kvm->vcpus[i]); - for (i = 0; i < KVM_MAX_VCPUS; ++i) - kvm_free_vcpu(&kvm->vcpus[i]); + kvm_unload_vcpu_mmu(kvm->vcpus[i]); + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + kvm_free_vcpu(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } } static int kvm_dev_release(struct inode *inode, struct file *filp) @@ -2371,40 +2366,54 @@ static int kvm_vm_ioctl_create_vcpu(stru { int r; struct kvm_vcpu *vcpu; - struct page *page; + struct page *pio_page, *run_page; r = -EINVAL; if (!valid_vcpu(n)) goto out; - vcpu = &kvm->vcpus[n]; - vcpu->vcpu_id = n; + vcpu = kzalloc(kvm_arch_ops->vcpu_size(), GFP_KERNEL); + if (!vcpu) { + r = -ENOMEM; + goto out; + } + + mutex_init(&vcpu->mutex); mutex_lock(&vcpu->mutex); + vcpu->cpu = -1; + vcpu->kvm = kvm; + vcpu->mmu.root_hpa = INVALID_PAGE; - if (vcpu->vmcs) { - mutex_unlock(&vcpu->mutex); - return -EEXIST; - } + vcpu->vcpu_id = n; + + spin_lock(&kvm->lock); + if (kvm->vcpus[n]) { + spin_unlock(&kvm->lock); + r = -EEXIST; + goto out_free; + } + kvm->vcpus[n] = vcpu; + spin_unlock(&kvm->lock); - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + run_page = alloc_page(GFP_KERNEL | __GFP_ZERO); r = -ENOMEM; - if (!page) - goto out_unlock; - vcpu->run = page_address(page); + if (!run_page) + goto out_unassign; + vcpu->run = page_address(run_page); - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + pio_page = alloc_page(GFP_KERNEL | __GFP_ZERO); r = -ENOMEM; - if (!page) + if (!pio_page) goto out_free_run; - vcpu->pio_data = page_address(page); + vcpu->pio_data = page_address(pio_page); vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, FX_IMAGE_ALIGN); vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; vcpu->cr0 = 0x10; - r = kvm_arch_ops->vcpu_create(vcpu); + r = kvm_arch_ops->vcpu_init(vcpu); if (r < 0) goto out_free_vcpus; @@ -2434,15 +2443,20 @@ static int kvm_vm_ioctl_create_vcpu(stru out_free_vcpus: kvm_free_vcpu(vcpu); + vcpu = NULL; out_free_run: - free_page((unsigned long)vcpu->run); - vcpu->run = NULL; -out_unlock: - mutex_unlock(&vcpu->mutex); + free_page((unsigned long)run_page); +out_unassign: + spin_lock(&kvm->lock); + kvm->vcpus[n] = NULL; + spin_unlock(&kvm->lock); +out_free: + kfree(vcpu); out: return r; } + static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) { u64 efer; @@ -2934,7 +2948,7 @@ static void decache_vcpus_on_cpu(int cpu spin_lock(&kvm_lock); list_for_each_entry(vm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = &vm->vcpus[i]; + vcpu = vm->vcpus[i]; /* * If the vcpu is locked, then it is running on some * other cpu and therefore it is not cached on the @@ -3071,7 +3085,7 @@ static u64 stat_get(void *_offset) spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = &kvm->vcpus[i]; + vcpu = kvm->vcpus[i]; total += *(u32 *)((void *)vcpu + offset); } spin_unlock(&kvm_lock); diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h index a869983..156359a 100644 --- a/drivers/kvm/kvm_svm.h +++ b/drivers/kvm/kvm_svm.h @@ -7,7 +7,6 @@ #include <linux/list.h> #include <asm/msr.h> #include "svm.h" -#include "kvm.h" static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 @@ -20,7 +19,7 @@ #endif #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) #define NUM_DB_REGS 4 -struct vcpu_svm { +struct kvm_svm_data { struct vmcb *vmcb; unsigned long vmcb_pa; struct svm_cpu_data *svm_data; @@ -39,4 +38,3 @@ struct vcpu_svm { }; #endif - diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 850a1b1..16a3b6e 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -14,7 +14,7 @@ * */ -#include "kvm_svm.h" +#include "kvm.h" #include "x86_emulate.h" #include <linux/module.h> @@ -567,18 +567,20 @@ static void init_vmcb(struct vmcb *vmcb) /* rdx = ?? */ } -static int svm_create_vcpu(struct kvm_vcpu *vcpu) +static int svm_vcpu_size(void) +{ + return sizeof(struct kvm) + sizeof(struct kvm_svm_data); +} + +static int svm_init_vcpu(struct kvm_vcpu *vcpu) { struct page *page; int r; r = -ENOMEM; - vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL); - if (!vcpu->svm) - goto out1; page = alloc_page(GFP_KERNEL); if (!page) - goto out2; + goto out1; vcpu->svm->vmcb = page_address(page); clear_page(vcpu->svm->vmcb); @@ -595,8 +597,6 @@ static int svm_create_vcpu(struct kvm_vc return 0; -out2: - kfree(vcpu->svm); out1: return r; } @@ -1608,7 +1608,7 @@ #endif : : [vcpu]"a"(vcpu), [svm]"i"(offsetof(struct kvm_vcpu, svm)), - [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), + [vmcb]"i"(offsetof(struct kvm_svm_data, vmcb_pa)), [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])), [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])), @@ -1763,7 +1763,8 @@ static struct kvm_arch_ops svm_arch_ops .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, - .vcpu_create = svm_create_vcpu, + .vcpu_size = svm_vcpu_size, + .vcpu_init = svm_init_vcpu, .vcpu_free = svm_free_vcpu, .vcpu_load = svm_vcpu_load, diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 2c4f01b..49587a1 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -96,9 +96,9 @@ static inline u64 msr_efer_save_restore_ static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) { - int efer_offset = vcpu->msr_offset_efer; - return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != - msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); + int efer_offset = vcpu->vmx->msr_offset_efer; + return msr_efer_save_restore_bits(vcpu->vmx->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vcpu->vmx->guest_msrs[efer_offset]); } static inline int is_page_fault(u32 intr_info) @@ -126,7 +126,7 @@ static int __find_msr_index(struct kvm_v int i; for (i = 0; i < vcpu->nmsrs; ++i) - if (vcpu->guest_msrs[i].index == msr) + if (vcpu->vmx->guest_msrs[i].index == msr) return i; return -1; } @@ -137,7 +137,7 @@ static struct vmx_msr_entry *find_msr_en i = __find_msr_index(vcpu, msr); if (i >= 0) - return &vcpu->guest_msrs[i]; + return &vcpu->vmx->guest_msrs[i]; return NULL; } @@ -160,8 +160,8 @@ static void __vcpu_clear(void *arg) int cpu = raw_smp_processor_id(); if (vcpu->cpu == cpu) - vmcs_clear(vcpu->vmcs); - if (per_cpu(current_vmcs, cpu) == vcpu->vmcs) + vmcs_clear(vcpu->vmx->vmcs); + if (per_cpu(current_vmcs, cpu) == vcpu->vmx->vmcs) per_cpu(current_vmcs, cpu) = NULL; rdtscll(vcpu->host_tsc); } @@ -260,7 +260,7 @@ static void update_exception_bitmap(stru eb |= 1u << NM_VECTOR; if (vcpu->guest_debug.enabled) eb |= 1u << 1; - if (vcpu->rmode.active) + if (vcpu->vmx->rmode.active) eb = ~0; vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -285,19 +285,19 @@ #endif static void load_transition_efer(struct kvm_vcpu *vcpu) { u64 trans_efer; - int efer_offset = vcpu->msr_offset_efer; + int efer_offset = vcpu->vmx->msr_offset_efer; - trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer = vcpu->vmx->host_msrs[efer_offset].data; trans_efer &= ~EFER_SAVE_RESTORE_BITS; trans_efer |= msr_efer_save_restore_bits( - vcpu->guest_msrs[efer_offset]); + vcpu->vmx->guest_msrs[efer_offset]); wrmsrl(MSR_EFER, trans_efer); vcpu->stat.efer_reload++; } static void vmx_save_host_state(struct kvm_vcpu *vcpu) { - struct vmx_host_state *hs = &vcpu->vmx_host_state; + struct vmx_host_state *hs = &vcpu->vmx->host_state; if (hs->loaded) return; @@ -334,17 +334,17 @@ #endif #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { - save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1); + save_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_kernel_gs_base, 1); } #endif - load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs); if (msr_efer_need_save_restore(vcpu)) load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) { - struct vmx_host_state *hs = &vcpu->vmx_host_state; + struct vmx_host_state *hs = &vcpu->vmx->host_state; if (!hs->loaded) return; @@ -366,10 +366,10 @@ #endif reload_tss(); } - save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); - load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + save_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs); + load_msrs(vcpu->vmx->host_msrs, vcpu->save_nmsrs); if (msr_efer_need_save_restore(vcpu)) - load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); + load_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_efer, 1); } /* @@ -378,7 +378,7 @@ #endif */ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) { - u64 phys_addr = __pa(vcpu->vmcs); + u64 phys_addr = __pa(vcpu->vmx->vmcs); int cpu; u64 tsc_this, delta; @@ -387,16 +387,16 @@ static void vmx_vcpu_load(struct kvm_vcp if (vcpu->cpu != cpu) vcpu_clear(vcpu); - if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) { + if (per_cpu(current_vmcs, cpu) != vcpu->vmx->vmcs) { u8 error; - per_cpu(current_vmcs, cpu) = vcpu->vmcs; + per_cpu(current_vmcs, cpu) = vcpu->vmx->vmcs; asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc"); if (error) printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", - vcpu->vmcs, phys_addr); + vcpu->vmx->vmcs, phys_addr); } if (vcpu->cpu != cpu) { @@ -504,12 +504,12 @@ static void vmx_inject_gp(struct kvm_vcp void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) { struct vmx_msr_entry tmp; - tmp = vcpu->guest_msrs[to]; - vcpu->guest_msrs[to] = vcpu->guest_msrs[from]; - vcpu->guest_msrs[from] = tmp; - tmp = vcpu->host_msrs[to]; - vcpu->host_msrs[to] = vcpu->host_msrs[from]; - vcpu->host_msrs[from] = tmp; + tmp = vcpu->vmx->guest_msrs[to]; + vcpu->vmx->guest_msrs[to] = vcpu->vmx->guest_msrs[from]; + vcpu->vmx->guest_msrs[from] = tmp; + tmp = vcpu->vmx->host_msrs[to]; + vcpu->vmx->host_msrs[to] = vcpu->vmx->host_msrs[from]; + vcpu->vmx->host_msrs[from] = tmp; } /* @@ -550,10 +550,10 @@ #endif vcpu->save_nmsrs = save_nmsrs; #ifdef CONFIG_X86_64 - vcpu->msr_offset_kernel_gs_base = + vcpu->vmx->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); + vcpu->vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -646,7 +646,7 @@ static int vmx_set_msr(struct kvm_vcpu * #ifdef CONFIG_X86_64 case MSR_EFER: ret = kvm_set_msr_common(vcpu, msr_index, data); - if (vcpu->vmx_host_state.loaded) + if (vcpu->vmx->host_state.loaded) load_transition_efer(vcpu); break; case MSR_FS_BASE: @@ -672,8 +672,8 @@ #endif msr = find_msr_entry(vcpu, msr_index); if (msr) { msr->data = data; - if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (vcpu->vmx->host_state.loaded) + load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs); break; } ret = kvm_set_msr_common(vcpu, msr_index, data); @@ -868,15 +868,15 @@ static void enter_pmode(struct kvm_vcpu { unsigned long flags; - vcpu->rmode.active = 0; + vcpu->vmx->rmode.active = 0; - vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base); - vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit); - vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar); + vmcs_writel(GUEST_TR_BASE, vcpu->vmx->rmode.tr.base); + vmcs_write32(GUEST_TR_LIMIT, vcpu->vmx->rmode.tr.limit); + vmcs_write32(GUEST_TR_AR_BYTES, vcpu->vmx->rmode.tr.ar); flags = vmcs_readl(GUEST_RFLAGS); flags &= ~(IOPL_MASK | X86_EFLAGS_VM); - flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT); + flags |= (vcpu->vmx->rmode.save_iopl << IOPL_SHIFT); vmcs_writel(GUEST_RFLAGS, flags); vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | @@ -884,10 +884,10 @@ static void enter_pmode(struct kvm_vcpu update_exception_bitmap(vcpu); - fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es); - fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds); - fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs); - fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs); + fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->vmx->rmode.es); + fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds); + fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs); + fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs); vmcs_write16(GUEST_SS_SELECTOR, 0); vmcs_write32(GUEST_SS_AR_BYTES, 0x93); @@ -920,19 +920,19 @@ static void enter_rmode(struct kvm_vcpu { unsigned long flags; - vcpu->rmode.active = 1; + vcpu->vmx->rmode.active = 1; - vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); + vcpu->vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); - vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT); + vcpu->vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT); vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); - vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES); + vcpu->vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES); vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); flags = vmcs_readl(GUEST_RFLAGS); - vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT; + vcpu->vmx->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT; flags |= IOPL_MASK | X86_EFLAGS_VM; @@ -950,11 +950,10 @@ static void enter_rmode(struct kvm_vcpu vmcs_writel(GUEST_CS_BASE, 0xf0000); vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); - fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es); - fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds); - fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); - fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); - + fix_rmode_seg(VCPU_SREG_ES, &vcpu->vmx->rmode.es); + fix_rmode_seg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds); + fix_rmode_seg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs); + fix_rmode_seg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs); init_rmode_tss(vcpu->kvm); } @@ -1002,10 +1001,10 @@ static void vmx_set_cr0(struct kvm_vcpu { vmx_fpu_deactivate(vcpu); - if (vcpu->rmode.active && (cr0 & X86_CR0_PE)) + if (vcpu->vmx->rmode.active && (cr0 & X86_CR0_PE)) enter_pmode(vcpu); - if (!vcpu->rmode.active && !(cr0 & X86_CR0_PE)) + if (!vcpu->vmx->rmode.active && !(cr0 & X86_CR0_PE)) enter_rmode(vcpu); #ifdef CONFIG_X86_64 @@ -1036,7 +1035,7 @@ static void vmx_set_cr3(struct kvm_vcpu static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { vmcs_writel(CR4_READ_SHADOW, cr4); - vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ? + vmcs_writel(GUEST_CR4, cr4 | (vcpu->vmx->rmode.active ? KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); vcpu->cr4 = cr4; } @@ -1124,17 +1123,17 @@ static void vmx_set_segment(struct kvm_v struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; - if (vcpu->rmode.active && seg == VCPU_SREG_TR) { - vcpu->rmode.tr.selector = var->selector; - vcpu->rmode.tr.base = var->base; - vcpu->rmode.tr.limit = var->limit; - vcpu->rmode.tr.ar = vmx_segment_access_rights(var); + if (vcpu->vmx->rmode.active && seg == VCPU_SREG_TR) { + vcpu->vmx->rmode.tr.selector = var->selector; + vcpu->vmx->rmode.tr.base = var->base; + vcpu->vmx->rmode.tr.limit = var->limit; + vcpu->vmx->rmode.tr.ar = vmx_segment_access_rights(var); return; } vmcs_writel(sf->base, var->base); vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); - if (vcpu->rmode.active && var->s) { + if (vcpu->vmx->rmode.active && var->s) { /* * Hack real-mode segments into vm86 compatibility. */ @@ -1253,6 +1252,7 @@ static int vmx_vcpu_setup(struct kvm_vcp vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val(); vcpu->cr8 = 0; vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; + if (vcpu->vcpu_id == 0) vcpu->apic_base |= MSR_IA32_APICBASE_BSP; @@ -1384,10 +1384,10 @@ #endif if (wrmsr_safe(index, data_low, data_high) < 0) continue; data = data_low | ((u64)data_high << 32); - vcpu->host_msrs[j].index = index; - vcpu->host_msrs[j].reserved = 0; - vcpu->host_msrs[j].data = data; - vcpu->guest_msrs[j] = vcpu->host_msrs[j]; + vcpu->vmx->host_msrs[j].index = index; + vcpu->vmx->host_msrs[j].reserved = 0; + vcpu->vmx->host_msrs[j].data = data; + vcpu->vmx->guest_msrs[j] = vcpu->vmx->host_msrs[j]; ++vcpu->nmsrs; } @@ -1479,7 +1479,7 @@ static void kvm_do_inject_irq(struct kvm if (!vcpu->irq_pending[word_index]) clear_bit(word_index, &vcpu->irq_summary); - if (vcpu->rmode.active) { + if (vcpu->vmx->rmode.active) { inject_rmode_irq(vcpu, irq); return; } @@ -1538,7 +1538,7 @@ static void kvm_guest_debug_pre(struct k static int handle_rmode_exception(struct kvm_vcpu *vcpu, int vec, u32 err_code) { - if (!vcpu->rmode.active) + if (!vcpu->vmx->rmode.active) return 0; /* @@ -1619,7 +1619,7 @@ static int handle_exception(struct kvm_v } } - if (vcpu->rmode.active && + if (vcpu->vmx->rmode.active && handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, error_code)) { if (vcpu->halt_request) { @@ -2224,28 +2224,34 @@ static void vmx_inject_page_fault(struct static void vmx_free_vmcs(struct kvm_vcpu *vcpu) { - if (vcpu->vmcs) { + if (vcpu->vmx->vmcs) { on_each_cpu(__vcpu_clear, vcpu, 0, 1); - free_vmcs(vcpu->vmcs); - vcpu->vmcs = NULL; + free_vmcs(vcpu->vmx->vmcs); + vcpu->vmx->vmcs = NULL; + } } +static int vmx_vcpu_size(void) +{ + return sizeof(struct kvm_vcpu) + sizeof(struct kvm_vmx_data); +} + static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { vmx_free_vmcs(vcpu); } -static int vmx_create_vcpu(struct kvm_vcpu *vcpu) +static int vmx_init_vcpu(struct kvm_vcpu *vcpu) { struct vmcs *vmcs; - vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vcpu->guest_msrs) + vcpu->vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vcpu->vmx->guest_msrs) return -ENOMEM; - vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vcpu->host_msrs) + vcpu->vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vcpu->vmx->host_msrs) goto out_free_guest_msrs; vmcs = alloc_vmcs(); @@ -2253,18 +2259,18 @@ static int vmx_create_vcpu(struct kvm_vc goto out_free_msrs; vmcs_clear(vmcs); - vcpu->vmcs = vmcs; + vcpu->vmx->vmcs = vmcs; vcpu->launched = 0; return 0; out_free_msrs: - kfree(vcpu->host_msrs); - vcpu->host_msrs = NULL; + kfree(vcpu->vmx->host_msrs); + vcpu->vmx->host_msrs = NULL; out_free_guest_msrs: - kfree(vcpu->guest_msrs); - vcpu->guest_msrs = NULL; + kfree(vcpu->vmx->guest_msrs); + vcpu->vmx->guest_msrs = NULL; return -ENOMEM; } @@ -2277,7 +2283,8 @@ static struct kvm_arch_ops vmx_arch_ops .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, - .vcpu_create = vmx_create_vcpu, + .vcpu_size = vmx_vcpu_size, + .vcpu_init = vmx_init_vcpu, .vcpu_free = vmx_free_vcpu, .vcpu_load = vmx_vcpu_load, [-- Attachment #2: Type: TEXT/PLAIN, Size: 24460 bytes --] diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 7bdffe6..9fff8b7 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -16,6 +16,7 @@ #include <linux/mm.h> #include <asm/signal.h> #include "vmx.h" +#include "kvm_svm.h" #include <linux/kvm.h> #include <linux/kvm_para.h> @@ -308,13 +309,39 @@ struct kvm_io_device *kvm_io_bus_find_de void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev); +struct kvm_vmx_data { + int msr_offset_efer; + + #ifdef CONFIG_X86_64 + int msr_offset_kernel_gs_base; + #endif + + struct vmx_host_state { + int loaded; + u16 fs_sel, gs_sel, ldt_sel; + int fs_gs_ldt_reload_needed; + } host_state; + + struct vmx_msr_entry *guest_msrs; + struct vmx_msr_entry *host_msrs; + + struct { + int active; + u8 save_iopl; + struct kvm_save_segment { + u16 selector; + unsigned long base; + u32 limit; + u32 ar; + } tr, es, ds, fs, gs; + } rmode; + + struct vmcs *vmcs; +}; + struct kvm_vcpu { struct kvm *kvm; int vcpu_id; - union { - struct vmcs *vmcs; - struct vcpu_svm *svm; - }; struct mutex mutex; int cpu; int launched; @@ -342,12 +369,6 @@ struct kvm_vcpu { u64 ia32_misc_enable_msr; int nmsrs; int save_nmsrs; - int msr_offset_efer; -#ifdef CONFIG_X86_64 - int msr_offset_kernel_gs_base; -#endif - struct vmx_msr_entry *guest_msrs; - struct vmx_msr_entry *host_msrs; struct kvm_mmu mmu; @@ -366,11 +387,6 @@ #endif char *guest_fx_image; int fpu_active; int guest_fpu_loaded; - struct vmx_host_state { - int loaded; - u16 fs_sel, gs_sel, ldt_sel; - int fs_gs_ldt_reload_needed; - } vmx_host_state; int mmio_needed; int mmio_read_completed; @@ -387,20 +403,13 @@ #endif struct kvm_stat stat; - struct { - int active; - u8 save_iopl; - struct kvm_save_segment { - u16 selector; - unsigned long base; - u32 limit; - u32 ar; - } tr, es, ds, fs, gs; - } rmode; - int halt_request; /* real mode on Intel only */ - int cpuid_nent; struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; + + int halt_request; /* real mode emulation */ + + struct kvm_vmx_data vmx[0]; + struct kvm_svm_data svm[0]; }; struct kvm_mem_alias { @@ -430,7 +439,7 @@ struct kvm { int n_free_mmu_pages; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; int nvcpus; - struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; int memory_config_version; int busy; unsigned long rmap_overflow; @@ -453,7 +462,8 @@ struct kvm_arch_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ - int (*vcpu_create)(struct kvm_vcpu *vcpu); + int (*vcpu_size)(void); + int (*vcpu_init)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 67654c3..6831024 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -267,7 +267,7 @@ void kvm_flush_remote_tlbs(struct kvm *k cpus_clear(cpus); needed = 0; for (i = 0; i < kvm->nvcpus; ++i) { - vcpu = &kvm->vcpus[i]; + vcpu = kvm->vcpus[i]; if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) continue; cpu = vcpu->cpu; @@ -294,7 +294,6 @@ void kvm_flush_remote_tlbs(struct kvm *k static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); - int i; if (!kvm) return ERR_PTR(-ENOMEM); @@ -306,14 +305,7 @@ static struct kvm *kvm_create_vm(void) list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); kvm_io_bus_init(&kvm->mmio_bus); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - struct kvm_vcpu *vcpu = &kvm->vcpus[i]; - mutex_init(&vcpu->mutex); - vcpu->cpu = -1; - vcpu->kvm = kvm; - vcpu->mmu.root_hpa = INVALID_PAGE; - } return kvm; } @@ -367,7 +359,7 @@ static void free_pio_guest_pages(struct static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { - if (!vcpu->vmcs) + if (!vcpu) return; vcpu_load(vcpu); @@ -377,7 +369,7 @@ static void kvm_unload_vcpu_mmu(struct k static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { - if (!vcpu->vmcs) + if (!vcpu) return; vcpu_load(vcpu); @@ -389,6 +381,7 @@ static void kvm_free_vcpu(struct kvm_vcp free_page((unsigned long)vcpu->pio_data); vcpu->pio_data = NULL; free_pio_guest_pages(vcpu); + kfree(vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -399,9 +392,11 @@ static void kvm_free_vcpus(struct kvm *k * Unpin any mmu pages first. */ for (i = 0; i < KVM_MAX_VCPUS; ++i) - kvm_unload_vcpu_mmu(&kvm->vcpus[i]); - for (i = 0; i < KVM_MAX_VCPUS; ++i) - kvm_free_vcpu(&kvm->vcpus[i]); + kvm_unload_vcpu_mmu(kvm->vcpus[i]); + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + kvm_free_vcpu(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } } static int kvm_dev_release(struct inode *inode, struct file *filp) @@ -2371,40 +2366,54 @@ static int kvm_vm_ioctl_create_vcpu(stru { int r; struct kvm_vcpu *vcpu; - struct page *page; + struct page *pio_page, *run_page; r = -EINVAL; if (!valid_vcpu(n)) goto out; - vcpu = &kvm->vcpus[n]; - vcpu->vcpu_id = n; + vcpu = kzalloc(kvm_arch_ops->vcpu_size(), GFP_KERNEL); + if (!vcpu) { + r = -ENOMEM; + goto out; + } + + mutex_init(&vcpu->mutex); mutex_lock(&vcpu->mutex); + vcpu->cpu = -1; + vcpu->kvm = kvm; + vcpu->mmu.root_hpa = INVALID_PAGE; - if (vcpu->vmcs) { - mutex_unlock(&vcpu->mutex); - return -EEXIST; - } + vcpu->vcpu_id = n; + + spin_lock(&kvm->lock); + if (kvm->vcpus[n]) { + spin_unlock(&kvm->lock); + r = -EEXIST; + goto out_free; + } + kvm->vcpus[n] = vcpu; + spin_unlock(&kvm->lock); - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + run_page = alloc_page(GFP_KERNEL | __GFP_ZERO); r = -ENOMEM; - if (!page) - goto out_unlock; - vcpu->run = page_address(page); + if (!run_page) + goto out_unassign; + vcpu->run = page_address(run_page); - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + pio_page = alloc_page(GFP_KERNEL | __GFP_ZERO); r = -ENOMEM; - if (!page) + if (!pio_page) goto out_free_run; - vcpu->pio_data = page_address(page); + vcpu->pio_data = page_address(pio_page); vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, FX_IMAGE_ALIGN); vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; vcpu->cr0 = 0x10; - r = kvm_arch_ops->vcpu_create(vcpu); + r = kvm_arch_ops->vcpu_init(vcpu); if (r < 0) goto out_free_vcpus; @@ -2434,15 +2443,20 @@ static int kvm_vm_ioctl_create_vcpu(stru out_free_vcpus: kvm_free_vcpu(vcpu); + vcpu = NULL; out_free_run: - free_page((unsigned long)vcpu->run); - vcpu->run = NULL; -out_unlock: - mutex_unlock(&vcpu->mutex); + free_page((unsigned long)run_page); +out_unassign: + spin_lock(&kvm->lock); + kvm->vcpus[n] = NULL; + spin_unlock(&kvm->lock); +out_free: + kfree(vcpu); out: return r; } + static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) { u64 efer; @@ -2934,7 +2948,7 @@ static void decache_vcpus_on_cpu(int cpu spin_lock(&kvm_lock); list_for_each_entry(vm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = &vm->vcpus[i]; + vcpu = vm->vcpus[i]; /* * If the vcpu is locked, then it is running on some * other cpu and therefore it is not cached on the @@ -3071,7 +3085,7 @@ static u64 stat_get(void *_offset) spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = &kvm->vcpus[i]; + vcpu = kvm->vcpus[i]; total += *(u32 *)((void *)vcpu + offset); } spin_unlock(&kvm_lock); diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h index a869983..156359a 100644 --- a/drivers/kvm/kvm_svm.h +++ b/drivers/kvm/kvm_svm.h @@ -7,7 +7,6 @@ #include <linux/list.h> #include <asm/msr.h> #include "svm.h" -#include "kvm.h" static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 @@ -20,7 +19,7 @@ #endif #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) #define NUM_DB_REGS 4 -struct vcpu_svm { +struct kvm_svm_data { struct vmcb *vmcb; unsigned long vmcb_pa; struct svm_cpu_data *svm_data; @@ -39,4 +38,3 @@ struct vcpu_svm { }; #endif - diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 850a1b1..16a3b6e 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -14,7 +14,7 @@ * */ -#include "kvm_svm.h" +#include "kvm.h" #include "x86_emulate.h" #include <linux/module.h> @@ -567,18 +567,20 @@ static void init_vmcb(struct vmcb *vmcb) /* rdx = ?? */ } -static int svm_create_vcpu(struct kvm_vcpu *vcpu) +static int svm_vcpu_size(void) +{ + return sizeof(struct kvm) + sizeof(struct kvm_svm_data); +} + +static int svm_init_vcpu(struct kvm_vcpu *vcpu) { struct page *page; int r; r = -ENOMEM; - vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL); - if (!vcpu->svm) - goto out1; page = alloc_page(GFP_KERNEL); if (!page) - goto out2; + goto out1; vcpu->svm->vmcb = page_address(page); clear_page(vcpu->svm->vmcb); @@ -595,8 +597,6 @@ static int svm_create_vcpu(struct kvm_vc return 0; -out2: - kfree(vcpu->svm); out1: return r; } @@ -1608,7 +1608,7 @@ #endif : : [vcpu]"a"(vcpu), [svm]"i"(offsetof(struct kvm_vcpu, svm)), - [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), + [vmcb]"i"(offsetof(struct kvm_svm_data, vmcb_pa)), [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])), [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])), @@ -1763,7 +1763,8 @@ static struct kvm_arch_ops svm_arch_ops .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, - .vcpu_create = svm_create_vcpu, + .vcpu_size = svm_vcpu_size, + .vcpu_init = svm_init_vcpu, .vcpu_free = svm_free_vcpu, .vcpu_load = svm_vcpu_load, diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 2c4f01b..49587a1 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -96,9 +96,9 @@ static inline u64 msr_efer_save_restore_ static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) { - int efer_offset = vcpu->msr_offset_efer; - return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != - msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); + int efer_offset = vcpu->vmx->msr_offset_efer; + return msr_efer_save_restore_bits(vcpu->vmx->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vcpu->vmx->guest_msrs[efer_offset]); } static inline int is_page_fault(u32 intr_info) @@ -126,7 +126,7 @@ static int __find_msr_index(struct kvm_v int i; for (i = 0; i < vcpu->nmsrs; ++i) - if (vcpu->guest_msrs[i].index == msr) + if (vcpu->vmx->guest_msrs[i].index == msr) return i; return -1; } @@ -137,7 +137,7 @@ static struct vmx_msr_entry *find_msr_en i = __find_msr_index(vcpu, msr); if (i >= 0) - return &vcpu->guest_msrs[i]; + return &vcpu->vmx->guest_msrs[i]; return NULL; } @@ -160,8 +160,8 @@ static void __vcpu_clear(void *arg) int cpu = raw_smp_processor_id(); if (vcpu->cpu == cpu) - vmcs_clear(vcpu->vmcs); - if (per_cpu(current_vmcs, cpu) == vcpu->vmcs) + vmcs_clear(vcpu->vmx->vmcs); + if (per_cpu(current_vmcs, cpu) == vcpu->vmx->vmcs) per_cpu(current_vmcs, cpu) = NULL; rdtscll(vcpu->host_tsc); } @@ -260,7 +260,7 @@ static void update_exception_bitmap(stru eb |= 1u << NM_VECTOR; if (vcpu->guest_debug.enabled) eb |= 1u << 1; - if (vcpu->rmode.active) + if (vcpu->vmx->rmode.active) eb = ~0; vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -285,19 +285,19 @@ #endif static void load_transition_efer(struct kvm_vcpu *vcpu) { u64 trans_efer; - int efer_offset = vcpu->msr_offset_efer; + int efer_offset = vcpu->vmx->msr_offset_efer; - trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer = vcpu->vmx->host_msrs[efer_offset].data; trans_efer &= ~EFER_SAVE_RESTORE_BITS; trans_efer |= msr_efer_save_restore_bits( - vcpu->guest_msrs[efer_offset]); + vcpu->vmx->guest_msrs[efer_offset]); wrmsrl(MSR_EFER, trans_efer); vcpu->stat.efer_reload++; } static void vmx_save_host_state(struct kvm_vcpu *vcpu) { - struct vmx_host_state *hs = &vcpu->vmx_host_state; + struct vmx_host_state *hs = &vcpu->vmx->host_state; if (hs->loaded) return; @@ -334,17 +334,17 @@ #endif #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { - save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1); + save_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_kernel_gs_base, 1); } #endif - load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs); if (msr_efer_need_save_restore(vcpu)) load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) { - struct vmx_host_state *hs = &vcpu->vmx_host_state; + struct vmx_host_state *hs = &vcpu->vmx->host_state; if (!hs->loaded) return; @@ -366,10 +366,10 @@ #endif reload_tss(); } - save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); - load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + save_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs); + load_msrs(vcpu->vmx->host_msrs, vcpu->save_nmsrs); if (msr_efer_need_save_restore(vcpu)) - load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); + load_msrs(vcpu->vmx->host_msrs + vcpu->vmx->msr_offset_efer, 1); } /* @@ -378,7 +378,7 @@ #endif */ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) { - u64 phys_addr = __pa(vcpu->vmcs); + u64 phys_addr = __pa(vcpu->vmx->vmcs); int cpu; u64 tsc_this, delta; @@ -387,16 +387,16 @@ static void vmx_vcpu_load(struct kvm_vcp if (vcpu->cpu != cpu) vcpu_clear(vcpu); - if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) { + if (per_cpu(current_vmcs, cpu) != vcpu->vmx->vmcs) { u8 error; - per_cpu(current_vmcs, cpu) = vcpu->vmcs; + per_cpu(current_vmcs, cpu) = vcpu->vmx->vmcs; asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc"); if (error) printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", - vcpu->vmcs, phys_addr); + vcpu->vmx->vmcs, phys_addr); } if (vcpu->cpu != cpu) { @@ -504,12 +504,12 @@ static void vmx_inject_gp(struct kvm_vcp void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) { struct vmx_msr_entry tmp; - tmp = vcpu->guest_msrs[to]; - vcpu->guest_msrs[to] = vcpu->guest_msrs[from]; - vcpu->guest_msrs[from] = tmp; - tmp = vcpu->host_msrs[to]; - vcpu->host_msrs[to] = vcpu->host_msrs[from]; - vcpu->host_msrs[from] = tmp; + tmp = vcpu->vmx->guest_msrs[to]; + vcpu->vmx->guest_msrs[to] = vcpu->vmx->guest_msrs[from]; + vcpu->vmx->guest_msrs[from] = tmp; + tmp = vcpu->vmx->host_msrs[to]; + vcpu->vmx->host_msrs[to] = vcpu->vmx->host_msrs[from]; + vcpu->vmx->host_msrs[from] = tmp; } /* @@ -550,10 +550,10 @@ #endif vcpu->save_nmsrs = save_nmsrs; #ifdef CONFIG_X86_64 - vcpu->msr_offset_kernel_gs_base = + vcpu->vmx->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); + vcpu->vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -646,7 +646,7 @@ static int vmx_set_msr(struct kvm_vcpu * #ifdef CONFIG_X86_64 case MSR_EFER: ret = kvm_set_msr_common(vcpu, msr_index, data); - if (vcpu->vmx_host_state.loaded) + if (vcpu->vmx->host_state.loaded) load_transition_efer(vcpu); break; case MSR_FS_BASE: @@ -672,8 +672,8 @@ #endif msr = find_msr_entry(vcpu, msr_index); if (msr) { msr->data = data; - if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (vcpu->vmx->host_state.loaded) + load_msrs(vcpu->vmx->guest_msrs, vcpu->save_nmsrs); break; } ret = kvm_set_msr_common(vcpu, msr_index, data); @@ -868,15 +868,15 @@ static void enter_pmode(struct kvm_vcpu { unsigned long flags; - vcpu->rmode.active = 0; + vcpu->vmx->rmode.active = 0; - vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base); - vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit); - vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar); + vmcs_writel(GUEST_TR_BASE, vcpu->vmx->rmode.tr.base); + vmcs_write32(GUEST_TR_LIMIT, vcpu->vmx->rmode.tr.limit); + vmcs_write32(GUEST_TR_AR_BYTES, vcpu->vmx->rmode.tr.ar); flags = vmcs_readl(GUEST_RFLAGS); flags &= ~(IOPL_MASK | X86_EFLAGS_VM); - flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT); + flags |= (vcpu->vmx->rmode.save_iopl << IOPL_SHIFT); vmcs_writel(GUEST_RFLAGS, flags); vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | @@ -884,10 +884,10 @@ static void enter_pmode(struct kvm_vcpu update_exception_bitmap(vcpu); - fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->rmode.es); - fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->rmode.ds); - fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->rmode.gs); - fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->rmode.fs); + fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->vmx->rmode.es); + fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds); + fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs); + fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs); vmcs_write16(GUEST_SS_SELECTOR, 0); vmcs_write32(GUEST_SS_AR_BYTES, 0x93); @@ -920,19 +920,19 @@ static void enter_rmode(struct kvm_vcpu { unsigned long flags; - vcpu->rmode.active = 1; + vcpu->vmx->rmode.active = 1; - vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); + vcpu->vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); - vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT); + vcpu->vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT); vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); - vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES); + vcpu->vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES); vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); flags = vmcs_readl(GUEST_RFLAGS); - vcpu->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT; + vcpu->vmx->rmode.save_iopl = (flags & IOPL_MASK) >> IOPL_SHIFT; flags |= IOPL_MASK | X86_EFLAGS_VM; @@ -950,11 +950,10 @@ static void enter_rmode(struct kvm_vcpu vmcs_writel(GUEST_CS_BASE, 0xf0000); vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); - fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es); - fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds); - fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); - fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); - + fix_rmode_seg(VCPU_SREG_ES, &vcpu->vmx->rmode.es); + fix_rmode_seg(VCPU_SREG_DS, &vcpu->vmx->rmode.ds); + fix_rmode_seg(VCPU_SREG_GS, &vcpu->vmx->rmode.gs); + fix_rmode_seg(VCPU_SREG_FS, &vcpu->vmx->rmode.fs); init_rmode_tss(vcpu->kvm); } @@ -1002,10 +1001,10 @@ static void vmx_set_cr0(struct kvm_vcpu { vmx_fpu_deactivate(vcpu); - if (vcpu->rmode.active && (cr0 & X86_CR0_PE)) + if (vcpu->vmx->rmode.active && (cr0 & X86_CR0_PE)) enter_pmode(vcpu); - if (!vcpu->rmode.active && !(cr0 & X86_CR0_PE)) + if (!vcpu->vmx->rmode.active && !(cr0 & X86_CR0_PE)) enter_rmode(vcpu); #ifdef CONFIG_X86_64 @@ -1036,7 +1035,7 @@ static void vmx_set_cr3(struct kvm_vcpu static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { vmcs_writel(CR4_READ_SHADOW, cr4); - vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ? + vmcs_writel(GUEST_CR4, cr4 | (vcpu->vmx->rmode.active ? KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); vcpu->cr4 = cr4; } @@ -1124,17 +1123,17 @@ static void vmx_set_segment(struct kvm_v struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; - if (vcpu->rmode.active && seg == VCPU_SREG_TR) { - vcpu->rmode.tr.selector = var->selector; - vcpu->rmode.tr.base = var->base; - vcpu->rmode.tr.limit = var->limit; - vcpu->rmode.tr.ar = vmx_segment_access_rights(var); + if (vcpu->vmx->rmode.active && seg == VCPU_SREG_TR) { + vcpu->vmx->rmode.tr.selector = var->selector; + vcpu->vmx->rmode.tr.base = var->base; + vcpu->vmx->rmode.tr.limit = var->limit; + vcpu->vmx->rmode.tr.ar = vmx_segment_access_rights(var); return; } vmcs_writel(sf->base, var->base); vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); - if (vcpu->rmode.active && var->s) { + if (vcpu->vmx->rmode.active && var->s) { /* * Hack real-mode segments into vm86 compatibility. */ @@ -1253,6 +1252,7 @@ static int vmx_vcpu_setup(struct kvm_vcp vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val(); vcpu->cr8 = 0; vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; + if (vcpu->vcpu_id == 0) vcpu->apic_base |= MSR_IA32_APICBASE_BSP; @@ -1384,10 +1384,10 @@ #endif if (wrmsr_safe(index, data_low, data_high) < 0) continue; data = data_low | ((u64)data_high << 32); - vcpu->host_msrs[j].index = index; - vcpu->host_msrs[j].reserved = 0; - vcpu->host_msrs[j].data = data; - vcpu->guest_msrs[j] = vcpu->host_msrs[j]; + vcpu->vmx->host_msrs[j].index = index; + vcpu->vmx->host_msrs[j].reserved = 0; + vcpu->vmx->host_msrs[j].data = data; + vcpu->vmx->guest_msrs[j] = vcpu->vmx->host_msrs[j]; ++vcpu->nmsrs; } @@ -1479,7 +1479,7 @@ static void kvm_do_inject_irq(struct kvm if (!vcpu->irq_pending[word_index]) clear_bit(word_index, &vcpu->irq_summary); - if (vcpu->rmode.active) { + if (vcpu->vmx->rmode.active) { inject_rmode_irq(vcpu, irq); return; } @@ -1538,7 +1538,7 @@ static void kvm_guest_debug_pre(struct k static int handle_rmode_exception(struct kvm_vcpu *vcpu, int vec, u32 err_code) { - if (!vcpu->rmode.active) + if (!vcpu->vmx->rmode.active) return 0; /* @@ -1619,7 +1619,7 @@ static int handle_exception(struct kvm_v } } - if (vcpu->rmode.active && + if (vcpu->vmx->rmode.active && handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, error_code)) { if (vcpu->halt_request) { @@ -2224,28 +2224,34 @@ static void vmx_inject_page_fault(struct static void vmx_free_vmcs(struct kvm_vcpu *vcpu) { - if (vcpu->vmcs) { + if (vcpu->vmx->vmcs) { on_each_cpu(__vcpu_clear, vcpu, 0, 1); - free_vmcs(vcpu->vmcs); - vcpu->vmcs = NULL; + free_vmcs(vcpu->vmx->vmcs); + vcpu->vmx->vmcs = NULL; + } } +static int vmx_vcpu_size(void) +{ + return sizeof(struct kvm_vcpu) + sizeof(struct kvm_vmx_data); +} + static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { vmx_free_vmcs(vcpu); } -static int vmx_create_vcpu(struct kvm_vcpu *vcpu) +static int vmx_init_vcpu(struct kvm_vcpu *vcpu) { struct vmcs *vmcs; - vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vcpu->guest_msrs) + vcpu->vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vcpu->vmx->guest_msrs) return -ENOMEM; - vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vcpu->host_msrs) + vcpu->vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vcpu->vmx->host_msrs) goto out_free_guest_msrs; vmcs = alloc_vmcs(); @@ -2253,18 +2259,18 @@ static int vmx_create_vcpu(struct kvm_vc goto out_free_msrs; vmcs_clear(vmcs); - vcpu->vmcs = vmcs; + vcpu->vmx->vmcs = vmcs; vcpu->launched = 0; return 0; out_free_msrs: - kfree(vcpu->host_msrs); - vcpu->host_msrs = NULL; + kfree(vcpu->vmx->host_msrs); + vcpu->vmx->host_msrs = NULL; out_free_guest_msrs: - kfree(vcpu->guest_msrs); - vcpu->guest_msrs = NULL; + kfree(vcpu->vmx->guest_msrs); + vcpu->vmx->guest_msrs = NULL; return -ENOMEM; } @@ -2277,7 +2283,8 @@ static struct kvm_arch_ops vmx_arch_ops .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, - .vcpu_create = vmx_create_vcpu, + .vcpu_size = vmx_vcpu_size, + .vcpu_init = vmx_init_vcpu, .vcpu_free = vmx_free_vcpu, .vcpu_load = vmx_vcpu_load, [-- Attachment #3: Type: text/plain, Size: 286 bytes --] ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ [-- Attachment #4: Type: text/plain, Size: 186 bytes --] _______________________________________________ kvm-devel mailing list kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org https://lists.sourceforge.net/lists/listinfo/kvm-devel ^ permalink raw reply related [flat|nested] 5+ messages in thread
[parent not found: <Pine.LNX.4.64.0707181528430.32400-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>]
* Re: [PATCH] dynamically create vcpus + vmx/svm structures [not found] ` <Pine.LNX.4.64.0707181528430.32400-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org> @ 2007-07-19 9:19 ` Avi Kivity 0 siblings, 0 replies; 5+ messages in thread From: Avi Kivity @ 2007-07-19 9:19 UTC (permalink / raw) To: Paul Turner; +Cc: KVM Paul Turner wrote: > here's an updated version that instead takes kvm_lock and leaves the > svm stuff in the relevant header file for now. unfortunately we still > need the includes since gcc is braindead and can't compile untyped > [0]-size arrays properly, the only two ways i can see to fix this is > either embedding vcpu in an arch specific struct and or using a > long[0] member and a vmx/svm macro as you mentioned before.. > Since this turned out to be a little more involved than anticipated, can you push out a patch that just converts the vcpu array to a pointer array? That will allow careful review of the locking changes, and is independent of splitting that arch independent stuff off. -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2007-07-19 9:19 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-13 1:20 [PATCH] dynamically create vcpus + vmx/svm structures Paul Turner
[not found] ` <Pine.LNX.4.64.0707121815040.23503-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2007-07-13 7:59 ` Avi Kivity
[not found] ` <ed628a920707131534x7fe57ca3sfa09d9d79412d9c6@mail.gmail.com>
[not found] ` <ed628a920707131534x7fe57ca3sfa09d9d79412d9c6-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-07-14 6:14 ` Avi Kivity
[not found] ` <469869CF.8030106-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-07-18 22:36 ` Paul Turner
[not found] ` <Pine.LNX.4.64.0707181528430.32400-hxTPNdr267xSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2007-07-19 9:19 ` Avi Kivity
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox