[PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device
@ 2010-12-31 12:01 Jan Kiszka
  2010-12-31 12:01 ` [PATCH 1/5] kvm: x86: Refactor msr_star/hsave_pa setup and checks Jan Kiszka
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, qemu-devel

More refactorings of upstream KVM to allow further unifications with
qemu-kvm. See patches for details.

Jan Kiszka (5):
  kvm: x86: Refactor msr_star/hsave_pa setup and checks
  kvm: x86: Reset paravirtual MSRs
  kvm: x86: Drop MCE MSRs write back restrictions
  kvm: Eliminate KVMState arguments
  kvm: x86: Introduce kvmclock device to save/restore its state

 cpu-defs.h            |    2 -
 kvm-all.c             |  233 +++++++++++++++++++++----------------------------
 kvm-stub.c            |    2 +-
 kvm.h                 |   15 +--
 target-i386/cpuid.c   |    9 +-
 target-i386/kvm.c     |  205 ++++++++++++++++++++++++++++---------------
 target-i386/kvm_x86.h |    3 +
 target-ppc/kvm.c      |   12 ++--
 target-s390x/kvm.c    |    8 +--
 9 files changed, 258 insertions(+), 231 deletions(-)


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/5] kvm: x86: Refactor msr_star/hsave_pa setup and checks
  2010-12-31 12:01 [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
@ 2010-12-31 12:01 ` Jan Kiszka
  2010-12-31 12:01 ` [PATCH 2/5] kvm: x86: Reset paravirtual MSRs Jan Kiszka
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, qemu-devel, Jan Kiszka

From: Jan Kiszka <jan.kiszka@siemens.com>

Simplify kvm_has_msr_star/hsave_pa to booleans and push their one-time
initialization into kvm_arch_init. Also handle potential errors of that
setup procedure.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 target-i386/kvm.c |   47 +++++++++++++++++++----------------------------
 1 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 30aa51c..000f3b0 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -54,6 +54,8 @@
 #define BUS_MCEERR_AO 5
 #endif
 
+static bool has_msr_star;
+static bool has_msr_hsave_pa;
 static int lm_capable_kernel;
 
 #ifdef KVM_CAP_EXT_CPUID
@@ -460,13 +462,10 @@ void kvm_arch_reset_vcpu(CPUState *env)
     }
 }
 
-int has_msr_star;
-int has_msr_hsave_pa;
-
-static void kvm_supported_msrs(CPUState *env)
+static int kvm_get_supported_msrs(KVMState *s)
 {
     static int kvm_supported_msrs;
-    int ret;
+    int ret = 0;
 
     /* first time */
     if (kvm_supported_msrs == 0) {
@@ -477,9 +476,9 @@ static void kvm_supported_msrs(CPUState *env)
         /* Obtain MSR list from KVM.  These are the MSRs that we must
          * save/restore */
         msr_list.nmsrs = 0;
-        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
+        ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list);
         if (ret < 0 && ret != -E2BIG) {
-            return;
+            return ret;
         }
         /* Old kernel modules had a bug and could write beyond the provided
            memory. Allocate at least a safe amount of 1K. */
@@ -488,17 +487,17 @@ static void kvm_supported_msrs(CPUState *env)
                                               sizeof(msr_list.indices[0])));
 
         kvm_msr_list->nmsrs = msr_list.nmsrs;
-        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
+        ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
         if (ret >= 0) {
             int i;
 
             for (i = 0; i < kvm_msr_list->nmsrs; i++) {
                 if (kvm_msr_list->indices[i] == MSR_STAR) {
-                    has_msr_star = 1;
+                    has_msr_star = true;
                     continue;
                 }
                 if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) {
-                    has_msr_hsave_pa = 1;
+                    has_msr_hsave_pa = true;
                     continue;
                 }
             }
@@ -507,19 +506,7 @@ static void kvm_supported_msrs(CPUState *env)
         free(kvm_msr_list);
     }
 
-    return;
-}
-
-static int kvm_has_msr_hsave_pa(CPUState *env)
-{
-    kvm_supported_msrs(env);
-    return has_msr_hsave_pa;
-}
-
-static int kvm_has_msr_star(CPUState *env)
-{
-    kvm_supported_msrs(env);
-    return has_msr_star;
+    return ret;
 }
 
 static int kvm_init_identity_map_page(KVMState *s)
@@ -544,9 +531,13 @@ static int kvm_init_identity_map_page(KVMState *s)
 int kvm_arch_init(KVMState *s, int smp_cpus)
 {
     int ret;
-
     struct utsname utsname;
 
+    ret = kvm_get_supported_msrs(s);
+    if (ret < 0) {
+        return ret;
+    }
+
     uname(&utsname);
     lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
 
@@ -831,10 +822,10 @@ static int kvm_put_msrs(CPUState *env, int level)
     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
-    if (kvm_has_msr_star(env)) {
+    if (has_msr_star) {
         kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
     }
-    if (kvm_has_msr_hsave_pa(env)) {
+    if (has_msr_hsave_pa) {
         kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
     }
 #ifdef TARGET_X86_64
@@ -1077,10 +1068,10 @@ static int kvm_get_msrs(CPUState *env)
     msrs[n++].index = MSR_IA32_SYSENTER_CS;
     msrs[n++].index = MSR_IA32_SYSENTER_ESP;
     msrs[n++].index = MSR_IA32_SYSENTER_EIP;
-    if (kvm_has_msr_star(env)) {
+    if (has_msr_star) {
         msrs[n++].index = MSR_STAR;
     }
-    if (kvm_has_msr_hsave_pa(env)) {
+    if (has_msr_hsave_pa) {
         msrs[n++].index = MSR_VM_HSAVE_PA;
     }
     msrs[n++].index = MSR_IA32_TSC;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/5] kvm: x86: Reset paravirtual MSRs
  2010-12-31 12:01 [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
  2010-12-31 12:01 ` [PATCH 1/5] kvm: x86: Refactor msr_star/hsave_pa setup and checks Jan Kiszka
@ 2010-12-31 12:01 ` Jan Kiszka
  2010-12-31 12:01 ` [PATCH 3/5] kvm: x86: Drop MCE MSRs write back restrictions Jan Kiszka
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, qemu-devel, Jan Kiszka, Glauber Costa

From: Jan Kiszka <jan.kiszka@siemens.com>

Make sure to clear MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, and
MSR_KVM_ASYNC_PF_EN so that a freshly booted guest cannot be disturbed
by old values.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
CC: Glauber Costa <glommer@redhat.com>
---
 target-i386/kvm.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 000f3b0..d05a2b9 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -454,6 +454,9 @@ void kvm_arch_reset_vcpu(CPUState *env)
     env->interrupt_injected = -1;
     env->nmi_injected = 0;
     env->nmi_pending = 0;
+    env->system_time_msr = 0;
+    env->wall_clock_msr = 0;
+    env->async_pf_en_msr = 0;
     if (kvm_irqchip_in_kernel()) {
         env->mp_state = cpu_is_bsp(env) ? KVM_MP_STATE_RUNNABLE :
                                           KVM_MP_STATE_UNINITIALIZED;
@@ -846,6 +849,13 @@ static int kvm_put_msrs(CPUState *env, int level)
         if (smp_cpus == 1 || env->tsc != 0) {
             kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
         }
+    }
+    /*
+     * The following paravirtual MSRs have side effects on the guest or are
+     * too heavy for normal writeback. Limit them to reset or full state
+     * updates.
+     */
+    if (level >= KVM_PUT_RESET_STATE) {
         kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
                           env->system_time_msr);
         kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/5] kvm: x86: Drop MCE MSRs write back restrictions
  2010-12-31 12:01 [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
  2010-12-31 12:01 ` [PATCH 1/5] kvm: x86: Refactor msr_star/hsave_pa setup and checks Jan Kiszka
  2010-12-31 12:01 ` [PATCH 2/5] kvm: x86: Reset paravirtual MSRs Jan Kiszka
@ 2010-12-31 12:01 ` Jan Kiszka
  2010-12-31 12:01 ` [PATCH 4/5] kvm: Eliminate KVMState arguments Jan Kiszka
  2010-12-31 12:01 ` [PATCH 5/5] kvm: x86: Introduce kvmclock device to save/restore its state Jan Kiszka
  4 siblings, 0 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, qemu-devel, Jan Kiszka, Huang Ying

From: Jan Kiszka <jan.kiszka@siemens.com>

There is no need to restrict writing back MCE MSRs to reset or full
state updates as setting their values has no side effects.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
CC: Huang Ying <ying.huang@intel.com>
---
 target-i386/kvm.c |   12 ++++--------
 1 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index d05a2b9..96c350e 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -867,14 +867,10 @@ static int kvm_put_msrs(CPUState *env, int level)
     if (env->mcg_cap) {
         int i;
 
-        if (level == KVM_PUT_RESET_STATE) {
-            kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
-        } else if (level == KVM_PUT_FULL_STATE) {
-            kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
-            kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
-            for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
-                kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
-            }
+        kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
+        kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
+        for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
+            kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
         }
     }
 #endif
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/5] kvm: Eliminate KVMState arguments
  2010-12-31 12:01 [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
                   ` (2 preceding siblings ...)
  2010-12-31 12:01 ` [PATCH 3/5] kvm: x86: Drop MCE MSRs write back restrictions Jan Kiszka
@ 2010-12-31 12:01 ` Jan Kiszka
  2010-12-31 12:01 ` [PATCH 5/5] kvm: x86: Introduce kvmclock device to save/restore its state Jan Kiszka
  4 siblings, 0 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, qemu-devel, Jan Kiszka

From: Jan Kiszka <jan.kiszka@siemens.com>

QEMU supports only one VM, so there is only one kvm_state per process,
and we gain nothing passing a reference to it around. Eliminate any need
to refer to it outside of kvm-all.c.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 cpu-defs.h            |    2 -
 kvm-all.c             |  233 +++++++++++++++++++++----------------------------
 kvm-stub.c            |    2 +-
 kvm.h                 |   15 +--
 target-i386/cpuid.c   |    9 +-
 target-i386/kvm.c     |   73 ++++++++--------
 target-i386/kvm_x86.h |    3 +
 target-ppc/kvm.c      |   12 ++--
 target-s390x/kvm.c    |    8 +--
 9 files changed, 159 insertions(+), 198 deletions(-)

diff --git a/cpu-defs.h b/cpu-defs.h
index eaed43e..ada6629 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -131,7 +131,6 @@ typedef struct icount_decr_u16 {
 #endif
 
 struct kvm_run;
-struct KVMState;
 struct qemu_work_item;
 
 typedef struct CPUBreakpoint {
@@ -208,7 +207,6 @@ typedef struct CPUWatchpoint {
     struct QemuCond *halt_cond;                                         \
     struct qemu_work_item *queued_work_first, *queued_work_last;        \
     const char *cpu_model_str;                                          \
-    struct KVMState *kvm_state;                                         \
     struct kvm_run *kvm_run;                                            \
     int kvm_fd;                                                         \
     int kvm_vcpu_dirty;
diff --git a/kvm-all.c b/kvm-all.c
index 237c762..bd8f88c 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -52,8 +52,7 @@ typedef struct KVMSlot
 
 typedef struct kvm_dirty_log KVMDirtyLog;
 
-struct KVMState
-{
+static struct KVMState {
     KVMSlot slots[32];
     int fd;
     int vmfd;
@@ -72,21 +71,19 @@ struct KVMState
     int irqchip_in_kernel;
     int pit_in_kernel;
     int xsave, xcrs;
-};
-
-static KVMState *kvm_state;
+} kvm_state;
 
-static KVMSlot *kvm_alloc_slot(KVMState *s)
+static KVMSlot *kvm_alloc_slot(void)
 {
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
         /* KVM private memory slots */
         if (i >= 8 && i < 12) {
             continue;
         }
-        if (s->slots[i].memory_size == 0) {
-            return &s->slots[i];
+        if (kvm_state.slots[i].memory_size == 0) {
+            return &kvm_state.slots[i];
         }
     }
 
@@ -94,14 +91,13 @@ static KVMSlot *kvm_alloc_slot(KVMState *s)
     abort();
 }
 
-static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
-                                         target_phys_addr_t start_addr,
+static KVMSlot *kvm_lookup_matching_slot(target_phys_addr_t start_addr,
                                          target_phys_addr_t end_addr)
 {
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        KVMSlot *mem = &s->slots[i];
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
+        KVMSlot *mem = &kvm_state.slots[i];
 
         if (start_addr == mem->start_addr &&
             end_addr == mem->start_addr + mem->memory_size) {
@@ -115,15 +111,14 @@ static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
 /*
  * Find overlapping slot with lowest start address
  */
-static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
-                                            target_phys_addr_t start_addr,
+static KVMSlot *kvm_lookup_overlapping_slot(target_phys_addr_t start_addr,
                                             target_phys_addr_t end_addr)
 {
     KVMSlot *found = NULL;
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        KVMSlot *mem = &s->slots[i];
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
+        KVMSlot *mem = &kvm_state.slots[i];
 
         if (mem->memory_size == 0 ||
             (found && found->start_addr < mem->start_addr)) {
@@ -139,13 +134,13 @@ static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
     return found;
 }
 
-int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
+int kvm_physical_memory_addr_from_ram(ram_addr_t ram_addr,
                                       target_phys_addr_t *phys_addr)
 {
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        KVMSlot *mem = &s->slots[i];
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
+        KVMSlot *mem = &kvm_state.slots[i];
 
         if (ram_addr >= mem->phys_offset &&
             ram_addr < mem->phys_offset + mem->memory_size) {
@@ -157,7 +152,7 @@ int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
     return 0;
 }
 
-static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
+static int kvm_set_user_memory_region(KVMSlot *slot)
 {
     struct kvm_userspace_memory_region mem;
 
@@ -166,10 +161,10 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
     mem.memory_size = slot->memory_size;
     mem.userspace_addr = (unsigned long)qemu_safe_ram_ptr(slot->phys_offset);
     mem.flags = slot->flags;
-    if (s->migration_log) {
+    if (kvm_state.migration_log) {
         mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
     }
-    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+    return kvm_vm_ioctl(KVM_SET_USER_MEMORY_REGION, &mem);
 }
 
 static void kvm_reset_vcpu(void *opaque)
@@ -181,33 +176,31 @@ static void kvm_reset_vcpu(void *opaque)
 
 int kvm_irqchip_in_kernel(void)
 {
-    return kvm_state->irqchip_in_kernel;
+    return kvm_state.irqchip_in_kernel;
 }
 
 int kvm_pit_in_kernel(void)
 {
-    return kvm_state->pit_in_kernel;
+    return kvm_state.pit_in_kernel;
 }
 
 
 int kvm_init_vcpu(CPUState *env)
 {
-    KVMState *s = kvm_state;
     long mmap_size;
     int ret;
 
     DPRINTF("kvm_init_vcpu\n");
 
-    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
+    ret = kvm_vm_ioctl(KVM_CREATE_VCPU, env->cpu_index);
     if (ret < 0) {
         DPRINTF("kvm_create_vcpu failed\n");
         goto err;
     }
 
     env->kvm_fd = ret;
-    env->kvm_state = s;
 
-    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
+    mmap_size = kvm_ioctl(KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
         DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
         goto err;
@@ -222,9 +215,9 @@ int kvm_init_vcpu(CPUState *env)
     }
 
 #ifdef KVM_CAP_COALESCED_MMIO
-    if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
-        s->coalesced_mmio_ring =
-            (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE;
+    if (kvm_state.coalesced_mmio && !kvm_state.coalesced_mmio_ring) {
+        kvm_state.coalesced_mmio_ring =
+            (void *)env->kvm_run + kvm_state.coalesced_mmio * PAGE_SIZE;
     }
 #endif
 
@@ -243,8 +236,7 @@ err:
 static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
                                       ram_addr_t size, int flags, int mask)
 {
-    KVMState *s = kvm_state;
-    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
+    KVMSlot *mem = kvm_lookup_matching_slot(phys_addr, phys_addr + size);
     int old_flags;
 
     if (mem == NULL)  {
@@ -260,14 +252,14 @@ static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
     mem->flags = flags;
 
     /* If nothing changed effectively, no need to issue ioctl */
-    if (s->migration_log) {
+    if (kvm_state.migration_log) {
         flags |= KVM_MEM_LOG_DIRTY_PAGES;
     }
     if (flags == old_flags) {
             return 0;
     }
 
-    return kvm_set_user_memory_region(s, mem);
+    return kvm_set_user_memory_region(mem);
 }
 
 int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
@@ -284,14 +276,13 @@ int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
 
 static int kvm_set_migration_log(int enable)
 {
-    KVMState *s = kvm_state;
     KVMSlot *mem;
     int i, err;
 
-    s->migration_log = enable;
+    kvm_state.migration_log = enable;
 
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        mem = &s->slots[i];
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
+        mem = &kvm_state.slots[i];
 
         if (!mem->memory_size) {
             continue;
@@ -299,7 +290,7 @@ static int kvm_set_migration_log(int enable)
         if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
             continue;
         }
-        err = kvm_set_user_memory_region(s, mem);
+        err = kvm_set_user_memory_region(mem);
         if (err) {
             return err;
         }
@@ -353,7 +344,6 @@ static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
 static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
 					  target_phys_addr_t end_addr)
 {
-    KVMState *s = kvm_state;
     unsigned long size, allocated_size = 0;
     KVMDirtyLog d;
     KVMSlot *mem;
@@ -361,7 +351,7 @@ static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
 
     d.dirty_bitmap = NULL;
     while (start_addr < end_addr) {
-        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
+        mem = kvm_lookup_overlapping_slot(start_addr, end_addr);
         if (mem == NULL) {
             break;
         }
@@ -377,7 +367,7 @@ static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
 
         d.slot = mem->slot;
 
-        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
+        if (kvm_vm_ioctl(KVM_GET_DIRTY_LOG, &d) == -1) {
             DPRINTF("ioctl failed %d\n", errno);
             ret = -1;
             break;
@@ -395,16 +385,15 @@ static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
 {
     int ret = -ENOSYS;
-#ifdef KVM_CAP_COALESCED_MMIO
-    KVMState *s = kvm_state;
 
-    if (s->coalesced_mmio) {
+#ifdef KVM_CAP_COALESCED_MMIO
+    if (kvm_state.coalesced_mmio) {
         struct kvm_coalesced_mmio_zone zone;
 
         zone.addr = start;
         zone.size = size;
 
-        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
+        ret = kvm_vm_ioctl(KVM_REGISTER_COALESCED_MMIO, &zone);
     }
 #endif
 
@@ -414,27 +403,26 @@ int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
 {
     int ret = -ENOSYS;
-#ifdef KVM_CAP_COALESCED_MMIO
-    KVMState *s = kvm_state;
 
-    if (s->coalesced_mmio) {
+#ifdef KVM_CAP_COALESCED_MMIO
+    if (kvm_state.coalesced_mmio) {
         struct kvm_coalesced_mmio_zone zone;
 
         zone.addr = start;
         zone.size = size;
 
-        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
+        ret = kvm_vm_ioctl(KVM_UNREGISTER_COALESCED_MMIO, &zone);
     }
 #endif
 
     return ret;
 }
 
-int kvm_check_extension(KVMState *s, unsigned int extension)
+int kvm_check_extension(unsigned int extension)
 {
     int ret;
 
-    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
+    ret = kvm_ioctl(KVM_CHECK_EXTENSION, extension);
     if (ret < 0) {
         ret = 0;
     }
@@ -445,7 +433,6 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
 static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
                              ram_addr_t phys_offset)
 {
-    KVMState *s = kvm_state;
     ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
     KVMSlot *mem, old;
     int err;
@@ -459,7 +446,7 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
     phys_offset &= ~IO_MEM_ROM;
 
     while (1) {
-        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
+        mem = kvm_lookup_overlapping_slot(start_addr, start_addr + size);
         if (!mem) {
             break;
         }
@@ -476,7 +463,7 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
 
         /* unregister the overlapping slot */
         mem->memory_size = 0;
-        err = kvm_set_user_memory_region(s, mem);
+        err = kvm_set_user_memory_region(mem);
         if (err) {
             fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
                     __func__, strerror(-err));
@@ -491,16 +478,16 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
          * address as the first existing one. If not or if some overlapping
          * slot comes around later, we will fail (not seen in practice so far)
          * - and actually require a recent KVM version. */
-        if (s->broken_set_mem_region &&
+        if (kvm_state.broken_set_mem_region &&
             old.start_addr == start_addr && old.memory_size < size &&
             flags < IO_MEM_UNASSIGNED) {
-            mem = kvm_alloc_slot(s);
+            mem = kvm_alloc_slot();
             mem->memory_size = old.memory_size;
             mem->start_addr = old.start_addr;
             mem->phys_offset = old.phys_offset;
             mem->flags = 0;
 
-            err = kvm_set_user_memory_region(s, mem);
+            err = kvm_set_user_memory_region(mem);
             if (err) {
                 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
                         strerror(-err));
@@ -515,13 +502,13 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
 
         /* register prefix slot */
         if (old.start_addr < start_addr) {
-            mem = kvm_alloc_slot(s);
+            mem = kvm_alloc_slot();
             mem->memory_size = start_addr - old.start_addr;
             mem->start_addr = old.start_addr;
             mem->phys_offset = old.phys_offset;
             mem->flags = 0;
 
-            err = kvm_set_user_memory_region(s, mem);
+            err = kvm_set_user_memory_region(mem);
             if (err) {
                 fprintf(stderr, "%s: error registering prefix slot: %s\n",
                         __func__, strerror(-err));
@@ -533,14 +520,14 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
         if (old.start_addr + old.memory_size > start_addr + size) {
             ram_addr_t size_delta;
 
-            mem = kvm_alloc_slot(s);
+            mem = kvm_alloc_slot();
             mem->start_addr = start_addr + size;
             size_delta = mem->start_addr - old.start_addr;
             mem->memory_size = old.memory_size - size_delta;
             mem->phys_offset = old.phys_offset + size_delta;
             mem->flags = 0;
 
-            err = kvm_set_user_memory_region(s, mem);
+            err = kvm_set_user_memory_region(mem);
             if (err) {
                 fprintf(stderr, "%s: error registering suffix slot: %s\n",
                         __func__, strerror(-err));
@@ -557,13 +544,13 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
     if (flags >= IO_MEM_UNASSIGNED) {
         return;
     }
-    mem = kvm_alloc_slot(s);
+    mem = kvm_alloc_slot();
     mem->memory_size = size;
     mem->start_addr = start_addr;
     mem->phys_offset = phys_offset;
     mem->flags = 0;
 
-    err = kvm_set_user_memory_region(s, mem);
+    err = kvm_set_user_memory_region(mem);
     if (err) {
         fprintf(stderr, "%s: error registering slot: %s\n", __func__,
                 strerror(-err));
@@ -602,27 +589,24 @@ int kvm_init(int smp_cpus)
     static const char upgrade_note[] =
         "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
         "(see http://sourceforge.net/projects/kvm).\n";
-    KVMState *s;
     int ret;
     int i;
 
-    s = qemu_mallocz(sizeof(KVMState));
-
 #ifdef KVM_CAP_SET_GUEST_DEBUG
-    QTAILQ_INIT(&s->kvm_sw_breakpoints);
+    QTAILQ_INIT(&kvm_state.kvm_sw_breakpoints);
 #endif
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        s->slots[i].slot = i;
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
+        kvm_state.slots[i].slot = i;
     }
-    s->vmfd = -1;
-    s->fd = qemu_open("/dev/kvm", O_RDWR);
-    if (s->fd == -1) {
+    kvm_state.vmfd = -1;
+    kvm_state.fd = qemu_open("/dev/kvm", O_RDWR);
+    if (kvm_state.fd == -1) {
         fprintf(stderr, "Could not access KVM kernel module: %m\n");
         ret = -errno;
         goto err;
     }
 
-    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
+    ret = kvm_ioctl(KVM_GET_API_VERSION, 0);
     if (ret < KVM_API_VERSION) {
         if (ret > 0) {
             ret = -EINVAL;
@@ -637,8 +621,8 @@ int kvm_init(int smp_cpus)
         goto err;
     }
 
-    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
-    if (s->vmfd < 0) {
+    kvm_state.vmfd = kvm_ioctl(KVM_CREATE_VM, 0);
+    if (kvm_state.vmfd < 0) {
 #ifdef TARGET_S390X
         fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
                         "your host kernel command line\n");
@@ -651,7 +635,7 @@ int kvm_init(int smp_cpus)
      * just use a user allocated buffer so we can use regular pages
      * unmodified.  Make sure we have a sufficiently modern version of KVM.
      */
-    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
+    if (!kvm_check_extension(KVM_CAP_USER_MEMORY)) {
         ret = -EINVAL;
         fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
                 upgrade_note);
@@ -661,7 +645,7 @@ int kvm_init(int smp_cpus)
     /* There was a nasty bug in < kvm-80 that prevents memory slots from being
      * destroyed properly.  Since we rely on this capability, refuse to work
      * with any kernel without this capability. */
-    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
+    if (!kvm_check_extension(KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
         ret = -EINVAL;
 
         fprintf(stderr,
@@ -670,66 +654,56 @@ int kvm_init(int smp_cpus)
         goto err;
     }
 
-    s->coalesced_mmio = 0;
 #ifdef KVM_CAP_COALESCED_MMIO
-    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
-    s->coalesced_mmio_ring = NULL;
+    kvm_state.coalesced_mmio = kvm_check_extension(KVM_CAP_COALESCED_MMIO);
+    kvm_state.coalesced_mmio_ring = NULL;
 #endif
 
-    s->broken_set_mem_region = 1;
+    kvm_state.broken_set_mem_region = 1;
 #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
-    ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
+    ret = kvm_check_extension(KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
     if (ret > 0) {
-        s->broken_set_mem_region = 0;
+        kvm_state.broken_set_mem_region = 0;
     }
 #endif
 
-    s->vcpu_events = 0;
 #ifdef KVM_CAP_VCPU_EVENTS
-    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
+    kvm_state.vcpu_events = kvm_check_extension(KVM_CAP_VCPU_EVENTS);
 #endif
 
-    s->robust_singlestep = 0;
 #ifdef KVM_CAP_X86_ROBUST_SINGLESTEP
-    s->robust_singlestep =
-        kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
+    kvm_state.robust_singlestep =
+        kvm_check_extension(KVM_CAP_X86_ROBUST_SINGLESTEP);
 #endif
 
-    s->debugregs = 0;
 #ifdef KVM_CAP_DEBUGREGS
-    s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
+    kvm_state.debugregs = kvm_check_extension(KVM_CAP_DEBUGREGS);
 #endif
 
-    s->xsave = 0;
 #ifdef KVM_CAP_XSAVE
-    s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
+    kvm_state.xsave = kvm_check_extension(KVM_CAP_XSAVE);
 #endif
 
-    s->xcrs = 0;
 #ifdef KVM_CAP_XCRS
-    s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
+    kvm_state.xcrs = kvm_check_extension(KVM_CAP_XCRS);
 #endif
 
-    ret = kvm_arch_init(s, smp_cpus);
+    ret = kvm_arch_init(smp_cpus);
     if (ret < 0) {
         goto err;
     }
 
-    kvm_state = s;
     cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
 
     return 0;
 
 err:
-    if (s) {
-        if (s->vmfd != -1) {
-            close(s->vmfd);
-        }
-        if (s->fd != -1) {
-            close(s->fd);
-        }
+    if (kvm_state.vmfd != -1) {
+        close(kvm_state.vmfd);
+    }
+    if (kvm_state.fd != -1) {
+        close(kvm_state.fd);
     }
-    qemu_free(s);
 
     return ret;
 }
@@ -777,7 +751,7 @@ static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
 static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run)
 {
     fprintf(stderr, "KVM internal error.");
-    if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
+    if (kvm_check_extension(KVM_CAP_INTERNAL_ERROR_DATA)) {
         int i;
 
         fprintf(stderr, " Suberror: %d\n", run->internal.suberror);
@@ -805,9 +779,8 @@ static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run)
 void kvm_flush_coalesced_mmio_buffer(void)
 {
 #ifdef KVM_CAP_COALESCED_MMIO
-    KVMState *s = kvm_state;
-    if (s->coalesced_mmio_ring) {
-        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
+    if (kvm_state.coalesced_mmio_ring) {
+        struct kvm_coalesced_mmio_ring *ring = kvm_state.coalesced_mmio_ring;
         while (ring->first != ring->last) {
             struct kvm_coalesced_mmio *ent;
 
@@ -962,7 +935,7 @@ void kvm_cpu_exec(CPUState *env)
     }
 }
 
-int kvm_ioctl(KVMState *s, int type, ...)
+int kvm_ioctl(int type, ...)
 {
     int ret;
     void *arg;
@@ -972,14 +945,14 @@ int kvm_ioctl(KVMState *s, int type, ...)
     arg = va_arg(ap, void *);
     va_end(ap);
 
-    ret = ioctl(s->fd, type, arg);
+    ret = ioctl(kvm_state.fd, type, arg);
     if (ret == -1) {
         ret = -errno;
     }
     return ret;
 }
 
-int kvm_vm_ioctl(KVMState *s, int type, ...)
+int kvm_vm_ioctl(int type, ...)
 {
     int ret;
     void *arg;
@@ -989,7 +962,7 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
     arg = va_arg(ap, void *);
     va_end(ap);
 
-    ret = ioctl(s->vmfd, type, arg);
+    ret = ioctl(kvm_state.vmfd, type, arg);
     if (ret == -1) {
         ret = -errno;
     }
@@ -1016,9 +989,7 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...)
 int kvm_has_sync_mmu(void)
 {
 #ifdef KVM_CAP_SYNC_MMU
-    KVMState *s = kvm_state;
-
-    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
+    return kvm_check_extension(KVM_CAP_SYNC_MMU);
 #else
     return 0;
 #endif
@@ -1026,27 +997,27 @@ int kvm_has_sync_mmu(void)
 
 int kvm_has_vcpu_events(void)
 {
-    return kvm_state->vcpu_events;
+    return kvm_state.vcpu_events;
 }
 
 int kvm_has_robust_singlestep(void)
 {
-    return kvm_state->robust_singlestep;
+    return kvm_state.robust_singlestep;
 }
 
 int kvm_has_debugregs(void)
 {
-    return kvm_state->debugregs;
+    return kvm_state.debugregs;
 }
 
 int kvm_has_xsave(void)
 {
-    return kvm_state->xsave;
+    return kvm_state.xsave;
 }
 
 int kvm_has_xcrs(void)
 {
-    return kvm_state->xcrs;
+    return kvm_state.xcrs;
 }
 
 void kvm_setup_guest_memory(void *start, size_t size)
@@ -1069,7 +1040,7 @@ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
 {
     struct kvm_sw_breakpoint *bp;
 
-    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
+    QTAILQ_FOREACH(bp, &kvm_state.kvm_sw_breakpoints, entry) {
         if (bp->pc == pc) {
             return bp;
         }
@@ -1079,7 +1050,7 @@ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
 
 int kvm_sw_breakpoints_active(CPUState *env)
 {
-    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
+    return !QTAILQ_EMPTY(&kvm_state.kvm_sw_breakpoints);
 }
 
 struct kvm_set_guest_debug_data {
@@ -1139,8 +1110,7 @@ int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
             return err;
         }
 
-        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
-                          bp, entry);
+        QTAILQ_INSERT_HEAD(&kvm_state.kvm_sw_breakpoints, bp, entry);
     } else {
         err = kvm_arch_insert_hw_breakpoint(addr, len, type);
         if (err) {
@@ -1180,7 +1150,7 @@ int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
             return err;
         }
 
-        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
+        QTAILQ_REMOVE(&kvm_state.kvm_sw_breakpoints, bp, entry);
         qemu_free(bp);
     } else {
         err = kvm_arch_remove_hw_breakpoint(addr, len, type);
@@ -1201,10 +1171,9 @@ int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
 void kvm_remove_all_breakpoints(CPUState *current_env)
 {
     struct kvm_sw_breakpoint *bp, *next;
-    KVMState *s = current_env->kvm_state;
     CPUState *env;
 
-    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
+    QTAILQ_FOREACH_SAFE(bp, &kvm_state.kvm_sw_breakpoints, entry, next) {
         if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
             /* Try harder to find a CPU that currently sees the breakpoint. */
             for (env = first_cpu; env != NULL; env = env->next_cpu) {
@@ -1284,7 +1253,7 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign
         iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
     }
 
-    ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);
+    ret = kvm_vm_ioctl(KVM_IOEVENTFD, &iofd);
 
     if (ret < 0) {
         return -errno;
@@ -1313,7 +1282,7 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
     if (!assign) {
         kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
     }
-    r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
+    r = kvm_vm_ioctl(KVM_IOEVENTFD, &kick);
     if (r < 0) {
         return r;
     }
diff --git a/kvm-stub.c b/kvm-stub.c
index 352c6a6..3a058ad 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -53,7 +53,7 @@ int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
     return -ENOSYS;
 }
 
-int kvm_check_extension(KVMState *s, unsigned int extension)
+int kvm_check_extension(unsigned int extension)
 {
     return 0;
 }
diff --git a/kvm.h b/kvm.h
index 51ad56f..26ca8c1 100644
--- a/kvm.h
+++ b/kvm.h
@@ -74,12 +74,9 @@ int kvm_irqchip_in_kernel(void);
 
 /* internal API */
 
-struct KVMState;
-typedef struct KVMState KVMState;
+int kvm_ioctl(int type, ...);
 
-int kvm_ioctl(KVMState *s, int type, ...);
-
-int kvm_vm_ioctl(KVMState *s, int type, ...);
+int kvm_vm_ioctl(int type, ...);
 
 int kvm_vcpu_ioctl(CPUState *env, int type, ...);
 
@@ -104,7 +101,7 @@ int kvm_arch_get_registers(CPUState *env);
 
 int kvm_arch_put_registers(CPUState *env, int level);
 
-int kvm_arch_init(KVMState *s, int smp_cpus);
+int kvm_arch_init(int smp_cpus);
 
 int kvm_arch_init_vcpu(CPUState *env);
 
@@ -146,10 +143,8 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg);
 
 bool kvm_arch_stop_on_emulation_error(CPUState *env);
 
-int kvm_check_extension(KVMState *s, unsigned int extension);
+int kvm_check_extension(unsigned int extension);
 
-uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
-                                      uint32_t index, int reg);
 void kvm_cpu_synchronize_state(CPUState *env);
 void kvm_cpu_synchronize_post_reset(CPUState *env);
 void kvm_cpu_synchronize_post_init(CPUState *env);
@@ -179,7 +174,7 @@ static inline void cpu_synchronize_post_init(CPUState *env)
 
 
 #if !defined(CONFIG_USER_ONLY)
-int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
+int kvm_physical_memory_addr_from_ram(ram_addr_t ram_addr,
                                       target_phys_addr_t *phys_addr);
 #endif
 
diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 5382a28..17ab619 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -23,6 +23,7 @@
 
 #include "cpu.h"
 #include "kvm.h"
+#include "kvm_x86.h"
 
 #include "qemu-option.h"
 #include "qemu-config.h"
@@ -1138,10 +1139,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             break;
         }
         if (kvm_enabled()) {
-            *eax = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EAX);
-            *ebx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EBX);
-            *ecx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_ECX);
-            *edx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EDX);
+            *eax = kvm_x86_get_supported_cpuid(0xd, count, R_EAX);
+            *ebx = kvm_x86_get_supported_cpuid(0xd, count, R_EBX);
+            *ecx = kvm_x86_get_supported_cpuid(0xd, count, R_ECX);
+            *edx = kvm_x86_get_supported_cpuid(0xd, count, R_EDX);
         } else {
             *eax = 0;
             *ebx = 0;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 96c350e..a4ccc7f 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -60,7 +60,7 @@ static int lm_capable_kernel;
 
 #ifdef KVM_CAP_EXT_CPUID
 
-static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
+static struct kvm_cpuid2 *try_get_cpuid(int max)
 {
     struct kvm_cpuid2 *cpuid;
     int r, size;
@@ -68,7 +68,7 @@ static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
     size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
     cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size);
     cpuid->nent = max;
-    r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
+    r = kvm_ioctl(KVM_GET_SUPPORTED_CPUID, cpuid);
     if (r == 0 && cpuid->nent >= max) {
         r = -E2BIG;
     }
@@ -85,20 +85,20 @@ static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
     return cpuid;
 }
 
-uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
-                                      uint32_t index, int reg)
+uint32_t kvm_x86_get_supported_cpuid(uint32_t function, uint32_t index,
+                                     int reg)
 {
     struct kvm_cpuid2 *cpuid;
     int i, max;
     uint32_t ret = 0;
     uint32_t cpuid_1_edx;
 
-    if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) {
+    if (!kvm_check_extension(KVM_CAP_EXT_CPUID)) {
         return -1U;
     }
 
     max = 1;
-    while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
+    while ((cpuid = try_get_cpuid(max)) == NULL) {
         max *= 2;
     }
 
@@ -126,7 +126,7 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
                     /* On Intel, kvm returns cpuid according to the Intel spec,
                      * so add missing bits according to the AMD spec:
                      */
-                    cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX);
+                    cpuid_1_edx = kvm_x86_get_supported_cpuid(1, 0, R_EDX);
                     ret |= cpuid_1_edx & 0x183f7ff;
                     break;
                 }
@@ -142,8 +142,8 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
 
 #else
 
-uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
-                                      uint32_t index, int reg)
+uint32_t kvm_x86_get_supported_cpuid(uint32_t function, uint32_t index,
+                                     int reg)
 {
     return -1U;
 }
@@ -175,7 +175,7 @@ static int get_para_features(CPUState *env)
     int i, features = 0;
 
     for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
-        if (kvm_check_extension(env->kvm_state, para_features[i].cap)) {
+        if (kvm_check_extension(para_features[i].cap)) {
             features |= (1 << para_features[i].feature);
         }
     }
@@ -184,15 +184,14 @@ static int get_para_features(CPUState *env)
 #endif
 
 #ifdef KVM_CAP_MCE
-static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
-                                     int *max_banks)
+static int kvm_get_mce_cap_supported(uint64_t *mce_cap, int *max_banks)
 {
     int r;
 
-    r = kvm_check_extension(s, KVM_CAP_MCE);
+    r = kvm_check_extension(KVM_CAP_MCE);
     if (r > 0) {
         *max_banks = r;
-        return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
+        return kvm_ioctl(KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
     }
     return -ENOSYS;
 }
@@ -325,18 +324,18 @@ int kvm_arch_init_vcpu(CPUState *env)
 
     env->mp_state = KVM_MP_STATE_RUNNABLE;
 
-    env->cpuid_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX);
+    env->cpuid_features &= kvm_x86_get_supported_cpuid(1, 0, R_EDX);
 
     i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
-    env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_ECX);
+    env->cpuid_ext_features &= kvm_x86_get_supported_cpuid(1, 0, R_ECX);
     env->cpuid_ext_features |= i;
 
-    env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(env, 0x80000001,
-                                                             0, R_EDX);
-    env->cpuid_ext3_features &= kvm_arch_get_supported_cpuid(env, 0x80000001,
-                                                             0, R_ECX);
-    env->cpuid_svm_features  &= kvm_arch_get_supported_cpuid(env, 0x8000000A,
-                                                             0, R_EDX);
+    env->cpuid_ext2_features &= kvm_x86_get_supported_cpuid(0x80000001,
+                                                            0, R_EDX);
+    env->cpuid_ext3_features &= kvm_x86_get_supported_cpuid(0x80000001,
+                                                            0, R_ECX);
+    env->cpuid_svm_features  &= kvm_x86_get_supported_cpuid(0x8000000A,
+                                                            0, R_EDX);
 
 
     cpuid_i = 0;
@@ -425,11 +424,11 @@ int kvm_arch_init_vcpu(CPUState *env)
 #ifdef KVM_CAP_MCE
     if (((env->cpuid_version >> 8)&0xF) >= 6
         && (env->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)
-        && kvm_check_extension(env->kvm_state, KVM_CAP_MCE) > 0) {
+        && kvm_check_extension(KVM_CAP_MCE) > 0) {
         uint64_t mcg_cap;
         int banks;
 
-        if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks)) {
+        if (kvm_get_mce_cap_supported(&mcg_cap, &banks)) {
             perror("kvm_get_mce_cap_supported FAILED");
         } else {
             if (banks > MCE_BANKS_DEF)
@@ -465,7 +464,7 @@ void kvm_arch_reset_vcpu(CPUState *env)
     }
 }
 
-static int kvm_get_supported_msrs(KVMState *s)
+static int kvm_get_supported_msrs(void)
 {
     static int kvm_supported_msrs;
     int ret = 0;
@@ -479,7 +478,7 @@ static int kvm_get_supported_msrs(KVMState *s)
         /* Obtain MSR list from KVM.  These are the MSRs that we must
          * save/restore */
         msr_list.nmsrs = 0;
-        ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list);
+        ret = kvm_ioctl(KVM_GET_MSR_INDEX_LIST, &msr_list);
         if (ret < 0 && ret != -E2BIG) {
             return ret;
         }
@@ -490,7 +489,7 @@ static int kvm_get_supported_msrs(KVMState *s)
                                               sizeof(msr_list.indices[0])));
 
         kvm_msr_list->nmsrs = msr_list.nmsrs;
-        ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
+        ret = kvm_ioctl(KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
         if (ret >= 0) {
             int i;
 
@@ -512,17 +511,17 @@ static int kvm_get_supported_msrs(KVMState *s)
     return ret;
 }
 
-static int kvm_init_identity_map_page(KVMState *s)
+static int kvm_init_identity_map_page(void)
 {
 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
     int ret;
     uint64_t addr = 0xfffbc000;
 
-    if (!kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) {
+    if (!kvm_check_extension(KVM_CAP_SET_IDENTITY_MAP_ADDR)) {
         return 0;
     }
 
-    ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &addr);
+    ret = kvm_vm_ioctl(KVM_SET_IDENTITY_MAP_ADDR, &addr);
     if (ret < 0) {
         fprintf(stderr, "kvm_set_identity_map_addr: %s\n", strerror(ret));
         return ret;
@@ -531,12 +530,12 @@ static int kvm_init_identity_map_page(KVMState *s)
     return 0;
 }
 
-int kvm_arch_init(KVMState *s, int smp_cpus)
+int kvm_arch_init(int smp_cpus)
 {
     int ret;
     struct utsname utsname;
 
-    ret = kvm_get_supported_msrs(s);
+    ret = kvm_get_supported_msrs();
     if (ret < 0) {
         return ret;
     }
@@ -550,7 +549,7 @@ int kvm_arch_init(KVMState *s, int smp_cpus)
      * versions of KVM just assumed that it would be at the end of physical
      * memory but that doesn't work with more than 4GB of memory.  We simply
      * refuse to work with those older versions of KVM. */
-    ret = kvm_check_extension(s, KVM_CAP_SET_TSS_ADDR);
+    ret = kvm_check_extension(KVM_CAP_SET_TSS_ADDR);
     if (ret <= 0) {
         fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
         return ret;
@@ -567,12 +566,12 @@ int kvm_arch_init(KVMState *s, int smp_cpus)
         perror("e820_add_entry() table is full");
         exit(1);
     }
-    ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
+    ret = kvm_vm_ioctl(KVM_SET_TSS_ADDR, 0xfffbd000);
     if (ret < 0) {
         return ret;
     }
 
-    return kvm_init_identity_map_page(s);
+    return kvm_init_identity_map_page();
 }
 
 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
@@ -1867,7 +1866,7 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
             || code == BUS_MCEERR_AO)) {
         vaddr = (void *)addr;
         if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
-            !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) {
+            !kvm_physical_memory_addr_from_ram(ram_addr, &paddr)) {
             fprintf(stderr, "Hardware memory error for memory used by "
                     "QEMU itself instead of guest system!\n");
             /* Hope we are lucky for AO MCE */
@@ -1916,7 +1915,7 @@ int kvm_on_sigbus(int code, void *addr)
         /* Hope we are lucky for AO MCE */
         vaddr = addr;
         if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
-            !kvm_physical_memory_addr_from_ram(first_cpu->kvm_state, ram_addr, &paddr)) {
+            !kvm_physical_memory_addr_from_ram(ram_addr, &paddr)) {
             fprintf(stderr, "Hardware memory error for memory used by "
                     "QEMU itself instead of guest system!: %p\n", addr);
             return 0;
diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h
index 9d7b584..304d0cb 100644
--- a/target-i386/kvm_x86.h
+++ b/target-i386/kvm_x86.h
@@ -22,4 +22,7 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
                         uint64_t mcg_status, uint64_t addr, uint64_t misc,
                         int flag);
 
+uint32_t kvm_x86_get_supported_cpuid(uint32_t function, uint32_t index,
+                                     int reg);
+
 #endif
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 849b404..56d30cc 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -56,13 +56,13 @@ static void kvm_kick_env(void *env)
     qemu_cpu_kick(env);
 }
 
-int kvm_arch_init(KVMState *s, int smp_cpus)
+int kvm_arch_init(int smp_cpus)
 {
 #ifdef KVM_CAP_PPC_UNSET_IRQ
-    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
+    cap_interrupt_unset = kvm_check_extension(KVM_CAP_PPC_UNSET_IRQ);
 #endif
 #ifdef KVM_CAP_PPC_IRQ_LEVEL
-    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
+    cap_interrupt_level = kvm_check_extension(KVM_CAP_PPC_IRQ_LEVEL);
 #endif
 
     if (!cap_interrupt_level) {
@@ -164,7 +164,7 @@ int kvm_arch_get_registers(CPUState *env)
         env->gpr[i] = regs.gpr[i];
 
 #ifdef KVM_CAP_PPC_SEGSTATE
-    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_SEGSTATE)) {
+    if (kvm_check_extension(KVM_CAP_PPC_SEGSTATE)) {
         env->sdr1 = sregs.u.s.sdr1;
 
         /* Sync SLB */
@@ -371,8 +371,8 @@ int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len)
 #ifdef KVM_CAP_PPC_GET_PVINFO
     struct kvm_ppc_pvinfo pvinfo;
 
-    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
-        !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
+    if (kvm_check_extension(KVM_CAP_PPC_GET_PVINFO) &&
+        !kvm_vm_ioctl(KVM_PPC_GET_PVINFO, &pvinfo)) {
         memcpy(buf, pvinfo.hcall, buf_len);
 
         return 0;
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index adf4a9e..927a37e 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -70,7 +70,7 @@
 #define SCLP_CMDW_READ_SCP_INFO         0x00020001
 #define SCLP_CMDW_READ_SCP_INFO_FORCED  0x00120001
 
-int kvm_arch_init(KVMState *s, int smp_cpus)
+int kvm_arch_init(int smp_cpus)
 {
     return 0;
 }
@@ -186,10 +186,6 @@ static void kvm_s390_interrupt_internal(CPUState *env, int type, uint32_t parm,
     struct kvm_s390_interrupt kvmint;
     int r;
 
-    if (!env->kvm_state) {
-        return;
-    }
-
     env->halted = 0;
     env->exception_index = -1;
 
@@ -198,7 +194,7 @@ static void kvm_s390_interrupt_internal(CPUState *env, int type, uint32_t parm,
     kvmint.parm64 = parm64;
 
     if (vm) {
-        r = kvm_vm_ioctl(env->kvm_state, KVM_S390_INTERRUPT, &kvmint);
+        r = kvm_vm_ioctl(KVM_S390_INTERRUPT, &kvmint);
     } else {
         r = kvm_vcpu_ioctl(env, KVM_S390_INTERRUPT, &kvmint);
     }
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 5/5] kvm: x86: Introduce kvmclock device to save/restore its state
  2010-12-31 12:01 [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
                   ` (3 preceding siblings ...)
  2010-12-31 12:01 ` [PATCH 4/5] kvm: Eliminate KVMState arguments Jan Kiszka
@ 2010-12-31 12:01 ` Jan Kiszka
  4 siblings, 0 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, qemu-devel, Jan Kiszka, Glauber Costa

From: Jan Kiszka <jan.kiszka@siemens.com>

If kvmclock is used, which implies the kernel supports it, register a
kvmclock device with the sysbus. Its main purpose is to save and restore
the kernel state on migration, but this will also allow to visualize it
one day.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
CC: Glauber Costa <glommer@redhat.com>
---
 target-i386/kvm.c |   71 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 70 insertions(+), 1 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index a4ccc7f..32310fa 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -29,6 +29,7 @@
 #include "hw/apic.h"
 #include "ioport.h"
 #include "kvm_x86.h"
+#include "hw/sysbus.h"
 
 #ifdef CONFIG_KVM_PARA
 #include <linux/kvm_para.h>
@@ -309,6 +310,64 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
 #endif
 }
 
+#ifdef KVM_CAP_ADJUST_CLOCK
+typedef struct KVMClockState {
+    SysBusDevice busdev;
+    uint64_t clock;
+    struct kvm_clock_data data;
+} KVMClockState;
+
+static void kvmclock_pre_save(void *opaque)
+{
+    KVMClockState *s = opaque;
+    struct kvm_clock_data data;
+    int ret;
+
+    ret = kvm_vm_ioctl(KVM_GET_CLOCK, &data);
+    if (ret < 0) {
+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+        data.clock = 0;
+    }
+    s->clock = data.clock;
+}
+
+static int kvmclock_post_load(void *opaque, int version_id)
+{
+    KVMClockState *s = opaque;
+    struct kvm_clock_data data;
+
+    data.clock = s->clock;
+    data.flags = 0;
+    return kvm_vm_ioctl(KVM_SET_CLOCK, &data);
+}
+
+static int kvmclock_init(SysBusDevice *dev)
+{
+    return 0;
+}
+
+static const VMStateDescription kvmclock_vmsd= {
+    .name = "kvmclock",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = kvmclock_pre_save,
+    .post_load = kvmclock_post_load,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT64(clock, KVMClockState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static SysBusDeviceInfo kvmclock_info = {
+    .qdev.name = "kvmclock",
+    .qdev.size = sizeof(KVMClockState),
+    .qdev.vmsd = &kvmclock_vmsd,
+    .qdev.no_user = 1,
+    .init = kvmclock_init,
+};
+#endif /* KVM_CAP_ADJUST_CLOCK */
+
 int kvm_arch_init_vcpu(CPUState *env)
 {
     struct {
@@ -337,7 +396,6 @@ int kvm_arch_init_vcpu(CPUState *env)
     env->cpuid_svm_features  &= kvm_x86_get_supported_cpuid(0x8000000A,
                                                             0, R_EDX);
 
-
     cpuid_i = 0;
 
 #ifdef CONFIG_KVM_PARA
@@ -444,6 +502,13 @@ int kvm_arch_init_vcpu(CPUState *env)
     }
 #endif
 
+#ifdef KVM_CAP_ADJUST_CLOCK
+    if (cpu_is_bsp(env) &&
+        (env->cpuid_kvm_features & (1ULL << KVM_FEATURE_CLOCKSOURCE))) {
+        sysbus_create_simple("kvmclock", -1, NULL);
+    }
+#endif
+
     return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
 }
 
@@ -535,6 +600,10 @@ int kvm_arch_init(int smp_cpus)
     int ret;
     struct utsname utsname;
 
+#ifdef KVM_CAP_ADJUST_CLOCK
+    sysbus_register_withprop(&kvmclock_info);
+#endif
+
     ret = kvm_get_supported_msrs();
     if (ret < 0) {
         return ret;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-12-31 12:02 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-31 12:01 [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
2010-12-31 12:01 ` [PATCH 1/5] kvm: x86: Refactor msr_star/hsave_pa setup and checks Jan Kiszka
2010-12-31 12:01 ` [PATCH 2/5] kvm: x86: Reset paravirtual MSRs Jan Kiszka
2010-12-31 12:01 ` [PATCH 3/5] kvm: x86: Drop MCE MSRs write back restrictions Jan Kiszka
2010-12-31 12:01 ` [PATCH 4/5] kvm: Eliminate KVMState arguments Jan Kiszka
2010-12-31 12:01 ` [PATCH 5/5] kvm: x86: Introduce kvmclock device to save/restore its state Jan Kiszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.