[Qemu-devel] [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device
@ 2010-12-31 12:01 Jan Kiszka
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 1/5] kvm: x86: Refactor msr_star/hsave_pa setup and checks Jan Kiszka
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: qemu-devel, kvm

More refactorings of upstream KVM to allow further unifications with
qemu-kvm. See patches for details.

Jan Kiszka (5):
  kvm: x86: Refactor msr_star/hsave_pa setup and checks
  kvm: x86: Reset paravirtual MSRs
  kvm: x86: Drop MCE MSRs write back restrictions
  kvm: Eliminate KVMState arguments
  kvm: x86: Introduce kvmclock device to save/restore its state

 cpu-defs.h            |    2 -
 kvm-all.c             |  233 +++++++++++++++++++++----------------------------
 kvm-stub.c            |    2 +-
 kvm.h                 |   15 +--
 target-i386/cpuid.c   |    9 +-
 target-i386/kvm.c     |  205 ++++++++++++++++++++++++++++---------------
 target-i386/kvm_x86.h |    3 +
 target-ppc/kvm.c      |   12 ++--
 target-s390x/kvm.c    |    8 +--
 9 files changed, 258 insertions(+), 231 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 1/5] kvm: x86: Refactor msr_star/hsave_pa setup and checks
  2010-12-31 12:01 [Qemu-devel] [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
@ 2010-12-31 12:01 ` Jan Kiszka
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 2/5] kvm: x86: Reset paravirtual MSRs Jan Kiszka
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Jan Kiszka, qemu-devel, kvm

From: Jan Kiszka <jan.kiszka@siemens.com>

Simplify kvm_has_msr_star/hsave_pa to booleans and push their one-time
initialization into kvm_arch_init. Also handle potential errors of that
setup procedure.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 target-i386/kvm.c |   47 +++++++++++++++++++----------------------------
 1 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 30aa51c..000f3b0 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -54,6 +54,8 @@
 #define BUS_MCEERR_AO 5
 #endif
 
+static bool has_msr_star;
+static bool has_msr_hsave_pa;
 static int lm_capable_kernel;
 
 #ifdef KVM_CAP_EXT_CPUID
@@ -460,13 +462,10 @@ void kvm_arch_reset_vcpu(CPUState *env)
     }
 }
 
-int has_msr_star;
-int has_msr_hsave_pa;
-
-static void kvm_supported_msrs(CPUState *env)
+static int kvm_get_supported_msrs(KVMState *s)
 {
     static int kvm_supported_msrs;
-    int ret;
+    int ret = 0;
 
     /* first time */
     if (kvm_supported_msrs == 0) {
@@ -477,9 +476,9 @@ static void kvm_supported_msrs(CPUState *env)
         /* Obtain MSR list from KVM.  These are the MSRs that we must
          * save/restore */
         msr_list.nmsrs = 0;
-        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
+        ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list);
         if (ret < 0 && ret != -E2BIG) {
-            return;
+            return ret;
         }
         /* Old kernel modules had a bug and could write beyond the provided
            memory. Allocate at least a safe amount of 1K. */
@@ -488,17 +487,17 @@ static void kvm_supported_msrs(CPUState *env)
                                               sizeof(msr_list.indices[0])));
 
         kvm_msr_list->nmsrs = msr_list.nmsrs;
-        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
+        ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
         if (ret >= 0) {
             int i;
 
             for (i = 0; i < kvm_msr_list->nmsrs; i++) {
                 if (kvm_msr_list->indices[i] == MSR_STAR) {
-                    has_msr_star = 1;
+                    has_msr_star = true;
                     continue;
                 }
                 if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) {
-                    has_msr_hsave_pa = 1;
+                    has_msr_hsave_pa = true;
                     continue;
                 }
             }
@@ -507,19 +506,7 @@ static void kvm_supported_msrs(CPUState *env)
         free(kvm_msr_list);
     }
 
-    return;
-}
-
-static int kvm_has_msr_hsave_pa(CPUState *env)
-{
-    kvm_supported_msrs(env);
-    return has_msr_hsave_pa;
-}
-
-static int kvm_has_msr_star(CPUState *env)
-{
-    kvm_supported_msrs(env);
-    return has_msr_star;
+    return ret;
 }
 
 static int kvm_init_identity_map_page(KVMState *s)
@@ -544,9 +531,13 @@ static int kvm_init_identity_map_page(KVMState *s)
 int kvm_arch_init(KVMState *s, int smp_cpus)
 {
     int ret;
-
     struct utsname utsname;
 
+    ret = kvm_get_supported_msrs(s);
+    if (ret < 0) {
+        return ret;
+    }
+
     uname(&utsname);
     lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
 
@@ -831,10 +822,10 @@ static int kvm_put_msrs(CPUState *env, int level)
     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
-    if (kvm_has_msr_star(env)) {
+    if (has_msr_star) {
         kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
     }
-    if (kvm_has_msr_hsave_pa(env)) {
+    if (has_msr_hsave_pa) {
         kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
     }
 #ifdef TARGET_X86_64
@@ -1077,10 +1068,10 @@ static int kvm_get_msrs(CPUState *env)
     msrs[n++].index = MSR_IA32_SYSENTER_CS;
     msrs[n++].index = MSR_IA32_SYSENTER_ESP;
     msrs[n++].index = MSR_IA32_SYSENTER_EIP;
-    if (kvm_has_msr_star(env)) {
+    if (has_msr_star) {
         msrs[n++].index = MSR_STAR;
     }
-    if (kvm_has_msr_hsave_pa(env)) {
+    if (has_msr_hsave_pa) {
         msrs[n++].index = MSR_VM_HSAVE_PA;
     }
     msrs[n++].index = MSR_IA32_TSC;
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 2/5] kvm: x86: Reset paravirtual MSRs
  2010-12-31 12:01 [Qemu-devel] [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 1/5] kvm: x86: Refactor msr_star/hsave_pa setup and checks Jan Kiszka
@ 2010-12-31 12:01 ` Jan Kiszka
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 3/5] kvm: x86: Drop MCE MSRs write back restrictions Jan Kiszka
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Jan Kiszka, Glauber Costa, qemu-devel, kvm

From: Jan Kiszka <jan.kiszka@siemens.com>

Make sure to clear MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, and
MSR_KVM_ASYNC_PF_EN so that a freshly booted guest cannot be disturbed
by old values.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
CC: Glauber Costa <glommer@redhat.com>
---
 target-i386/kvm.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 000f3b0..d05a2b9 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -454,6 +454,9 @@ void kvm_arch_reset_vcpu(CPUState *env)
     env->interrupt_injected = -1;
     env->nmi_injected = 0;
     env->nmi_pending = 0;
+    env->system_time_msr = 0;
+    env->wall_clock_msr = 0;
+    env->async_pf_en_msr = 0;
     if (kvm_irqchip_in_kernel()) {
         env->mp_state = cpu_is_bsp(env) ? KVM_MP_STATE_RUNNABLE :
                                           KVM_MP_STATE_UNINITIALIZED;
@@ -846,6 +849,13 @@ static int kvm_put_msrs(CPUState *env, int level)
         if (smp_cpus == 1 || env->tsc != 0) {
             kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
         }
+    }
+    /*
+     * The following paravirtual MSRs have side effects on the guest or are
+     * too heavy for normal writeback. Limit them to reset or full state
+     * updates.
+     */
+    if (level >= KVM_PUT_RESET_STATE) {
         kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
                           env->system_time_msr);
         kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 3/5] kvm: x86: Drop MCE MSRs write back restrictions
  2010-12-31 12:01 [Qemu-devel] [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 1/5] kvm: x86: Refactor msr_star/hsave_pa setup and checks Jan Kiszka
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 2/5] kvm: x86: Reset paravirtual MSRs Jan Kiszka
@ 2010-12-31 12:01 ` Jan Kiszka
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 4/5] kvm: Eliminate KVMState arguments Jan Kiszka
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 5/5] kvm: x86: Introduce kvmclock device to save/restore its state Jan Kiszka
  4 siblings, 0 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Jan Kiszka, qemu-devel, kvm, Huang Ying

From: Jan Kiszka <jan.kiszka@siemens.com>

There is no need to restrict writing back MCE MSRs to reset or full
state updates as setting their values has no side effects.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
CC: Huang Ying <ying.huang@intel.com>
---
 target-i386/kvm.c |   12 ++++--------
 1 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index d05a2b9..96c350e 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -867,14 +867,10 @@ static int kvm_put_msrs(CPUState *env, int level)
     if (env->mcg_cap) {
         int i;
 
-        if (level == KVM_PUT_RESET_STATE) {
-            kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
-        } else if (level == KVM_PUT_FULL_STATE) {
-            kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
-            kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
-            for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
-                kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
-            }
+        kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
+        kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
+        for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
+            kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
         }
     }
 #endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 4/5] kvm: Eliminate KVMState arguments
  2010-12-31 12:01 [Qemu-devel] [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
                   ` (2 preceding siblings ...)
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 3/5] kvm: x86: Drop MCE MSRs write back restrictions Jan Kiszka
@ 2010-12-31 12:01 ` Jan Kiszka
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 5/5] kvm: x86: Introduce kvmclock device to save/restore its state Jan Kiszka
  4 siblings, 0 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Jan Kiszka, qemu-devel, kvm

From: Jan Kiszka <jan.kiszka@siemens.com>

QEMU supports only one VM, so there is only one kvm_state per process,
and we gain nothing passing a reference to it around. Eliminate any need
to refer to it outside of kvm-all.c.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 cpu-defs.h            |    2 -
 kvm-all.c             |  233 +++++++++++++++++++++----------------------------
 kvm-stub.c            |    2 +-
 kvm.h                 |   15 +--
 target-i386/cpuid.c   |    9 +-
 target-i386/kvm.c     |   73 ++++++++--------
 target-i386/kvm_x86.h |    3 +
 target-ppc/kvm.c      |   12 ++--
 target-s390x/kvm.c    |    8 +--
 9 files changed, 159 insertions(+), 198 deletions(-)

diff --git a/cpu-defs.h b/cpu-defs.h
index eaed43e..ada6629 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -131,7 +131,6 @@ typedef struct icount_decr_u16 {
 #endif
 
 struct kvm_run;
-struct KVMState;
 struct qemu_work_item;
 
 typedef struct CPUBreakpoint {
@@ -208,7 +207,6 @@ typedef struct CPUWatchpoint {
     struct QemuCond *halt_cond;                                         \
     struct qemu_work_item *queued_work_first, *queued_work_last;        \
     const char *cpu_model_str;                                          \
-    struct KVMState *kvm_state;                                         \
     struct kvm_run *kvm_run;                                            \
     int kvm_fd;                                                         \
     int kvm_vcpu_dirty;
diff --git a/kvm-all.c b/kvm-all.c
index 237c762..bd8f88c 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -52,8 +52,7 @@ typedef struct KVMSlot
 
 typedef struct kvm_dirty_log KVMDirtyLog;
 
-struct KVMState
-{
+static struct KVMState {
     KVMSlot slots[32];
     int fd;
     int vmfd;
@@ -72,21 +71,19 @@ struct KVMState
     int irqchip_in_kernel;
     int pit_in_kernel;
     int xsave, xcrs;
-};
-
-static KVMState *kvm_state;
+} kvm_state;
 
-static KVMSlot *kvm_alloc_slot(KVMState *s)
+static KVMSlot *kvm_alloc_slot(void)
 {
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
         /* KVM private memory slots */
         if (i >= 8 && i < 12) {
             continue;
         }
-        if (s->slots[i].memory_size == 0) {
-            return &s->slots[i];
+        if (kvm_state.slots[i].memory_size == 0) {
+            return &kvm_state.slots[i];
         }
     }
 
@@ -94,14 +91,13 @@ static KVMSlot *kvm_alloc_slot(KVMState *s)
     abort();
 }
 
-static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
-                                         target_phys_addr_t start_addr,
+static KVMSlot *kvm_lookup_matching_slot(target_phys_addr_t start_addr,
                                          target_phys_addr_t end_addr)
 {
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        KVMSlot *mem = &s->slots[i];
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
+        KVMSlot *mem = &kvm_state.slots[i];
 
         if (start_addr == mem->start_addr &&
             end_addr == mem->start_addr + mem->memory_size) {
@@ -115,15 +111,14 @@ static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
 /*
  * Find overlapping slot with lowest start address
  */
-static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
-                                            target_phys_addr_t start_addr,
+static KVMSlot *kvm_lookup_overlapping_slot(target_phys_addr_t start_addr,
                                             target_phys_addr_t end_addr)
 {
     KVMSlot *found = NULL;
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        KVMSlot *mem = &s->slots[i];
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
+        KVMSlot *mem = &kvm_state.slots[i];
 
         if (mem->memory_size == 0 ||
             (found && found->start_addr < mem->start_addr)) {
@@ -139,13 +134,13 @@ static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
     return found;
 }
 
-int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
+int kvm_physical_memory_addr_from_ram(ram_addr_t ram_addr,
                                       target_phys_addr_t *phys_addr)
 {
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        KVMSlot *mem = &s->slots[i];
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
+        KVMSlot *mem = &kvm_state.slots[i];
 
         if (ram_addr >= mem->phys_offset &&
             ram_addr < mem->phys_offset + mem->memory_size) {
@@ -157,7 +152,7 @@ int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
     return 0;
 }
 
-static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
+static int kvm_set_user_memory_region(KVMSlot *slot)
 {
     struct kvm_userspace_memory_region mem;
 
@@ -166,10 +161,10 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
     mem.memory_size = slot->memory_size;
     mem.userspace_addr = (unsigned long)qemu_safe_ram_ptr(slot->phys_offset);
     mem.flags = slot->flags;
-    if (s->migration_log) {
+    if (kvm_state.migration_log) {
         mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
     }
-    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+    return kvm_vm_ioctl(KVM_SET_USER_MEMORY_REGION, &mem);
 }
 
 static void kvm_reset_vcpu(void *opaque)
@@ -181,33 +176,31 @@ static void kvm_reset_vcpu(void *opaque)
 
 int kvm_irqchip_in_kernel(void)
 {
-    return kvm_state->irqchip_in_kernel;
+    return kvm_state.irqchip_in_kernel;
 }
 
 int kvm_pit_in_kernel(void)
 {
-    return kvm_state->pit_in_kernel;
+    return kvm_state.pit_in_kernel;
 }
 
 
 int kvm_init_vcpu(CPUState *env)
 {
-    KVMState *s = kvm_state;
     long mmap_size;
     int ret;
 
     DPRINTF("kvm_init_vcpu\n");
 
-    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
+    ret = kvm_vm_ioctl(KVM_CREATE_VCPU, env->cpu_index);
     if (ret < 0) {
         DPRINTF("kvm_create_vcpu failed\n");
         goto err;
     }
 
     env->kvm_fd = ret;
-    env->kvm_state = s;
 
-    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
+    mmap_size = kvm_ioctl(KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
         DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
         goto err;
@@ -222,9 +215,9 @@ int kvm_init_vcpu(CPUState *env)
     }
 
 #ifdef KVM_CAP_COALESCED_MMIO
-    if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
-        s->coalesced_mmio_ring =
-            (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE;
+    if (kvm_state.coalesced_mmio && !kvm_state.coalesced_mmio_ring) {
+        kvm_state.coalesced_mmio_ring =
+            (void *)env->kvm_run + kvm_state.coalesced_mmio * PAGE_SIZE;
     }
 #endif
 
@@ -243,8 +236,7 @@ err:
 static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
                                       ram_addr_t size, int flags, int mask)
 {
-    KVMState *s = kvm_state;
-    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
+    KVMSlot *mem = kvm_lookup_matching_slot(phys_addr, phys_addr + size);
     int old_flags;
 
     if (mem == NULL)  {
@@ -260,14 +252,14 @@ static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
     mem->flags = flags;
 
     /* If nothing changed effectively, no need to issue ioctl */
-    if (s->migration_log) {
+    if (kvm_state.migration_log) {
         flags |= KVM_MEM_LOG_DIRTY_PAGES;
     }
     if (flags == old_flags) {
             return 0;
     }
 
-    return kvm_set_user_memory_region(s, mem);
+    return kvm_set_user_memory_region(mem);
 }
 
 int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
@@ -284,14 +276,13 @@ int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
 
 static int kvm_set_migration_log(int enable)
 {
-    KVMState *s = kvm_state;
     KVMSlot *mem;
     int i, err;
 
-    s->migration_log = enable;
+    kvm_state.migration_log = enable;
 
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        mem = &s->slots[i];
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
+        mem = &kvm_state.slots[i];
 
         if (!mem->memory_size) {
             continue;
@@ -299,7 +290,7 @@ static int kvm_set_migration_log(int enable)
         if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
             continue;
         }
-        err = kvm_set_user_memory_region(s, mem);
+        err = kvm_set_user_memory_region(mem);
         if (err) {
             return err;
         }
@@ -353,7 +344,6 @@ static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
 static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
 					  target_phys_addr_t end_addr)
 {
-    KVMState *s = kvm_state;
     unsigned long size, allocated_size = 0;
     KVMDirtyLog d;
     KVMSlot *mem;
@@ -361,7 +351,7 @@ static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
 
     d.dirty_bitmap = NULL;
     while (start_addr < end_addr) {
-        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
+        mem = kvm_lookup_overlapping_slot(start_addr, end_addr);
         if (mem == NULL) {
             break;
         }
@@ -377,7 +367,7 @@ static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
 
         d.slot = mem->slot;
 
-        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
+        if (kvm_vm_ioctl(KVM_GET_DIRTY_LOG, &d) == -1) {
             DPRINTF("ioctl failed %d\n", errno);
             ret = -1;
             break;
@@ -395,16 +385,15 @@ static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
 {
     int ret = -ENOSYS;
-#ifdef KVM_CAP_COALESCED_MMIO
-    KVMState *s = kvm_state;
 
-    if (s->coalesced_mmio) {
+#ifdef KVM_CAP_COALESCED_MMIO
+    if (kvm_state.coalesced_mmio) {
         struct kvm_coalesced_mmio_zone zone;
 
         zone.addr = start;
         zone.size = size;
 
-        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
+        ret = kvm_vm_ioctl(KVM_REGISTER_COALESCED_MMIO, &zone);
     }
 #endif
 
@@ -414,27 +403,26 @@ int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
 {
     int ret = -ENOSYS;
-#ifdef KVM_CAP_COALESCED_MMIO
-    KVMState *s = kvm_state;
 
-    if (s->coalesced_mmio) {
+#ifdef KVM_CAP_COALESCED_MMIO
+    if (kvm_state.coalesced_mmio) {
         struct kvm_coalesced_mmio_zone zone;
 
         zone.addr = start;
         zone.size = size;
 
-        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
+        ret = kvm_vm_ioctl(KVM_UNREGISTER_COALESCED_MMIO, &zone);
     }
 #endif
 
     return ret;
 }
 
-int kvm_check_extension(KVMState *s, unsigned int extension)
+int kvm_check_extension(unsigned int extension)
 {
     int ret;
 
-    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
+    ret = kvm_ioctl(KVM_CHECK_EXTENSION, extension);
     if (ret < 0) {
         ret = 0;
     }
@@ -445,7 +433,6 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
 static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
                              ram_addr_t phys_offset)
 {
-    KVMState *s = kvm_state;
     ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
     KVMSlot *mem, old;
     int err;
@@ -459,7 +446,7 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
     phys_offset &= ~IO_MEM_ROM;
 
     while (1) {
-        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
+        mem = kvm_lookup_overlapping_slot(start_addr, start_addr + size);
         if (!mem) {
             break;
         }
@@ -476,7 +463,7 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
 
         /* unregister the overlapping slot */
         mem->memory_size = 0;
-        err = kvm_set_user_memory_region(s, mem);
+        err = kvm_set_user_memory_region(mem);
         if (err) {
             fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
                     __func__, strerror(-err));
@@ -491,16 +478,16 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
          * address as the first existing one. If not or if some overlapping
          * slot comes around later, we will fail (not seen in practice so far)
          * - and actually require a recent KVM version. */
-        if (s->broken_set_mem_region &&
+        if (kvm_state.broken_set_mem_region &&
             old.start_addr == start_addr && old.memory_size < size &&
             flags < IO_MEM_UNASSIGNED) {
-            mem = kvm_alloc_slot(s);
+            mem = kvm_alloc_slot();
             mem->memory_size = old.memory_size;
             mem->start_addr = old.start_addr;
             mem->phys_offset = old.phys_offset;
             mem->flags = 0;
 
-            err = kvm_set_user_memory_region(s, mem);
+            err = kvm_set_user_memory_region(mem);
             if (err) {
                 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
                         strerror(-err));
@@ -515,13 +502,13 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
 
         /* register prefix slot */
         if (old.start_addr < start_addr) {
-            mem = kvm_alloc_slot(s);
+            mem = kvm_alloc_slot();
             mem->memory_size = start_addr - old.start_addr;
             mem->start_addr = old.start_addr;
             mem->phys_offset = old.phys_offset;
             mem->flags = 0;
 
-            err = kvm_set_user_memory_region(s, mem);
+            err = kvm_set_user_memory_region(mem);
             if (err) {
                 fprintf(stderr, "%s: error registering prefix slot: %s\n",
                         __func__, strerror(-err));
@@ -533,14 +520,14 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
         if (old.start_addr + old.memory_size > start_addr + size) {
             ram_addr_t size_delta;
 
-            mem = kvm_alloc_slot(s);
+            mem = kvm_alloc_slot();
             mem->start_addr = start_addr + size;
             size_delta = mem->start_addr - old.start_addr;
             mem->memory_size = old.memory_size - size_delta;
             mem->phys_offset = old.phys_offset + size_delta;
             mem->flags = 0;
 
-            err = kvm_set_user_memory_region(s, mem);
+            err = kvm_set_user_memory_region(mem);
             if (err) {
                 fprintf(stderr, "%s: error registering suffix slot: %s\n",
                         __func__, strerror(-err));
@@ -557,13 +544,13 @@ static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
     if (flags >= IO_MEM_UNASSIGNED) {
         return;
     }
-    mem = kvm_alloc_slot(s);
+    mem = kvm_alloc_slot();
     mem->memory_size = size;
     mem->start_addr = start_addr;
     mem->phys_offset = phys_offset;
     mem->flags = 0;
 
-    err = kvm_set_user_memory_region(s, mem);
+    err = kvm_set_user_memory_region(mem);
     if (err) {
         fprintf(stderr, "%s: error registering slot: %s\n", __func__,
                 strerror(-err));
@@ -602,27 +589,24 @@ int kvm_init(int smp_cpus)
     static const char upgrade_note[] =
         "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
         "(see http://sourceforge.net/projects/kvm).\n";
-    KVMState *s;
     int ret;
     int i;
 
-    s = qemu_mallocz(sizeof(KVMState));
-
 #ifdef KVM_CAP_SET_GUEST_DEBUG
-    QTAILQ_INIT(&s->kvm_sw_breakpoints);
+    QTAILQ_INIT(&kvm_state.kvm_sw_breakpoints);
 #endif
-    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        s->slots[i].slot = i;
+    for (i = 0; i < ARRAY_SIZE(kvm_state.slots); i++) {
+        kvm_state.slots[i].slot = i;
     }
-    s->vmfd = -1;
-    s->fd = qemu_open("/dev/kvm", O_RDWR);
-    if (s->fd == -1) {
+    kvm_state.vmfd = -1;
+    kvm_state.fd = qemu_open("/dev/kvm", O_RDWR);
+    if (kvm_state.fd == -1) {
         fprintf(stderr, "Could not access KVM kernel module: %m\n");
         ret = -errno;
         goto err;
     }
 
-    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
+    ret = kvm_ioctl(KVM_GET_API_VERSION, 0);
     if (ret < KVM_API_VERSION) {
         if (ret > 0) {
             ret = -EINVAL;
@@ -637,8 +621,8 @@ int kvm_init(int smp_cpus)
         goto err;
     }
 
-    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
-    if (s->vmfd < 0) {
+    kvm_state.vmfd = kvm_ioctl(KVM_CREATE_VM, 0);
+    if (kvm_state.vmfd < 0) {
 #ifdef TARGET_S390X
         fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
                         "your host kernel command line\n");
@@ -651,7 +635,7 @@ int kvm_init(int smp_cpus)
      * just use a user allocated buffer so we can use regular pages
      * unmodified.  Make sure we have a sufficiently modern version of KVM.
      */
-    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
+    if (!kvm_check_extension(KVM_CAP_USER_MEMORY)) {
         ret = -EINVAL;
         fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
                 upgrade_note);
@@ -661,7 +645,7 @@ int kvm_init(int smp_cpus)
     /* There was a nasty bug in < kvm-80 that prevents memory slots from being
      * destroyed properly.  Since we rely on this capability, refuse to work
      * with any kernel without this capability. */
-    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
+    if (!kvm_check_extension(KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
         ret = -EINVAL;
 
         fprintf(stderr,
@@ -670,66 +654,56 @@ int kvm_init(int smp_cpus)
         goto err;
     }
 
-    s->coalesced_mmio = 0;
 #ifdef KVM_CAP_COALESCED_MMIO
-    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
-    s->coalesced_mmio_ring = NULL;
+    kvm_state.coalesced_mmio = kvm_check_extension(KVM_CAP_COALESCED_MMIO);
+    kvm_state.coalesced_mmio_ring = NULL;
 #endif
 
-    s->broken_set_mem_region = 1;
+    kvm_state.broken_set_mem_region = 1;
 #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
-    ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
+    ret = kvm_check_extension(KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
     if (ret > 0) {
-        s->broken_set_mem_region = 0;
+        kvm_state.broken_set_mem_region = 0;
     }
 #endif
 
-    s->vcpu_events = 0;
 #ifdef KVM_CAP_VCPU_EVENTS
-    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
+    kvm_state.vcpu_events = kvm_check_extension(KVM_CAP_VCPU_EVENTS);
 #endif
 
-    s->robust_singlestep = 0;
 #ifdef KVM_CAP_X86_ROBUST_SINGLESTEP
-    s->robust_singlestep =
-        kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
+    kvm_state.robust_singlestep =
+        kvm_check_extension(KVM_CAP_X86_ROBUST_SINGLESTEP);
 #endif
 
-    s->debugregs = 0;
 #ifdef KVM_CAP_DEBUGREGS
-    s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
+    kvm_state.debugregs = kvm_check_extension(KVM_CAP_DEBUGREGS);
 #endif
 
-    s->xsave = 0;
 #ifdef KVM_CAP_XSAVE
-    s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
+    kvm_state.xsave = kvm_check_extension(KVM_CAP_XSAVE);
 #endif
 
-    s->xcrs = 0;
 #ifdef KVM_CAP_XCRS
-    s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
+    kvm_state.xcrs = kvm_check_extension(KVM_CAP_XCRS);
 #endif
 
-    ret = kvm_arch_init(s, smp_cpus);
+    ret = kvm_arch_init(smp_cpus);
     if (ret < 0) {
         goto err;
     }
 
-    kvm_state = s;
     cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
 
     return 0;
 
 err:
-    if (s) {
-        if (s->vmfd != -1) {
-            close(s->vmfd);
-        }
-        if (s->fd != -1) {
-            close(s->fd);
-        }
+    if (kvm_state.vmfd != -1) {
+        close(kvm_state.vmfd);
+    }
+    if (kvm_state.fd != -1) {
+        close(kvm_state.fd);
     }
-    qemu_free(s);
 
     return ret;
 }
@@ -777,7 +751,7 @@ static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
 static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run)
 {
     fprintf(stderr, "KVM internal error.");
-    if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
+    if (kvm_check_extension(KVM_CAP_INTERNAL_ERROR_DATA)) {
         int i;
 
         fprintf(stderr, " Suberror: %d\n", run->internal.suberror);
@@ -805,9 +779,8 @@ static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run)
 void kvm_flush_coalesced_mmio_buffer(void)
 {
 #ifdef KVM_CAP_COALESCED_MMIO
-    KVMState *s = kvm_state;
-    if (s->coalesced_mmio_ring) {
-        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
+    if (kvm_state.coalesced_mmio_ring) {
+        struct kvm_coalesced_mmio_ring *ring = kvm_state.coalesced_mmio_ring;
         while (ring->first != ring->last) {
             struct kvm_coalesced_mmio *ent;
 
@@ -962,7 +935,7 @@ void kvm_cpu_exec(CPUState *env)
     }
 }
 
-int kvm_ioctl(KVMState *s, int type, ...)
+int kvm_ioctl(int type, ...)
 {
     int ret;
     void *arg;
@@ -972,14 +945,14 @@ int kvm_ioctl(KVMState *s, int type, ...)
     arg = va_arg(ap, void *);
     va_end(ap);
 
-    ret = ioctl(s->fd, type, arg);
+    ret = ioctl(kvm_state.fd, type, arg);
     if (ret == -1) {
         ret = -errno;
     }
     return ret;
 }
 
-int kvm_vm_ioctl(KVMState *s, int type, ...)
+int kvm_vm_ioctl(int type, ...)
 {
     int ret;
     void *arg;
@@ -989,7 +962,7 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
     arg = va_arg(ap, void *);
     va_end(ap);
 
-    ret = ioctl(s->vmfd, type, arg);
+    ret = ioctl(kvm_state.vmfd, type, arg);
     if (ret == -1) {
         ret = -errno;
     }
@@ -1016,9 +989,7 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...)
 int kvm_has_sync_mmu(void)
 {
 #ifdef KVM_CAP_SYNC_MMU
-    KVMState *s = kvm_state;
-
-    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
+    return kvm_check_extension(KVM_CAP_SYNC_MMU);
 #else
     return 0;
 #endif
@@ -1026,27 +997,27 @@ int kvm_has_sync_mmu(void)
 
 int kvm_has_vcpu_events(void)
 {
-    return kvm_state->vcpu_events;
+    return kvm_state.vcpu_events;
 }
 
 int kvm_has_robust_singlestep(void)
 {
-    return kvm_state->robust_singlestep;
+    return kvm_state.robust_singlestep;
 }
 
 int kvm_has_debugregs(void)
 {
-    return kvm_state->debugregs;
+    return kvm_state.debugregs;
 }
 
 int kvm_has_xsave(void)
 {
-    return kvm_state->xsave;
+    return kvm_state.xsave;
 }
 
 int kvm_has_xcrs(void)
 {
-    return kvm_state->xcrs;
+    return kvm_state.xcrs;
 }
 
 void kvm_setup_guest_memory(void *start, size_t size)
@@ -1069,7 +1040,7 @@ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
 {
     struct kvm_sw_breakpoint *bp;
 
-    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
+    QTAILQ_FOREACH(bp, &kvm_state.kvm_sw_breakpoints, entry) {
         if (bp->pc == pc) {
             return bp;
         }
@@ -1079,7 +1050,7 @@ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
 
 int kvm_sw_breakpoints_active(CPUState *env)
 {
-    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
+    return !QTAILQ_EMPTY(&kvm_state.kvm_sw_breakpoints);
 }
 
 struct kvm_set_guest_debug_data {
@@ -1139,8 +1110,7 @@ int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
             return err;
         }
 
-        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
-                          bp, entry);
+        QTAILQ_INSERT_HEAD(&kvm_state.kvm_sw_breakpoints, bp, entry);
     } else {
         err = kvm_arch_insert_hw_breakpoint(addr, len, type);
         if (err) {
@@ -1180,7 +1150,7 @@ int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
             return err;
         }
 
-        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
+        QTAILQ_REMOVE(&kvm_state.kvm_sw_breakpoints, bp, entry);
         qemu_free(bp);
     } else {
         err = kvm_arch_remove_hw_breakpoint(addr, len, type);
@@ -1201,10 +1171,9 @@ int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
 void kvm_remove_all_breakpoints(CPUState *current_env)
 {
     struct kvm_sw_breakpoint *bp, *next;
-    KVMState *s = current_env->kvm_state;
     CPUState *env;
 
-    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
+    QTAILQ_FOREACH_SAFE(bp, &kvm_state.kvm_sw_breakpoints, entry, next) {
         if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
             /* Try harder to find a CPU that currently sees the breakpoint. */
             for (env = first_cpu; env != NULL; env = env->next_cpu) {
@@ -1284,7 +1253,7 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign
         iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
     }
 
-    ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);
+    ret = kvm_vm_ioctl(KVM_IOEVENTFD, &iofd);
 
     if (ret < 0) {
         return -errno;
@@ -1313,7 +1282,7 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
     if (!assign) {
         kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
     }
-    r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
+    r = kvm_vm_ioctl(KVM_IOEVENTFD, &kick);
     if (r < 0) {
         return r;
     }
diff --git a/kvm-stub.c b/kvm-stub.c
index 352c6a6..3a058ad 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -53,7 +53,7 @@ int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
     return -ENOSYS;
 }
 
-int kvm_check_extension(KVMState *s, unsigned int extension)
+int kvm_check_extension(unsigned int extension)
 {
     return 0;
 }
diff --git a/kvm.h b/kvm.h
index 51ad56f..26ca8c1 100644
--- a/kvm.h
+++ b/kvm.h
@@ -74,12 +74,9 @@ int kvm_irqchip_in_kernel(void);
 
 /* internal API */
 
-struct KVMState;
-typedef struct KVMState KVMState;
+int kvm_ioctl(int type, ...);
 
-int kvm_ioctl(KVMState *s, int type, ...);
-
-int kvm_vm_ioctl(KVMState *s, int type, ...);
+int kvm_vm_ioctl(int type, ...);
 
 int kvm_vcpu_ioctl(CPUState *env, int type, ...);
 
@@ -104,7 +101,7 @@ int kvm_arch_get_registers(CPUState *env);
 
 int kvm_arch_put_registers(CPUState *env, int level);
 
-int kvm_arch_init(KVMState *s, int smp_cpus);
+int kvm_arch_init(int smp_cpus);
 
 int kvm_arch_init_vcpu(CPUState *env);
 
@@ -146,10 +143,8 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg);
 
 bool kvm_arch_stop_on_emulation_error(CPUState *env);
 
-int kvm_check_extension(KVMState *s, unsigned int extension);
+int kvm_check_extension(unsigned int extension);
 
-uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
-                                      uint32_t index, int reg);
 void kvm_cpu_synchronize_state(CPUState *env);
 void kvm_cpu_synchronize_post_reset(CPUState *env);
 void kvm_cpu_synchronize_post_init(CPUState *env);
@@ -179,7 +174,7 @@ static inline void cpu_synchronize_post_init(CPUState *env)
 
 
 #if !defined(CONFIG_USER_ONLY)
-int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
+int kvm_physical_memory_addr_from_ram(ram_addr_t ram_addr,
                                       target_phys_addr_t *phys_addr);
 #endif
 
diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 5382a28..17ab619 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -23,6 +23,7 @@
 
 #include "cpu.h"
 #include "kvm.h"
+#include "kvm_x86.h"
 
 #include "qemu-option.h"
 #include "qemu-config.h"
@@ -1138,10 +1139,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             break;
         }
         if (kvm_enabled()) {
-            *eax = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EAX);
-            *ebx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EBX);
-            *ecx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_ECX);
-            *edx = kvm_arch_get_supported_cpuid(env, 0xd, count, R_EDX);
+            *eax = kvm_x86_get_supported_cpuid(0xd, count, R_EAX);
+            *ebx = kvm_x86_get_supported_cpuid(0xd, count, R_EBX);
+            *ecx = kvm_x86_get_supported_cpuid(0xd, count, R_ECX);
+            *edx = kvm_x86_get_supported_cpuid(0xd, count, R_EDX);
         } else {
             *eax = 0;
             *ebx = 0;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 96c350e..a4ccc7f 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -60,7 +60,7 @@ static int lm_capable_kernel;
 
 #ifdef KVM_CAP_EXT_CPUID
 
-static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
+static struct kvm_cpuid2 *try_get_cpuid(int max)
 {
     struct kvm_cpuid2 *cpuid;
     int r, size;
@@ -68,7 +68,7 @@ static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
     size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
     cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size);
     cpuid->nent = max;
-    r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
+    r = kvm_ioctl(KVM_GET_SUPPORTED_CPUID, cpuid);
     if (r == 0 && cpuid->nent >= max) {
         r = -E2BIG;
     }
@@ -85,20 +85,20 @@ static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
     return cpuid;
 }
 
-uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
-                                      uint32_t index, int reg)
+uint32_t kvm_x86_get_supported_cpuid(uint32_t function, uint32_t index,
+                                     int reg)
 {
     struct kvm_cpuid2 *cpuid;
     int i, max;
     uint32_t ret = 0;
     uint32_t cpuid_1_edx;
 
-    if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) {
+    if (!kvm_check_extension(KVM_CAP_EXT_CPUID)) {
         return -1U;
     }
 
     max = 1;
-    while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
+    while ((cpuid = try_get_cpuid(max)) == NULL) {
         max *= 2;
     }
 
@@ -126,7 +126,7 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
                     /* On Intel, kvm returns cpuid according to the Intel spec,
                      * so add missing bits according to the AMD spec:
                      */
-                    cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX);
+                    cpuid_1_edx = kvm_x86_get_supported_cpuid(1, 0, R_EDX);
                     ret |= cpuid_1_edx & 0x183f7ff;
                     break;
                 }
@@ -142,8 +142,8 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
 
 #else
 
-uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
-                                      uint32_t index, int reg)
+uint32_t kvm_x86_get_supported_cpuid(uint32_t function, uint32_t index,
+                                     int reg)
 {
     return -1U;
 }
@@ -175,7 +175,7 @@ static int get_para_features(CPUState *env)
     int i, features = 0;
 
     for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
-        if (kvm_check_extension(env->kvm_state, para_features[i].cap)) {
+        if (kvm_check_extension(para_features[i].cap)) {
             features |= (1 << para_features[i].feature);
         }
     }
@@ -184,15 +184,14 @@ static int get_para_features(CPUState *env)
 #endif
 
 #ifdef KVM_CAP_MCE
-static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
-                                     int *max_banks)
+static int kvm_get_mce_cap_supported(uint64_t *mce_cap, int *max_banks)
 {
     int r;
 
-    r = kvm_check_extension(s, KVM_CAP_MCE);
+    r = kvm_check_extension(KVM_CAP_MCE);
     if (r > 0) {
         *max_banks = r;
-        return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
+        return kvm_ioctl(KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
     }
     return -ENOSYS;
 }
@@ -325,18 +324,18 @@ int kvm_arch_init_vcpu(CPUState *env)
 
     env->mp_state = KVM_MP_STATE_RUNNABLE;
 
-    env->cpuid_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_EDX);
+    env->cpuid_features &= kvm_x86_get_supported_cpuid(1, 0, R_EDX);
 
     i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
-    env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(env, 1, 0, R_ECX);
+    env->cpuid_ext_features &= kvm_x86_get_supported_cpuid(1, 0, R_ECX);
     env->cpuid_ext_features |= i;
 
-    env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(env, 0x80000001,
-                                                             0, R_EDX);
-    env->cpuid_ext3_features &= kvm_arch_get_supported_cpuid(env, 0x80000001,
-                                                             0, R_ECX);
-    env->cpuid_svm_features  &= kvm_arch_get_supported_cpuid(env, 0x8000000A,
-                                                             0, R_EDX);
+    env->cpuid_ext2_features &= kvm_x86_get_supported_cpuid(0x80000001,
+                                                            0, R_EDX);
+    env->cpuid_ext3_features &= kvm_x86_get_supported_cpuid(0x80000001,
+                                                            0, R_ECX);
+    env->cpuid_svm_features  &= kvm_x86_get_supported_cpuid(0x8000000A,
+                                                            0, R_EDX);
 
 
     cpuid_i = 0;
@@ -425,11 +424,11 @@ int kvm_arch_init_vcpu(CPUState *env)
 #ifdef KVM_CAP_MCE
     if (((env->cpuid_version >> 8)&0xF) >= 6
         && (env->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)
-        && kvm_check_extension(env->kvm_state, KVM_CAP_MCE) > 0) {
+        && kvm_check_extension(KVM_CAP_MCE) > 0) {
         uint64_t mcg_cap;
         int banks;
 
-        if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks)) {
+        if (kvm_get_mce_cap_supported(&mcg_cap, &banks)) {
             perror("kvm_get_mce_cap_supported FAILED");
         } else {
             if (banks > MCE_BANKS_DEF)
@@ -465,7 +464,7 @@ void kvm_arch_reset_vcpu(CPUState *env)
     }
 }
 
-static int kvm_get_supported_msrs(KVMState *s)
+static int kvm_get_supported_msrs(void)
 {
     static int kvm_supported_msrs;
     int ret = 0;
@@ -479,7 +478,7 @@ static int kvm_get_supported_msrs(KVMState *s)
         /* Obtain MSR list from KVM.  These are the MSRs that we must
          * save/restore */
         msr_list.nmsrs = 0;
-        ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list);
+        ret = kvm_ioctl(KVM_GET_MSR_INDEX_LIST, &msr_list);
         if (ret < 0 && ret != -E2BIG) {
             return ret;
         }
@@ -490,7 +489,7 @@ static int kvm_get_supported_msrs(KVMState *s)
                                               sizeof(msr_list.indices[0])));
 
         kvm_msr_list->nmsrs = msr_list.nmsrs;
-        ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
+        ret = kvm_ioctl(KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
         if (ret >= 0) {
             int i;
 
@@ -512,17 +511,17 @@ static int kvm_get_supported_msrs(KVMState *s)
     return ret;
 }
 
-static int kvm_init_identity_map_page(KVMState *s)
+static int kvm_init_identity_map_page(void)
 {
 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
     int ret;
     uint64_t addr = 0xfffbc000;
 
-    if (!kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) {
+    if (!kvm_check_extension(KVM_CAP_SET_IDENTITY_MAP_ADDR)) {
         return 0;
     }
 
-    ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &addr);
+    ret = kvm_vm_ioctl(KVM_SET_IDENTITY_MAP_ADDR, &addr);
     if (ret < 0) {
         fprintf(stderr, "kvm_set_identity_map_addr: %s\n", strerror(ret));
         return ret;
@@ -531,12 +530,12 @@ static int kvm_init_identity_map_page(KVMState *s)
     return 0;
 }
 
-int kvm_arch_init(KVMState *s, int smp_cpus)
+int kvm_arch_init(int smp_cpus)
 {
     int ret;
     struct utsname utsname;
 
-    ret = kvm_get_supported_msrs(s);
+    ret = kvm_get_supported_msrs();
     if (ret < 0) {
         return ret;
     }
@@ -550,7 +549,7 @@ int kvm_arch_init(KVMState *s, int smp_cpus)
      * versions of KVM just assumed that it would be at the end of physical
      * memory but that doesn't work with more than 4GB of memory.  We simply
      * refuse to work with those older versions of KVM. */
-    ret = kvm_check_extension(s, KVM_CAP_SET_TSS_ADDR);
+    ret = kvm_check_extension(KVM_CAP_SET_TSS_ADDR);
     if (ret <= 0) {
         fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
         return ret;
@@ -567,12 +566,12 @@ int kvm_arch_init(KVMState *s, int smp_cpus)
         perror("e820_add_entry() table is full");
         exit(1);
     }
-    ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
+    ret = kvm_vm_ioctl(KVM_SET_TSS_ADDR, 0xfffbd000);
     if (ret < 0) {
         return ret;
     }
 
-    return kvm_init_identity_map_page(s);
+    return kvm_init_identity_map_page();
 }
 
 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
@@ -1867,7 +1866,7 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
             || code == BUS_MCEERR_AO)) {
         vaddr = (void *)addr;
         if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
-            !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) {
+            !kvm_physical_memory_addr_from_ram(ram_addr, &paddr)) {
             fprintf(stderr, "Hardware memory error for memory used by "
                     "QEMU itself instead of guest system!\n");
             /* Hope we are lucky for AO MCE */
@@ -1916,7 +1915,7 @@ int kvm_on_sigbus(int code, void *addr)
         /* Hope we are lucky for AO MCE */
         vaddr = addr;
         if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
-            !kvm_physical_memory_addr_from_ram(first_cpu->kvm_state, ram_addr, &paddr)) {
+            !kvm_physical_memory_addr_from_ram(ram_addr, &paddr)) {
             fprintf(stderr, "Hardware memory error for memory used by "
                     "QEMU itself instead of guest system!: %p\n", addr);
             return 0;
diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h
index 9d7b584..304d0cb 100644
--- a/target-i386/kvm_x86.h
+++ b/target-i386/kvm_x86.h
@@ -22,4 +22,7 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
                         uint64_t mcg_status, uint64_t addr, uint64_t misc,
                         int flag);
 
+uint32_t kvm_x86_get_supported_cpuid(uint32_t function, uint32_t index,
+                                     int reg);
+
 #endif
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 849b404..56d30cc 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -56,13 +56,13 @@ static void kvm_kick_env(void *env)
     qemu_cpu_kick(env);
 }
 
-int kvm_arch_init(KVMState *s, int smp_cpus)
+int kvm_arch_init(int smp_cpus)
 {
 #ifdef KVM_CAP_PPC_UNSET_IRQ
-    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
+    cap_interrupt_unset = kvm_check_extension(KVM_CAP_PPC_UNSET_IRQ);
 #endif
 #ifdef KVM_CAP_PPC_IRQ_LEVEL
-    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
+    cap_interrupt_level = kvm_check_extension(KVM_CAP_PPC_IRQ_LEVEL);
 #endif
 
     if (!cap_interrupt_level) {
@@ -164,7 +164,7 @@ int kvm_arch_get_registers(CPUState *env)
         env->gpr[i] = regs.gpr[i];
 
 #ifdef KVM_CAP_PPC_SEGSTATE
-    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_SEGSTATE)) {
+    if (kvm_check_extension(KVM_CAP_PPC_SEGSTATE)) {
         env->sdr1 = sregs.u.s.sdr1;
 
         /* Sync SLB */
@@ -371,8 +371,8 @@ int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len)
 #ifdef KVM_CAP_PPC_GET_PVINFO
     struct kvm_ppc_pvinfo pvinfo;
 
-    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
-        !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
+    if (kvm_check_extension(KVM_CAP_PPC_GET_PVINFO) &&
+        !kvm_vm_ioctl(KVM_PPC_GET_PVINFO, &pvinfo)) {
         memcpy(buf, pvinfo.hcall, buf_len);
 
         return 0;
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index adf4a9e..927a37e 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -70,7 +70,7 @@
 #define SCLP_CMDW_READ_SCP_INFO         0x00020001
 #define SCLP_CMDW_READ_SCP_INFO_FORCED  0x00120001
 
-int kvm_arch_init(KVMState *s, int smp_cpus)
+int kvm_arch_init(int smp_cpus)
 {
     return 0;
 }
@@ -186,10 +186,6 @@ static void kvm_s390_interrupt_internal(CPUState *env, int type, uint32_t parm,
     struct kvm_s390_interrupt kvmint;
     int r;
 
-    if (!env->kvm_state) {
-        return;
-    }
-
     env->halted = 0;
     env->exception_index = -1;
 
@@ -198,7 +194,7 @@ static void kvm_s390_interrupt_internal(CPUState *env, int type, uint32_t parm,
     kvmint.parm64 = parm64;
 
     if (vm) {
-        r = kvm_vm_ioctl(env->kvm_state, KVM_S390_INTERRUPT, &kvmint);
+        r = kvm_vm_ioctl(KVM_S390_INTERRUPT, &kvmint);
     } else {
         r = kvm_vcpu_ioctl(env, KVM_S390_INTERRUPT, &kvmint);
     }
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 5/5] kvm: x86: Introduce kvmclock device to save/restore its state
  2010-12-31 12:01 [Qemu-devel] [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
                   ` (3 preceding siblings ...)
  2010-12-31 12:01 ` [Qemu-devel] [PATCH 4/5] kvm: Eliminate KVMState arguments Jan Kiszka
@ 2010-12-31 12:01 ` Jan Kiszka
  4 siblings, 0 replies; 6+ messages in thread
From: Jan Kiszka @ 2010-12-31 12:01 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: Jan Kiszka, Glauber Costa, qemu-devel, kvm

From: Jan Kiszka <jan.kiszka@siemens.com>

If kvmclock is used, which implies the kernel supports it, register a
kvmclock device with the sysbus. Its main purpose is to save and restore
the kernel state on migration, but this will also allow to visualize it
one day.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
CC: Glauber Costa <glommer@redhat.com>
---
 target-i386/kvm.c |   71 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 70 insertions(+), 1 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index a4ccc7f..32310fa 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -29,6 +29,7 @@
 #include "hw/apic.h"
 #include "ioport.h"
 #include "kvm_x86.h"
+#include "hw/sysbus.h"
 
 #ifdef CONFIG_KVM_PARA
 #include <linux/kvm_para.h>
@@ -309,6 +310,64 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
 #endif
 }
 
+#ifdef KVM_CAP_ADJUST_CLOCK
+typedef struct KVMClockState {
+    SysBusDevice busdev;
+    uint64_t clock;
+    struct kvm_clock_data data;
+} KVMClockState;
+
+static void kvmclock_pre_save(void *opaque)
+{
+    KVMClockState *s = opaque;
+    struct kvm_clock_data data;
+    int ret;
+
+    ret = kvm_vm_ioctl(KVM_GET_CLOCK, &data);
+    if (ret < 0) {
+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+        data.clock = 0;
+    }
+    s->clock = data.clock;
+}
+
+static int kvmclock_post_load(void *opaque, int version_id)
+{
+    KVMClockState *s = opaque;
+    struct kvm_clock_data data;
+
+    data.clock = s->clock;
+    data.flags = 0;
+    return kvm_vm_ioctl(KVM_SET_CLOCK, &data);
+}
+
+static int kvmclock_init(SysBusDevice *dev)
+{
+    return 0;
+}
+
+static const VMStateDescription kvmclock_vmsd= {
+    .name = "kvmclock",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = kvmclock_pre_save,
+    .post_load = kvmclock_post_load,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT64(clock, KVMClockState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static SysBusDeviceInfo kvmclock_info = {
+    .qdev.name = "kvmclock",
+    .qdev.size = sizeof(KVMClockState),
+    .qdev.vmsd = &kvmclock_vmsd,
+    .qdev.no_user = 1,
+    .init = kvmclock_init,
+};
+#endif /* KVM_CAP_ADJUST_CLOCK */
+
 int kvm_arch_init_vcpu(CPUState *env)
 {
     struct {
@@ -337,7 +396,6 @@ int kvm_arch_init_vcpu(CPUState *env)
     env->cpuid_svm_features  &= kvm_x86_get_supported_cpuid(0x8000000A,
                                                             0, R_EDX);
 
-
     cpuid_i = 0;
 
 #ifdef CONFIG_KVM_PARA
@@ -444,6 +502,13 @@ int kvm_arch_init_vcpu(CPUState *env)
     }
 #endif
 
+#ifdef KVM_CAP_ADJUST_CLOCK
+    if (cpu_is_bsp(env) &&
+        (env->cpuid_kvm_features & (1ULL << KVM_FEATURE_CLOCKSOURCE))) {
+        sysbus_create_simple("kvmclock", -1, NULL);
+    }
+#endif
+
     return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
 }
 
@@ -535,6 +600,10 @@ int kvm_arch_init(int smp_cpus)
     int ret;
     struct utsname utsname;
 
+#ifdef KVM_CAP_ADJUST_CLOCK
+    sysbus_register_withprop(&kvmclock_info);
+#endif
+
     ret = kvm_get_supported_msrs();
     if (ret < 0) {
         return ret;
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-12-31 12:02 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-31 12:01 [Qemu-devel] [PATCH 0/5] [uq/master] MSR refactorings, KVMState elimination, kvmclock device Jan Kiszka
2010-12-31 12:01 ` [Qemu-devel] [PATCH 1/5] kvm: x86: Refactor msr_star/hsave_pa setup and checks Jan Kiszka
2010-12-31 12:01 ` [Qemu-devel] [PATCH 2/5] kvm: x86: Reset paravirtual MSRs Jan Kiszka
2010-12-31 12:01 ` [Qemu-devel] [PATCH 3/5] kvm: x86: Drop MCE MSRs write back restrictions Jan Kiszka
2010-12-31 12:01 ` [Qemu-devel] [PATCH 4/5] kvm: Eliminate KVMState arguments Jan Kiszka
2010-12-31 12:01 ` [Qemu-devel] [PATCH 5/5] kvm: x86: Introduce kvmclock device to save/restore its state Jan Kiszka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).