public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Anthony Liguori <aliguori-NZpS4cJIG2HvQtjrzfazuQ@public.gmane.org>
To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Subject: [PATCH] Support for QEMU's CVS
Date: Wed, 20 Dec 2006 23:17:41 -0600	[thread overview]
Message-ID: <458A18F5.10108@cs.utexas.edu> (raw)

[-- Attachment #1: Type: text/plain, Size: 758 bytes --]

Howdy,

The attached patch forward ports the KVM patch to QEMU's CVS.  The only 
significant change needed was hacking the Bochs BIOS to dynamically 
disable SMM support if KVM is enabled.  This was done by using one of 
the Bochs DEBUG ports.  Probably not the best long term solution but it 
works.

The patch is minimal (no VMDK changes, no migration, etc.).  This is to 
ease upstream integration.

I am able to boot both a Windows and FC5 guest (under SVM).

You'll need the BIOS (binary file can't be included in diff) from:

http://www.cs.utexas.edu/~aliguori/bios.bin

I reckon some changes still need to be made for CVS inclusion but I 
think it's at least appropriate to send to qemu-devel and begin the 
review process.

Regards,

Anthony Liguori

[-- Attachment #2: kvm-7-qemu-cvs.diff --]
[-- Type: text/x-patch, Size: 47088 bytes --]

diff -r 2eac80033ea0 Makefile.target
--- a/Makefile.target	Tue Dec 19 09:31:34 2006 +0000
+++ b/Makefile.target	Wed Dec 20 23:01:25 2006 -0600
@@ -204,8 +204,8 @@ OBJS+= libqemu.a
 OBJS+= libqemu.a
 
 # cpu emulator library
-LIBOBJS=exec.o kqemu.o translate-op.o translate-all.o cpu-exec.o\
-        translate.o op.o 
+LIBOBJS=exec.o kqemu.o qemu-kvm.o translate-op.o translate-all.o cpu-exec.o\
+        translate.o op.o
 ifdef CONFIG_SOFTFLOAT
 LIBOBJS+=fpu/softfloat.o
 else
@@ -331,6 +331,10 @@ SOUND_HW += fmopl.o adlib.o
 SOUND_HW += fmopl.o adlib.o
 endif
 AUDIODRV+= wavcapture.o
+ifdef CONFIG_KVM_KERNEL_INC
+DEFINES += -I $(CONFIG_KVM_KERNEL_INC)
+LIBS += -lkvm
+endif
 
 # SCSI layer
 VL_OBJS+= scsi-disk.o cdrom.o lsi53c895a.o
@@ -490,6 +494,9 @@ op_helper.o: op_helper.c
 op_helper.o: op_helper.c
 	$(CC) $(HELPER_CFLAGS) $(DEFINES) -c -o $@ $<
 endif
+
+qemu-kvm.o: qemu-kvm.c
+	$(CC) $(HELPER_CFLAGS) $(DEFINES) -c -o $@ $<
 
 cpu-exec.o: cpu-exec.c
 	$(CC) $(HELPER_CFLAGS) $(DEFINES) -c -o $@ $<
diff -r 2eac80033ea0 configure
--- a/configure	Tue Dec 19 09:31:34 2006 +0000
+++ b/configure	Wed Dec 20 23:01:25 2006 -0600
@@ -89,6 +89,7 @@ bsd="no"
 bsd="no"
 linux="no"
 kqemu="no"
+kvm="no"
 profiler="no"
 cocoa="no"
 check_gfx="yes"
@@ -222,6 +223,8 @@ for opt do
   ;;
   --disable-kqemu) kqemu="no"
   ;;
+  --enable-kvm) kvm="yes"
+  ;;
   --enable-profiler) profiler="yes"
   ;;
   --enable-cocoa) cocoa="yes" ; coreaudio="yes" ; sdl="no"
@@ -264,6 +267,7 @@ echo ""
 echo ""
 echo "kqemu kernel acceleration support:"
 echo "  --disable-kqemu          disable kqemu support"
+echo "  --enable-kvm             enable kernel virtual machine support"
 echo ""
 echo "Advanced options (experts only):"
 echo "  --source-path=PATH       path of source code [$source_path]"
@@ -576,6 +580,7 @@ fi
 fi
 echo "FMOD support      $fmod $fmod_support"
 echo "kqemu support     $kqemu"
+echo "kvm support       $kvm"
 echo "Documentation     $build_docs"
 [ ! -z "$uname_release" ] && \
 echo "uname -r          $uname_release"
@@ -799,6 +804,13 @@ interp_prefix1=`echo "$interp_prefix" | 
 interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_cpu/g"`
 echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h
 
+configure_kvm() {
+  if test $kvm = "yes" -a "$target_softmmu" = "yes" -a $cpu = "$target_cpu" ; then
+    echo "#define USE_KVM 1" >> $config_h
+    echo "CONFIG_KVM_KERNEL_INC=$kernel_path/include" >> $config_mak
+  fi
+}
+
 if test "$target_cpu" = "i386" ; then
   echo "TARGET_ARCH=i386" >> $config_mak
   echo "#define TARGET_ARCH \"i386\"" >> $config_h
@@ -806,6 +818,7 @@ if test "$target_cpu" = "i386" ; then
   if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "i386" ; then
     echo "#define USE_KQEMU 1" >> $config_h
   fi
+  configure_kvm
 elif test "$target_cpu" = "arm" -o "$target_cpu" = "armeb" ; then
   echo "TARGET_ARCH=arm" >> $config_mak
   echo "#define TARGET_ARCH \"arm\"" >> $config_h
@@ -837,6 +850,7 @@ elif test "$target_cpu" = "x86_64" ; the
   if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "x86_64"  ; then
     echo "#define USE_KQEMU 1" >> $config_h
   fi
+  configure_kvm
 elif test "$target_cpu" = "mips" -o "$target_cpu" = "mipsel" ; then
   echo "TARGET_ARCH=mips" >> $config_mak
   echo "#define TARGET_ARCH \"mips\"" >> $config_h
diff -r 2eac80033ea0 cpu-all.h
--- a/cpu-all.h	Tue Dec 19 09:31:34 2006 +0000
+++ b/cpu-all.h	Wed Dec 20 23:01:25 2006 -0600
@@ -826,6 +826,7 @@ extern int phys_ram_fd;
 extern int phys_ram_fd;
 extern uint8_t *phys_ram_base;
 extern uint8_t *phys_ram_dirty;
+extern uint8_t *bios_mem;
 
 /* physical memory access */
 #define TLB_INVALID_MASK   (1 << 3)
diff -r 2eac80033ea0 cpu-exec.c
--- a/cpu-exec.c	Tue Dec 19 09:31:34 2006 +0000
+++ b/cpu-exec.c	Wed Dec 20 23:01:25 2006 -0600
@@ -33,6 +33,11 @@
 #undef EIP
 #include <signal.h>
 #include <sys/ucontext.h>
+#endif
+
+#ifdef USE_KVM
+#include "qemu-kvm.h"
+extern int kvm_allowed;
 #endif
 
 int tb_invalidated_flag;
@@ -457,6 +462,12 @@ int cpu_exec(CPUState *env1)
             }
 #endif
 
+#ifdef USE_KVM
+            if (kvm_allowed) {
+                kvm_cpu_exec(env);
+                longjmp(env->jmp_env, 1);
+            }
+#endif
             T0 = 0; /* force lookup of first TB */
             for(;;) {
 #if defined(__sparc__) && !defined(HOST_SOLARIS)
diff -r 2eac80033ea0 exec.c
--- a/exec.c	Tue Dec 19 09:31:34 2006 +0000
+++ b/exec.c	Wed Dec 20 23:01:25 2006 -0600
@@ -69,6 +69,10 @@
 #define TARGET_PHYS_ADDR_SPACE_BITS 32
 #endif
 
+#ifdef USE_KVM
+extern int kvm_allowed;
+#endif
+
 TranslationBlock tbs[CODE_GEN_MAX_BLOCKS];
 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
 int nb_tbs;
@@ -82,6 +86,7 @@ int phys_ram_fd;
 int phys_ram_fd;
 uint8_t *phys_ram_base;
 uint8_t *phys_ram_dirty;
+uint8_t *bios_mem;
 
 CPUState *first_cpu;
 /* current CPU in the current thread. It is only valid inside
@@ -1043,6 +1048,11 @@ int cpu_breakpoint_insert(CPUState *env,
     if (env->nb_breakpoints >= MAX_BREAKPOINTS)
         return -1;
     env->breakpoints[env->nb_breakpoints++] = pc;
+
+#ifdef USE_KVM
+    if (kvm_allowed)
+	kvm_update_debugger(env);
+#endif
     
     breakpoint_invalidate(env, pc);
     return 0;
@@ -1066,6 +1076,11 @@ int cpu_breakpoint_remove(CPUState *env,
     if (i < env->nb_breakpoints)
       env->breakpoints[i] = env->breakpoints[env->nb_breakpoints];
 
+#ifdef USE_KVM
+    if (kvm_allowed)
+	kvm_update_debugger(env);
+#endif
+    
     breakpoint_invalidate(env, pc);
     return 0;
 #else
@@ -1084,6 +1099,10 @@ void cpu_single_step(CPUState *env, int 
         /* XXX: only flush what is necessary */
         tb_flush(env);
     }
+#ifdef USE_KVM
+    if (kvm_allowed)
+	kvm_update_debugger(env);
+#endif
 #endif
 }
 
diff -r 2eac80033ea0 hw/cirrus_vga.c
--- a/hw/cirrus_vga.c	Tue Dec 19 09:31:34 2006 +0000
+++ b/hw/cirrus_vga.c	Wed Dec 20 23:01:25 2006 -0600
@@ -28,6 +28,9 @@
  */
 #include "vl.h"
 #include "vga_int.h"
+#ifndef _WIN32
+#include <sys/mman.h>
+#endif
 
 /*
  * TODO:
@@ -231,6 +234,10 @@ typedef struct CirrusVGAState {
     int cirrus_linear_io_addr;
     int cirrus_linear_bitblt_io_addr;
     int cirrus_mmio_io_addr;
+#ifdef USE_KVM
+    unsigned long cirrus_lfb_addr;
+    unsigned long cirrus_lfb_end;
+#endif
     uint32_t cirrus_addr_mask;
     uint32_t linear_mmio_mask;
     uint8_t cirrus_shadow_gr0;
@@ -267,6 +274,10 @@ typedef struct CirrusVGAState {
     int last_hw_cursor_y_end;
     int real_vram_size; /* XXX: suppress that */
     CPUWriteMemoryFunc **cirrus_linear_write;
+#ifdef USE_KVM
+    unsigned long map_addr;
+    unsigned long map_end;
+#endif
 } CirrusVGAState;
 
 typedef struct PCICirrusVGAState {
@@ -2525,6 +2536,48 @@ static CPUWriteMemoryFunc *cirrus_linear
     cirrus_linear_bitblt_writel,
 };
 
+#ifdef USE_KVM
+
+#include "qemu-kvm.h"
+
+extern kvm_context_t kvm_context;
+
+static void *set_vram_mapping(unsigned long begin, unsigned long end)
+{
+    void *vram_pointer = NULL;
+
+    /* align begin and end address */
+    begin = begin & TARGET_PAGE_MASK;
+    end = begin + VGA_RAM_SIZE;
+    end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
+
+    vram_pointer = kvm_create_phys_mem(kvm_context, begin, end - begin, 1, 
+				       1, 1);
+
+    if (vram_pointer == NULL) {
+        printf("set_vram_mapping: cannot allocate memory: %m\n");
+        return NULL;
+    }
+
+    memset(vram_pointer, 0, end - begin);
+
+    return vram_pointer;
+}
+
+static int unset_vram_mapping(unsigned long begin, unsigned long end)
+{
+    /* align begin and end address */
+    end = begin + VGA_RAM_SIZE;
+    begin = begin & TARGET_PAGE_MASK;
+    end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
+
+    kvm_destroy_phys_mem(kvm_context, begin, end - begin);
+
+    return 0;
+}
+
+#endif
+
 /* Compute the memory access functions */
 static void cirrus_update_memory_access(CirrusVGAState *s)
 {
@@ -2543,11 +2596,45 @@ static void cirrus_update_memory_access(
         
 	mode = s->gr[0x05] & 0x7;
 	if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
+#ifdef USE_KVM
+            if (kvm_allowed && s->cirrus_lfb_addr && s->cirrus_lfb_end &&
+		!s->map_addr) {
+                void *vram_pointer, *old_vram;
+
+                vram_pointer = set_vram_mapping(s->cirrus_lfb_addr,
+                                                s->cirrus_lfb_end);
+                if (!vram_pointer)
+                    fprintf(stderr, "NULL vram_pointer\n");
+                else {
+                    old_vram = vga_update_vram((VGAState *)s, vram_pointer,
+                                               VGA_RAM_SIZE);
+                    qemu_free(old_vram);
+                }
+                s->map_addr = s->cirrus_lfb_addr;
+                s->map_end = s->cirrus_lfb_end;
+            }
+#endif
             s->cirrus_linear_write[0] = cirrus_linear_mem_writeb;
             s->cirrus_linear_write[1] = cirrus_linear_mem_writew;
             s->cirrus_linear_write[2] = cirrus_linear_mem_writel;
         } else {
         generic_io:
+#ifdef USE_KVM
+            if (kvm_allowed && s->cirrus_lfb_addr && s->cirrus_lfb_end &&
+		s->map_addr) {
+		int error;
+                void *old_vram = NULL;
+
+		error = unset_vram_mapping(s->cirrus_lfb_addr,
+					   s->cirrus_lfb_end);
+		if (!error)
+		    old_vram = vga_update_vram((VGAState *)s, NULL,
+                                               VGA_RAM_SIZE);
+                if (old_vram)
+                    munmap(old_vram, s->map_addr - s->map_end);
+                s->map_addr = s->map_end = 0;
+            }
+#endif
             s->cirrus_linear_write[0] = cirrus_linear_writeb;
             s->cirrus_linear_write[1] = cirrus_linear_writew;
             s->cirrus_linear_write[2] = cirrus_linear_writel;
@@ -2946,6 +3033,13 @@ static void cirrus_vga_save(QEMUFile *f,
     qemu_put_be32s(f, &s->hw_cursor_y);
     /* XXX: we do not save the bitblt state - we assume we do not save
        the state when the blitter is active */
+
+#ifdef USE_KVM
+    if (kvm_allowed) { /* XXX: KVM images ought to be loadable in QEMU */
+	qemu_put_be32s(f, &s->real_vram_size);
+	qemu_put_buffer(f, s->vram_ptr, s->real_vram_size);
+    }
+#endif
 }
 
 static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
@@ -2995,6 +3089,22 @@ static int cirrus_vga_load(QEMUFile *f, 
 
     qemu_get_be32s(f, &s->hw_cursor_x);
     qemu_get_be32s(f, &s->hw_cursor_y);
+
+#ifdef USE_KVM
+    if (kvm_allowed) {
+        int real_vram_size;
+        qemu_get_be32s(f, &real_vram_size);
+        if (real_vram_size != s->real_vram_size) {
+            if (real_vram_size > s->real_vram_size)
+                real_vram_size = s->real_vram_size;
+            printf("%s: REAL_VRAM_SIZE MISMATCH !!!!!! SAVED=%d CURRENT=%d", 
+                   __FUNCTION__, real_vram_size, s->real_vram_size);
+        }
+        qemu_get_buffer(f, s->vram_ptr, real_vram_size);
+        cirrus_update_memory_access(s);
+    }
+#endif
+
 
     /* force refresh */
     s->graphic_mode = -1;
@@ -3151,6 +3261,13 @@ static void cirrus_pci_lfb_map(PCIDevice
     /* XXX: add byte swapping apertures */
     cpu_register_physical_memory(addr, s->vram_size,
 				 s->cirrus_linear_io_addr);
+#ifdef USE_KVM
+    if (kvm_allowed) {
+	s->cirrus_lfb_addr = addr;
+	s->cirrus_lfb_end = addr + VGA_RAM_SIZE;
+    }
+#endif
+
     cpu_register_physical_memory(addr + 0x1000000, 0x400000,
 				 s->cirrus_linear_bitblt_io_addr);
 }
diff -r 2eac80033ea0 hw/pc.c
--- a/hw/pc.c	Tue Dec 19 09:31:34 2006 +0000
+++ b/hw/pc.c	Wed Dec 20 23:01:25 2006 -0600
@@ -22,6 +22,10 @@
  * THE SOFTWARE.
  */
 #include "vl.h"
+#ifdef USE_KVM
+#include "qemu-kvm.h"
+extern int kvm_allowed;
+#endif
 
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
@@ -289,6 +293,21 @@ static uint32_t ioport92_read(void *opaq
 /***********************************************************/
 /* Bochs BIOS debug ports */
 
+static uint32_t bochs_bios_read(void *opaque, uint32_t addr)
+{
+    switch (addr) {
+    case 0x402: /* SMI enable */
+#ifdef USE_KVM
+	if (kvm_allowed)
+	    return 0;
+	else
+#endif
+	    return 1;
+    }
+
+    return 0xFF;
+}
+
 void bochs_bios_write(void *opaque, uint32_t addr, uint32_t val)
 {
     static const char shutdown_str[8] = "Shutdown";
@@ -335,6 +354,8 @@ void bochs_bios_write(void *opaque, uint
 
 void bochs_bios_init(void)
 {
+    register_ioport_read(0x402, 1, 1, bochs_bios_read, NULL);
+
     register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
     register_ioport_write(0x401, 1, 2, bochs_bios_write, NULL);
     register_ioport_write(0x402, 1, 1, bochs_bios_write, NULL);
@@ -442,6 +463,11 @@ static void pc_init_ne2k_isa(NICInfo *nd
     nb_ne2k++;
 }
 
+#ifdef USE_KVM
+extern kvm_context_t kvm_context;
+extern int kvm_allowed;
+#endif
+
 /* PC hardware initialisation */
 static void pc_init1(int ram_size, int vga_ram_size, int boot_device,
                      DisplayState *ds, const char **fd_filename, int snapshot,
@@ -508,6 +534,11 @@ static void pc_init1(int ram_size, int v
     /* setup basic memory access */
     cpu_register_physical_memory(0xc0000, 0x10000, 
                                  vga_bios_offset | IO_MEM_ROM);
+#ifdef USE_KVM
+    if (kvm_allowed)
+	    memcpy(phys_ram_base + 0xc0000, phys_ram_base + vga_bios_offset,
+		   0x10000);
+#endif
 
     /* map the last 128KB of the BIOS in ISA space */
     isa_bios_size = bios_size;
@@ -518,9 +549,28 @@ static void pc_init1(int ram_size, int v
     cpu_register_physical_memory(0x100000 - isa_bios_size, 
                                  isa_bios_size, 
                                  (bios_offset + bios_size - isa_bios_size) | IO_MEM_ROM);
+#ifdef USE_KVM
+    if (kvm_allowed)
+	    memcpy(phys_ram_base + 0x100000 - isa_bios_size,
+		   phys_ram_base + (bios_offset + bios_size - isa_bios_size),
+		   isa_bios_size);
+#endif
     /* map all the bios at the top of memory */
     cpu_register_physical_memory((uint32_t)(-bios_size), 
                                  bios_size, bios_offset | IO_MEM_ROM);
+#ifdef USE_KVM
+    if (kvm_allowed) {
+	    bios_mem = kvm_create_phys_mem(kvm_context, (uint32_t)(-bios_size),
+					   bios_size, 2, 0, 1);
+	    if (!bios_mem)
+		    exit(1);
+	    memcpy(bios_mem, phys_ram_base + bios_offset, bios_size);
+
+	    cpu_register_physical_memory(phys_ram_size - KVM_EXTRA_PAGES * 4096, KVM_EXTRA_PAGES * 4096,
+					 (phys_ram_size - KVM_EXTRA_PAGES * 4096) | IO_MEM_ROM);
+    }
+    
+#endif
     
     bochs_bios_init();
 
diff -r 2eac80033ea0 hw/usb-hid.c
--- a/hw/usb-hid.c	Tue Dec 19 09:31:34 2006 +0000
+++ b/hw/usb-hid.c	Wed Dec 20 23:01:25 2006 -0600
@@ -169,7 +169,9 @@ static const uint8_t qemu_tablet_config_
 	0x81,       /*  u8  ep_bEndpointAddress; IN Endpoint 1 */
  	0x03,       /*  u8  ep_bmAttributes; Interrupt */
  	0x08, 0x00, /*  u16 ep_wMaxPacketSize; */
-	0x03,       /*  u8  ep_bInterval; (255ms -- usb 2.0 spec) */
+	/* Temporarily increase usb polling interval to prevent cpu
+	 * saturation (3 ms is way too often for kvm) */
+	0x33,       /*  u8  ep_bInterval; (255ms -- usb 2.0 spec) */
 };
 
 static const uint8_t qemu_mouse_hid_report_descriptor[] = {
diff -r 2eac80033ea0 hw/vga.c
--- a/hw/vga.c	Tue Dec 19 09:31:34 2006 +0000
+++ b/hw/vga.c	Wed Dec 20 23:01:25 2006 -0600
@@ -1373,6 +1373,26 @@ void vga_invalidate_scanlines(VGAState *
     }
 }
 
+#ifdef USE_KVM
+
+#include "kvmctl.h"
+extern kvm_context_t kvm_context;
+
+static int bitmap_get_dirty(unsigned long *bitmap, unsigned nr)
+{
+    unsigned word = nr / ((sizeof bitmap[0]) * 8);
+    unsigned bit = nr % ((sizeof bitmap[0]) * 8);
+
+    //printf("%x -> %ld\n", nr, (bitmap[word] >> bit) & 1);
+    return (bitmap[word] >> bit) & 1;
+}
+
+#endif
+
+#ifdef USE_KVM
+extern int kvm_allowed;
+#endif
+
 /* 
  * graphic modes
  */
@@ -1385,6 +1405,16 @@ static void vga_draw_graphic(VGAState *s
     uint32_t v, addr1, addr;
     vga_draw_line_func *vga_draw_line;
     
+#ifdef USE_KVM
+
+    /* HACK ALERT */
+#define BITMAP_SIZE ((8*1024*1024) / 4096 / 8 / sizeof(long))
+    unsigned long bitmap[BITMAP_SIZE];
+
+    if (kvm_allowed)
+	    kvm_get_dirty_pages(kvm_context, 1, &bitmap);
+#endif
+
     full_update |= update_basic_params(s);
 
     s->get_resolution(s, &width, &height);
@@ -1491,10 +1521,20 @@ static void vga_draw_graphic(VGAState *s
         update = full_update | 
             cpu_physical_memory_get_dirty(page0, VGA_DIRTY_FLAG) |
             cpu_physical_memory_get_dirty(page1, VGA_DIRTY_FLAG);
+#ifdef USE_KVM
+	if (kvm_allowed) {
+		update |= bitmap_get_dirty(bitmap, (page0 - s->vram_offset) >> TARGET_PAGE_BITS);
+		update |= bitmap_get_dirty(bitmap, (page1 - s->vram_offset) >> TARGET_PAGE_BITS);
+	}
+#endif
         if ((page1 - page0) > TARGET_PAGE_SIZE) {
             /* if wide line, can use another page */
             update |= cpu_physical_memory_get_dirty(page0 + TARGET_PAGE_SIZE, 
                                                     VGA_DIRTY_FLAG);
+#ifdef USE_KVM
+	    if (kvm_allowed)
+		    update |= bitmap_get_dirty(bitmap, (page0 - s->vram_offset) >> TARGET_PAGE_BITS);
+#endif
         }
         /* explicit invalidation for the hardware cursor */
         update |= (s->invalidated_y_table[y >> 5] >> (y & 0x1f)) & 1;
@@ -1751,6 +1791,7 @@ static void vga_map(PCIDevice *pci_dev, 
     }
 }
 
+/* when used on xen/kvm environment, the vga_ram_base is not used */
 void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base, 
                      unsigned long vga_ram_offset, int vga_ram_size)
 {
@@ -1781,7 +1822,14 @@ void vga_common_init(VGAState *s, Displa
 
     vga_reset(s);
 
+#ifndef USE_KVM
     s->vram_ptr = vga_ram_base;
+#else
+    if (kvm_allowed)
+	    s->vram_ptr = qemu_malloc(vga_ram_size);
+    else
+	    s->vram_ptr = vga_ram_base;
+#endif
     s->vram_offset = vga_ram_offset;
     s->vram_size = vga_ram_size;
     s->ds = ds;
@@ -1909,6 +1957,31 @@ int pci_vga_init(PCIBus *bus, DisplaySta
     return 0;
 }
 
+void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size)
+{
+    uint8_t *old_pointer;
+
+    if (s->vram_size != vga_ram_size) {
+        fprintf(stderr, "No support to change vga_ram_size\n");
+        return NULL;
+    }
+
+    if (!vga_ram_base) {
+        vga_ram_base = qemu_malloc(vga_ram_size);
+        if (!vga_ram_base) {
+            fprintf(stderr, "reallocate error\n");
+            return NULL;
+        }
+    }
+
+    /* XXX lock needed? */
+    memcpy(vga_ram_base, s->vram_ptr, vga_ram_size);
+    old_pointer = s->vram_ptr;
+    s->vram_ptr = vga_ram_base;
+
+    return old_pointer;
+}
+
 /********************************************************/
 /* vga screen dump */
 
diff -r 2eac80033ea0 hw/vga_int.h
--- a/hw/vga_int.h	Tue Dec 19 09:31:34 2006 +0000
+++ b/hw/vga_int.h	Wed Dec 20 23:01:25 2006 -0600
@@ -174,5 +174,6 @@ void vga_draw_cursor_line_32(uint8_t *d1
                              unsigned int color0, unsigned int color1,
                              unsigned int color_xor);
 
+void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size);
 extern const uint8_t sr_mask[8];
 extern const uint8_t gr_mask[16];
diff -r 2eac80033ea0 pc-bios/bios.bin
Binary file pc-bios/bios.bin has changed
diff -r 2eac80033ea0 pc-bios/bios.diff
--- a/pc-bios/bios.diff	Tue Dec 19 09:31:34 2006 +0000
+++ b/pc-bios/bios.diff	Wed Dec 20 23:01:47 2006 -0600
@@ -4,7 +4,7 @@ retrieving revision 1.3
 retrieving revision 1.3
 diff -u -w -r1.3 rombios.h
 --- rombios.h	3 Oct 2006 20:27:30 -0000	1.3
-+++ rombios.h	1 Nov 2006 19:16:34 -0000
++++ rombios.h	21 Dec 2006 04:57:58 -0000
 @@ -19,7 +19,7 @@
  //  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
  
@@ -20,14 +20,22 @@ retrieving revision 1.8
 retrieving revision 1.8
 diff -u -w -r1.8 rombios32.c
 --- rombios32.c	3 Oct 2006 20:27:30 -0000	1.8
-+++ rombios32.c	1 Nov 2006 19:16:34 -0000
-@@ -852,6 +852,11 @@
++++ rombios32.c	21 Dec 2006 04:57:59 -0000
+@@ -757,6 +757,7 @@
+         pci_config_writeb(d, 0x80, 0x01); /* enable PM io space */
+         pm_sci_int = pci_config_readb(d, PCI_INTERRUPT_LINE);
+ #ifdef BX_USE_SMM
++	if (inb(INFO_PORT) == 1)
+         smm_init(d);
+ #endif
+         acpi_enabled = 1;
+@@ -852,6 +853,11 @@
      int ioapic_id, i, len;
      int mp_config_table_size;
  
 +#ifdef BX_QEMU
 +    if (smp_cpus <= 1)
-+        return;
++	return;
 +#endif
 +
  #ifdef BX_USE_EBDA_TABLES
diff -r 2eac80033ea0 target-i386/cpu.h
--- a/target-i386/cpu.h	Tue Dec 19 09:31:34 2006 +0000
+++ b/target-i386/cpu.h	Wed Dec 20 23:01:25 2006 -0600
@@ -155,14 +155,18 @@
 #define HF_MP_MASK           (1 << HF_MP_SHIFT)
 #define HF_EM_MASK           (1 << HF_EM_SHIFT)
 #define HF_TS_MASK           (1 << HF_TS_SHIFT)
+#define HF_IOPL_MASK         (3 << HF_IOPL_SHIFT)
 #define HF_LMA_MASK          (1 << HF_LMA_SHIFT)
 #define HF_CS64_MASK         (1 << HF_CS64_SHIFT)
 #define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
+#define HF_VM_MASK           (1 << HF_VM_SHIFT)
 #define HF_HALTED_MASK       (1 << HF_HALTED_SHIFT)
 #define HF_SMM_MASK          (1 << HF_SMM_SHIFT)
 
-#define CR0_PE_MASK  (1 << 0)
-#define CR0_MP_MASK  (1 << 1)
+#define CR0_PE_SHIFT 0
+#define CR0_PE_MASK  (1 << CR0_PE_SHIFT)
+#define CR0_MP_SHIFT 1
+#define CR0_MP_MASK  (1 << CR0_MP_SHIFT)
 #define CR0_EM_MASK  (1 << 2)
 #define CR0_TS_MASK  (1 << 3)
 #define CR0_ET_MASK  (1 << 4)
@@ -179,7 +183,8 @@
 #define CR4_PAE_MASK  (1 << 5)
 #define CR4_PGE_MASK  (1 << 7)
 #define CR4_PCE_MASK  (1 << 8)
-#define CR4_OSFXSR_MASK (1 << 9)
+#define CR4_OSFXSR_SHIFT 9
+#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT)
 #define CR4_OSXMMEXCPT_MASK  (1 << 10)
 
 #define PG_PRESENT_BIT	0
@@ -490,6 +495,9 @@ typedef struct CPUX86State {
     target_ulong kernelgsbase;
 #endif
 
+#ifdef USE_KVM
+    uint64_t tsc; /* time stamp counter */
+#endif
     uint64_t pat;
 
     /* temporary data for USE_CODE_COPY mode */
@@ -528,6 +536,13 @@ typedef struct CPUX86State {
     int kqemu_enabled;
     int last_io_time;
 #endif
+
+#ifdef USE_KVM
+#define BITS_PER_LONG (8 * sizeof (long))
+#define NR_IRQ_WORDS (256/ BITS_PER_LONG)
+    unsigned long kvm_interrupt_bitmap[NR_IRQ_WORDS];
+#endif
+
     /* in order to simplify APIC support, we leave this pointer to the
        user */
     struct APICState *apic_state;
diff -r 2eac80033ea0 target-i386/helper.c
--- a/target-i386/helper.c	Tue Dec 19 09:31:34 2006 +0000
+++ b/target-i386/helper.c	Wed Dec 20 23:01:25 2006 -0600
@@ -18,7 +18,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 #include "exec.h"
-
+#ifdef USE_KVM
+extern int kvm_allowed;
+#endif
 //#define DEBUG_PCALL
 
 #if 0
@@ -184,7 +186,15 @@ static inline void get_ss_esp_from_tss(u
     if (!(env->tr.flags & DESC_P_MASK))
         cpu_abort(env, "invalid tss");
     type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+#ifdef USE_KVM
+    /*
+     * Bit 1 is the Busy bit.  We believe it is legal to interrupt into a busy
+     * segment
+     */
+    if ((kvm_allowed && (type & 5) != 1) || (type & 7) != 1)
+#else
     if ((type & 7) != 1)
+#endif
         cpu_abort(env, "invalid tss type");
     shift = type >> 3;
     index = (dpl * 4 + 2) << shift;
@@ -497,7 +507,12 @@ static inline void check_io(int addr, in
     
     /* TSS must be a valid 32 bit one */
     if (!(env->tr.flags & DESC_P_MASK) ||
+#ifdef USE_KVM
+	/* Probable qemu bug: 11 is a valid segment type */
+        ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xd) != 9 ||
+#else
         ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 ||
+#endif
         env->tr.limit < 103)
         goto fail;
     io_offset = lduw_kernel(env->tr.base + 0x66);
@@ -839,6 +854,13 @@ static void do_interrupt64(int intno, in
     uint32_t e1, e2, e3, ss;
     target_ulong old_eip, esp, offset;
 
+#ifdef USE_KVM
+    if (kvm_allowed) {
+	    printf("%s: unexpect\n", __FUNCTION__);
+	    exit(-1);
+    }
+#endif
+
     has_error_code = 0;
     if (!is_int && !is_hw) {
         switch(intno) {
@@ -1122,6 +1144,12 @@ void do_interrupt_user(int intno, int is
     int dpl, cpl;
     uint32_t e2;
 
+#ifdef USE_KVM
+    if (kvm_allowed) {
+	    printf("%s: unexpect\n", __FUNCTION__);
+	    exit(-1);
+    }
+#endif
     dt = &env->idt;
     ptr = dt->base + (intno * 8);
     e2 = ldl_kernel(ptr + 4);
@@ -1147,6 +1175,12 @@ void do_interrupt(int intno, int is_int,
 void do_interrupt(int intno, int is_int, int error_code, 
                   target_ulong next_eip, int is_hw)
 {
+#ifdef USE_KVM
+    if (kvm_allowed) {
+	printf("%s: unexpect\n", __FUNCTION__);
+	exit(-1);
+    }
+#endif
     if (loglevel & CPU_LOG_INT) {
         if ((env->cr[0] & CR0_PE_MASK)) {
             static int count;
@@ -1958,6 +1992,12 @@ void helper_ljmp_protected_T0_T1(int nex
         cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
                        get_seg_base(e1, e2), limit, e2);
         EIP = new_eip;
+#ifdef USE_KVM
+        if (kvm_allowed && (e2 & DESC_L_MASK)) {
+            env->exception_index = -1;
+            cpu_loop_exit();   
+        }       
+#endif
     } else {
         /* jump to call or task gate */
         dpl = (e2 >> DESC_DPL_SHIFT) & 3;
diff -r 2eac80033ea0 vl.c
--- a/vl.c	Tue Dec 19 09:31:34 2006 +0000
+++ b/vl.c	Wed Dec 20 23:01:25 2006 -0600
@@ -87,6 +87,10 @@
 #include "disas.h"
 
 #include "exec-all.h"
+
+#if USE_KVM
+#include "qemu-kvm.h"
+#endif
 
 #define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
 
@@ -5318,6 +5322,10 @@ static void ram_save(QEMUFile *f, void *
     if (ram_compress_open(s, f) < 0)
         return;
     for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) {
+#ifdef USE_KVM
+	if (kvm_allowed && (i>=0xa0000) & (i<0xc0000)) /* do not access video-addresses */
+	    continue;
+#endif
 #if 0
         if (tight_savevm_enabled) {
             int64_t sector_num;
@@ -5367,6 +5375,10 @@ static int ram_load(QEMUFile *f, void *o
     if (ram_decompress_open(s, f) < 0)
         return -EINVAL;
     for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) {
+#ifdef USE_KVM
+	if (kvm_allowed && (i>=0xa0000) && (i<0xc0000)) /* do not access video-addresses */
+	    continue;
+#endif
         if (ram_decompress_buf(s, buf, 1) < 0) {
             fprintf(stderr, "Error while reading ram block header\n");
             goto error;
@@ -5727,6 +5739,9 @@ void main_loop_wait(int timeout)
             if (FD_ISSET(ioh->fd, &rfds)) {
                 ioh->fd_read(ioh->opaque);
             }
+        }
+        for(ioh = first_io_handler; ioh != NULL; ioh = ioh_next) {
+            ioh_next = ioh->next;
             if (FD_ISSET(ioh->fd, &wfds)) {
                 ioh->fd_write(ioh->opaque);
             }
@@ -5930,6 +5945,9 @@ void help(void)
 #ifdef USE_KQEMU
            "-kernel-kqemu   enable KQEMU full virtualization (default is user mode only)\n"
            "-no-kqemu       disable KQEMU kernel module usage\n"
+#endif
+#ifdef USE_KVM
+	   "-no-kvm         disable KVM hardware virtualization\n"
 #endif
 #ifdef USE_CODE_COPY
            "-no-code-copy   disable code copy acceleration\n"
@@ -6022,6 +6040,7 @@ enum {
     QEMU_OPTION_vnc,
     QEMU_OPTION_no_acpi,
     QEMU_OPTION_no_reboot,
+    QEMU_OPTION_no_kvm,
 };
 
 typedef struct QEMUOption {
@@ -6077,6 +6096,9 @@ const QEMUOption qemu_options[] = {
 #ifdef USE_KQEMU
     { "no-kqemu", 0, QEMU_OPTION_no_kqemu },
     { "kernel-kqemu", 0, QEMU_OPTION_kernel_kqemu },
+#endif
+#ifdef USE_KVM
+    { "no-kvm", 0, QEMU_OPTION_no_kvm },
 #endif
 #if defined(TARGET_PPC) || defined(TARGET_SPARC)
     { "g", 1, QEMU_OPTION_g },
@@ -6720,6 +6742,11 @@ int main(int argc, char **argv)
                 kqemu_allowed = 2;
                 break;
 #endif
+#ifdef USE_KVM
+	    case QEMU_OPTION_no_kvm:
+		kvm_allowed = 0;
+		break;
+#endif
             case QEMU_OPTION_usb:
                 usb_enabled = 1;
                 break;
@@ -6758,6 +6785,15 @@ int main(int argc, char **argv)
         }
     }
 
+#if USE_KVM
+    if (kvm_allowed) {
+	if (kvm_qemu_init() < 0) {
+	    fprintf(stderr, "Could not initialize KVM, will disable KVM support\n");
+	    kvm_allowed = 0;
+	}
+    }
+#endif
+
 #ifdef USE_KQEMU
     if (smp_cpus > 1)
         kqemu_allowed = 0;
@@ -6805,12 +6841,28 @@ int main(int argc, char **argv)
 
     /* init the memory */
     phys_ram_size = ram_size + vga_ram_size + bios_size;
-
+#if USE_KVM
+    /* Initialize kvm */
+    if (kvm_allowed) {
+	    phys_ram_size += KVM_EXTRA_PAGES * 4096;
+	    if (kvm_qemu_create_context() < 0) {
+		    fprintf(stderr, "Could not create KVM context\n");
+		    exit(1);
+	    }
+    } else {
+	    phys_ram_base = qemu_vmalloc(phys_ram_size);
+	    if (!phys_ram_base) {
+		    fprintf(stderr, "Could not allocate physical memory\n");
+		    exit(1);
+	    }
+    }
+#else
     phys_ram_base = qemu_vmalloc(phys_ram_size);
     if (!phys_ram_base) {
         fprintf(stderr, "Could not allocate physical memory\n");
         exit(1);
     }
+#endif
 
     /* we always create the cdrom drive, even if no disk is there */
     bdrv_init();
diff -r 2eac80033ea0 vl.h
--- a/vl.h	Tue Dec 19 09:31:34 2006 +0000
+++ b/vl.h	Wed Dec 20 23:01:25 2006 -0600
@@ -149,6 +149,7 @@ extern int graphic_depth;
 extern int graphic_depth;
 extern const char *keyboard_layout;
 extern int kqemu_allowed;
+extern int kvm_allowed;
 extern int win2k_install_hack;
 extern int usb_enabled;
 extern int smp_cpus;
@@ -161,6 +162,10 @@ extern int no_quit;
 #define BIOS_SIZE (128 * 1024)
 #else
 #define BIOS_SIZE ((256 + 64) * 1024)
+#endif
+
+#if USE_KVM
+#define KVM_EXTRA_PAGES 3
 #endif
 
 /* keyboard/mouse support */
diff -r 2eac80033ea0 qemu-kvm.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qemu-kvm.c	Wed Dec 20 23:03:49 2006 -0600
@@ -0,0 +1,648 @@
+
+#include "config.h"
+#include "config-host.h"
+
+#ifdef USE_KVM
+
+#include "exec.h"
+
+#include "qemu-kvm.h"
+#include <kvmctl.h>
+#include <string.h>
+
+#define MSR_IA32_TSC		0x10
+
+extern void perror(const char *s);
+
+int kvm_allowed = 1;
+kvm_context_t kvm_context;
+static struct kvm_msr_list *kvm_msr_list;
+static int kvm_has_msr_star;
+
+#define NR_CPU 16
+static CPUState *saved_env[NR_CPU];
+
+static void set_msr_entry(struct kvm_msr_entry *entry, uint32_t index, 
+                          uint64_t data)
+{
+    entry->index = index;
+    entry->data  = data;
+}
+
+/* returns 0 on success, non-0 on failure */
+static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
+{
+        switch (entry->index) {
+        case MSR_IA32_SYSENTER_CS:  
+            env->sysenter_cs  = entry->data;
+            break;
+        case MSR_IA32_SYSENTER_ESP:
+            env->sysenter_esp = entry->data;
+            break;
+        case MSR_IA32_SYSENTER_EIP:
+            env->sysenter_eip = entry->data;
+            break;
+        case MSR_STAR:
+            env->star         = entry->data;
+            break;
+#ifdef TARGET_X86_64
+        case MSR_CSTAR:
+            env->cstar        = entry->data;
+            break;
+        case MSR_KERNELGSBASE:
+            env->kernelgsbase = entry->data;
+            break;
+        case MSR_FMASK:
+            env->fmask        = entry->data;
+            break;
+        case MSR_LSTAR:
+            env->lstar        = entry->data;
+            break;
+#endif
+        case MSR_IA32_TSC:
+            env->tsc          = entry->data;
+            break;
+        default:
+            printf("Warning unknown msr index 0x%x\n", entry->index);
+            return 1;
+        }
+        return 0;
+}
+
+#ifdef TARGET_X86_64
+#define MSR_COUNT 9
+#else
+#define MSR_COUNT 5
+#endif
+
+static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
+{
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = 3;
+    lhs->present = 1;
+    lhs->dpl = 3;
+    lhs->db = 0;
+    lhs->s = 1;
+    lhs->l = 0;
+    lhs->g = 0;
+    lhs->avl = 0;
+    lhs->unusable = 0;
+}
+
+static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
+{
+    unsigned flags = rhs->flags;
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
+    lhs->present = (flags & DESC_P_MASK) != 0;
+    lhs->dpl = rhs->selector & 3;
+    lhs->db = (flags >> DESC_B_SHIFT) & 1;
+    lhs->s = (flags & DESC_S_MASK) != 0;
+    lhs->l = (flags >> DESC_L_SHIFT) & 1;
+    lhs->g = (flags & DESC_G_MASK) != 0;
+    lhs->avl = (flags & DESC_AVL_MASK) != 0;
+    lhs->unusable = 0;
+}
+
+static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
+{
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->flags =
+	(rhs->type << DESC_TYPE_SHIFT)
+	| (rhs->present * DESC_P_MASK)
+	| (rhs->dpl << DESC_DPL_SHIFT)
+	| (rhs->db << DESC_B_SHIFT)
+	| (rhs->s * DESC_S_MASK)
+	| (rhs->l << DESC_L_SHIFT)
+	| (rhs->g * DESC_G_MASK)
+	| (rhs->avl * DESC_AVL_MASK);
+}
+
+static void load_regs(CPUState *env)
+{
+    struct kvm_regs regs;
+    struct kvm_sregs sregs;
+    struct kvm_msr_entry msrs[MSR_COUNT];
+    int rc, n;
+
+    /* hack: save env */
+    if (!saved_env[0])
+	saved_env[0] = env;
+
+    regs.rax = env->regs[R_EAX];
+    regs.rbx = env->regs[R_EBX];
+    regs.rcx = env->regs[R_ECX];
+    regs.rdx = env->regs[R_EDX];
+    regs.rsi = env->regs[R_ESI];
+    regs.rdi = env->regs[R_EDI];
+    regs.rsp = env->regs[R_ESP];
+    regs.rbp = env->regs[R_EBP];
+#ifdef TARGET_X86_64
+    regs.r8 = env->regs[8];
+    regs.r9 = env->regs[9];
+    regs.r10 = env->regs[10];
+    regs.r11 = env->regs[11];
+    regs.r12 = env->regs[12];
+    regs.r13 = env->regs[13];
+    regs.r14 = env->regs[14];
+    regs.r15 = env->regs[15];
+#endif
+    
+    regs.rflags = env->eflags;
+    regs.rip = env->eip;
+
+    kvm_set_regs(kvm_context, 0, &regs);
+
+    memcpy(sregs.interrupt_bitmap, env->kvm_interrupt_bitmap, sizeof(sregs.interrupt_bitmap));
+
+    if ((env->eflags & VM_MASK)) {
+	    set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
+	    set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
+	    set_v8086_seg(&sregs.es, &env->segs[R_ES]);
+	    set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
+	    set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
+	    set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
+    } else {
+	    set_seg(&sregs.cs, &env->segs[R_CS]);
+	    set_seg(&sregs.ds, &env->segs[R_DS]);
+	    set_seg(&sregs.es, &env->segs[R_ES]);
+	    set_seg(&sregs.fs, &env->segs[R_FS]);
+	    set_seg(&sregs.gs, &env->segs[R_GS]);
+	    set_seg(&sregs.ss, &env->segs[R_SS]);
+
+	    if (env->cr[0] & CR0_PE_MASK) {
+		/* force ss cpl to cs cpl */
+		sregs.ss.selector = (sregs.ss.selector & ~3) | 
+			(sregs.cs.selector & 3);
+		sregs.ss.dpl = sregs.ss.selector & 3;
+	    }
+    }
+
+    set_seg(&sregs.tr, &env->tr);
+    set_seg(&sregs.ldt, &env->ldt);
+
+    sregs.idt.limit = env->idt.limit;
+    sregs.idt.base = env->idt.base;
+    sregs.gdt.limit = env->gdt.limit;
+    sregs.gdt.base = env->gdt.base;
+
+    sregs.cr0 = env->cr[0];
+    sregs.cr2 = env->cr[2];
+    sregs.cr3 = env->cr[3];
+    sregs.cr4 = env->cr[4];
+    sregs.cr8 = cpu_get_apic_tpr(env);
+    sregs.apic_base = cpu_get_apic_base(env);
+    sregs.efer = env->efer;
+
+    kvm_set_sregs(kvm_context, 0, &sregs);
+
+    /* msrs */
+    n = 0;
+    set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_CS,  env->sysenter_cs);
+    set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
+    set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
+    if (kvm_has_msr_star)
+	set_msr_entry(&msrs[n++], MSR_STAR,              env->star);
+    set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
+#ifdef TARGET_X86_64
+    set_msr_entry(&msrs[n++], MSR_CSTAR,             env->cstar);
+    set_msr_entry(&msrs[n++], MSR_KERNELGSBASE,      env->kernelgsbase);
+    set_msr_entry(&msrs[n++], MSR_FMASK,             env->fmask);
+    set_msr_entry(&msrs[n++], MSR_LSTAR  ,           env->lstar);
+#endif
+
+    rc = kvm_set_msrs(kvm_context, 0, msrs, n);
+    if (rc == -1)
+        perror("kvm_set_msrs FAILED");
+}
+
+static void save_regs(CPUState *env)
+{
+    struct kvm_regs regs;
+    struct kvm_sregs sregs;
+    struct kvm_msr_entry msrs[MSR_COUNT];
+    uint32_t hflags;
+    uint32_t i, n, rc;
+
+    kvm_get_regs(kvm_context, 0, &regs);
+
+    env->regs[R_EAX] = regs.rax;
+    env->regs[R_EBX] = regs.rbx;
+    env->regs[R_ECX] = regs.rcx;
+    env->regs[R_EDX] = regs.rdx;
+    env->regs[R_ESI] = regs.rsi;
+    env->regs[R_EDI] = regs.rdi;
+    env->regs[R_ESP] = regs.rsp;
+    env->regs[R_EBP] = regs.rbp;
+#ifdef TARGET_X86_64
+    env->regs[8] = regs.r8;
+    env->regs[9] = regs.r9;
+    env->regs[10] = regs.r10;
+    env->regs[11] = regs.r11;
+    env->regs[12] = regs.r12;
+    env->regs[13] = regs.r13;
+    env->regs[14] = regs.r14;
+    env->regs[15] = regs.r15;
+#endif
+
+    env->eflags = regs.rflags;
+    env->eip = regs.rip;
+
+    kvm_get_sregs(kvm_context, 0, &sregs);
+
+    memcpy(env->kvm_interrupt_bitmap, sregs.interrupt_bitmap, sizeof(env->kvm_interrupt_bitmap));
+
+    get_seg(&env->segs[R_CS], &sregs.cs);
+    get_seg(&env->segs[R_DS], &sregs.ds);
+    get_seg(&env->segs[R_ES], &sregs.es);
+    get_seg(&env->segs[R_FS], &sregs.fs);
+    get_seg(&env->segs[R_GS], &sregs.gs);
+    get_seg(&env->segs[R_SS], &sregs.ss);
+
+    get_seg(&env->tr, &sregs.tr);
+    get_seg(&env->ldt, &sregs.ldt);
+    
+    env->idt.limit = sregs.idt.limit;
+    env->idt.base = sregs.idt.base;
+    env->gdt.limit = sregs.gdt.limit;
+    env->gdt.base = sregs.gdt.base;
+
+    env->cr[0] = sregs.cr0;
+    env->cr[2] = sregs.cr2;
+    env->cr[3] = sregs.cr3;
+    env->cr[4] = sregs.cr4;
+
+    cpu_set_apic_tpr(env, sregs.cr8);
+    cpu_set_apic_base(env, sregs.apic_base);
+
+    env->efer = sregs.efer;
+
+#define HFLAG_COPY_MASK ~( \
+			HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
+			HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
+			HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
+			HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
+
+
+
+    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
+    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & 
+	    (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
+    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); 
+    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << 
+	    (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
+
+    if (env->efer & MSR_EFER_LMA) {
+        hflags |= HF_LMA_MASK;
+    }
+
+    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
+        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+    } else {
+        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >> 
+		(DESC_B_SHIFT - HF_CS32_SHIFT);
+        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >> 
+		(DESC_B_SHIFT - HF_SS32_SHIFT);
+        if (!(env->cr[0] & CR0_PE_MASK) || 
+                   (env->eflags & VM_MASK) ||
+                   !(hflags & HF_CS32_MASK)) {
+                hflags |= HF_ADDSEG_MASK;
+            } else {
+                hflags |= ((env->segs[R_DS].base | 
+                                env->segs[R_ES].base |
+                                env->segs[R_SS].base) != 0) << 
+                    HF_ADDSEG_SHIFT;
+            }
+    }
+    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
+    CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((env->eflags >> 10) & 1));
+    CC_OP = CC_OP_EFLAGS;
+    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+
+    tlb_flush(env, 1);
+
+    /* msrs */    
+    n = 0;
+    msrs[n++].index = MSR_IA32_SYSENTER_CS;
+    msrs[n++].index = MSR_IA32_SYSENTER_ESP;
+    msrs[n++].index = MSR_IA32_SYSENTER_EIP;
+    if (kvm_has_msr_star)
+	msrs[n++].index = MSR_STAR;
+    msrs[n++].index = MSR_IA32_TSC;
+#ifdef TARGET_X86_64
+    msrs[n++].index = MSR_CSTAR;
+    msrs[n++].index = MSR_KERNELGSBASE;
+    msrs[n++].index = MSR_FMASK;
+    msrs[n++].index = MSR_LSTAR;
+#endif
+    rc = kvm_get_msrs(kvm_context, 0, msrs, n);
+    if (rc == -1) {
+        perror("kvm_get_msrs FAILED");
+    }
+    else {
+        n = rc; /* actual number of MSRs */
+        for (i=0 ; i<n; i++) {
+            if (get_msr_entry(&msrs[i], env))
+                return;
+        }
+    }
+}
+
+#include <signal.h>
+
+static int kvm_interrupt_pending(CPUState *env)
+{
+    int i;
+
+    for (i = 0; i < NR_IRQ_WORDS; ++i)
+	if (env->kvm_interrupt_bitmap[i])
+	    return 1;
+    return 0;
+}
+
+static inline void push_interrupts(CPUState *env)
+{
+    if (!(env->interrupt_request & CPU_INTERRUPT_HARD) ||
+	!(env->eflags & IF_MASK) || kvm_interrupt_pending(env)) {
+    	if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
+	    env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
+	    env->exception_index = EXCP_INTERRUPT;
+	    cpu_loop_exit();
+        }
+        return;
+    }
+
+    do {
+        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+
+        // for now using cpu 0
+	kvm_inject_irq(kvm_context, 0, cpu_get_pic_interrupt(env)); 
+    } while ( (env->interrupt_request & CPU_INTERRUPT_HARD) && (env->cr[0] & CR0_PG_MASK) );
+}
+
+void kvm_load_registers(CPUState *env)
+{
+    load_regs(env);
+}
+
+int kvm_cpu_exec(CPUState *env)
+{
+
+    push_interrupts(env);
+
+    if (!saved_env[0])
+	saved_env[0] = env;
+
+    kvm_run(kvm_context, 0);
+
+    save_regs(env);
+
+    return 0;
+}
+
+
+static int kvm_cpuid(void *opaque, uint64_t *rax, uint64_t *rbx, 
+		      uint64_t *rcx, uint64_t *rdx)
+{
+    CPUState **envs = opaque;
+    CPUState *saved_env;
+    uint32_t eax = *rax;
+
+    saved_env = env;
+    env = envs[0];
+
+    env->regs[R_EAX] = *rax;
+    env->regs[R_EBX] = *rbx;
+    env->regs[R_ECX] = *rcx;
+    env->regs[R_EDX] = *rdx;
+    helper_cpuid();
+    *rdx = env->regs[R_EDX];
+    *rcx = env->regs[R_ECX];
+    *rbx = env->regs[R_EBX];
+    *rax = env->regs[R_EAX];
+    // don't report long mode/syscall if no native support
+    if (eax == 0x80000001) {
+	unsigned long h_eax = eax, h_edx;
+
+
+	// push/pop hack to workaround gcc 3 register pressure trouble
+	asm (
+#ifdef __x86_64__
+	     "push %%rbx; push %%rcx; cpuid; pop %%rcx; pop %%rbx"
+#else
+	     "push %%ebx; push %%ecx; cpuid; pop %%ecx; pop %%ebx"
+#endif
+	     : "+a"(h_eax), "=d"(h_edx));
+
+	// long mode
+	if ((h_edx & 0x20000000) == 0)
+	    *rdx &= ~0x20000000ull;
+	// syscall
+	if ((h_edx & 0x00000800) == 0)
+	    *rdx &= ~0x00000800ull;
+    }
+    env = saved_env;
+    return 0;
+}
+
+static int kvm_debug(void *opaque, int vcpu)
+{
+    CPUState **envs = opaque;
+
+    env = envs[0];
+    save_regs(env);
+    env->exception_index = EXCP_DEBUG;
+    return 1;
+}
+
+static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
+{
+    *data = cpu_inb(0, addr);
+    return 0;
+}
+
+static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
+{
+    *data = cpu_inw(0, addr);
+    return 0;
+}
+
+static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
+{
+    *data = cpu_inl(0, addr);
+    return 0;
+}
+
+static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
+{
+    cpu_outb(0, addr, data);
+    return 0;
+}
+
+static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
+{
+    cpu_outw(0, addr, data);
+    return 0;
+}
+
+static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
+{
+    cpu_outl(0, addr, data);
+    return 0;
+}
+
+static int kvm_readb(void *opaque, uint64_t addr, uint8_t *data)
+{
+    *data = ldub_phys(addr);
+    return 0;
+}
+ 
+static int kvm_readw(void *opaque, uint64_t addr, uint16_t *data)
+{
+    *data = lduw_phys(addr);
+    return 0;
+}
+
+static int kvm_readl(void *opaque, uint64_t addr, uint32_t *data)
+{
+    *data = ldl_phys(addr);
+    return 0;
+}
+
+static int kvm_readq(void *opaque, uint64_t addr, uint64_t *data)
+{
+    *data = ldq_phys(addr);
+    return 0;
+}
+
+static int kvm_writeb(void *opaque, uint64_t addr, uint8_t data)
+{
+    stb_phys(addr, data);
+    return 0;
+}
+
+static int kvm_writew(void *opaque, uint64_t addr, uint16_t data)
+{
+    stw_phys(addr, data);
+    return 0;
+}
+
+static int kvm_writel(void *opaque, uint64_t addr, uint32_t data)
+{
+    stl_phys(addr, data);
+    return 0;
+}
+
+static int kvm_writeq(void *opaque, uint64_t addr, uint64_t data)
+{
+    stq_phys(addr, data);
+    return 0;
+}
+
+static int kvm_io_window(void *opaque)
+{
+    return 1;
+}
+
+ 
+static int kvm_halt(void *opaque, int vcpu)
+{
+    CPUState **envs = opaque, *env;
+
+    env = envs[0];
+    save_regs(env);
+
+    if (!((kvm_interrupt_pending(env) || 
+	   (env->interrupt_request & CPU_INTERRUPT_HARD)) && 
+	  (env->eflags & IF_MASK))) {
+	    env->hflags |= HF_HALTED_MASK;
+	    env->exception_index = EXCP_HLT;
+    }
+    return 1;
+}
+ 
+static struct kvm_callbacks qemu_kvm_ops = {
+    .cpuid = kvm_cpuid,
+    .debug = kvm_debug,
+    .inb   = kvm_inb,
+    .inw   = kvm_inw,
+    .inl   = kvm_inl,
+    .outb  = kvm_outb,
+    .outw  = kvm_outw,
+    .outl  = kvm_outl,
+    .readb = kvm_readb,
+    .readw = kvm_readw,
+    .readl = kvm_readl,
+    .readq = kvm_readq,
+    .writeb = kvm_writeb,
+    .writew = kvm_writew,
+    .writel = kvm_writel,
+    .writeq = kvm_writeq,
+    .halt  = kvm_halt,
+    .io_window = kvm_io_window,
+};
+
+int kvm_qemu_init()
+{
+    /* Try to initialize kvm */
+    kvm_context = kvm_init(&qemu_kvm_ops, saved_env);
+    if (!kvm_context) {
+      	return -1;
+    }
+
+    return 0;
+}
+
+int kvm_qemu_create_context(void)
+{
+    int i;
+
+    if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
+	kvm_qemu_destroy();
+	printf("%d\n", __LINE__);
+	return -1;
+    }
+    kvm_msr_list = kvm_get_msr_list(kvm_context);
+    if (!kvm_msr_list) {
+	printf("%d: %m\n", __LINE__);
+	kvm_qemu_destroy();
+	return -1;
+    }
+    for (i = 0; i < kvm_msr_list->nmsrs; ++i)
+	if (kvm_msr_list->indices[i] == MSR_STAR)
+	    kvm_has_msr_star = 1;
+    return 0;
+}
+
+void kvm_qemu_destroy(void)
+{
+    kvm_finalize(kvm_context);
+}
+
+int kvm_update_debugger(CPUState *env)
+{
+    struct kvm_debug_guest dbg;
+    int i;
+
+    dbg.enabled = 0;
+    if (env->nb_breakpoints || env->singlestep_enabled) {
+	dbg.enabled = 1;
+	for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
+	    dbg.breakpoints[i].enabled = 1;
+	    dbg.breakpoints[i].address = env->breakpoints[i];
+	}
+	dbg.singlestep = env->singlestep_enabled;
+    }
+    return kvm_guest_debug(kvm_context, 0, &dbg);
+}
+
+
+#endif
diff -r 2eac80033ea0 qemu-kvm.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qemu-kvm.h	Wed Dec 20 23:03:49 2006 -0600
@@ -0,0 +1,13 @@
+#ifndef QEMU_KVM_H
+#define QEMU_KVM_H
+
+#include "kvmctl.h"
+
+int kvm_qemu_init(void);
+int kvm_qemu_create_context(void);
+void kvm_qemu_destroy(void);
+void kvm_load_registers(CPUState *env);
+int kvm_cpu_exec(CPUState *env);
+int kvm_update_debugger(CPUState *env);
+
+#endif

[-- Attachment #3: Type: text/plain, Size: 347 bytes --]

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV

[-- Attachment #4: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

             reply	other threads:[~2006-12-21  5:17 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-12-21  5:17 Anthony Liguori [this message]
     [not found] ` <458A18F5.10108-NZpS4cJIG2HvQtjrzfazuQ@public.gmane.org>
2006-12-21  8:49   ` [PATCH] Support for QEMU's CVS Avi Kivity
     [not found]     ` <458A4AA7.1000705-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2006-12-21 14:17       ` James Morris
2006-12-21 16:20       ` Anthony Liguori
     [not found]         ` <458AB453.3030701-NZpS4cJIG2HvQtjrzfazuQ@public.gmane.org>
2006-12-21 16:45           ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=458A18F5.10108@cs.utexas.edu \
    --to=aliguori-nzps4cjig2hvqtjrzfazuq@public.gmane.org \
    --cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox