All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch] Qemu map cache
@ 2006-12-04 17:33 Cui, Dexuan
  2006-12-04 18:26 ` Anthony Liguori
  2006-12-04 22:58 ` Anthony Liguori
  0 siblings, 2 replies; 8+ messages in thread
From: Cui, Dexuan @ 2006-12-04 17:33 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 495 bytes --]

On IA32 host or IA32 PAE host, at present, generally, we can't create an
HVM guest with more than 2G memory, because generally it's almost
impossible for Qemu to find a large enough and consecutive virtual
address space to map an HVM guest's whole physical address space.
The attached patch fixes this issue using dynamic mapping based on
little blocks of memory.

-- Dexuan Cui

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>


[-- Attachment #2: qemu-map-cache-2.diff --]
[-- Type: application/octet-stream, Size: 8322 bytes --]

diff -r fd28a1b139de tools/ioemu/cpu-defs.h
--- a/tools/ioemu/cpu-defs.h	Mon Dec 04 09:29:26 2006 +0000
+++ b/tools/ioemu/cpu-defs.h	Tue Dec 05 00:11:49 2006 +0800
@@ -28,6 +28,8 @@
 #ifndef TARGET_LONG_BITS
 #error TARGET_LONG_BITS must be defined before including this header
 #endif
+
+#define TARGET_PHYS_ADDR_BITS 64
 
 #ifndef TARGET_PHYS_ADDR_BITS 
 #if TARGET_LONG_BITS >= HOST_LONG_BITS
diff -r fd28a1b139de tools/ioemu/target-i386-dm/cpu.h
--- a/tools/ioemu/target-i386-dm/cpu.h	Mon Dec 04 09:29:26 2006 +0000
+++ b/tools/ioemu/target-i386-dm/cpu.h	Tue Dec 05 00:11:49 2006 +0800
@@ -25,7 +25,8 @@
 #ifdef TARGET_X86_64
 #define TARGET_LONG_BITS 64
 #else
-#define TARGET_LONG_BITS 32
+/* #define TARGET_LONG_BITS 32 */
+#define TARGET_LONG_BITS 64 /* for Qemu map cache */
 #endif
 
 /* target supports implicit self modifying code */
diff -r fd28a1b139de tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c	Mon Dec 04 09:29:26 2006 +0000
+++ b/tools/ioemu/target-i386-dm/exec-dm.c	Tue Dec 05 00:11:49 2006 +0800
@@ -36,6 +36,7 @@
 
 #include "cpu.h"
 #include "exec-all.h"
+#include "vl.h"
 
 //#define DEBUG_TB_INVALIDATE
 //#define DEBUG_FLUSH
@@ -426,6 +427,12 @@ static inline int paddr_is_ram(target_ph
 #endif
 }
 
+#if defined(__i386__) || defined(__x86_64__)
+#define phys_ram_addr(x) (qemu_map_cache(x))
+#elif defined(__ia64__)
+#define phys_ram_addr(x) (phys_ram_base + (x))
+#endif
+
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, 
                             int len, int is_write)
 {
@@ -438,7 +445,7 @@ void cpu_physical_memory_rw(target_phys_
         l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); 
         if (l > len)
             l = len;
-	
+
         io_index = iomem_index(addr);
         if (is_write) {
             if (io_index) {
@@ -460,9 +467,10 @@ void cpu_physical_memory_rw(target_phys_
                 }
             } else if (paddr_is_ram(addr)) {
                 /* Reading from RAM */
-                memcpy(phys_ram_base + addr, buf, l);
+                ptr = phys_ram_addr(addr);
+                memcpy(ptr, buf, l);
 #ifdef __ia64__
-                sync_icache((unsigned long)(phys_ram_base + addr), l);
+                sync_icache(ptr, l);
 #endif 
             }
         } else {
@@ -485,7 +493,8 @@ void cpu_physical_memory_rw(target_phys_
                 }
             } else if (paddr_is_ram(addr)) {
                 /* Reading from RAM */
-                memcpy(buf, phys_ram_base + addr, l);
+                ptr = phys_ram_addr(addr);
+                memcpy(buf, ptr, l);
             } else {
                 /* Neither RAM nor known MMIO space */
                 memset(buf, 0xff, len); 
diff -r fd28a1b139de tools/ioemu/vl.c
--- a/tools/ioemu/vl.c	Mon Dec 04 09:29:26 2006 +0000
+++ b/tools/ioemu/vl.c	Tue Dec 05 00:11:49 2006 +0800
@@ -5808,6 +5808,101 @@ int set_mm_mapping(int xc_handle, uint32
     return 0;
 }
 
+static xen_pfn_t *page_array;
+
+#if defined(__i386__) || defined(__x86_64__)
+static struct map_cache *mapcache_entry;
+static unsigned long nr_buckets;
+
+static inline struct map_cache* get_hash_bucket(target_phys_addr_t addr)
+{
+    return &mapcache_entry[(addr >> MCACHE_BUCKET_SHIFT) % nr_buckets];
+}
+
+/*
+ * For most cases (>99.9%), the page address is the same.
+ */
+static target_phys_addr_t     last_bucket = ~0LL;
+static uint8_t               *last_bucket_mapped;
+
+#define likely(x)     __builtin_expect((x),1)
+
+static int qemu_map_cache_init(unsigned long nr_pages)
+{
+    unsigned long max_pages = MAX_MCACHE_SIZE >> PAGE_SHIFT;
+    int i;
+
+    if ( nr_pages < max_pages )
+        max_pages = nr_pages;
+
+    nr_buckets = (max_pages << PAGE_SHIFT) >> MCACHE_BUCKET_SHIFT;
+
+    fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets);
+
+    mapcache_entry = 
+        (struct map_cache *)malloc(nr_buckets * sizeof(struct map_cache));
+    if ( mapcache_entry == NULL )
+        return errno = ENOMEM;
+
+    memset(mapcache_entry, 0, nr_buckets * sizeof(struct map_cache));
+
+    /*
+     * To avoid ENOMEM from xc_map_foreign_batch() at runtime, we
+     * pre-fill all the map caches in advance.
+     */
+    for (i = 0; i < nr_buckets; i++)
+       (void)qemu_map_cache( ((target_phys_addr_t)i) << MCACHE_BUCKET_SHIFT );
+
+    return 0;
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr)
+{
+    struct map_cache *entry;
+    target_phys_addr_t bucket = phys_addr &  MCACHE_BUCKET_MASK;
+    target_phys_addr_t offset = phys_addr & ~MCACHE_BUCKET_MASK;
+
+    if ( likely(bucket == last_bucket) )
+        return last_bucket_mapped + offset;
+
+    entry = get_hash_bucket(bucket);
+
+    if ( entry->mc_addr == 0 || entry->phys_addr != bucket )
+    { 
+         /* We need to remap the existing mapping. First unmap if existing 
+          * mapping is there. 
+          */
+        uint8_t *mc_addr;
+        unsigned long pfn = bucket >> PAGE_SHIFT;
+
+        if ( entry->mc_addr ) {
+            errno = munmap(entry->mc_addr, MCACHE_BUCKET_SIZE);
+            if ( errno ) {
+                fprintf(logfile, "unmap fails %d\n", errno);
+                exit(-1);
+            }
+        }
+
+        mc_addr = xc_map_foreign_batch(xc_handle, domid, PROT_READ|PROT_WRITE,
+                  &page_array[pfn], (MCACHE_BUCKET_SIZE >> PAGE_SHIFT));
+        if(mc_addr == 0) {
+            fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno);
+            exit(-1);
+        }
+
+        entry->mc_addr = mc_addr;
+        entry->phys_addr = bucket;
+        entry->map_count++;
+    }
+
+    last_bucket = bucket;
+    last_bucket_mapped = entry->mc_addr;
+
+    return last_bucket_mapped + offset;
+}
+#endif
+
+
 int main(int argc, char **argv)
 {
 #ifdef CONFIG_GDBSTUB
@@ -5842,7 +5937,6 @@ int main(int argc, char **argv)
     char usb_devices[MAX_USB_CMDLINE][128];
     int usb_devices_index;
     unsigned long nr_pages, tmp_nr_pages, shared_page_nr;
-    xen_pfn_t *page_array;
     extern void *shared_page;
     extern void *buffered_io_page;
 
@@ -6130,6 +6224,7 @@ int main(int argc, char **argv)
                 break;
             case QEMU_OPTION_m:
                 ram_size = atol(optarg) * 1024 * 1024;
+                ram_size = (uint64_t)atol(optarg) * 1024 * 1024;
                 if (ram_size <= 0)
                     help();
 #ifndef CONFIG_DM
@@ -6410,11 +6505,9 @@ int main(int argc, char **argv)
     for ( i = 0; i < tmp_nr_pages; i++)
         page_array[i] = i;
 
-    phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
-                                         PROT_READ|PROT_WRITE, page_array,
-                                         tmp_nr_pages);
-    if (phys_ram_base == NULL) {
-        fprintf(logfile, "batch map guest memory returned error %d\n", errno);
+    if ( qemu_map_cache_init(tmp_nr_pages) )
+    {
+        fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno);
         exit(-1);
     }
 
@@ -6440,7 +6533,8 @@ int main(int argc, char **argv)
     fprintf(logfile, "buffered io page at pfn:%lx, mfn: %"PRIx64"\n",
             shared_page_nr - 2, (uint64_t)(page_array[shared_page_nr - 2]));
 
-    free(page_array);
+    /* free(page_array); */
+    /* Qemu map cache needs page_array to get mfn from pfn. */
 
 #elif defined(__ia64__)
   
diff -r fd28a1b139de tools/ioemu/vl.h
--- a/tools/ioemu/vl.h	Mon Dec 04 09:29:26 2006 +0000
+++ b/tools/ioemu/vl.h	Tue Dec 05 00:11:49 2006 +0800
@@ -156,6 +156,29 @@ extern void *shared_vram;
 
 extern FILE *logfile;
 
+
+#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) 
+#define MAX_MCACHE_SIZE    0x40000000 /* 1GB max for x86 */
+#define MCACHE_BUCKET_SHIFT 16
+#elif defined(__x86_64__)
+#define MAX_MCACHE_SIZE    0x1000000000 /* 64GB max for x86_64 */
+#define MCACHE_BUCKET_SHIFT 20
+#endif
+
+#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
+#define MCACHE_BUCKET_MASK ~(MCACHE_BUCKET_SIZE - 1)
+
+struct map_cache
+{
+    target_phys_addr_t phys_addr; /* MCACHE_BUCKET_SIZE unit  */
+    uint8_t *mc_addr;             /* virtual address          */
+    unsigned int map_count;       /* how many times remapped? */
+};
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr);
+#endif
+
 extern int xc_handle;
 extern int domid;
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread
* RE: [Patch] Qemu map cache
@ 2006-12-07  2:08 Cui, Dexuan
  2006-12-07 10:35 ` Keir Fraser
  0 siblings, 1 reply; 8+ messages in thread
From: Cui, Dexuan @ 2006-12-07  2:08 UTC (permalink / raw)
  To: xen-devel

Hi Keir,
There is a minor mistake in the patch I posted previously.
The "1UL" in the following line should be "1ULL":
#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT

Sorry for that.

Btw, it seems the patch has not been checked in. Could you give some commetns?
Thanks!

-- Dexuan

-----Original Message-----
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Cui, Dexuan
Sent: 2006年12月5日 1:34
To: xen-devel@lists.xensource.com
Subject: [Xen-devel] [Patch] Qemu map cache

On IA32 host or IA32 PAE host, at present, generally, we can't create an
HVM guest with more than 2G memory, because generally it's almost
impossible for Qemu to find a large enough and consecutive virtual
address space to map an HVM guest's whole physical address space.
The attached patch fixes this issue using dynamic mapping based on
little blocks of memory.

-- Dexuan Cui

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2006-12-07 10:35 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-12-04 17:33 [Patch] Qemu map cache Cui, Dexuan
2006-12-04 18:26 ` Anthony Liguori
2006-12-04 22:58 ` Anthony Liguori
2006-12-05 20:28   ` Ian Pratt
2006-12-05 20:48     ` Anthony Liguori
2006-12-06  0:33       ` Ian Pratt
  -- strict thread matches above, loose matches on Subject: below --
2006-12-07  2:08 Cui, Dexuan
2006-12-07 10:35 ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.