[Qemu-devel] [PULL 03/48] hax: Support guest RAM sizes of 4GB or more

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: Yu Ning <yu.ning@intel.com>
Subject: [Qemu-devel] [PULL 03/48] hax: Support guest RAM sizes of 4GB or more
Date: Tue, 13 Feb 2018 13:00:07 +0100	[thread overview]
Message-ID: <1518523252-49106-4-git-send-email-pbonzini@redhat.com> (raw)
In-Reply-To: <1518523252-49106-1-git-send-email-pbonzini@redhat.com>

From: Yu Ning <yu.ning@intel.com>

Since HAX_VM_IOCTL_ALLOC_RAM takes a 32-bit size, it cannot handle
RAM blocks of 4GB or larger, which is why HAXM can only run guests
with less than 4GB of RAM. Solve this problem by utilizing the new
HAXM API, HAX_VM_IOCTL_ADD_RAMBLOCK, which takes a 64-bit size, to
register RAM blocks with the HAXM kernel module. The new API is
first added in HAXM 7.0.0, and its availablility and be confirmed
by the presence of the HAX_CAP_64BIT_RAMBLOCK capability flag.

When the guest RAM size reaches 7GB, QEMU will ask HAXM to set up a
memory mapping that covers a 4GB region, which will fail, because
HAX_VM_IOCTL_SET_RAM also takes a 32-bit size. Work around this
limitation by splitting the large mapping into small ones and
calling HAX_VM_IOCTL_SET_RAM multiple times.

Bug: https://bugs.launchpad.net/qemu/+bug/1735576

Signed-off-by: Yu Ning <yu.ning@intel.com>
Message-Id: <1515752555-12784-1-git-send-email-yu.ning@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/sysemu/hax.h        |  2 +-
 target/i386/hax-all.c       |  2 ++
 target/i386/hax-darwin.c    | 27 +++++++++++++++++++++------
 target/i386/hax-darwin.h    |  1 +
 target/i386/hax-i386.h      |  1 +
 target/i386/hax-interface.h |  8 ++++++++
 target/i386/hax-mem.c       | 34 ++++++++++++++++++++++++++--------
 target/i386/hax-windows.c   | 38 +++++++++++++++++++++++++++-----------
 target/i386/hax-windows.h   |  2 ++
 9 files changed, 89 insertions(+), 26 deletions(-)

diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h
index f252399..1f6c461 100644
--- a/include/sysemu/hax.h
+++ b/include/sysemu/hax.h
@@ -27,7 +27,7 @@
 int hax_sync_vcpus(void);
 int hax_init_vcpu(CPUState *cpu);
 int hax_smp_cpu_exec(CPUState *cpu);
-int hax_populate_ram(uint64_t va, uint32_t size);
+int hax_populate_ram(uint64_t va, uint64_t size);
 
 void hax_cpu_synchronize_state(CPUState *cpu);
 void hax_cpu_synchronize_post_reset(CPUState *cpu);
diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c
index bc9a12c..cad7531 100644
--- a/target/i386/hax-all.c
+++ b/target/i386/hax-all.c
@@ -103,6 +103,8 @@ static int hax_get_capability(struct hax_state *hax)
         return -ENOTSUP;
     }
 
+    hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
+
     if (cap->wstatus & HAX_CAP_MEMQUOTA) {
         if (cap->mem_quota < hax->mem_quota) {
             fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
diff --git a/target/i386/hax-darwin.c b/target/i386/hax-darwin.c
index ee94174..acdde47 100644
--- a/target/i386/hax-darwin.c
+++ b/target/i386/hax-darwin.c
@@ -28,21 +28,36 @@ hax_fd hax_mod_open(void)
     return fd;
 }
 
-int hax_populate_ram(uint64_t va, uint32_t size)
+int hax_populate_ram(uint64_t va, uint64_t size)
 {
     int ret;
-    struct hax_alloc_ram_info info;
 
     if (!hax_global.vm || !hax_global.vm->fd) {
         fprintf(stderr, "Allocate memory before vm create?\n");
         return -EINVAL;
     }
 
-    info.size = size;
-    info.va = va;
-    ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ALLOC_RAM, &info);
+    if (hax_global.supports_64bit_ramblock) {
+        struct hax_ramblock_info ramblock = {
+            .start_va = va,
+            .size = size,
+            .reserved = 0
+        };
+
+        ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ADD_RAMBLOCK, &ramblock);
+    } else {
+        struct hax_alloc_ram_info info = {
+            .size = (uint32_t)size,
+            .pad = 0,
+            .va = va
+        };
+
+        ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ALLOC_RAM, &info);
+    }
     if (ret < 0) {
-        fprintf(stderr, "Failed to allocate %x memory\n", size);
+        fprintf(stderr, "Failed to register RAM block: ret=%d, va=0x%" PRIx64
+                ", size=0x%" PRIx64 ", method=%s\n", ret, va, size,
+                hax_global.supports_64bit_ramblock ? "new" : "legacy");
         return ret;
     }
     return 0;
diff --git a/target/i386/hax-darwin.h b/target/i386/hax-darwin.h
index fb8e25a..51af0e8 100644
--- a/target/i386/hax-darwin.h
+++ b/target/i386/hax-darwin.h
@@ -44,6 +44,7 @@ static inline void hax_close_fd(hax_fd fd)
 #define HAX_VM_IOCTL_SET_RAM _IOWR(0, 0x82, struct hax_set_ram_info)
 #define HAX_VM_IOCTL_VCPU_DESTROY _IOW(0, 0x83, uint32_t)
 #define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION _IOW(0, 0x84, struct hax_qemu_version)
+#define HAX_VM_IOCTL_ADD_RAMBLOCK _IOW(0, 0x85, struct hax_ramblock_info)
 
 #define HAX_VCPU_IOCTL_RUN  _IO(0, 0xc0)
 #define HAX_VCPU_IOCTL_SET_MSRS _IOWR(0, 0xc1, struct hax_msr_data)
diff --git a/target/i386/hax-i386.h b/target/i386/hax-i386.h
index 8ffe91f..6abc156 100644
--- a/target/i386/hax-i386.h
+++ b/target/i386/hax-i386.h
@@ -37,6 +37,7 @@ struct hax_state {
     uint32_t version;
     struct hax_vm *vm;
     uint64_t mem_quota;
+    bool supports_64bit_ramblock;
 };
 
 #define HAX_MAX_VCPU 0x10
diff --git a/target/i386/hax-interface.h b/target/i386/hax-interface.h
index d141308..93d5fcb 100644
--- a/target/i386/hax-interface.h
+++ b/target/i386/hax-interface.h
@@ -308,6 +308,13 @@ struct hax_alloc_ram_info {
     uint32_t pad;
     uint64_t va;
 } __attribute__ ((__packed__));
+
+struct hax_ramblock_info {
+    uint64_t start_va;
+    uint64_t size;
+    uint64_t reserved;
+} __attribute__ ((__packed__));
+
 #define HAX_RAM_INFO_ROM     0x01 /* Read-Only */
 #define HAX_RAM_INFO_INVALID 0x80 /* Unmapped, usually used for MMIO */
 struct hax_set_ram_info {
@@ -327,6 +334,7 @@ struct hax_set_ram_info {
 
 #define HAX_CAP_MEMQUOTA           0x2
 #define HAX_CAP_UG                 0x4
+#define HAX_CAP_64BIT_RAMBLOCK     0x8
 
 struct hax_capabilityinfo {
     /* bit 0: 1 - working
diff --git a/target/i386/hax-mem.c b/target/i386/hax-mem.c
index 27a0d21..f46e855 100644
--- a/target/i386/hax-mem.c
+++ b/target/i386/hax-mem.c
@@ -174,6 +174,7 @@ static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
     ram_addr_t size = int128_get64(section->size);
     unsigned int delta;
     uint64_t host_va;
+    uint32_t max_mapping_size;
 
     /* We only care about RAM and ROM regions */
     if (!memory_region_is_ram(mr)) {
@@ -206,10 +207,23 @@ static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
         flags |= HAX_RAM_INFO_ROM;
     }
 
-    /* the kernel module interface uses 32-bit sizes (but we could split...) */
-    g_assert(size <= UINT32_MAX);
-
-    hax_update_mapping(start_pa, size, host_va, flags);
+    /*
+     * The kernel module interface uses 32-bit sizes:
+     * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram
+     *
+     * If the mapping size is longer than 32 bits, we can't process it in one
+     * call into the kernel. Instead, we split the mapping into smaller ones,
+     * and call hax_update_mapping() on each.
+     */
+    max_mapping_size = UINT32_MAX & qemu_real_host_page_mask;
+    while (size > max_mapping_size) {
+        hax_update_mapping(start_pa, max_mapping_size, host_va, flags);
+        start_pa += max_mapping_size;
+        size -= max_mapping_size;
+        host_va += max_mapping_size;
+    }
+    /* Now size <= max_mapping_size */
+    hax_update_mapping(start_pa, (uint32_t)size, host_va, flags);
 }
 
 static void hax_region_add(MemoryListener *listener,
@@ -283,12 +297,16 @@ static MemoryListener hax_memory_listener = {
 static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
 {
     /*
-     * In HAX, QEMU allocates the virtual address, and HAX kernel
-     * populates the memory with physical memory. Currently we have no
-     * paging, so user should make sure enough free memory in advance.
+     * We must register each RAM block with the HAXM kernel module, or
+     * hax_set_ram() will fail for any mapping into the RAM block:
+     * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram
+     *
+     * Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all
+     * host physical pages for the RAM block as part of this registration
+     * process, hence the name hax_populate_ram().
      */
     if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) {
-        fprintf(stderr, "HAX failed to populate RAM");
+        fprintf(stderr, "HAX failed to populate RAM\n");
         abort();
     }
 }
diff --git a/target/i386/hax-windows.c b/target/i386/hax-windows.c
index 15a180b..b1ac737 100644
--- a/target/i386/hax-windows.c
+++ b/target/i386/hax-windows.c
@@ -58,10 +58,9 @@ static int hax_open_device(hax_fd *fd)
     return fd;
 }
 
-int hax_populate_ram(uint64_t va, uint32_t size)
+int hax_populate_ram(uint64_t va, uint64_t size)
 {
     int ret;
-    struct hax_alloc_ram_info info;
     HANDLE hDeviceVM;
     DWORD dSize = 0;
 
@@ -70,18 +69,35 @@ int hax_populate_ram(uint64_t va, uint32_t size)
         return -EINVAL;
     }
 
-    info.size = size;
-    info.va = va;
-
     hDeviceVM = hax_global.vm->fd;
-
-    ret = DeviceIoControl(hDeviceVM,
-                          HAX_VM_IOCTL_ALLOC_RAM,
-                          &info, sizeof(info), NULL, 0, &dSize,
-                          (LPOVERLAPPED) NULL);
+    if (hax_global.supports_64bit_ramblock) {
+        struct hax_ramblock_info ramblock = {
+            .start_va = va,
+            .size = size,
+            .reserved = 0
+        };
+
+        ret = DeviceIoControl(hDeviceVM,
+                              HAX_VM_IOCTL_ADD_RAMBLOCK,
+                              &ramblock, sizeof(ramblock), NULL, 0, &dSize,
+                              (LPOVERLAPPED) NULL);
+    } else {
+        struct hax_alloc_ram_info info = {
+            .size = (uint32_t) size,
+            .pad = 0,
+            .va = va
+        };
+
+        ret = DeviceIoControl(hDeviceVM,
+                              HAX_VM_IOCTL_ALLOC_RAM,
+                              &info, sizeof(info), NULL, 0, &dSize,
+                              (LPOVERLAPPED) NULL);
+    }
 
     if (!ret) {
-        fprintf(stderr, "Failed to allocate %x memory\n", size);
+        fprintf(stderr, "Failed to register RAM block: va=0x%" PRIx64
+                ", size=0x%" PRIx64 ", method=%s\n", va, size,
+                hax_global.supports_64bit_ramblock ? "new" : "legacy");
         return ret;
     }
 
diff --git a/target/i386/hax-windows.h b/target/i386/hax-windows.h
index 20e2f85..12cbd81 100644
--- a/target/i386/hax-windows.h
+++ b/target/i386/hax-windows.h
@@ -57,6 +57,8 @@ static inline int hax_invalid_fd(hax_fd fd)
                                             METHOD_BUFFERED, FILE_ANY_ACCESS)
 #define HAX_VM_IOCTL_VCPU_DESTROY  CTL_CODE(HAX_DEVICE_TYPE, 0x905, \
                                             METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VM_IOCTL_ADD_RAMBLOCK  CTL_CODE(HAX_DEVICE_TYPE, 0x913, \
+                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
 
 #define HAX_VCPU_IOCTL_RUN      CTL_CODE(HAX_DEVICE_TYPE, 0x906, \
                                          METHOD_BUFFERED, FILE_ANY_ACCESS)
-- 
1.8.3.1

next prev parent reply	other threads:[~2018-02-13 12:01 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-13 12:00 [Qemu-devel] [PULL 00/48] Misc patches for 2018-02-13 Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 01/48] Revert "build-sys: silence make by default or V=0" Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 02/48] make: fix help message reference to bogus V=0 variable Paolo Bonzini
2018-02-13 12:00 ` Paolo Bonzini [this message]
2018-02-13 12:00 ` [Qemu-devel] [PULL 04/48] net/can: simple messages transport implementation for QEMU Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 05/48] net/can: support for connecting to Linux host SocketCAN interface Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 06/48] hw/net/can: SJA1000 chip register level emulation for QEMU Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 07/48] hw/net/can: Kvaser PCI CAN-S (single SJA1000 channel) emulation Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 08/48] hw/net/can: PCM-3680I PCI (dual " Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 09/48] hw/net/can: MIOe-3680 " Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 10/48] net/can: documentation Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 11/48] hw/net/can: interrupt cleanup Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 12/48] build-sys: remove useless extra*flags variables Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 13/48] build-sys: check static linking of UBSAN Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 14/48] sdhci: use error_propagate(local_err) in realize() Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 15/48] sdhci: add qtest to check the SD capabilities register Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 16/48] sdhci: add check_capab_readonly() qtest Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 17/48] sdhci: add a check_capab_baseclock() qtest Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 18/48] sdhci: add a check_capab_sdma() qtest Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 19/48] sdhci: add qtest to check the SD Spec version Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 20/48] sdhci: add a 'spec_version property' (default to v2) Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 21/48] sdhci: use a numeric value for the default CAPAB register Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 22/48] sdhci: simplify sdhci_get_fifolen() Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 23/48] sdhci: check the Spec v1 capabilities correctness Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 25/48] sdhci: Fix 64-bit ADMA2 Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 27/48] hw/arm/exynos4210: access the 64-bit capareg with qdev_prop_set_uint64() Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 28/48] hw/arm/exynos4210: add a comment about a very similar SDHCI (Spec. v2) Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 29/48] hw/arm/xilinx_zynq: fix the capabilities register to match the datasheet Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 31/48] sdhci: rename the hostctl1 register Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 32/48] sdhci: implement the Host Control 2 register (tuning sequence) Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 33/48] sdbus: add trace events Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 34/48] sdhci: implement UHS-I voltage switch Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 35/48] sdhci: implement CMD/DAT[] fields in the Present State register Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 36/48] hw/arm/bcm2835_peripherals: implement SDHCI Spec v3 Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 37/48] hw/arm/bcm2835_peripherals: change maximum block size to 1kB Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 38/48] hw/arm/fsl-imx6: implement SDHCI Spec. v3 Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 39/48] hw/arm/xilinx_zynqmp: fix the capabilities/spec version to match the datasheet Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 40/48] hw/arm/xilinx_zynqmp: enable the UHS-I mode Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 41/48] sdhci: check Spec v3 capabilities qtest Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 42/48] sdhci: add a check_capab_v3() qtest Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 43/48] sdhci: add Spec v4.2 register definitions Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 44/48] g364fb: switch to using DirtyBitmapSnapshot Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 45/48] memory: remove memory_region_test_and_clear_dirty Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 46/48] memory: hide memory_region_sync_dirty_bitmap behind DirtyBitmapSnapshot Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 47/48] memory: unify loops to sync dirty log bitmap Paolo Bonzini
2018-02-13 12:00 ` [Qemu-devel] [PULL 48/48] travis: use libgcc-4.8-dev (libgcc-6-dev is not available on Ubuntu 14.04) Paolo Bonzini
2018-02-13 14:37 ` [Qemu-devel] [PULL 00/48] Misc patches for 2018-02-13 Peter Maydell
2018-02-13 15:10   ` Paolo Bonzini

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:f252399 dfblob:1f6c461 dfblob:bc9a12c dfblob:cad7531
dfblob:ee94174 dfblob:acdde47 dfblob:fb8e25a dfblob:51af0e8
dfblob:8ffe91f dfblob:6abc156 dfblob:d141308 dfblob:93d5fcb
dfblob:27a0d21 dfblob:f46e855 dfblob:15a180b dfblob:b1ac737
dfblob:20e2f85 dfblob:12cbd81 )
 OR (
bs:"[Qemu-devel] [PULL 03/48] hax: Support guest RAM sizes of 4GB or more" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1518523252-49106-4-git-send-email-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yu.ning@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).