From: Paolo Bonzini <pbonzini@redhat.com>
To: Yu Ning <yu.ning@linux.intel.com>, qemu-devel@nongnu.org
Cc: Yu Ning <yu.ning@intel.com>, Richard Henderson <rth@twiddle.net>,
Eduardo Habkost <ehabkost@redhat.com>
Subject: Re: [Qemu-devel] [PATCH] hax: Support guest RAM sizes of 4GB or more
Date: Wed, 7 Feb 2018 16:36:02 +0100 [thread overview]
Message-ID: <8a183bbd-4c98-a342-d1e5-dfcd4e0bc8e8@redhat.com> (raw)
In-Reply-To: <1515752555-12784-1-git-send-email-yu.ning@linux.intel.com>
On 12/01/2018 11:22, Yu Ning wrote:
> From: Yu Ning <yu.ning@intel.com>
>
> Since HAX_VM_IOCTL_ALLOC_RAM takes a 32-bit size, it cannot handle
> RAM blocks of 4GB or larger, which is why HAXM can only run guests
> with less than 4GB of RAM. Solve this problem by utilizing the new
> HAXM API, HAX_VM_IOCTL_ADD_RAMBLOCK, which takes a 64-bit size, to
> register RAM blocks with the HAXM kernel module. The new API is
> first added in HAXM 7.0.0, and its availablility and be confirmed
> by the presence of the HAX_CAP_64BIT_RAMBLOCK capability flag.
>
> When the guest RAM size reaches 7GB, QEMU will ask HAXM to set up a
> memory mapping that covers a 4GB region, which will fail, because
> HAX_VM_IOCTL_SET_RAM also takes a 32-bit size. Work around this
> limitation by splitting the large mapping into small ones and
> calling HAX_VM_IOCTL_SET_RAM multiple times.
>
> Bug: https://bugs.launchpad.net/qemu/+bug/1735576
>
> Signed-off-by: Yu Ning <yu.ning@intel.com>
> ---
> include/sysemu/hax.h | 2 +-
> target/i386/hax-all.c | 2 ++
> target/i386/hax-darwin.c | 27 +++++++++++++++++++++------
> target/i386/hax-darwin.h | 1 +
> target/i386/hax-i386.h | 1 +
> target/i386/hax-interface.h | 8 ++++++++
> target/i386/hax-mem.c | 34 ++++++++++++++++++++++++++--------
> target/i386/hax-windows.c | 38 +++++++++++++++++++++++++++-----------
> target/i386/hax-windows.h | 2 ++
> 9 files changed, 89 insertions(+), 26 deletions(-)
>
> diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h
> index f252399..1f6c461 100644
> --- a/include/sysemu/hax.h
> +++ b/include/sysemu/hax.h
> @@ -27,7 +27,7 @@
> int hax_sync_vcpus(void);
> int hax_init_vcpu(CPUState *cpu);
> int hax_smp_cpu_exec(CPUState *cpu);
> -int hax_populate_ram(uint64_t va, uint32_t size);
> +int hax_populate_ram(uint64_t va, uint64_t size);
>
> void hax_cpu_synchronize_state(CPUState *cpu);
> void hax_cpu_synchronize_post_reset(CPUState *cpu);
> diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c
> index 3ce6950..57921e7 100644
> --- a/target/i386/hax-all.c
> +++ b/target/i386/hax-all.c
> @@ -104,6 +104,8 @@ static int hax_get_capability(struct hax_state *hax)
> return -ENOTSUP;
> }
>
> + hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
> +
> if (cap->wstatus & HAX_CAP_MEMQUOTA) {
> if (cap->mem_quota < hax->mem_quota) {
> fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
> diff --git a/target/i386/hax-darwin.c b/target/i386/hax-darwin.c
> index ee94174..acdde47 100644
> --- a/target/i386/hax-darwin.c
> +++ b/target/i386/hax-darwin.c
> @@ -28,21 +28,36 @@ hax_fd hax_mod_open(void)
> return fd;
> }
>
> -int hax_populate_ram(uint64_t va, uint32_t size)
> +int hax_populate_ram(uint64_t va, uint64_t size)
> {
> int ret;
> - struct hax_alloc_ram_info info;
>
> if (!hax_global.vm || !hax_global.vm->fd) {
> fprintf(stderr, "Allocate memory before vm create?\n");
> return -EINVAL;
> }
>
> - info.size = size;
> - info.va = va;
> - ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ALLOC_RAM, &info);
> + if (hax_global.supports_64bit_ramblock) {
> + struct hax_ramblock_info ramblock = {
> + .start_va = va,
> + .size = size,
> + .reserved = 0
> + };
> +
> + ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ADD_RAMBLOCK, &ramblock);
> + } else {
> + struct hax_alloc_ram_info info = {
> + .size = (uint32_t)size,
> + .pad = 0,
> + .va = va
> + };
> +
> + ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ALLOC_RAM, &info);
> + }
> if (ret < 0) {
> - fprintf(stderr, "Failed to allocate %x memory\n", size);
> + fprintf(stderr, "Failed to register RAM block: ret=%d, va=0x%" PRIx64
> + ", size=0x%" PRIx64 ", method=%s\n", ret, va, size,
> + hax_global.supports_64bit_ramblock ? "new" : "legacy");
> return ret;
> }
> return 0;
> diff --git a/target/i386/hax-darwin.h b/target/i386/hax-darwin.h
> index fb8e25a..51af0e8 100644
> --- a/target/i386/hax-darwin.h
> +++ b/target/i386/hax-darwin.h
> @@ -44,6 +44,7 @@ static inline void hax_close_fd(hax_fd fd)
> #define HAX_VM_IOCTL_SET_RAM _IOWR(0, 0x82, struct hax_set_ram_info)
> #define HAX_VM_IOCTL_VCPU_DESTROY _IOW(0, 0x83, uint32_t)
> #define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION _IOW(0, 0x84, struct hax_qemu_version)
> +#define HAX_VM_IOCTL_ADD_RAMBLOCK _IOW(0, 0x85, struct hax_ramblock_info)
>
> #define HAX_VCPU_IOCTL_RUN _IO(0, 0xc0)
> #define HAX_VCPU_IOCTL_SET_MSRS _IOWR(0, 0xc1, struct hax_msr_data)
> diff --git a/target/i386/hax-i386.h b/target/i386/hax-i386.h
> index 8ffe91f..6abc156 100644
> --- a/target/i386/hax-i386.h
> +++ b/target/i386/hax-i386.h
> @@ -37,6 +37,7 @@ struct hax_state {
> uint32_t version;
> struct hax_vm *vm;
> uint64_t mem_quota;
> + bool supports_64bit_ramblock;
> };
>
> #define HAX_MAX_VCPU 0x10
> diff --git a/target/i386/hax-interface.h b/target/i386/hax-interface.h
> index d141308..93d5fcb 100644
> --- a/target/i386/hax-interface.h
> +++ b/target/i386/hax-interface.h
> @@ -308,6 +308,13 @@ struct hax_alloc_ram_info {
> uint32_t pad;
> uint64_t va;
> } __attribute__ ((__packed__));
> +
> +struct hax_ramblock_info {
> + uint64_t start_va;
> + uint64_t size;
> + uint64_t reserved;
> +} __attribute__ ((__packed__));
> +
> #define HAX_RAM_INFO_ROM 0x01 /* Read-Only */
> #define HAX_RAM_INFO_INVALID 0x80 /* Unmapped, usually used for MMIO */
> struct hax_set_ram_info {
> @@ -327,6 +334,7 @@ struct hax_set_ram_info {
>
> #define HAX_CAP_MEMQUOTA 0x2
> #define HAX_CAP_UG 0x4
> +#define HAX_CAP_64BIT_RAMBLOCK 0x8
>
> struct hax_capabilityinfo {
> /* bit 0: 1 - working
> diff --git a/target/i386/hax-mem.c b/target/i386/hax-mem.c
> index 27a0d21..f46e855 100644
> --- a/target/i386/hax-mem.c
> +++ b/target/i386/hax-mem.c
> @@ -174,6 +174,7 @@ static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
> ram_addr_t size = int128_get64(section->size);
> unsigned int delta;
> uint64_t host_va;
> + uint32_t max_mapping_size;
>
> /* We only care about RAM and ROM regions */
> if (!memory_region_is_ram(mr)) {
> @@ -206,10 +207,23 @@ static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
> flags |= HAX_RAM_INFO_ROM;
> }
>
> - /* the kernel module interface uses 32-bit sizes (but we could split...) */
> - g_assert(size <= UINT32_MAX);
> -
> - hax_update_mapping(start_pa, size, host_va, flags);
> + /*
> + * The kernel module interface uses 32-bit sizes:
> + * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram
> + *
> + * If the mapping size is longer than 32 bits, we can't process it in one
> + * call into the kernel. Instead, we split the mapping into smaller ones,
> + * and call hax_update_mapping() on each.
> + */
> + max_mapping_size = UINT32_MAX & qemu_real_host_page_mask;
> + while (size > max_mapping_size) {
> + hax_update_mapping(start_pa, max_mapping_size, host_va, flags);
> + start_pa += max_mapping_size;
> + size -= max_mapping_size;
> + host_va += max_mapping_size;
> + }
> + /* Now size <= max_mapping_size */
> + hax_update_mapping(start_pa, (uint32_t)size, host_va, flags);
> }
>
> static void hax_region_add(MemoryListener *listener,
> @@ -283,12 +297,16 @@ static MemoryListener hax_memory_listener = {
> static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
> {
> /*
> - * In HAX, QEMU allocates the virtual address, and HAX kernel
> - * populates the memory with physical memory. Currently we have no
> - * paging, so user should make sure enough free memory in advance.
> + * We must register each RAM block with the HAXM kernel module, or
> + * hax_set_ram() will fail for any mapping into the RAM block:
> + * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram
> + *
> + * Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all
> + * host physical pages for the RAM block as part of this registration
> + * process, hence the name hax_populate_ram().
> */
> if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) {
> - fprintf(stderr, "HAX failed to populate RAM");
> + fprintf(stderr, "HAX failed to populate RAM\n");
> abort();
> }
> }
> diff --git a/target/i386/hax-windows.c b/target/i386/hax-windows.c
> index 15a180b..b1ac737 100644
> --- a/target/i386/hax-windows.c
> +++ b/target/i386/hax-windows.c
> @@ -58,10 +58,9 @@ static int hax_open_device(hax_fd *fd)
> return fd;
> }
>
> -int hax_populate_ram(uint64_t va, uint32_t size)
> +int hax_populate_ram(uint64_t va, uint64_t size)
> {
> int ret;
> - struct hax_alloc_ram_info info;
> HANDLE hDeviceVM;
> DWORD dSize = 0;
>
> @@ -70,18 +69,35 @@ int hax_populate_ram(uint64_t va, uint32_t size)
> return -EINVAL;
> }
>
> - info.size = size;
> - info.va = va;
> -
> hDeviceVM = hax_global.vm->fd;
> -
> - ret = DeviceIoControl(hDeviceVM,
> - HAX_VM_IOCTL_ALLOC_RAM,
> - &info, sizeof(info), NULL, 0, &dSize,
> - (LPOVERLAPPED) NULL);
> + if (hax_global.supports_64bit_ramblock) {
> + struct hax_ramblock_info ramblock = {
> + .start_va = va,
> + .size = size,
> + .reserved = 0
> + };
> +
> + ret = DeviceIoControl(hDeviceVM,
> + HAX_VM_IOCTL_ADD_RAMBLOCK,
> + &ramblock, sizeof(ramblock), NULL, 0, &dSize,
> + (LPOVERLAPPED) NULL);
> + } else {
> + struct hax_alloc_ram_info info = {
> + .size = (uint32_t) size,
> + .pad = 0,
> + .va = va
> + };
> +
> + ret = DeviceIoControl(hDeviceVM,
> + HAX_VM_IOCTL_ALLOC_RAM,
> + &info, sizeof(info), NULL, 0, &dSize,
> + (LPOVERLAPPED) NULL);
> + }
>
> if (!ret) {
> - fprintf(stderr, "Failed to allocate %x memory\n", size);
> + fprintf(stderr, "Failed to register RAM block: va=0x%" PRIx64
> + ", size=0x%" PRIx64 ", method=%s\n", va, size,
> + hax_global.supports_64bit_ramblock ? "new" : "legacy");
> return ret;
> }
>
> diff --git a/target/i386/hax-windows.h b/target/i386/hax-windows.h
> index 004f867..8491417 100644
> --- a/target/i386/hax-windows.h
> +++ b/target/i386/hax-windows.h
> @@ -59,6 +59,8 @@ static inline int hax_invalid_fd(hax_fd fd)
> METHOD_BUFFERED, FILE_ANY_ACCESS)
> #define HAX_VM_IOCTL_VCPU_DESTROY CTL_CODE(HAX_DEVICE_TYPE, 0x905, \
> METHOD_BUFFERED, FILE_ANY_ACCESS)
> +#define HAX_VM_IOCTL_ADD_RAMBLOCK CTL_CODE(HAX_DEVICE_TYPE, 0x913, \
> + METHOD_BUFFERED, FILE_ANY_ACCESS)
>
> #define HAX_VCPU_IOCTL_RUN CTL_CODE(HAX_DEVICE_TYPE, 0x906, \
> METHOD_BUFFERED, FILE_ANY_ACCESS)
>
Queued, thanks. Sorry for the delay!
Paolo
next prev parent reply other threads:[~2018-02-07 15:36 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-01-12 10:22 [Qemu-devel] [PATCH] hax: Support guest RAM sizes of 4GB or more Yu Ning
2018-01-22 8:52 ` Yu Ning
2018-02-07 15:36 ` Paolo Bonzini [this message]
2018-02-07 23:46 ` Yu Ning
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=8a183bbd-4c98-a342-d1e5-dfcd4e0bc8e8@redhat.com \
--to=pbonzini@redhat.com \
--cc=ehabkost@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
--cc=yu.ning@intel.com \
--cc=yu.ning@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).