qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Igor Mammedov <imammedo@redhat.com>
Cc: pbonzini@redhat.com, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v4 4/7] pc: fix QEMU crashing when more than ~50 memory hotplugged
Date: Thu, 9 Jul 2015 16:06:14 +0300	[thread overview]
Message-ID: <20150709155919-mutt-send-email-mst@redhat.com> (raw)
In-Reply-To: <1436442444-132020-5-git-send-email-imammedo@redhat.com>

On Thu, Jul 09, 2015 at 01:47:21PM +0200, Igor Mammedov wrote:
> QEMU asserts in vhost due to hitting vhost backend limit
> on number of supported memory regions.
> 
> Describe all hotplugged memory as one continuos range
> to vhost with linear 1:1 HVA->GPA mapping in backend.
> 
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>

Hmm - a bunch of work here to recombine MRs that memory listener
interface breaks up.  In particular KVM could benefit from this too (on
workloads that change the table a lot).  Can't we teach memory core to
pass hva range as a single continuous range to memory listeners?

> ---
>  hw/virtio/vhost.c         | 47 ++++++++++++++++++++++++++++++++++++++++++++---
>  include/hw/virtio/vhost.h |  1 +
>  2 files changed, 45 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 2712c6f..7bc27f0 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -432,6 +432,10 @@ static void vhost_set_memory(MemoryListener *listener,
>  
>      assert(size);
>  
> +    if (!dev->rsvd_hva.mr) {
> +        dev->rsvd_hva = memory_region_find_hva_range(section->mr);
> +    }
> +
>      /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
>      ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region;
>      if (add) {
> @@ -472,6 +476,42 @@ static void vhost_begin(MemoryListener *listener)
>      dev->mem_changed_start_addr = -1;
>  }
>  
> +static int vhost_set_mem_table(struct vhost_dev *dev)
> +{
> +    hwaddr start_addr = 0;
> +    ram_addr_t size = 0;
> +    struct vhost_memory *mem;
> +    int r, i;
> +
> +    /* drop memory ranges from continuos HVA */
> +    mem = g_memdup(dev->mem, offsetof(struct vhost_memory, regions) +
> +                       dev->mem->nregions * sizeof dev->mem->regions[0]);
> +    start_addr = dev->rsvd_hva.offset_within_address_space;
> +    size = int128_get64(dev->rsvd_hva.size);
> +    for (i = 0; i < mem->nregions; i++) {
> +        if (mem->regions[i].guest_phys_addr >= start_addr &&
> +            mem->regions[i].guest_phys_addr < start_addr + size) {
> +            mem->nregions--;
> +            memmove(&mem->regions[i], &mem->regions[i + 1],
> +                    (mem->nregions - i) * sizeof mem->regions[0]);
> +        }
> +    }
> +    /* add one continuos HVA entry if memory ranges from it is present */
> +    if (dev->mem->nregions > mem->nregions) {
> +        struct vhost_memory_region *reg = &mem->regions[mem->nregions];
> +
> +        reg->guest_phys_addr = start_addr;
> +        reg->memory_size = size;
> +        reg->userspace_addr =
> +            (__u64)memory_region_get_ram_ptr(dev->rsvd_hva.mr);
> +        mem->nregions++;
> +    }
> +
> +    r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, mem);
> +    g_free(mem);
> +    return r;
> +}
> +
>  static void vhost_commit(MemoryListener *listener)
>  {
>      struct vhost_dev *dev = container_of(listener, struct vhost_dev,
> @@ -500,7 +540,7 @@ static void vhost_commit(MemoryListener *listener)
>      }
>  
>      if (!dev->log_enabled) {
> -        r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
> +        r = vhost_set_mem_table(dev);
>          assert(r >= 0);
>          dev->memory_changed = false;
>          return;
> @@ -513,7 +553,7 @@ static void vhost_commit(MemoryListener *listener)
>      if (dev->log_size < log_size) {
>          vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
>      }
> -    r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
> +    r = vhost_set_mem_table(dev);
>      assert(r >= 0);
>      /* To log less, can only decrease log size after table update. */
>      if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
> @@ -956,6 +996,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>          migrate_add_blocker(hdev->migration_blocker);
>      }
>      hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
> +    memset(&hdev->rsvd_hva, 0, sizeof hdev->rsvd_hva);
>      hdev->n_mem_sections = 0;
>      hdev->mem_sections = NULL;
>      hdev->log = NULL;
> @@ -1119,7 +1160,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
>      if (r < 0) {
>          goto fail_features;
>      }
> -    r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem);
> +    r = vhost_set_mem_table(hdev);
>      if (r < 0) {
>          r = -errno;
>          goto fail_mem;
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index dd51050..d41bf2f 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -40,6 +40,7 @@ struct vhost_dev {
>      struct vhost_memory *mem;
>      int n_mem_sections;
>      MemoryRegionSection *mem_sections;
> +    MemoryRegionSection rsvd_hva;
>      struct vhost_virtqueue *vqs;
>      int nvqs;
>      /* the first virtqueue which would be used by this vhost dev */
> -- 
> 1.8.3.1

  reply	other threads:[~2015-07-09 13:06 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-09 11:47 [Qemu-devel] [PATCH v4 0/7] Fix QEMU crash during memory hotplug with vhost=on Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 1/7] memory: get rid of memory_region_destructor_ram_from_ptr() Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 2/7] memory: introduce MemoryRegion container with reserved HVA range Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 3/7] pc: reserve hotpluggable memory range with memory_region_init_hva_range() Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 4/7] pc: fix QEMU crashing when more than ~50 memory hotplugged Igor Mammedov
2015-07-09 13:06   ` Michael S. Tsirkin [this message]
2015-07-09 13:43     ` Paolo Bonzini
2015-07-09 13:46       ` Michael S. Tsirkin
2015-07-10 10:12         ` Igor Mammedov
2015-07-13  6:55           ` Michael S. Tsirkin
2015-07-13 18:55             ` Igor Mammedov
2015-07-13 20:14               ` Michael S. Tsirkin
2015-07-14 13:02                 ` Igor Mammedov
2015-07-14 13:14                   ` Michael S. Tsirkin
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 5/7] exec: make sure that RAMBlock descriptor won't be leaked Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 6/7] exec: add qemu_ram_unmap_hva() API for unmapping memory from HVA area Igor Mammedov
2015-07-09 11:47 ` [Qemu-devel] [PATCH v4 7/7] memory: add support for deleting HVA mapped MemoryRegion Igor Mammedov
2015-07-15 15:12 ` [Qemu-devel] [PATCH v4 0/7] Fix QEMU crash during memory hotplug with vhost=on Igor Mammedov
2015-07-15 16:32   ` Michael S. Tsirkin
2015-07-16  7:26     ` Igor Mammedov
2015-07-16  7:35       ` Michael S. Tsirkin
2015-07-16  9:42         ` Igor Mammedov
2015-07-16 10:24           ` Michael S. Tsirkin
2015-07-16 11:11             ` Igor Mammedov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150709155919-mutt-send-email-mst@redhat.com \
    --to=mst@redhat.com \
    --cc=imammedo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).