All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: haifeng.lin@huawei.com, jerry.lilijun@huawei.com,
	n.nikolaev@virtualopensystems.com, qemu-devel@nongnu.org,
	milo.raofei@huawei.com, arei.gonglei@huawei.com
Subject: Re: [Qemu-devel] [PATCH v1] vhost-user: fix not send all hugepage files to vhost-user
Date: Wed, 4 Feb 2015 14:52:00 +0100	[thread overview]
Message-ID: <20150204135200.GA16345@redhat.com> (raw)
In-Reply-To: <54CEB6EB.1090202@redhat.com>

On Mon, Feb 02, 2015 at 12:29:47AM +0100, Paolo Bonzini wrote:
> 
> 
> On 17/12/2014 07:02, haifeng.lin@huawei.com wrote:
> > From: linhaifeng <haifeng.lin@huawei.com>
> > 
> > If we create VM with two or more numa nodes qemu will create two
> > or more hugepage files but qemu only send one hugepage file fd
> > to vhost-user when VM's memory size is 2G and with two numa nodes.
> > 
> > Signed-off-by: linhaifeng <haifeng.lin@huawei.com>
> 
> The bug is in vhost_dev_assign_memory.  It doesn't check that the file
> descriptor matches when merging regions.  Michael, does the merging
> trigger in practice?  Can we just eliminate it?
> 
> Paolo

I'm not sure: does memory core ever give us two adjacent
RAM segments that we *can* merge?
If yes it would trigger, and extra memory slots slow down lookups
linearly so they aren't free.

> > ---
> >  hw/virtio/vhost-user.c      | 78 ++++++++++++++++++++++++++++++---------------
> >  hw/virtio/vhost.c           | 13 ++++++++
> >  linux-headers/linux/vhost.h |  7 ++++
> >  3 files changed, 73 insertions(+), 25 deletions(-)
> > 
> > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > index aefe0bb..439cbba 100644
> > --- a/hw/virtio/vhost-user.c
> > +++ b/hw/virtio/vhost-user.c
> > @@ -24,6 +24,10 @@
> >  #include <linux/vhost.h>
> >  
> >  #define VHOST_MEMORY_MAX_NREGIONS    8
> > +/* FIXME: same as the max number of numa node?*/
> > +#define HUGEPAGE_MAX_FILES           8
> > +
> > +#define RAM_SHARED     (1 << 1)
> >  
> >  typedef enum VhostUserRequest {
> >      VHOST_USER_NONE = 0,
> > @@ -41,14 +45,15 @@ typedef enum VhostUserRequest {
> >      VHOST_USER_SET_VRING_KICK = 12,
> >      VHOST_USER_SET_VRING_CALL = 13,
> >      VHOST_USER_SET_VRING_ERR = 14,
> > -    VHOST_USER_MAX
> > +    VHOST_USER_MMAP_HUGEPAGE_FILE = 15,
> > +    VHOST_USER_UNMAP_HUGEPAGE_FILE = 16,
> > +    VHOST_USER_MAX,
> >  } VhostUserRequest;
> >  
> >  typedef struct VhostUserMemoryRegion {
> >      uint64_t guest_phys_addr;
> >      uint64_t memory_size;
> >      uint64_t userspace_addr;
> > -    uint64_t mmap_offset;
> >  } VhostUserMemoryRegion;
> >  
> >  typedef struct VhostUserMemory {
> > @@ -57,6 +62,16 @@ typedef struct VhostUserMemory {
> >      VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> >  } VhostUserMemory;
> >  
> > +typedef struct HugepageMemoryInfo {
> > +    uint64_t base_addr;
> > +    uint64_t size;
> > +}HugeMemInfo;
> > +
> > +typedef struct HugepageInfo {
> > +    uint32_t num;
> > +    HugeMemInfo files[HUGEPAGE_MAX_FILES];
> > +}HugepageInfo;
> > +
> >  typedef struct VhostUserMsg {
> >      VhostUserRequest request;
> >  
> > @@ -71,6 +86,7 @@ typedef struct VhostUserMsg {
> >          struct vhost_vring_state state;
> >          struct vhost_vring_addr addr;
> >          VhostUserMemory memory;
> > +        HugepageInfo huge_info;
> >      };
> >  } QEMU_PACKED VhostUserMsg;
> >  
> > @@ -104,7 +120,9 @@ static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
> >      VHOST_GET_VRING_BASE,   /* VHOST_USER_GET_VRING_BASE */
> >      VHOST_SET_VRING_KICK,   /* VHOST_USER_SET_VRING_KICK */
> >      VHOST_SET_VRING_CALL,   /* VHOST_USER_SET_VRING_CALL */
> > -    VHOST_SET_VRING_ERR     /* VHOST_USER_SET_VRING_ERR */
> > +    VHOST_SET_VRING_ERR,    /* VHOST_USER_SET_VRING_ERR */
> > +    VHOST_MMAP_HUGEPAGE_FILE,  /* VHOST_USER_MMAP_HUGEPAGE_FILE */
> > +    VHOST_UNMAP_HUGEPAGE_FILE, /* VHOST_USER_UNMAP_HUGEPAGE_FILE */
> >  };
> >  
> >  static VhostUserRequest vhost_user_request_translate(unsigned long int request)
> > @@ -190,6 +208,7 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
> >      int fds[VHOST_MEMORY_MAX_NREGIONS];
> >      int i, fd;
> >      size_t fd_num = 0;
> > +    RAMBlock *block;
> >  
> >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> >  
> > @@ -213,37 +232,46 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
> >      case VHOST_RESET_OWNER:
> >          break;
> >  
> > -    case VHOST_SET_MEM_TABLE:
> > -        for (i = 0; i < dev->mem->nregions; ++i) {
> > -            struct vhost_memory_region *reg = dev->mem->regions + i;
> > -            ram_addr_t ram_addr;
> > +    case VHOST_MMAP_HUGEPAGE_FILE:
> > +        qemu_mutex_lock_ramlist();
> >  
> > -            assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
> > -            qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr);
> > -            fd = qemu_get_ram_fd(ram_addr);
> > -            if (fd > 0) {
> > -                msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
> > -                msg.memory.regions[fd_num].memory_size  = reg->memory_size;
> > -                msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
> > -                msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
> > -                    (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
> > -                assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
> > -                fds[fd_num++] = fd;
> > +        /* Get hugepage file informations */
> > +        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> > +            if (block->flags & RAM_SHARED && block->fd > 0) {
> > +                msg.huge_info.files[fd_num].size = block->length;
> > +                msg.huge_info.files[fd_num].base_addr = block->host;
> > +                fds[fd_num++] = block->fd;
> >              }
> >          }
> > +        msg.huge_info.num = fd_num;
> >  
> > -        msg.memory.nregions = fd_num;
> > +        /* Calculate msg size */
> > +        msg.size = sizeof(m.huge_info.num);
> > +        msg.size += fd_num * sizeof(HugeMemInfo);
> > +        
> > +        qemu_mutex_unlock_ramlist();        
> > +        break;
> >  
> > -        if (!fd_num) {
> > -            error_report("Failed initializing vhost-user memory map\n"
> > -                    "consider using -object memory-backend-file share=on\n");
> > -            return -1;
> > +    case VHOST_UNMAP_HUGEPAGE_FILE:
> > +        /* Tell vhost-user to unmap all hugepage files. */
> > +        break;
> > +
> > +    case VHOST_SET_MEM_TABLE:
> > +        for (i = 0; i < dev->mem->nregions; i++) {
> > +            struct vhost_memory_region *reg = dev->mem->regions + i;
> > +
> > +            assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
> > +
> > +            msg.memory.regions[i].userspace_addr = reg->userspace_addr;
> > +            msg.memory.regions[i].memory_size  = reg->memory_size;
> > +            msg.memory.regions[i].guest_phys_addr = reg->guest_phys_addr;
> > +            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
> >          }
> >  
> > +        msg.memory.nregions = i;
> >          msg.size = sizeof(m.memory.nregions);
> >          msg.size += sizeof(m.memory.padding);
> > -        msg.size += fd_num * sizeof(VhostUserMemoryRegion);
> > -
> > +        msg.size += i * sizeof(VhostUserMemoryRegion);
> >          break;
> >  
> >      case VHOST_SET_LOG_FD:
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index 5a12861..b8eb341 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -1041,6 +1041,14 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
> >      if (r < 0) {
> >          goto fail_features;
> >      }
> > +    if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
> > +        r = hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, 
> > +            NULL);
> > +        if (r < 0) {
> > +            r = -errno;
> > +            goto fail_mem;
> > +        }    
> > +    }
> >      r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem);
> >      if (r < 0) {
> >          r = -errno;
> > @@ -1101,5 +1109,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
> >      g_free(hdev->log);
> >      hdev->log = NULL;
> >      hdev->log_size = 0;
> > +
> > +    if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
> > +        (void)hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE, 
> > +            NULL);   
> > +    }    
> >  }
> >  
> > diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
> > index c656f61..bb72811 100644
> > --- a/linux-headers/linux/vhost.h
> > +++ b/linux-headers/linux/vhost.h
> > @@ -113,6 +113,13 @@ struct vhost_memory {
> >  /* Set eventfd to signal an error */
> >  #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
> >  
> > +/* Tell vhost-user to mmap hugepage file */
> > +#define VHOST_MMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x23, int)
> > +/* Tell vhost-user to unmap hugepage file */
> > +#define VHOST_UNMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x24, int)
> > +
> > +#define VHOST_THREAD_ID _IOR(VHOST_VIRTIO, 0x25, struct vhost_vring_thread)
> > +
> >  /* VHOST_NET specific defines */
> >  
> >  /* Attach virtio net ring to a raw socket, or tap device.
> > 

  parent reply	other threads:[~2015-02-04 13:52 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-12-17  6:02 [Qemu-devel] [PATCH v1] vhost-user: fix not send all hugepage files to vhost-user haifeng.lin
2014-12-18  5:06 ` Linhaifeng
2015-01-29  3:58   ` Linhaifeng
2015-01-29 10:51     ` Michael S. Tsirkin
2015-01-29 13:02       ` Linhaifeng
2015-02-01 23:29 ` Paolo Bonzini
2015-02-03 13:09   ` Linhaifeng
2015-02-04 13:52   ` Michael S. Tsirkin [this message]
2015-02-04 14:02     ` Paolo Bonzini
2015-02-15 12:07   ` Linhaifeng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150204135200.GA16345@redhat.com \
    --to=mst@redhat.com \
    --cc=arei.gonglei@huawei.com \
    --cc=haifeng.lin@huawei.com \
    --cc=jerry.lilijun@huawei.com \
    --cc=milo.raofei@huawei.com \
    --cc=n.nikolaev@virtualopensystems.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.