qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe
@ 2013-05-03  2:45 Liu Ping Fan
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 1/6] hostmem: make hostmem single, not per Vring related Liu Ping Fan
                   ` (6 more replies)
  0 siblings, 7 replies; 19+ messages in thread
From: Liu Ping Fan @ 2013-05-03  2:45 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, Vasilis Liaskovitis,
	Stefan Hajnoczi, Paolo Bonzini

v1->v2:
  1.split RCU prepared style update and monitor the RAM-Device refcnt into two patches (patch 2,4)
  2.introduce AddrSpaceMem, which is similar to HostMem, but based on address space, while
    the original HostMem only server system memory address space

Liu Ping Fan (6):
  hostmem: make hostmem single, not per Vring related
  hostmem: AddressSpace has its own map and maintained by RCU prepared
    style
  memory: add ref/unref interface for MemroyRegionOps
  hostmem: hostmem listener pin RAM-Device by refcnt
  Vring: use hostmem's RAM safe api
  virtio-blk: release reference to RAM's memoryRegion

 exec.c                                |    1 +
 hw/block/dataplane/virtio-blk.c       |   52 ++++++++---
 hw/virtio/dataplane/hostmem.c         |  168 +++++++++++++++++++++++++--------
 hw/virtio/dataplane/vring.c           |  100 +++++++++++++++-----
 include/exec/memory.h                 |   11 ++
 include/hw/virtio/dataplane/hostmem.h |   25 +++---
 include/hw/virtio/dataplane/vring.h   |    5 +-
 memory.c                              |   18 ++++
 8 files changed, 289 insertions(+), 91 deletions(-)

-- 
1.7.4.4

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v2 1/6] hostmem: make hostmem single, not per Vring related
  2013-05-03  2:45 [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Liu Ping Fan
@ 2013-05-03  2:45 ` Liu Ping Fan
  2013-05-03  8:54   ` Stefan Hajnoczi
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 2/6] hostmem: AddressSpace has its own map and maintained by RCU prepared style Liu Ping Fan
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Liu Ping Fan @ 2013-05-03  2:45 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, Vasilis Liaskovitis,
	Stefan Hajnoczi, Paolo Bonzini

From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

The hwaddr and hva mapping relation is system wide, no need to
be created for each Vring

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 exec.c                                |    1 +
 hw/virtio/dataplane/hostmem.c         |   33 +++++++++++++++++++--------------
 hw/virtio/dataplane/vring.c           |   11 ++++-------
 include/exec/memory.h                 |    1 +
 include/hw/virtio/dataplane/hostmem.h |    5 +----
 include/hw/virtio/dataplane/vring.h   |    1 -
 6 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/exec.c b/exec.c
index fa1e0c3..df01f08 100644
--- a/exec.c
+++ b/exec.c
@@ -1809,6 +1809,7 @@ static void memory_map_init(void)
     memory_listener_register(&core_memory_listener, &address_space_memory);
     memory_listener_register(&io_memory_listener, &address_space_io);
     memory_listener_register(&tcg_memory_listener, &address_space_memory);
+    hostmem_init();
 
     dma_context_init(&dma_context_memory, &address_space_memory,
                      NULL, NULL, NULL);
diff --git a/hw/virtio/dataplane/hostmem.c b/hw/virtio/dataplane/hostmem.c
index 37292ff..756b09f 100644
--- a/hw/virtio/dataplane/hostmem.c
+++ b/hw/virtio/dataplane/hostmem.c
@@ -14,6 +14,10 @@
 #include "exec/address-spaces.h"
 #include "hw/virtio/dataplane/hostmem.h"
 
+HostMem *system_mem;
+
+static void hostmem_finalize(void);
+
 static int hostmem_lookup_cmp(const void *phys_, const void *region_)
 {
     hwaddr phys = *(const hwaddr *)phys_;
@@ -31,11 +35,12 @@ static int hostmem_lookup_cmp(const void *phys_, const void *region_)
 /**
  * Map guest physical address to host pointer
  */
-void *hostmem_lookup(HostMem *hostmem, hwaddr phys, hwaddr len, bool is_write)
+void *hostmem_lookup(hwaddr phys, hwaddr len, bool is_write)
 {
     HostMemRegion *region;
     void *host_addr = NULL;
     hwaddr offset_within_region;
+    HostMem *hostmem = system_mem;
 
     qemu_mutex_lock(&hostmem->current_regions_lock);
     region = bsearch(&phys, hostmem->current_regions,
@@ -137,13 +142,12 @@ static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
 {
 }
 
-void hostmem_init(HostMem *hostmem)
+void hostmem_init(void)
 {
-    memset(hostmem, 0, sizeof(*hostmem));
+    system_mem = g_new0(HostMem, 1);
+    qemu_mutex_init(&system_mem->current_regions_lock);
 
-    qemu_mutex_init(&hostmem->current_regions_lock);
-
-    hostmem->listener = (MemoryListener){
+    system_mem->listener = (MemoryListener) {
         .begin = hostmem_listener_dummy,
         .commit = hostmem_listener_commit,
         .region_add = hostmem_listener_append_region,
@@ -161,16 +165,17 @@ void hostmem_init(HostMem *hostmem)
         .priority = 10,
     };
 
-    memory_listener_register(&hostmem->listener, &address_space_memory);
-    if (hostmem->num_new_regions > 0) {
-        hostmem_listener_commit(&hostmem->listener);
+    memory_listener_register(&system_mem->listener, &address_space_memory);
+    if (system_mem->num_new_regions > 0) {
+        hostmem_listener_commit(&system_mem->listener);
     }
+    atexit(hostmem_finalize);
 }
 
-void hostmem_finalize(HostMem *hostmem)
+static void hostmem_finalize(void)
 {
-    memory_listener_unregister(&hostmem->listener);
-    g_free(hostmem->new_regions);
-    g_free(hostmem->current_regions);
-    qemu_mutex_destroy(&hostmem->current_regions_lock);
+    memory_listener_unregister(&system_mem->listener);
+    g_free(system_mem->new_regions);
+    g_free(system_mem->current_regions);
+    qemu_mutex_destroy(&system_mem->current_regions_lock);
 }
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index e0d6e83..4d6d735 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -27,8 +27,7 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
 
     vring->broken = false;
 
-    hostmem_init(&vring->hostmem);
-    vring_ptr = hostmem_lookup(&vring->hostmem, vring_addr, vring_size, true);
+    vring_ptr = hostmem_lookup(vring_addr, vring_size, true);
     if (!vring_ptr) {
         error_report("Failed to map vring "
                      "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu,
@@ -51,7 +50,6 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
 
 void vring_teardown(Vring *vring)
 {
-    hostmem_finalize(&vring->hostmem);
 }
 
 /* Disable guest->host notifies */
@@ -138,8 +136,7 @@ static int get_indirect(Vring *vring,
         struct vring_desc *desc_ptr;
 
         /* Translate indirect descriptor */
-        desc_ptr = hostmem_lookup(&vring->hostmem,
-                                  indirect->addr + found * sizeof(desc),
+        desc_ptr = hostmem_lookup(indirect->addr + found * sizeof(desc),
                                   sizeof(desc), false);
         if (!desc_ptr) {
             error_report("Failed to map indirect descriptor "
@@ -172,7 +169,7 @@ static int get_indirect(Vring *vring,
             return -ENOBUFS;
         }
 
-        iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
+        iov->iov_base = hostmem_lookup(desc.addr, desc.len,
                                        desc.flags & VRING_DESC_F_WRITE);
         if (!iov->iov_base) {
             error_report("Failed to map indirect descriptor"
@@ -300,7 +297,7 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
         }
 
         /* TODO handle non-contiguous memory across region boundaries */
-        iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
+        iov->iov_base = hostmem_lookup(desc.addr, desc.len,
                                        desc.flags & VRING_DESC_F_WRITE);
         if (!iov->iov_base) {
             error_report("Failed to map vring desc addr %#" PRIx64 " len %u",
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9e88320..2761668 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -887,6 +887,7 @@ void *address_space_map(AddressSpace *as, hwaddr addr,
 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                          int is_write, hwaddr access_len);
 
+void hostmem_init(void);
 
 #endif
 
diff --git a/include/hw/virtio/dataplane/hostmem.h b/include/hw/virtio/dataplane/hostmem.h
index b2cf093..7f967a5 100644
--- a/include/hw/virtio/dataplane/hostmem.h
+++ b/include/hw/virtio/dataplane/hostmem.h
@@ -41,9 +41,6 @@ typedef struct {
     size_t num_current_regions;
 } HostMem;
 
-void hostmem_init(HostMem *hostmem);
-void hostmem_finalize(HostMem *hostmem);
-
 /**
  * Map a guest physical address to a pointer
  *
@@ -52,6 +49,6 @@ void hostmem_finalize(HostMem *hostmem);
  * can be done with other mechanisms like bdrv_drain_all() that quiesce
  * in-flight I/O.
  */
-void *hostmem_lookup(HostMem *hostmem, hwaddr phys, hwaddr len, bool is_write);
+void *hostmem_lookup(hwaddr phys, hwaddr len, bool is_write);
 
 #endif /* HOSTMEM_H */
diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h
index 9380cb5..56acffb 100644
--- a/include/hw/virtio/dataplane/vring.h
+++ b/include/hw/virtio/dataplane/vring.h
@@ -23,7 +23,6 @@
 #include "hw/virtio/virtio.h"
 
 typedef struct {
-    HostMem hostmem;                /* guest memory mapper */
     struct vring vr;                /* virtqueue vring mapped to host memory */
     uint16_t last_avail_idx;        /* last processed avail ring index */
     uint16_t last_used_idx;         /* last processed used ring index */
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v2 2/6] hostmem: AddressSpace has its own map and maintained by RCU prepared style
  2013-05-03  2:45 [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Liu Ping Fan
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 1/6] hostmem: make hostmem single, not per Vring related Liu Ping Fan
@ 2013-05-03  2:45 ` Liu Ping Fan
  2013-05-03  9:10   ` Stefan Hajnoczi
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 3/6] memory: add ref/unref interface for MemroyRegionOps Liu Ping Fan
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Liu Ping Fan @ 2013-05-03  2:45 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, Vasilis Liaskovitis,
	Stefan Hajnoczi, Paolo Bonzini

From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

Each address space will have a map between its hwaddr and hva,
this is expressed as the struct AddrSpaceMem. Currently only
address space of system memory's map is used by virtio device,
and the map is stored in AddrSpaceMem *system_mem.

The map is maintained by RCU prepared style, cur_hostmem,
next_hostmem, cur_lock fields in AddrSpaceMem help to access
the aim. cur_hostmem is used to search, next_hostmem is used to
update and will substitue cur_hostmem when done.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 hw/virtio/dataplane/hostmem.c         |  133 +++++++++++++++++++++++----------
 include/hw/virtio/dataplane/hostmem.h |   20 +++--
 2 files changed, 103 insertions(+), 50 deletions(-)

diff --git a/hw/virtio/dataplane/hostmem.c b/hw/virtio/dataplane/hostmem.c
index 756b09f..1fd3e06 100644
--- a/hw/virtio/dataplane/hostmem.c
+++ b/hw/virtio/dataplane/hostmem.c
@@ -14,7 +14,7 @@
 #include "exec/address-spaces.h"
 #include "hw/virtio/dataplane/hostmem.h"
 
-HostMem *system_mem;
+static AddrSpaceMem *system_mem;
 
 static void hostmem_finalize(void);
 
@@ -32,17 +32,39 @@ static int hostmem_lookup_cmp(const void *phys_, const void *region_)
     }
 }
 
-/**
- * Map guest physical address to host pointer
- */
-void *hostmem_lookup(hwaddr phys, hwaddr len, bool is_write)
+static void hostmem_ref(HostMem *hostmem)
+{
+    int t;
+
+    t = __sync_add_and_fetch(&hostmem->ref, 1);
+    assert(t > 0);
+}
+
+static void hostmem_unref(HostMem *hostmem)
+{
+    int t;
+
+    t = __sync_sub_and_fetch(&hostmem->ref, 1);
+    assert(t >= 0);
+    if (!t) {
+        g_free(hostmem->current_regions);
+        g_free(hostmem);
+    }
+}
+
+static void *address_space_mem_lookup(AddrSpaceMem *as_mem, hwaddr phys,
+    hwaddr len, bool is_write)
 {
     HostMemRegion *region;
     void *host_addr = NULL;
     hwaddr offset_within_region;
-    HostMem *hostmem = system_mem;
+    HostMem *hostmem;
+
+    qemu_mutex_lock(&as_mem->cur_lock);
+    hostmem = as_mem->cur_hostmem;
+    hostmem_ref(hostmem);
+    qemu_mutex_unlock(&as_mem->cur_lock);
 
-    qemu_mutex_lock(&hostmem->current_regions_lock);
     region = bsearch(&phys, hostmem->current_regions,
                      hostmem->num_current_regions,
                      sizeof(hostmem->current_regions[0]),
@@ -57,28 +79,45 @@ void *hostmem_lookup(hwaddr phys, hwaddr len, bool is_write)
     if (len <= region->size - offset_within_region) {
         host_addr = region->host_addr + offset_within_region;
     }
-out:
-    qemu_mutex_unlock(&hostmem->current_regions_lock);
 
+out:
+    hostmem_unref(hostmem);
     return host_addr;
 }
 
 /**
- * Install new regions list
+ * Map guest physical address to host pointer
  */
-static void hostmem_listener_commit(MemoryListener *listener)
+void *hostmem_lookup(hwaddr phys, hwaddr len, bool is_write)
 {
-    HostMem *hostmem = container_of(listener, HostMem, listener);
+    return address_space_mem_lookup(system_mem, phys, len, is_write);
+}
 
-    qemu_mutex_lock(&hostmem->current_regions_lock);
-    g_free(hostmem->current_regions);
-    hostmem->current_regions = hostmem->new_regions;
-    hostmem->num_current_regions = hostmem->num_new_regions;
-    qemu_mutex_unlock(&hostmem->current_regions_lock);
+static void hostmem_listener_begin(MemoryListener *listener)
+{
+    AddrSpaceMem *as_mem = container_of(listener, AddrSpaceMem, listener);
+
+    as_mem->next_hostmem = g_new0(HostMem, 1);
+    as_mem->next_hostmem->ref = 1;
+}
 
-    /* Reset new regions list */
-    hostmem->new_regions = NULL;
-    hostmem->num_new_regions = 0;
+/**
+ * Install new regions list
+ */
+static void hostmem_listener_commit(MemoryListener *listener)
+{
+    HostMem *tmp;
+    AddrSpaceMem *as_mem = container_of(listener, AddrSpaceMem, listener);
+
+    /* writer of cur_hostmem &next_hostmem is serialed by biglock
+         * in hotplug path. So only take care of r/w on cur_hostmem
+         */
+    tmp = as_mem->cur_hostmem;
+    qemu_mutex_lock(&as_mem->cur_lock);
+    as_mem->cur_hostmem = as_mem->next_hostmem;
+    qemu_mutex_unlock(&as_mem->cur_lock);
+    as_mem->next_hostmem = NULL;
+    hostmem_unref(tmp);
 }
 
 /**
@@ -88,23 +127,23 @@ static void hostmem_append_new_region(HostMem *hostmem,
                                       MemoryRegionSection *section)
 {
     void *ram_ptr = memory_region_get_ram_ptr(section->mr);
-    size_t num = hostmem->num_new_regions;
-    size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]);
+    size_t num = hostmem->num_current_regions;
+    size_t new_size = (num + 1) * sizeof(hostmem->current_regions[0]);
 
-    hostmem->new_regions = g_realloc(hostmem->new_regions, new_size);
-    hostmem->new_regions[num] = (HostMemRegion){
+    hostmem->current_regions = g_realloc(hostmem->current_regions, new_size);
+    hostmem->current_regions[num] = (HostMemRegion){
         .host_addr = ram_ptr + section->offset_within_region,
         .guest_addr = section->offset_within_address_space,
         .size = section->size,
         .readonly = section->readonly,
     };
-    hostmem->num_new_regions++;
+    hostmem->num_current_regions++;
 }
 
 static void hostmem_listener_append_region(MemoryListener *listener,
                                            MemoryRegionSection *section)
 {
-    HostMem *hostmem = container_of(listener, HostMem, listener);
+    AddrSpaceMem *as_mem = container_of(listener, AddrSpaceMem, listener);
 
     /* Ignore non-RAM regions, we may not be able to map them */
     if (!memory_region_is_ram(section->mr)) {
@@ -116,7 +155,7 @@ static void hostmem_listener_append_region(MemoryListener *listener,
         return;
     }
 
-    hostmem_append_new_region(hostmem, section);
+    hostmem_append_new_region(as_mem->next_hostmem, section);
 }
 
 /* We don't implement most MemoryListener callbacks, use these nop stubs */
@@ -142,13 +181,13 @@ static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
 {
 }
 
-void hostmem_init(void)
+static AddrSpaceMem *address_space_mem_create(AddressSpace *as)
 {
-    system_mem = g_new0(HostMem, 1);
-    qemu_mutex_init(&system_mem->current_regions_lock);
+    AddrSpaceMem *as_mem = g_new0(AddrSpaceMem, 1);
 
-    system_mem->listener = (MemoryListener) {
-        .begin = hostmem_listener_dummy,
+    qemu_mutex_init(&as_mem->cur_lock);
+    as_mem->listener = (MemoryListener) {
+        .begin = hostmem_listener_begin,
         .commit = hostmem_listener_commit,
         .region_add = hostmem_listener_append_region,
         .region_del = hostmem_listener_section_dummy,
@@ -164,18 +203,30 @@ void hostmem_init(void)
         .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
         .priority = 10,
     };
+    as_mem->cur_hostmem = g_new0(HostMem, 1);
+    as_mem->cur_hostmem->ref = 1;
+    memory_listener_register(&as_mem->listener, as);
 
-    memory_listener_register(&system_mem->listener, &address_space_memory);
-    if (system_mem->num_new_regions > 0) {
-        hostmem_listener_commit(&system_mem->listener);
-    }
+    return as_mem;
+}
+
+static void address_space_mem_destroy(AddrSpaceMem *as_mem)
+{
+    memory_listener_unregister(&as_mem->listener);
+    qemu_mutex_destroy(&as_mem->cur_lock);
+    hostmem_unref(as_mem->cur_hostmem);
+    assert(!as_mem->next_hostmem);
+    g_free(as_mem);
+}
+
+void hostmem_init(void)
+{
+    system_mem = address_space_mem_create(&address_space_memory);
     atexit(hostmem_finalize);
 }
 
-static void hostmem_finalize(void)
+void hostmem_finalize(void)
 {
-    memory_listener_unregister(&system_mem->listener);
-    g_free(system_mem->new_regions);
-    g_free(system_mem->current_regions);
-    qemu_mutex_destroy(&system_mem->current_regions_lock);
+    address_space_mem_destroy(system_mem);
+    system_mem = NULL;
 }
diff --git a/include/hw/virtio/dataplane/hostmem.h b/include/hw/virtio/dataplane/hostmem.h
index 7f967a5..51afa8f 100644
--- a/include/hw/virtio/dataplane/hostmem.h
+++ b/include/hw/virtio/dataplane/hostmem.h
@@ -28,19 +28,21 @@ typedef struct {
     /* The listener is invoked when regions change and a new list of regions is
      * built up completely before they are installed.
      */
-    MemoryListener listener;
-    HostMemRegion *new_regions;
-    size_t num_new_regions;
-
-    /* Current regions are accessed from multiple threads either to lookup
-     * addresses or to install a new list of regions.  The lock protects the
-     * pointer and the regions.
-     */
-    QemuMutex current_regions_lock;
+    int ref;
     HostMemRegion *current_regions;
     size_t num_current_regions;
 } HostMem;
 
+/* Helper to map address space's hw_addr to hva */
+typedef struct {
+    /* lock to protect the r/w access to cur_hostmem */
+    QemuMutex cur_lock;
+    /* switch from cur_hostmem to next_hostmem to adopt RCU */
+    HostMem *cur_hostmem;
+    HostMem *next_hostmem;
+    MemoryListener listener;
+} AddrSpaceMem;
+
 /**
  * Map a guest physical address to a pointer
  *
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v2 3/6] memory: add ref/unref interface for MemroyRegionOps
  2013-05-03  2:45 [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Liu Ping Fan
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 1/6] hostmem: make hostmem single, not per Vring related Liu Ping Fan
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 2/6] hostmem: AddressSpace has its own map and maintained by RCU prepared style Liu Ping Fan
@ 2013-05-03  2:45 ` Liu Ping Fan
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 4/6] hostmem: hostmem listener pin RAM-Device by refcnt Liu Ping Fan
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 19+ messages in thread
From: Liu Ping Fan @ 2013-05-03  2:45 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, Vasilis Liaskovitis,
	Stefan Hajnoczi, Paolo Bonzini

From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

This pair of interface are optinal, except for those device which is
used outside the biglock's protection for hot unplug. Currently,
HostMem used by virtio-blk dataplane is outside biglock, so the RAM
device should implement this.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 include/exec/memory.h |   10 ++++++++++
 memory.c              |   18 ++++++++++++++++++
 2 files changed, 28 insertions(+), 0 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 2761668..edeb480 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -54,6 +54,12 @@ struct MemoryRegionIORange {
  * Memory region callbacks
  */
 struct MemoryRegionOps {
+
+    /* ref/unref pair is optional;  ref.
+     * inc refcnt of object who store MemoryRegion
+     */
+    void (*ref)(void);
+    void (*unref)(void);
     /* Read from the memory region. @addr is relative to @mr; @size is
      * in bytes. */
     uint64_t (*read)(void *opaque,
@@ -223,6 +229,10 @@ struct MemoryListener {
     QTAILQ_ENTRY(MemoryListener) link;
 };
 
+/**/
+bool memory_region_ref(MemoryRegion *mr);
+bool memory_region_unref(MemoryRegion *mr);
+
 /**
  * memory_region_init: Initialize a memory region
  *
diff --git a/memory.c b/memory.c
index 75ca281..c29998d 100644
--- a/memory.c
+++ b/memory.c
@@ -786,6 +786,24 @@ static bool memory_region_wrong_endianness(MemoryRegion *mr)
 #endif
 }
 
+bool memory_region_ref(MemoryRegion *mr)
+{
+    if (mr->ops && mr->ops->ref) {
+        mr->ops->ref();
+        return true;
+    }
+    return false;
+}
+
+bool memory_region_unref(MemoryRegion *mr)
+{
+    if (mr->ops && mr->ops->unref) {
+        mr->ops->unref();
+        return true;
+    }
+    return false;
+}
+
 void memory_region_init(MemoryRegion *mr,
                         const char *name,
                         uint64_t size)
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v2 4/6] hostmem: hostmem listener pin RAM-Device by refcnt
  2013-05-03  2:45 [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Liu Ping Fan
                   ` (2 preceding siblings ...)
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 3/6] memory: add ref/unref interface for MemroyRegionOps Liu Ping Fan
@ 2013-05-03  2:45 ` Liu Ping Fan
  2013-05-03  9:35   ` Stefan Hajnoczi
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 5/6] Vring: use hostmem's RAM safe api Liu Ping Fan
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Liu Ping Fan @ 2013-05-03  2:45 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, Vasilis Liaskovitis,
	Stefan Hajnoczi, Paolo Bonzini

From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

With ref()/unref() interface of MemoryRegion, we can pin RAM-Device
when using its memory, and release it when done.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 hw/virtio/dataplane/hostmem.c         |   44 +++++++++++++++++++++++++++-----
 hw/virtio/dataplane/vring.c           |    8 +++---
 include/hw/virtio/dataplane/hostmem.h |    4 ++-
 3 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/hw/virtio/dataplane/hostmem.c b/hw/virtio/dataplane/hostmem.c
index 1fd3e06..0e28dfc 100644
--- a/hw/virtio/dataplane/hostmem.c
+++ b/hw/virtio/dataplane/hostmem.c
@@ -42,18 +42,28 @@ static void hostmem_ref(HostMem *hostmem)
 
 static void hostmem_unref(HostMem *hostmem)
 {
-    int t;
+    int i, t;
+    HostMemRegion *hmr;
 
     t = __sync_sub_and_fetch(&hostmem->ref, 1);
     assert(t >= 0);
     if (!t) {
+        for (i = 0; i < hostmem->num_current_regions; i++) {
+            hmr = &hostmem->current_regions[i];
+            /* Fix me, when memory hotunplug implement
+             * assert(hmr->mr_ops->unref)
+             */
+            if (hmr->mr->ops && hmr->mr->ops->unref) {
+                hmr->mr->ops->unref();
+            }
+        }
         g_free(hostmem->current_regions);
         g_free(hostmem);
     }
 }
 
 static void *address_space_mem_lookup(AddrSpaceMem *as_mem, hwaddr phys,
-    hwaddr len, bool is_write)
+    hwaddr len, MemoryRegion **mr, bool is_write)
 {
     HostMemRegion *region;
     void *host_addr = NULL;
@@ -65,6 +75,9 @@ static void *address_space_mem_lookup(AddrSpaceMem *as_mem, hwaddr phys,
     hostmem_ref(hostmem);
     qemu_mutex_unlock(&as_mem->cur_lock);
 
+    if (mr) {
+        *mr = NULL;
+    }
     region = bsearch(&phys, hostmem->current_regions,
                      hostmem->num_current_regions,
                      sizeof(hostmem->current_regions[0]),
@@ -79,7 +92,10 @@ static void *address_space_mem_lookup(AddrSpaceMem *as_mem, hwaddr phys,
     if (len <= region->size - offset_within_region) {
         host_addr = region->host_addr + offset_within_region;
     }
-
+    if (mr) {
+        *mr = region->mr;
+        memory_region_ref(*mr);
+    }
 out:
     hostmem_unref(hostmem);
     return host_addr;
@@ -88,9 +104,10 @@ out:
 /**
  * Map guest physical address to host pointer
  */
-void *hostmem_lookup(hwaddr phys, hwaddr len, bool is_write)
+void *hostmem_lookup(hwaddr phys, hwaddr len, MemoryRegion **mr,
+    bool is_write)
 {
-    return address_space_mem_lookup(system_mem, phys, len, is_write);
+    return address_space_mem_lookup(system_mem, phys, len, mr, is_write);
 }
 
 static void hostmem_listener_begin(MemoryListener *listener)
@@ -134,13 +151,14 @@ static void hostmem_append_new_region(HostMem *hostmem,
     hostmem->current_regions[num] = (HostMemRegion){
         .host_addr = ram_ptr + section->offset_within_region,
         .guest_addr = section->offset_within_address_space,
+        .mr = section->mr,
         .size = section->size,
         .readonly = section->readonly,
     };
     hostmem->num_current_regions++;
 }
 
-static void hostmem_listener_append_region(MemoryListener *listener,
+static void hostmem_listener_nop_region(MemoryListener *listener,
                                            MemoryRegionSection *section)
 {
     AddrSpaceMem *as_mem = container_of(listener, AddrSpaceMem, listener);
@@ -158,6 +176,18 @@ static void hostmem_listener_append_region(MemoryListener *listener,
     hostmem_append_new_region(as_mem->next_hostmem, section);
 }
 
+static void hostmem_listener_append_region(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    hostmem_listener_nop_region(listener, section);
+    /* Fix me, when memory hotunplug implement
+     * assert(section->mr->ops->ref)
+     */
+    if (section->mr->ops && section->mr->ops->ref) {
+        section->mr->ops->ref();
+    }
+}
+
 /* We don't implement most MemoryListener callbacks, use these nop stubs */
 static void hostmem_listener_dummy(MemoryListener *listener)
 {
@@ -191,7 +221,7 @@ static AddrSpaceMem *address_space_mem_create(AddressSpace *as)
         .commit = hostmem_listener_commit,
         .region_add = hostmem_listener_append_region,
         .region_del = hostmem_listener_section_dummy,
-        .region_nop = hostmem_listener_append_region,
+        .region_nop = hostmem_listener_nop_region,
         .log_start = hostmem_listener_section_dummy,
         .log_stop = hostmem_listener_section_dummy,
         .log_sync = hostmem_listener_section_dummy,
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index 4d6d735..e3c3afb 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -27,7 +27,7 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
 
     vring->broken = false;
 
-    vring_ptr = hostmem_lookup(vring_addr, vring_size, true);
+    vring_ptr = hostmem_lookup(vring_addr, vring_size, NULL, true);
     if (!vring_ptr) {
         error_report("Failed to map vring "
                      "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu,
@@ -137,7 +137,7 @@ static int get_indirect(Vring *vring,
 
         /* Translate indirect descriptor */
         desc_ptr = hostmem_lookup(indirect->addr + found * sizeof(desc),
-                                  sizeof(desc), false);
+                                  sizeof(desc), NULL, false);
         if (!desc_ptr) {
             error_report("Failed to map indirect descriptor "
                          "addr %#" PRIx64 " len %zu",
@@ -169,7 +169,7 @@ static int get_indirect(Vring *vring,
             return -ENOBUFS;
         }
 
-        iov->iov_base = hostmem_lookup(desc.addr, desc.len,
+        iov->iov_base = hostmem_lookup(desc.addr, desc.len, NULL,
                                        desc.flags & VRING_DESC_F_WRITE);
         if (!iov->iov_base) {
             error_report("Failed to map indirect descriptor"
@@ -297,7 +297,7 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
         }
 
         /* TODO handle non-contiguous memory across region boundaries */
-        iov->iov_base = hostmem_lookup(desc.addr, desc.len,
+        iov->iov_base = hostmem_lookup(desc.addr, desc.len, NULL,
                                        desc.flags & VRING_DESC_F_WRITE);
         if (!iov->iov_base) {
             error_report("Failed to map vring desc addr %#" PRIx64 " len %u",
diff --git a/include/hw/virtio/dataplane/hostmem.h b/include/hw/virtio/dataplane/hostmem.h
index 51afa8f..20289c8 100644
--- a/include/hw/virtio/dataplane/hostmem.h
+++ b/include/hw/virtio/dataplane/hostmem.h
@@ -20,6 +20,7 @@
 typedef struct {
     void *host_addr;
     hwaddr guest_addr;
+    MemoryRegion *mr;
     uint64_t size;
     bool readonly;
 } HostMemRegion;
@@ -51,6 +52,7 @@ typedef struct {
  * can be done with other mechanisms like bdrv_drain_all() that quiesce
  * in-flight I/O.
  */
-void *hostmem_lookup(hwaddr phys, hwaddr len, bool is_write);
+void *hostmem_lookup(hwaddr phys, hwaddr len, MemoryRegion **mr,
+    bool is_write);
 
 #endif /* HOSTMEM_H */
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v2 5/6] Vring: use hostmem's RAM safe api
  2013-05-03  2:45 [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Liu Ping Fan
                   ` (3 preceding siblings ...)
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 4/6] hostmem: hostmem listener pin RAM-Device by refcnt Liu Ping Fan
@ 2013-05-03  2:45 ` Liu Ping Fan
  2013-05-03 10:02   ` Stefan Hajnoczi
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 6/6] virtio-blk: release reference to RAM's memoryRegion Liu Ping Fan
  2013-05-04  9:53 ` [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Paolo Bonzini
  6 siblings, 1 reply; 19+ messages in thread
From: Liu Ping Fan @ 2013-05-03  2:45 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, Vasilis Liaskovitis,
	Stefan Hajnoczi, Paolo Bonzini

From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

Before mm-ops done, we should gurantee the validaion of regions which is
used by Vring self and the chunck pointed by vring desc. We acheive
this goal by inc refcnt of RAM-Device. When finished, we dec this cnt
through the interface of MemoryRegion.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 hw/block/dataplane/virtio-blk.c     |    3 +-
 hw/virtio/dataplane/vring.c         |   95 +++++++++++++++++++++++++++-------
 include/hw/virtio/dataplane/vring.h |    4 +-
 3 files changed, 80 insertions(+), 22 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 0356665..3bb57d1 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -305,7 +305,8 @@ static void handle_notify(EventNotifier *e)
         vring_disable_notification(s->vdev, &s->vring);
 
         for (;;) {
-            head = vring_pop(s->vdev, &s->vring, iov, end, &out_num, &in_num);
+            head = vring_pop(s->vdev, &s->vring, iov, end, &out_num,
+                        &in_num, NULL);
             if (head < 0) {
                 break; /* no more requests */
             }
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index e3c3afb..440486d 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -27,7 +27,7 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
 
     vring->broken = false;
 
-    vring_ptr = hostmem_lookup(vring_addr, vring_size, NULL, true);
+    vring_ptr = hostmem_lookup(vring_addr, vring_size, &vring->vring_mr, true);
     if (!vring_ptr) {
         error_report("Failed to map vring "
                      "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu,
@@ -50,6 +50,7 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
 
 void vring_teardown(Vring *vring)
 {
+    memory_region_unref(vring->vring_mr);
 }
 
 /* Disable guest->host notifies */
@@ -109,11 +110,14 @@ bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
 static int get_indirect(Vring *vring,
                         struct iovec iov[], struct iovec *iov_end,
                         unsigned int *out_num, unsigned int *in_num,
-                        struct vring_desc *indirect)
+                        struct vring_desc *indirect,
+                        MemoryRegion ***mrs)
 {
     struct vring_desc desc;
     unsigned int i = 0, count, found = 0;
-
+    MemoryRegion **cur = *mrs;
+    int ret = 0;
+    MemoryRegion *tmp;
     /* Sanity check */
     if (unlikely(indirect->len % sizeof(desc))) {
         error_report("Invalid length in indirect descriptor: "
@@ -132,19 +136,23 @@ static int get_indirect(Vring *vring,
         return -EFAULT;
     }
 
+    *mrs[0] = NULL;
     do {
         struct vring_desc *desc_ptr;
 
         /* Translate indirect descriptor */
         desc_ptr = hostmem_lookup(indirect->addr + found * sizeof(desc),
-                                  sizeof(desc), NULL, false);
+                                  sizeof(desc),
+                                  &tmp,
+                                  false);
         if (!desc_ptr) {
             error_report("Failed to map indirect descriptor "
                          "addr %#" PRIx64 " len %zu",
                          (uint64_t)indirect->addr + found * sizeof(desc),
                          sizeof(desc));
             vring->broken = true;
-            return -EFAULT;
+            ret = -EFAULT;
+            goto fail;
         }
         desc = *desc_ptr;
 
@@ -155,31 +163,40 @@ static int get_indirect(Vring *vring,
             error_report("Loop detected: last one at %u "
                          "indirect size %u", i, count);
             vring->broken = true;
-            return -EFAULT;
+            memory_region_unref(tmp);
+            ret = -EFAULT;
+            goto fail;
         }
 
         if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
             error_report("Nested indirect descriptor");
             vring->broken = true;
-            return -EFAULT;
+            memory_region_unref(tmp);
+            ret = -EFAULT;
+            goto fail;
         }
 
         /* Stop for now if there are not enough iovecs available. */
         if (iov >= iov_end) {
-            return -ENOBUFS;
+            memory_region_unref(tmp);
+            ret = -ENOBUFS;
+            goto fail;
         }
 
-        iov->iov_base = hostmem_lookup(desc.addr, desc.len, NULL,
+        iov->iov_base = hostmem_lookup(desc.addr, desc.len, cur,
                                        desc.flags & VRING_DESC_F_WRITE);
         if (!iov->iov_base) {
             error_report("Failed to map indirect descriptor"
                          "addr %#" PRIx64 " len %u",
                          (uint64_t)desc.addr, desc.len);
             vring->broken = true;
-            return -EFAULT;
+            memory_region_unref(tmp);
+            ret = -EFAULT;
+            goto fail;
         }
         iov->iov_len = desc.len;
         iov++;
+        cur++;
 
         /* If this is an input descriptor, increment that count. */
         if (desc.flags & VRING_DESC_F_WRITE) {
@@ -191,13 +208,27 @@ static int get_indirect(Vring *vring,
                 error_report("Indirect descriptor "
                              "has out after in: idx %u", i);
                 vring->broken = true;
-                return -EFAULT;
+                memory_region_unref(tmp);
+                ret = -EFAULT;
+                goto fail;
             }
             *out_num += 1;
         }
         i = desc.next;
+        memory_region_unref(tmp);
     } while (desc.flags & VRING_DESC_F_NEXT);
+    *mrs = cur;
     return 0;
+
+fail:
+    for (; cur > *mrs; cur--) {
+        memory_region_unref(*cur);
+    }
+    if (*cur) {
+        memory_region_unref(*cur);
+    }
+
+    return ret;
 }
 
 /* This looks in the virtqueue and for the first available buffer, and converts
@@ -209,15 +240,20 @@ static int get_indirect(Vring *vring,
  * never a valid descriptor number) if none was found.  A negative code is
  * returned on error.
  *
+ * @mrs should be the same cnt as iov[]
+ *
  * Stolen from linux/drivers/vhost/vhost.c.
  */
 int vring_pop(VirtIODevice *vdev, Vring *vring,
               struct iovec iov[], struct iovec *iov_end,
-              unsigned int *out_num, unsigned int *in_num)
+              unsigned int *out_num, unsigned int *in_num,
+              MemoryRegion **mrs)
 {
     struct vring_desc desc;
     unsigned int i, head, found = 0, num = vring->vr.num;
     uint16_t avail_idx, last_avail_idx;
+    MemoryRegion **indirect, **cur = mrs;
+    int ret = 0;
 
     /* If there was a fatal error then refuse operation */
     if (vring->broken) {
@@ -263,17 +299,20 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
     *out_num = *in_num = 0;
 
     i = head;
+    mrs[0] = NULL;
     do {
         if (unlikely(i >= num)) {
             error_report("Desc index is %u > %u, head = %u", i, num, head);
             vring->broken = true;
-            return -EFAULT;
+            ret = -EFAULT;
+            goto fail;
         }
         if (unlikely(++found > num)) {
             error_report("Loop detected: last one at %u vq size %u head %u",
                          i, num, head);
             vring->broken = true;
-            return -EFAULT;
+            ret = -EFAULT;
+            goto fail;
         }
         desc = vring->vr.desc[i];
 
@@ -281,10 +320,13 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
         barrier();
 
         if (desc.flags & VRING_DESC_F_INDIRECT) {
-            int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
+            indirect = cur;
+            int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc,
+                        &indirect);
             if (ret < 0) {
-                return ret;
+               goto fail;
             }
+            cur = indirect;
             continue;
         }
 
@@ -293,20 +335,23 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
          * with the current set.
          */
         if (iov >= iov_end) {
-            return -ENOBUFS;
+            ret = -ENOBUFS;
+            goto fail;
         }
 
         /* TODO handle non-contiguous memory across region boundaries */
-        iov->iov_base = hostmem_lookup(desc.addr, desc.len, NULL,
+        iov->iov_base = hostmem_lookup(desc.addr, desc.len, cur,
                                        desc.flags & VRING_DESC_F_WRITE);
         if (!iov->iov_base) {
             error_report("Failed to map vring desc addr %#" PRIx64 " len %u",
                          (uint64_t)desc.addr, desc.len);
             vring->broken = true;
-            return -EFAULT;
+            ret = -EFAULT;
+            goto fail;
         }
         iov->iov_len  = desc.len;
         iov++;
+        cur++;
 
         if (desc.flags & VRING_DESC_F_WRITE) {
             /* If this is an input descriptor,
@@ -318,7 +363,8 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
             if (unlikely(*in_num)) {
                 error_report("Descriptor has out after in: idx %d", i);
                 vring->broken = true;
-                return -EFAULT;
+                ret = -EFAULT;
+                goto fail;
             }
             *out_num += 1;
         }
@@ -328,6 +374,15 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
     /* On success, increment avail index. */
     vring->last_avail_idx++;
     return head;
+
+fail:
+    for (; cur > mrs; cur--) {
+        memory_region_unref(*cur);
+    }
+    if (*cur) {
+        memory_region_unref(*cur);
+    }
+    return ret;
 }
 
 /* After we've used one of their buffers, we tell them about it.
diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h
index 56acffb..1aee7cf 100644
--- a/include/hw/virtio/dataplane/vring.h
+++ b/include/hw/virtio/dataplane/vring.h
@@ -23,6 +23,7 @@
 #include "hw/virtio/virtio.h"
 
 typedef struct {
+    MemoryRegion *vring_mr;   /* RAM's memoryRegion on which this vring sits */
     struct vring vr;                /* virtqueue vring mapped to host memory */
     uint16_t last_avail_idx;        /* last processed avail ring index */
     uint16_t last_used_idx;         /* last processed used ring index */
@@ -55,7 +56,8 @@ bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
 bool vring_should_notify(VirtIODevice *vdev, Vring *vring);
 int vring_pop(VirtIODevice *vdev, Vring *vring,
               struct iovec iov[], struct iovec *iov_end,
-              unsigned int *out_num, unsigned int *in_num);
+              unsigned int *out_num, unsigned int *in_num,
+              MemoryRegion **mrs);
 void vring_push(Vring *vring, unsigned int head, int len);
 
 #endif /* VRING_H */
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v2 6/6] virtio-blk: release reference to RAM's memoryRegion
  2013-05-03  2:45 [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Liu Ping Fan
                   ` (4 preceding siblings ...)
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 5/6] Vring: use hostmem's RAM safe api Liu Ping Fan
@ 2013-05-03  2:45 ` Liu Ping Fan
  2013-05-03 10:09   ` Stefan Hajnoczi
  2013-05-04  9:53 ` [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Paolo Bonzini
  6 siblings, 1 reply; 19+ messages in thread
From: Liu Ping Fan @ 2013-05-03  2:45 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, Vasilis Liaskovitis,
	Stefan Hajnoczi, Paolo Bonzini

From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

virtio-blk will unref RAM's memoryRegion when the io-req has been
done. So we can avoid to call bdrv_drain_all() when RAM hot unplug.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 hw/block/dataplane/virtio-blk.c |   51 +++++++++++++++++++++++++++++---------
 1 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 3bb57d1..047e1df 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -35,6 +35,8 @@ enum {
 
 typedef struct {
     struct iocb iocb;               /* Linux AIO control block */
+    MemoryRegion *mrs[VRING_MAX];
+    int mrs_cnt;
     QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
     unsigned int head;              /* vring descriptor index */
     struct iovec *bounce_iov;       /* used if guest buffers are unaligned */
@@ -121,6 +123,10 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
      * transferred plus the status bytes.
      */
     vring_push(&s->vring, req->head, len + sizeof(hdr));
+    while (--req->mrs_cnt >= 0) {
+        memory_region_unref(req->mrs[req->mrs_cnt]);
+    }
+
 
     s->num_reqs--;
 }
@@ -156,7 +162,8 @@ static void do_get_id_cmd(VirtIOBlockDataPlane *s,
 static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
                        struct iovec *iov, unsigned int iov_cnt,
                        long long offset, unsigned int head,
-                       QEMUIOVector *inhdr)
+                       QEMUIOVector *inhdr,
+                       MemoryRegion **mrs, int cnt)
 {
     struct iocb *iocb;
     QEMUIOVector qiov;
@@ -188,6 +195,8 @@ static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
 
     /* Fill in virtio block metadata needed for completion */
     VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+    memcpy(req->mrs, mrs, cnt*sizeof(MemoryRegion *));
+    req->mrs_cnt = cnt;
     req->head = head;
     req->inhdr = inhdr;
     req->bounce_iov = bounce_iov;
@@ -197,19 +206,22 @@ static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
 
 static int process_request(IOQueue *ioq, struct iovec iov[],
                            unsigned int out_num, unsigned int in_num,
-                           unsigned int head)
+                           unsigned int head, MemoryRegion **mrs)
 {
     VirtIOBlockDataPlane *s = container_of(ioq, VirtIOBlockDataPlane, ioqueue);
     struct iovec *in_iov = &iov[out_num];
     struct virtio_blk_outhdr outhdr;
     QEMUIOVector *inhdr;
     size_t in_size;
+    unsigned int i, cnt = out_num+in_num;
+    int ret;
 
     /* Copy in outhdr */
     if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr,
                             sizeof(outhdr)) != sizeof(outhdr))) {
         error_report("virtio-blk request outhdr too short");
-        return -EFAULT;
+        ret = -EFAULT;
+        goto free_mrs;
     }
     iov_discard_front(&iov, &out_num, sizeof(outhdr));
 
@@ -217,7 +229,8 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
     in_size = iov_size(in_iov, in_num);
     if (in_size < sizeof(struct virtio_blk_inhdr)) {
         error_report("virtio_blk request inhdr too short");
-        return -EFAULT;
+        ret = -EFAULT;
+        goto free_mrs;
     }
     inhdr = g_slice_new(QEMUIOVector);
     qemu_iovec_init(inhdr, 1);
@@ -231,17 +244,20 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
 
     switch (outhdr.type) {
     case VIRTIO_BLK_T_IN:
-        do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, head, inhdr);
+        do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, head, inhdr,
+            mrs, cnt);
         return 0;
 
     case VIRTIO_BLK_T_OUT:
-        do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, head, inhdr);
+        do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, head, inhdr,
+            mrs, cnt);
         return 0;
 
     case VIRTIO_BLK_T_SCSI_CMD:
         /* TODO support SCSI commands */
         complete_request_early(s, head, inhdr, VIRTIO_BLK_S_UNSUPP);
-        return 0;
+        ret = 0;
+        goto free_mrs;
 
     case VIRTIO_BLK_T_FLUSH:
         /* TODO fdsync not supported by Linux AIO, do it synchronously here! */
@@ -250,18 +266,27 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
         } else {
             complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
         }
-        return 0;
+        ret = 0;
+        goto free_mrs;
 
     case VIRTIO_BLK_T_GET_ID:
         do_get_id_cmd(s, in_iov, in_num, head, inhdr);
-        return 0;
+        ret = 0;
+        goto free_mrs;
 
     default:
         error_report("virtio-blk unsupported request type %#x", outhdr.type);
         qemu_iovec_destroy(inhdr);
         g_slice_free(QEMUIOVector, inhdr);
-        return -EFAULT;
+        ret = -EFAULT;
+        goto free_mrs;
+    }
+
+free_mrs:
+    for (i = 0; i < cnt; i++) {
+        memory_region_unref(mrs[i]);
     }
+    return ret;
 }
 
 static int flush_true(EventNotifier *e)
@@ -287,6 +312,7 @@ static void handle_notify(EventNotifier *e)
     struct iovec iovec[VRING_MAX];
     struct iovec *end = &iovec[VRING_MAX];
     struct iovec *iov = iovec;
+    MemoryRegion *mrs[VRING_MAX];
 
     /* When a request is read from the vring, the index of the first descriptor
      * (aka head) is returned so that the completed request can be pushed onto
@@ -306,7 +332,7 @@ static void handle_notify(EventNotifier *e)
 
         for (;;) {
             head = vring_pop(s->vdev, &s->vring, iov, end, &out_num,
-                        &in_num, NULL);
+                        &in_num, mrs);
             if (head < 0) {
                 break; /* no more requests */
             }
@@ -314,7 +340,8 @@ static void handle_notify(EventNotifier *e)
             trace_virtio_blk_data_plane_process_request(s, out_num, in_num,
                                                         head);
 
-            if (process_request(&s->ioqueue, iov, out_num, in_num, head) < 0) {
+            if (process_request(&s->ioqueue, iov, out_num, in_num, head,
+                    mrs) < 0) {
                 vring_set_broken(&s->vring);
                 break;
             }
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/6] hostmem: make hostmem single, not per Vring related
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 1/6] hostmem: make hostmem single, not per Vring related Liu Ping Fan
@ 2013-05-03  8:54   ` Stefan Hajnoczi
  0 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2013-05-03  8:54 UTC (permalink / raw)
  To: Liu Ping Fan
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi, Paolo Bonzini

On Fri, May 03, 2013 at 10:45:17AM +0800, Liu Ping Fan wrote:
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 9e88320..2761668 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -887,6 +887,7 @@ void *address_space_map(AddressSpace *as, hwaddr addr,
>  void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
>                           int is_write, hwaddr access_len);
>  
> +void hostmem_init(void);

This should go in hostmem.h.  exec.c must now include hostmem.h.

You need to take care of the ./configure dependency: hostmem.o is only
build when CONFIG_VIRTIO_BLK_DATA_PLANE is 'y'.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/6] hostmem: AddressSpace has its own map and maintained by RCU prepared style
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 2/6] hostmem: AddressSpace has its own map and maintained by RCU prepared style Liu Ping Fan
@ 2013-05-03  9:10   ` Stefan Hajnoczi
  2013-05-06  1:44     ` liu ping fan
  0 siblings, 1 reply; 19+ messages in thread
From: Stefan Hajnoczi @ 2013-05-03  9:10 UTC (permalink / raw)
  To: Liu Ping Fan
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi, Paolo Bonzini

On Fri, May 03, 2013 at 10:45:18AM +0800, Liu Ping Fan wrote:
> +/**
> + * Install new regions list
> + */
> +static void hostmem_listener_commit(MemoryListener *listener)
> +{
> +    HostMem *tmp;
> +    AddrSpaceMem *as_mem = container_of(listener, AddrSpaceMem, listener);
> +
> +    /* writer of cur_hostmem &next_hostmem is serialed by biglock

s/serialed/serialized/

> +         * in hotplug path. So only take care of r/w on cur_hostmem
> +         */

Indentation.

> @@ -164,18 +203,30 @@ void hostmem_init(void)
>          .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
>          .priority = 10,
>      };
> +    as_mem->cur_hostmem = g_new0(HostMem, 1);
> +    as_mem->cur_hostmem->ref = 1;
> +    memory_listener_register(&as_mem->listener, as);
>  
> -    memory_listener_register(&system_mem->listener, &address_space_memory);
> -    if (system_mem->num_new_regions > 0) {
> -        hostmem_listener_commit(&system_mem->listener);
> -    }

The point of this if statement was to make the newly added regions
visible.  I guess it is not necessary because exec.c is calling us
before any memory gets initialized now?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 4/6] hostmem: hostmem listener pin RAM-Device by refcnt
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 4/6] hostmem: hostmem listener pin RAM-Device by refcnt Liu Ping Fan
@ 2013-05-03  9:35   ` Stefan Hajnoczi
  2013-05-06  1:45     ` liu ping fan
  0 siblings, 1 reply; 19+ messages in thread
From: Stefan Hajnoczi @ 2013-05-03  9:35 UTC (permalink / raw)
  To: Liu Ping Fan
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi, Paolo Bonzini

On Fri, May 03, 2013 at 10:45:20AM +0800, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> With ref()/unref() interface of MemoryRegion, we can pin RAM-Device
> when using its memory, and release it when done.
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  hw/virtio/dataplane/hostmem.c         |   44 +++++++++++++++++++++++++++-----
>  hw/virtio/dataplane/vring.c           |    8 +++---
>  include/hw/virtio/dataplane/hostmem.h |    4 ++-
>  3 files changed, 44 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/virtio/dataplane/hostmem.c b/hw/virtio/dataplane/hostmem.c
> index 1fd3e06..0e28dfc 100644
> --- a/hw/virtio/dataplane/hostmem.c
> +++ b/hw/virtio/dataplane/hostmem.c
> @@ -42,18 +42,28 @@ static void hostmem_ref(HostMem *hostmem)
>  
>  static void hostmem_unref(HostMem *hostmem)
>  {
> -    int t;
> +    int i, t;
> +    HostMemRegion *hmr;
>  
>      t = __sync_sub_and_fetch(&hostmem->ref, 1);
>      assert(t >= 0);
>      if (!t) {
> +        for (i = 0; i < hostmem->num_current_regions; i++) {
> +            hmr = &hostmem->current_regions[i];
> +            /* Fix me, when memory hotunplug implement
> +             * assert(hmr->mr_ops->unref)
> +             */
> +            if (hmr->mr->ops && hmr->mr->ops->unref) {
> +                hmr->mr->ops->unref();
> +            }

This patch should use memory_region_ref()/unref() which you introduced
in the previous patch instead of open-coding this.

> @@ -158,6 +176,18 @@ static void hostmem_listener_append_region(MemoryListener *listener,
>      hostmem_append_new_region(as_mem->next_hostmem, section);
>  }
>  
> +static void hostmem_listener_append_region(MemoryListener *listener,
> +                                           MemoryRegionSection *section)
> +{
> +    hostmem_listener_nop_region(listener, section);
> +    /* Fix me, when memory hotunplug implement
> +     * assert(section->mr->ops->ref)
> +     */
> +    if (section->mr->ops && section->mr->ops->ref) {
> +        section->mr->ops->ref();
> +    }
> +}

Why does append increment the refcount while nop does not?

It seems that we always need to increment the memory region refcount
since we're building a completely new hostmem.  The refcount ownership
is not passed from the current hostmen to the next hostmem, all memory
regions are released in hostmem_unref().

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 5/6] Vring: use hostmem's RAM safe api
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 5/6] Vring: use hostmem's RAM safe api Liu Ping Fan
@ 2013-05-03 10:02   ` Stefan Hajnoczi
  2013-05-06  1:45     ` liu ping fan
  0 siblings, 1 reply; 19+ messages in thread
From: Stefan Hajnoczi @ 2013-05-03 10:02 UTC (permalink / raw)
  To: Liu Ping Fan
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi, Paolo Bonzini

On Fri, May 03, 2013 at 10:45:21AM +0800, Liu Ping Fan wrote:
> @@ -109,11 +110,14 @@ bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
>  static int get_indirect(Vring *vring,
>                          struct iovec iov[], struct iovec *iov_end,
>                          unsigned int *out_num, unsigned int *in_num,
> -                        struct vring_desc *indirect)
> +                        struct vring_desc *indirect,
> +                        MemoryRegion ***mrs)
>  {
>      struct vring_desc desc;
>      unsigned int i = 0, count, found = 0;
> -
> +    MemoryRegion **cur = *mrs;
> +    int ret = 0;
> +    MemoryRegion *tmp;
>      /* Sanity check */
>      if (unlikely(indirect->len % sizeof(desc))) {
>          error_report("Invalid length in indirect descriptor: "
> @@ -132,19 +136,23 @@ static int get_indirect(Vring *vring,
>          return -EFAULT;
>      }
>  
> +    *mrs[0] = NULL;

The goto fail case is broken when we fail with cur > *mrs before calling
hostmem_lookup(..., cur, ...) since *cur will be undefined.

>      do {
>          struct vring_desc *desc_ptr;
>  
>          /* Translate indirect descriptor */
>          desc_ptr = hostmem_lookup(indirect->addr + found * sizeof(desc),
> -                                  sizeof(desc), NULL, false);
> +                                  sizeof(desc),
> +                                  &tmp,

Please use a more descriptive name, like desc_mr.  This function is
fairly involved so the variable names should be descriptive.

> +                                  false);
>          if (!desc_ptr) {
>              error_report("Failed to map indirect descriptor "
>                           "addr %#" PRIx64 " len %zu",
>                           (uint64_t)indirect->addr + found * sizeof(desc),
>                           sizeof(desc));
>              vring->broken = true;
> -            return -EFAULT;
> +            ret = -EFAULT;
> +            goto fail;
>          }
>          desc = *desc_ptr;
>  
> @@ -155,31 +163,40 @@ static int get_indirect(Vring *vring,
>              error_report("Loop detected: last one at %u "
>                           "indirect size %u", i, count);
>              vring->broken = true;
> -            return -EFAULT;
> +            memory_region_unref(tmp);
> +            ret = -EFAULT;
> +            goto fail;
>          }

No need to hold onto tmp and handle all these error cases.  After the
barrier() desc_ptr is no longer used because we've loaded the descriptor
into a local struct.  Please unref tmp right after barrier().

> @@ -209,15 +240,20 @@ static int get_indirect(Vring *vring,
>   * never a valid descriptor number) if none was found.  A negative code is
>   * returned on error.
>   *
> + * @mrs should be the same cnt as iov[]
> + *
>   * Stolen from linux/drivers/vhost/vhost.c.
>   */
>  int vring_pop(VirtIODevice *vdev, Vring *vring,
>                struct iovec iov[], struct iovec *iov_end,
> -              unsigned int *out_num, unsigned int *in_num)
> +              unsigned int *out_num, unsigned int *in_num,
> +              MemoryRegion **mrs)
>  {
>      struct vring_desc desc;
>      unsigned int i, head, found = 0, num = vring->vr.num;
>      uint16_t avail_idx, last_avail_idx;
> +    MemoryRegion **indirect, **cur = mrs;
> +    int ret = 0;
>  
>      /* If there was a fatal error then refuse operation */
>      if (vring->broken) {
> @@ -263,17 +299,20 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
>      *out_num = *in_num = 0;
>  
>      i = head;
> +    mrs[0] = NULL;

Same goto fail issue here when cur > *mrs before
hostmem_lookup(..., cur, ...) has been called.

>      do {
>          if (unlikely(i >= num)) {
>              error_report("Desc index is %u > %u, head = %u", i, num, head);
>              vring->broken = true;
> -            return -EFAULT;
> +            ret = -EFAULT;
> +            goto fail;
>          }
>          if (unlikely(++found > num)) {
>              error_report("Loop detected: last one at %u vq size %u head %u",
>                           i, num, head);
>              vring->broken = true;
> -            return -EFAULT;
> +            ret = -EFAULT;
> +            goto fail;
>          }
>          desc = vring->vr.desc[i];
>  
> @@ -281,10 +320,13 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
>          barrier();
>  
>          if (desc.flags & VRING_DESC_F_INDIRECT) {
> -            int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
> +            indirect = cur;
> +            int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc,
> +                        &indirect);
>              if (ret < 0) {
> -                return ret;
> +               goto fail;

Indentation.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 6/6] virtio-blk: release reference to RAM's memoryRegion
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 6/6] virtio-blk: release reference to RAM's memoryRegion Liu Ping Fan
@ 2013-05-03 10:09   ` Stefan Hajnoczi
  2013-05-06  2:39     ` liu ping fan
  0 siblings, 1 reply; 19+ messages in thread
From: Stefan Hajnoczi @ 2013-05-03 10:09 UTC (permalink / raw)
  To: Liu Ping Fan
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi, Paolo Bonzini

On Fri, May 03, 2013 at 10:45:22AM +0800, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> virtio-blk will unref RAM's memoryRegion when the io-req has been
> done. So we can avoid to call bdrv_drain_all() when RAM hot unplug.
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  hw/block/dataplane/virtio-blk.c |   51 +++++++++++++++++++++++++++++---------
>  1 files changed, 39 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> index 3bb57d1..047e1df 100644
> --- a/hw/block/dataplane/virtio-blk.c
> +++ b/hw/block/dataplane/virtio-blk.c
> @@ -35,6 +35,8 @@ enum {
>  
>  typedef struct {
>      struct iocb iocb;               /* Linux AIO control block */
> +    MemoryRegion *mrs[VRING_MAX];
> +    int mrs_cnt;
>      QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
>      unsigned int head;              /* vring descriptor index */
>      struct iovec *bounce_iov;       /* used if guest buffers are unaligned */
> @@ -121,6 +123,10 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
>       * transferred plus the status bytes.
>       */
>      vring_push(&s->vring, req->head, len + sizeof(hdr));
> +    while (--req->mrs_cnt >= 0) {
> +        memory_region_unref(req->mrs[req->mrs_cnt]);
> +    }

The vring has the property that every virtqueue element popped will be
pushed back.

Therefore it might be nicer to hide the MemoryRegion unref inside
vring_push().  The user wouldn't have to know about MemoryRegion -
that's especially nice since there are other devices besides virtio-blk
(like virtio-net) that would need to be modified if we use the current
approach.

Stefan

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe
  2013-05-03  2:45 [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Liu Ping Fan
                   ` (5 preceding siblings ...)
  2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 6/6] virtio-blk: release reference to RAM's memoryRegion Liu Ping Fan
@ 2013-05-04  9:53 ` Paolo Bonzini
  2013-05-06  1:42   ` liu ping fan
  6 siblings, 1 reply; 19+ messages in thread
From: Paolo Bonzini @ 2013-05-04  9:53 UTC (permalink / raw)
  To: Liu Ping Fan
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi

Il 03/05/2013 04:45, Liu Ping Fan ha scritto:
> v1->v2:
>   1.split RCU prepared style update and monitor the RAM-Device refcnt into two patches (patch 2,4)
>   2.introduce AddrSpaceMem, which is similar to HostMem, but based on address space, while
>     the original HostMem only server system memory address space

This looks suspiciously similar to FlatView, doesn't it?

Perhaps the right thing to do is to add the appropriate locking and
RCU-style updating to address_space_update_topology and
memory_region_find.   (And replacing flatview_destroy with ref/unref
similar to HostMem in your patch 2).  Then just switch dataplane to use
memory_region_find...

Paolo

> Liu Ping Fan (6):
>   hostmem: make hostmem single, not per Vring related
>   hostmem: AddressSpace has its own map and maintained by RCU prepared
>     style
>   memory: add ref/unref interface for MemroyRegionOps
>   hostmem: hostmem listener pin RAM-Device by refcnt
>   Vring: use hostmem's RAM safe api
>   virtio-blk: release reference to RAM's memoryRegion
> 
>  exec.c                                |    1 +
>  hw/block/dataplane/virtio-blk.c       |   52 ++++++++---
>  hw/virtio/dataplane/hostmem.c         |  168 +++++++++++++++++++++++++--------
>  hw/virtio/dataplane/vring.c           |  100 +++++++++++++++-----
>  include/exec/memory.h                 |   11 ++
>  include/hw/virtio/dataplane/hostmem.h |   25 +++---
>  include/hw/virtio/dataplane/vring.h   |    5 +-
>  memory.c                              |   18 ++++
>  8 files changed, 289 insertions(+), 91 deletions(-)
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe
  2013-05-04  9:53 ` [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Paolo Bonzini
@ 2013-05-06  1:42   ` liu ping fan
  2013-05-06  6:17     ` Paolo Bonzini
  0 siblings, 1 reply; 19+ messages in thread
From: liu ping fan @ 2013-05-06  1:42 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi

On Sat, May 4, 2013 at 5:53 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 03/05/2013 04:45, Liu Ping Fan ha scritto:
>> v1->v2:
>>   1.split RCU prepared style update and monitor the RAM-Device refcnt into two patches (patch 2,4)
>>   2.introduce AddrSpaceMem, which is similar to HostMem, but based on address space, while
>>     the original HostMem only server system memory address space
>
> This looks suspiciously similar to FlatView, doesn't it?
>
FlatView is used for all the listeners, including for mmio dispatching,
which aims to mapping from hwaddr to DeviceState for dispatching service.
While here, we mapping from hwaddr to hva.

> Perhaps the right thing to do is to add the appropriate locking and
> RCU-style updating to address_space_update_topology and

RCU implementation is data struct related,  and each listener has its
local table, so I think it is more reasonable to implement them
separately.  And vhost has its own RCU implement in kernel already.

> memory_region_find.   (And replacing flatview_destroy with ref/unref
> similar to HostMem in your patch 2).  Then just switch dataplane to use
> memory_region_find...
>
In fact, I think, HostMem listener can be an substitute for
cpu_physical_memory_map(),  the main issue can be the migration
support.  But before getting big patches, I hope to have this smaller
and simpler one.

Regards,
Pingfan

> Paolo
>
>> Liu Ping Fan (6):
>>   hostmem: make hostmem single, not per Vring related
>>   hostmem: AddressSpace has its own map and maintained by RCU prepared
>>     style
>>   memory: add ref/unref interface for MemroyRegionOps
>>   hostmem: hostmem listener pin RAM-Device by refcnt
>>   Vring: use hostmem's RAM safe api
>>   virtio-blk: release reference to RAM's memoryRegion
>>
>>  exec.c                                |    1 +
>>  hw/block/dataplane/virtio-blk.c       |   52 ++++++++---
>>  hw/virtio/dataplane/hostmem.c         |  168 +++++++++++++++++++++++++--------
>>  hw/virtio/dataplane/vring.c           |  100 +++++++++++++++-----
>>  include/exec/memory.h                 |   11 ++
>>  include/hw/virtio/dataplane/hostmem.h |   25 +++---
>>  include/hw/virtio/dataplane/vring.h   |    5 +-
>>  memory.c                              |   18 ++++
>>  8 files changed, 289 insertions(+), 91 deletions(-)
>>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/6] hostmem: AddressSpace has its own map and maintained by RCU prepared style
  2013-05-03  9:10   ` Stefan Hajnoczi
@ 2013-05-06  1:44     ` liu ping fan
  0 siblings, 0 replies; 19+ messages in thread
From: liu ping fan @ 2013-05-06  1:44 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi, Paolo Bonzini

On Fri, May 3, 2013 at 5:10 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Fri, May 03, 2013 at 10:45:18AM +0800, Liu Ping Fan wrote:
>> +/**
>> + * Install new regions list
>> + */
>> +static void hostmem_listener_commit(MemoryListener *listener)
>> +{
>> +    HostMem *tmp;
>> +    AddrSpaceMem *as_mem = container_of(listener, AddrSpaceMem, listener);
>> +
>> +    /* writer of cur_hostmem &next_hostmem is serialed by biglock
>
> s/serialed/serialized/
>
Will fix,
>> +         * in hotplug path. So only take care of r/w on cur_hostmem
>> +         */
>
> Indentation.
>
Will fix,
>> @@ -164,18 +203,30 @@ void hostmem_init(void)
>>          .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
>>          .priority = 10,
>>      };
>> +    as_mem->cur_hostmem = g_new0(HostMem, 1);
>> +    as_mem->cur_hostmem->ref = 1;
>> +    memory_listener_register(&as_mem->listener, as);
>>
>> -    memory_listener_register(&system_mem->listener, &address_space_memory);
>> -    if (system_mem->num_new_regions > 0) {
>> -        hostmem_listener_commit(&system_mem->listener);
>> -    }
>
> The point of this if statement was to make the newly added regions
> visible.  I guess it is not necessary because exec.c is calling us
> before any memory gets initialized now?

Yes, before any memory_region added into system.

Regards,
Pingfan

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 4/6] hostmem: hostmem listener pin RAM-Device by refcnt
  2013-05-03  9:35   ` Stefan Hajnoczi
@ 2013-05-06  1:45     ` liu ping fan
  0 siblings, 0 replies; 19+ messages in thread
From: liu ping fan @ 2013-05-06  1:45 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi, Paolo Bonzini

On Fri, May 3, 2013 at 5:35 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Fri, May 03, 2013 at 10:45:20AM +0800, Liu Ping Fan wrote:
>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> With ref()/unref() interface of MemoryRegion, we can pin RAM-Device
>> when using its memory, and release it when done.
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ---
>>  hw/virtio/dataplane/hostmem.c         |   44 +++++++++++++++++++++++++++-----
>>  hw/virtio/dataplane/vring.c           |    8 +++---
>>  include/hw/virtio/dataplane/hostmem.h |    4 ++-
>>  3 files changed, 44 insertions(+), 12 deletions(-)
>>
>> diff --git a/hw/virtio/dataplane/hostmem.c b/hw/virtio/dataplane/hostmem.c
>> index 1fd3e06..0e28dfc 100644
>> --- a/hw/virtio/dataplane/hostmem.c
>> +++ b/hw/virtio/dataplane/hostmem.c
>> @@ -42,18 +42,28 @@ static void hostmem_ref(HostMem *hostmem)
>>
>>  static void hostmem_unref(HostMem *hostmem)
>>  {
>> -    int t;
>> +    int i, t;
>> +    HostMemRegion *hmr;
>>
>>      t = __sync_sub_and_fetch(&hostmem->ref, 1);
>>      assert(t >= 0);
>>      if (!t) {
>> +        for (i = 0; i < hostmem->num_current_regions; i++) {
>> +            hmr = &hostmem->current_regions[i];
>> +            /* Fix me, when memory hotunplug implement
>> +             * assert(hmr->mr_ops->unref)
>> +             */
>> +            if (hmr->mr->ops && hmr->mr->ops->unref) {
>> +                hmr->mr->ops->unref();
>> +            }
>
> This patch should use memory_region_ref()/unref() which you introduced
> in the previous patch instead of open-coding this.
>
Yes, reasonable.
>> @@ -158,6 +176,18 @@ static void hostmem_listener_append_region(MemoryListener *listener,
>>      hostmem_append_new_region(as_mem->next_hostmem, section);
>>  }
>>
>> +static void hostmem_listener_append_region(MemoryListener *listener,
>> +                                           MemoryRegionSection *section)
>> +{
>> +    hostmem_listener_nop_region(listener, section);
>> +    /* Fix me, when memory hotunplug implement
>> +     * assert(section->mr->ops->ref)
>> +     */
>> +    if (section->mr->ops && section->mr->ops->ref) {
>> +        section->mr->ops->ref();
>> +    }
>> +}
>
> Why does append increment the refcount while nop does not?
>
> It seems that we always need to increment the memory region refcount
> since we're building a completely new hostmem.  The refcount ownership
> is not passed from the current hostmen to the next hostmem, all memory
> regions are released in hostmem_unref().

Yes, you are right.  Will fix it.

Thanks,
Pingfan

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 5/6] Vring: use hostmem's RAM safe api
  2013-05-03 10:02   ` Stefan Hajnoczi
@ 2013-05-06  1:45     ` liu ping fan
  0 siblings, 0 replies; 19+ messages in thread
From: liu ping fan @ 2013-05-06  1:45 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi, Paolo Bonzini

On Fri, May 3, 2013 at 6:02 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Fri, May 03, 2013 at 10:45:21AM +0800, Liu Ping Fan wrote:
>> @@ -109,11 +110,14 @@ bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
>>  static int get_indirect(Vring *vring,
>>                          struct iovec iov[], struct iovec *iov_end,
>>                          unsigned int *out_num, unsigned int *in_num,
>> -                        struct vring_desc *indirect)
>> +                        struct vring_desc *indirect,
>> +                        MemoryRegion ***mrs)
>>  {
>>      struct vring_desc desc;
>>      unsigned int i = 0, count, found = 0;
>> -
>> +    MemoryRegion **cur = *mrs;
>> +    int ret = 0;
>> +    MemoryRegion *tmp;
>>      /* Sanity check */
>>      if (unlikely(indirect->len % sizeof(desc))) {
>>          error_report("Invalid length in indirect descriptor: "
>> @@ -132,19 +136,23 @@ static int get_indirect(Vring *vring,
>>          return -EFAULT;
>>      }
>>
>> +    *mrs[0] = NULL;
>
> The goto fail case is broken when we fail with cur > *mrs before calling
> hostmem_lookup(..., cur, ...) since *cur will be undefined.
>
Will fix,
>>      do {
>>          struct vring_desc *desc_ptr;
>>
>>          /* Translate indirect descriptor */
>>          desc_ptr = hostmem_lookup(indirect->addr + found * sizeof(desc),
>> -                                  sizeof(desc), NULL, false);
>> +                                  sizeof(desc),
>> +                                  &tmp,
>
> Please use a more descriptive name, like desc_mr.  This function is
> fairly involved so the variable names should be descriptive.
>
Ok,
>> +                                  false);
>>          if (!desc_ptr) {
>>              error_report("Failed to map indirect descriptor "
>>                           "addr %#" PRIx64 " len %zu",
>>                           (uint64_t)indirect->addr + found * sizeof(desc),
>>                           sizeof(desc));
>>              vring->broken = true;
>> -            return -EFAULT;
>> +            ret = -EFAULT;
>> +            goto fail;
>>          }
>>          desc = *desc_ptr;
>>
>> @@ -155,31 +163,40 @@ static int get_indirect(Vring *vring,
>>              error_report("Loop detected: last one at %u "
>>                           "indirect size %u", i, count);
>>              vring->broken = true;
>> -            return -EFAULT;
>> +            memory_region_unref(tmp);
>> +            ret = -EFAULT;
>> +            goto fail;
>>          }
>
> No need to hold onto tmp and handle all these error cases.  After the
> barrier() desc_ptr is no longer used because we've loaded the descriptor
> into a local struct.  Please unref tmp right after barrier().
>
Ok,
>> @@ -209,15 +240,20 @@ static int get_indirect(Vring *vring,
>>   * never a valid descriptor number) if none was found.  A negative code is
>>   * returned on error.
>>   *
>> + * @mrs should be the same cnt as iov[]
>> + *
>>   * Stolen from linux/drivers/vhost/vhost.c.
>>   */
>>  int vring_pop(VirtIODevice *vdev, Vring *vring,
>>                struct iovec iov[], struct iovec *iov_end,
>> -              unsigned int *out_num, unsigned int *in_num)
>> +              unsigned int *out_num, unsigned int *in_num,
>> +              MemoryRegion **mrs)
>>  {
>>      struct vring_desc desc;
>>      unsigned int i, head, found = 0, num = vring->vr.num;
>>      uint16_t avail_idx, last_avail_idx;
>> +    MemoryRegion **indirect, **cur = mrs;
>> +    int ret = 0;
>>
>>      /* If there was a fatal error then refuse operation */
>>      if (vring->broken) {
>> @@ -263,17 +299,20 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
>>      *out_num = *in_num = 0;
>>
>>      i = head;
>> +    mrs[0] = NULL;
>
> Same goto fail issue here when cur > *mrs before
> hostmem_lookup(..., cur, ...) has been called.
>
Will fix
>>      do {
>>          if (unlikely(i >= num)) {
>>              error_report("Desc index is %u > %u, head = %u", i, num, head);
>>              vring->broken = true;
>> -            return -EFAULT;
>> +            ret = -EFAULT;
>> +            goto fail;
>>          }
>>          if (unlikely(++found > num)) {
>>              error_report("Loop detected: last one at %u vq size %u head %u",
>>                           i, num, head);
>>              vring->broken = true;
>> -            return -EFAULT;
>> +            ret = -EFAULT;
>> +            goto fail;
>>          }
>>          desc = vring->vr.desc[i];
>>
>> @@ -281,10 +320,13 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
>>          barrier();
>>
>>          if (desc.flags & VRING_DESC_F_INDIRECT) {
>> -            int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
>> +            indirect = cur;
>> +            int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc,
>> +                        &indirect);
>>              if (ret < 0) {
>> -                return ret;
>> +               goto fail;
>
> Indentation.
Will fix.

Thanks,
Pingfan

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 6/6] virtio-blk: release reference to RAM's memoryRegion
  2013-05-03 10:09   ` Stefan Hajnoczi
@ 2013-05-06  2:39     ` liu ping fan
  0 siblings, 0 replies; 19+ messages in thread
From: liu ping fan @ 2013-05-06  2:39 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi, Paolo Bonzini

On Fri, May 3, 2013 at 6:09 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Fri, May 03, 2013 at 10:45:22AM +0800, Liu Ping Fan wrote:
>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> virtio-blk will unref RAM's memoryRegion when the io-req has been
>> done. So we can avoid to call bdrv_drain_all() when RAM hot unplug.
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ---
>>  hw/block/dataplane/virtio-blk.c |   51 +++++++++++++++++++++++++++++---------
>>  1 files changed, 39 insertions(+), 12 deletions(-)
>>
>> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
>> index 3bb57d1..047e1df 100644
>> --- a/hw/block/dataplane/virtio-blk.c
>> +++ b/hw/block/dataplane/virtio-blk.c
>> @@ -35,6 +35,8 @@ enum {
>>
>>  typedef struct {
>>      struct iocb iocb;               /* Linux AIO control block */
>> +    MemoryRegion *mrs[VRING_MAX];
>> +    int mrs_cnt;
>>      QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
>>      unsigned int head;              /* vring descriptor index */
>>      struct iovec *bounce_iov;       /* used if guest buffers are unaligned */
>> @@ -121,6 +123,10 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
>>       * transferred plus the status bytes.
>>       */
>>      vring_push(&s->vring, req->head, len + sizeof(hdr));
>> +    while (--req->mrs_cnt >= 0) {
>> +        memory_region_unref(req->mrs[req->mrs_cnt]);
>> +    }
>
> The vring has the property that every virtqueue element popped will be
> pushed back.
>
> Therefore it might be nicer to hide the MemoryRegion unref inside
> vring_push().  The user wouldn't have to know about MemoryRegion -
> that's especially nice since there are other devices besides virtio-blk
> (like virtio-net) that would need to be modified if we use the current
> approach.
>
How about use "head" as the key to pair vring_push/vring_pop, and keep
tracing the refer to MemoryRegion in Vring (resort to linked-list to
dynamically maintain mrs[] used by vring_push).  So the reference of
mr will be kept in Vring, totally transparent to caller.

Regards,
Pingfan

> Stefan

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe
  2013-05-06  1:42   ` liu ping fan
@ 2013-05-06  6:17     ` Paolo Bonzini
  0 siblings, 0 replies; 19+ messages in thread
From: Paolo Bonzini @ 2013-05-06  6:17 UTC (permalink / raw)
  To: liu ping fan
  Cc: Peter Maydell, Anthony Liguori, Jan Kiszka, qemu-devel,
	Vasilis Liaskovitis, Stefan Hajnoczi

Il 06/05/2013 03:42, liu ping fan ha scritto:
> On Sat, May 4, 2013 at 5:53 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 03/05/2013 04:45, Liu Ping Fan ha scritto:
>>> v1->v2:
>>>   1.split RCU prepared style update and monitor the RAM-Device refcnt into two patches (patch 2,4)
>>>   2.introduce AddrSpaceMem, which is similar to HostMem, but based on address space, while
>>>     the original HostMem only server system memory address space
>>
>> This looks suspiciously similar to FlatView, doesn't it?
>>
> FlatView is used for all the listeners, including for mmio dispatching,
> which aims to mapping from hwaddr to DeviceState for dispatching service.
> While here, we mapping from hwaddr to hva.
> 
>> Perhaps the right thing to do is to add the appropriate locking and
>> RCU-style updating to address_space_update_topology and
> 
> RCU implementation is data struct related,  and each listener has its
> local table, so I think it is more reasonable to implement them
> separately.

I mentioned address_space_update_topology simply because it is where the
FlatView is replaced.

>> memory_region_find.   (And replacing flatview_destroy with ref/unref
>> similar to HostMem in your patch 2).  Then just switch dataplane to use
>> memory_region_find...
>>
> In fact, I think, HostMem listener can be an substitute for
> cpu_physical_memory_map(),  the main issue can be the migration
> support.  But before getting big patches, I hope to have this smaller
> and simpler one.

I think replacing HostMem with FlatView is a smaller patch than these
ones.  I'll try to make a prototype.

Paolo

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2013-05-06  6:18 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-05-03  2:45 [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Liu Ping Fan
2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 1/6] hostmem: make hostmem single, not per Vring related Liu Ping Fan
2013-05-03  8:54   ` Stefan Hajnoczi
2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 2/6] hostmem: AddressSpace has its own map and maintained by RCU prepared style Liu Ping Fan
2013-05-03  9:10   ` Stefan Hajnoczi
2013-05-06  1:44     ` liu ping fan
2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 3/6] memory: add ref/unref interface for MemroyRegionOps Liu Ping Fan
2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 4/6] hostmem: hostmem listener pin RAM-Device by refcnt Liu Ping Fan
2013-05-03  9:35   ` Stefan Hajnoczi
2013-05-06  1:45     ` liu ping fan
2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 5/6] Vring: use hostmem's RAM safe api Liu Ping Fan
2013-05-03 10:02   ` Stefan Hajnoczi
2013-05-06  1:45     ` liu ping fan
2013-05-03  2:45 ` [Qemu-devel] [PATCH v2 6/6] virtio-blk: release reference to RAM's memoryRegion Liu Ping Fan
2013-05-03 10:09   ` Stefan Hajnoczi
2013-05-06  2:39     ` liu ping fan
2013-05-04  9:53 ` [Qemu-devel] [PATCH v2 0/6] proposal to make hostmem listener RAM unplug safe Paolo Bonzini
2013-05-06  1:42   ` liu ping fan
2013-05-06  6:17     ` Paolo Bonzini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).