qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC post-2.5 PATCH 0/5] VFIO: capability chains
@ 2015-11-23 21:13 Alex Williamson
  2015-11-23 21:13 ` [Qemu-devel] [RFC post-2.5 PATCH 1/5] vfio: Wrap VFIO_DEVICE_GET_REGION_INFO Alex Williamson
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Alex Williamson @ 2015-11-23 21:13 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, kvm

This is the matching QEMU changes for the proposed kernel-side
capability chains.  Unfortunately there's also a lot of churn to get
to consistent interfaces involved in this series, which allow us to
generically handle multiple mmaps overlapping a region and making the
actual step of consuming the new kernel data trivial.  The last patch
shows the proof of concept for reallocating the info buffer when
necessary and finding and using capability data.  Apologies for the
patches being a little rough, but I hope the concept shows through.
As noted above, this is of course post-2.5 material and of course
kernel header updates would only be included when accepted upstream.
Thanks,

Alex

---

Alex Williamson (5):
      vfio: Wrap VFIO_DEVICE_GET_REGION_INFO
      vfio: Generalize region support
      vfio/pci: Convert all MemoryRegion to dynamic alloc and consistent functions
      linux-headers/vfio: Update for proposed capabilities list
      vfio: Enable sparse mmap capability


 hw/vfio/common.c              |  245 +++++++++++++++++++++++++++----
 hw/vfio/pci-quirks.c          |   62 ++++----
 hw/vfio/pci.c                 |  329 +++++++++++++++++++----------------------
 hw/vfio/pci.h                 |   10 +
 hw/vfio/platform.c            |   71 ++-------
 include/hw/vfio/vfio-common.h |   26 ++-
 linux-headers/linux/vfio.h    |   53 ++++++-
 trace-events                  |   12 +
 8 files changed, 502 insertions(+), 306 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Qemu-devel] [RFC post-2.5 PATCH 1/5] vfio: Wrap VFIO_DEVICE_GET_REGION_INFO
  2015-11-23 21:13 [Qemu-devel] [RFC post-2.5 PATCH 0/5] VFIO: capability chains Alex Williamson
@ 2015-11-23 21:13 ` Alex Williamson
  2015-11-23 21:13 ` [Qemu-devel] [RFC post-2.5 PATCH 2/5] vfio: Generalize region support Alex Williamson
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Alex Williamson @ 2015-11-23 21:13 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, kvm

In preparation for supporting capability chains on regions, wrap
ioctl(VFIO_DEVICE_GET_REGION_INFO) so we don't duplicate the code for
each caller.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/common.c              |   18 +++++++++
 hw/vfio/pci.c                 |   81 +++++++++++++++++++++--------------------
 hw/vfio/platform.c            |   13 ++++---
 include/hw/vfio/vfio-common.h |    3 ++
 4 files changed, 69 insertions(+), 46 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6797208..901a2b9 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -958,6 +958,24 @@ void vfio_put_base_device(VFIODevice *vbasedev)
     close(vbasedev->fd);
 }
 
+int vfio_get_region_info(VFIODevice *vbasedev, int index,
+                         struct vfio_region_info **info)
+{
+    size_t argsz = sizeof(struct vfio_region_info);
+
+    *info = g_malloc0(argsz);
+
+    (*info)->index = index;
+    (*info)->argsz = argsz;
+
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
+        g_free(*info);
+        return -errno;
+    }
+
+    return 0;
+}
+
 static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid,
                                    int req, void *param)
 {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 1fb868c..a4f3f1f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -768,25 +768,25 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
 
 static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
 {
-    struct vfio_region_info reg_info = {
-        .argsz = sizeof(reg_info),
-        .index = VFIO_PCI_ROM_REGION_INDEX
-    };
+    struct vfio_region_info *reg_info;
     uint64_t size;
     off_t off = 0;
     ssize_t bytes;
 
-    if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
+    if (vfio_get_region_info(&vdev->vbasedev,
+                             VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
         error_report("vfio: Error getting ROM info: %m");
         return;
     }
 
-    trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info.size,
-                            (unsigned long)reg_info.offset,
-                            (unsigned long)reg_info.flags);
+    trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
+                            (unsigned long)reg_info->offset,
+                            (unsigned long)reg_info->flags);
+
+    vdev->rom_size = size = reg_info->size;
+    vdev->rom_offset = reg_info->offset;
 
-    vdev->rom_size = size = reg_info.size;
-    vdev->rom_offset = reg_info.offset;
+    g_free(reg_info);
 
     if (!vdev->rom_size) {
         vdev->rom_read_failed = true;
@@ -2010,7 +2010,7 @@ static VFIODeviceOps vfio_pci_ops = {
 static int vfio_populate_device(VFIOPCIDevice *vdev)
 {
     VFIODevice *vbasedev = &vdev->vbasedev;
-    struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+    struct vfio_region_info *reg_info;
     struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
     int i, ret = -1;
 
@@ -2032,72 +2032,73 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
     }
 
     for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
-        reg_info.index = i;
-
-        ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+        ret = vfio_get_region_info(vbasedev, i, &reg_info);
         if (ret) {
             error_report("vfio: Error getting region %d info: %m", i);
             goto error;
         }
 
         trace_vfio_populate_device_region(vbasedev->name, i,
-                                          (unsigned long)reg_info.size,
-                                          (unsigned long)reg_info.offset,
-                                          (unsigned long)reg_info.flags);
+                                          (unsigned long)reg_info->size,
+                                          (unsigned long)reg_info->offset,
+                                          (unsigned long)reg_info->flags);
 
         vdev->bars[i].region.vbasedev = vbasedev;
-        vdev->bars[i].region.flags = reg_info.flags;
-        vdev->bars[i].region.size = reg_info.size;
-        vdev->bars[i].region.fd_offset = reg_info.offset;
+        vdev->bars[i].region.flags = reg_info->flags;
+        vdev->bars[i].region.size = reg_info->size;
+        vdev->bars[i].region.fd_offset = reg_info->offset;
         vdev->bars[i].region.nr = i;
         QLIST_INIT(&vdev->bars[i].quirks);
-    }
 
-    reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX;
+        g_free(reg_info);
+    }
 
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+    ret = vfio_get_region_info(vbasedev,
+                               VFIO_PCI_CONFIG_REGION_INDEX, &reg_info);
     if (ret) {
         error_report("vfio: Error getting config info: %m");
         goto error;
     }
 
     trace_vfio_populate_device_config(vdev->vbasedev.name,
-                                      (unsigned long)reg_info.size,
-                                      (unsigned long)reg_info.offset,
-                                      (unsigned long)reg_info.flags);
+                                      (unsigned long)reg_info->size,
+                                      (unsigned long)reg_info->offset,
+                                      (unsigned long)reg_info->flags);
 
-    vdev->config_size = reg_info.size;
+    vdev->config_size = reg_info->size;
     if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) {
         vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS;
     }
-    vdev->config_offset = reg_info.offset;
+    vdev->config_offset = reg_info->offset;
+
+    g_free(reg_info);
 
     if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) &&
         vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) {
-        struct vfio_region_info vga_info = {
-            .argsz = sizeof(vga_info),
-            .index = VFIO_PCI_VGA_REGION_INDEX,
-         };
-
-        ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &vga_info);
+        ret = vfio_get_region_info(vbasedev,
+                                   VFIO_PCI_VGA_REGION_INDEX, &reg_info);
         if (ret) {
             error_report(
                 "vfio: Device does not support requested feature x-vga");
             goto error;
         }
 
-        if (!(vga_info.flags & VFIO_REGION_INFO_FLAG_READ) ||
-            !(vga_info.flags & VFIO_REGION_INFO_FLAG_WRITE) ||
-            vga_info.size < 0xbffff + 1) {
+        if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) ||
+            !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) ||
+            reg_info->size < 0xbffff + 1) {
             error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
-                         (unsigned long)vga_info.flags,
-                         (unsigned long)vga_info.size);
+                         (unsigned long)reg_info->flags,
+                         (unsigned long)reg_info->size);
+            g_free(reg_info);
+            ret = -1;
             goto error;
         }
 
-        vdev->vga.fd_offset = vga_info.offset;
+        vdev->vga.fd_offset = reg_info->offset;
         vdev->vga.fd = vdev->vbasedev.fd;
 
+        g_free(reg_info);
+
         vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
         vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
         QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_MEM].quirks);
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 289b498..a4642f1 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -475,23 +475,24 @@ static int vfio_populate_device(VFIODevice *vbasedev)
     vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
 
     for (i = 0; i < vbasedev->num_regions; i++) {
-        struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+        struct vfio_region_info *reg_info;
         VFIORegion *ptr;
 
         vdev->regions[i] = g_new0(VFIORegion, 1);
         ptr = vdev->regions[i];
-        reg_info.index = i;
-        ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+        ret = vfio_get_region_info(vbasedev, i, &reg_info);
         if (ret) {
             error_report("vfio: Error getting region %d info: %m", i);
             goto reg_error;
         }
-        ptr->flags = reg_info.flags;
-        ptr->size = reg_info.size;
-        ptr->fd_offset = reg_info.offset;
+        ptr->flags = reg_info->flags;
+        ptr->size = reg_info->size;
+        ptr->fd_offset = reg_info->offset;
         ptr->nr = i;
         ptr->vbasedev = vbasedev;
 
+        g_free(reg_info);
+
         trace_vfio_platform_populate_regions(ptr->nr,
                             (unsigned long)ptr->flags,
                             (unsigned long)ptr->size,
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f037f3c..e7a0039 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -25,6 +25,7 @@
 #include "exec/memory.h"
 #include "qemu/queue.h"
 #include "qemu/notify.h"
+#include <linux/vfio.h>
 
 /*#define DEBUG_VFIO*/
 #ifdef DEBUG_VFIO
@@ -133,6 +134,8 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as);
 void vfio_put_group(VFIOGroup *group);
 int vfio_get_device(VFIOGroup *group, const char *name,
                     VFIODevice *vbasedev);
+int vfio_get_region_info(VFIODevice *vbasedev, int index,
+                         struct vfio_region_info **info);
 
 extern const MemoryRegionOps vfio_region_ops;
 extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list;

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [RFC post-2.5 PATCH 2/5] vfio: Generalize region support
  2015-11-23 21:13 [Qemu-devel] [RFC post-2.5 PATCH 0/5] VFIO: capability chains Alex Williamson
  2015-11-23 21:13 ` [Qemu-devel] [RFC post-2.5 PATCH 1/5] vfio: Wrap VFIO_DEVICE_GET_REGION_INFO Alex Williamson
@ 2015-11-23 21:13 ` Alex Williamson
  2015-11-23 21:14 ` [Qemu-devel] [RFC post-2.5 PATCH 3/5] vfio/pci: Convert all MemoryRegion to dynamic alloc and consistent functions Alex Williamson
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Alex Williamson @ 2015-11-23 21:13 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, kvm

Both platform and PCI vfio drivers create a "slow", I/O memory region
with one or more mmap memory regions overlayed when supported by the
device. Generalize this to a set of common helpers in the core that
pulls the region info from vfio, fills the region data, configures
slow mapping, and adds helpers for comleting the mmap, enable/disable,
and teardown.  This can be immediately used by the PCI MSI-X code,
which needs to mmap around the MSI-X vector table and will also be
used with the vfio sparse mmap capability.

This also changes VFIORegion.mem to be dynamically allocated because
otherwise we don't know how the caller has allocated VFIORegion and
therefore don't know whether to unreference it to destroy the
MemoryRegion or not.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/common.c              |  169 ++++++++++++++++++++++++++++++++++-------
 hw/vfio/pci-quirks.c          |   24 +++---
 hw/vfio/pci.c                 |  164 ++++++++++++++++++++--------------------
 hw/vfio/platform.c            |   72 +++--------------
 include/hw/vfio/vfio-common.h |   23 ++++--
 trace-events                  |   10 ++
 6 files changed, 272 insertions(+), 190 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 901a2b9..a73c6ad 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -492,46 +492,159 @@ static void vfio_listener_release(VFIOContainer *container)
     memory_listener_unregister(&container->listener);
 }
 
-int vfio_mmap_region(Object *obj, VFIORegion *region,
-                     MemoryRegion *mem, MemoryRegion *submem,
-                     void **map, size_t size, off_t offset,
-                     const char *name)
+int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
+                      int index, const char *name)
 {
-    int ret = 0;
-    VFIODevice *vbasedev = region->vbasedev;
+    struct vfio_region_info *info;
+    int ret;
+
+    ret = vfio_get_region_info(vbasedev, index, &info);
+    if (ret) {
+        return ret;
+    }
+
+    region->vbasedev = vbasedev;
+    region->flags = info->flags;
+    region->size = info->size;
+    region->fd_offset = info->offset;
+    region->nr = index;
 
-    if (!vbasedev->no_mmap && size && region->flags &
-        VFIO_REGION_INFO_FLAG_MMAP) {
-        int prot = 0;
+    if (region->size) {
+        region->mem = g_new0(MemoryRegion, 1);
+        memory_region_init_io(region->mem, obj, &vfio_region_ops,
+                              region, name, region->size);
 
-        if (region->flags & VFIO_REGION_INFO_FLAG_READ) {
-            prot |= PROT_READ;
+        if (!vbasedev->no_mmap && region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
+            region->nr_mmaps = 1;
+            region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
+
+            region->mmaps[0].offset = 0;
+            region->mmaps[0].size = region->size;
         }
+    }
 
-        if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) {
-            prot |= PROT_WRITE;
+    g_free(info);
+
+    trace_vfio_region_setup(vbasedev->name, index, name,
+                            region->flags, region->fd_offset, region->size);
+    return 0;
+}
+
+int vfio_region_mmap(VFIORegion *region)
+{
+    int i, prot = 0;
+    char *name;
+
+    if (!region->mem) {
+        return 0;
+    }
+
+    prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
+    prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
+
+    for (i = 0; i < region->nr_mmaps; i++) {
+        region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
+                                     MAP_SHARED, region->vbasedev->fd,
+                                     region->fd_offset +
+                                     region->mmaps[i].offset);
+        if (region->mmaps[i].mmap == MAP_FAILED) {
+            int ret = -errno;
+
+            trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
+                                         region->fd_offset +
+                                         region->mmaps[i].offset,
+                                         region->fd_offset +
+                                         region->mmaps[i].offset +
+                                         region->mmaps[i].size - 1, ret);
+
+            region->mmaps[i].mmap = NULL;
+
+            for (i--; i >= 0; i--) {
+                memory_region_del_subregion(region->mem, &region->mmaps[i].mem);
+                munmap(region->mmaps[i].mmap, region->mmaps[i].size);
+                object_unparent(OBJECT(&region->mmaps[i].mem));
+                region->mmaps[i].mmap = NULL;
+            }
+
+            return ret;
         }
 
-        *map = mmap(NULL, size, prot, MAP_SHARED,
-                    vbasedev->fd,
-                    region->fd_offset + offset);
-        if (*map == MAP_FAILED) {
-            *map = NULL;
-            ret = -errno;
-            goto empty_region;
+	name = g_strdup_printf("%s mmaps[%d]",
+                               memory_region_name(region->mem), i);
+        memory_region_init_ram_ptr(&region->mmaps[i].mem,
+                                   memory_region_owner(region->mem),
+                                   name, region->mmaps[i].size,
+                                   region->mmaps[i].mmap);
+        g_free(name);
+        memory_region_set_skip_dump(&region->mmaps[i].mem);
+        memory_region_add_subregion(region->mem, region->mmaps[i].offset,
+                                    &region->mmaps[i].mem);
+
+        trace_vfio_region_mmap(memory_region_name(&region->mmaps[i].mem),
+                               region->mmaps[i].offset,
+                               region->mmaps[i].offset +
+                               region->mmaps[i].size - 1);
+    }
+
+    return 0;
+}
+
+void vfio_region_exit(VFIORegion *region)
+{
+    int i;
+
+    if (!region->mem) {
+        return;
+    }
+
+    for (i = 0; i < region->nr_mmaps; i++) {
+        if (region->mmaps[i].mmap) {
+            memory_region_del_subregion(region->mem, &region->mmaps[i].mem);
         }
+    }
 
-        memory_region_init_ram_ptr(submem, obj, name, size, *map);
-        memory_region_set_skip_dump(submem);
-    } else {
-empty_region:
-        /* Create a zero sized sub-region to make cleanup easy. */
-        memory_region_init(submem, obj, name, 0);
+    trace_vfio_region_exit(region->vbasedev->name, region->nr);
+}
+
+void vfio_region_finalize(VFIORegion *region)
+{
+    int i;
+
+    if (!region->mem) {
+        return;
+    }
+
+    for (i = 0; i < region->nr_mmaps; i++) {
+        if (region->mmaps[i].mmap) {
+            munmap(region->mmaps[i].mmap, region->mmaps[i].size);
+            object_unparent(OBJECT(&region->mmaps[i].mem));
+        }
     }
 
-    memory_region_add_subregion(mem, offset, submem);
+    object_unparent(OBJECT(region->mem));
 
-    return ret;
+    g_free(region->mem);
+    g_free(region->mmaps);
+
+    trace_vfio_region_finalize(region->vbasedev->name, region->nr);
+}
+
+void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled)
+{
+    int i;
+
+    if (!region->mem) {
+        return;
+    }
+
+    for (i = 0; i < region->nr_mmaps; i++) {
+        if (region->mmaps[i].mmap) {
+            memory_region_set_enabled(&region->mmaps[i].mem, enabled);
+        }
+    }
+
+    trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem),
+                                        enabled);
 }
 
 void vfio_reset_handler(void *opaque)
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index e117c41..92a2d9d 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -336,14 +336,14 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     memory_region_init_io(window->addr_mem, OBJECT(vdev),
                           &vfio_generic_window_address_quirk, window,
                           "vfio-ati-bar4-window-address-quirk", 4);
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                         window->address_offset,
                                         window->addr_mem, 1);
 
     memory_region_init_io(window->data_mem, OBJECT(vdev),
                           &vfio_generic_window_data_quirk, window,
                           "vfio-ati-bar4-window-data-quirk", 4);
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                         window->data_offset,
                                         window->data_mem, 1);
 
@@ -377,7 +377,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
     memory_region_init_io(mirror->mem, OBJECT(vdev),
                           &vfio_generic_mirror_quirk, mirror,
                           "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                         mirror->offset, mirror->mem, 1);
 
     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -682,7 +682,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
     memory_region_init_io(window->addr_mem, OBJECT(vdev),
                           &vfio_generic_window_address_quirk, window,
                           "vfio-nvidia-bar5-window-address-quirk", 4);
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                         window->address_offset,
                                         window->addr_mem, 1);
     memory_region_set_enabled(window->addr_mem, false);
@@ -690,7 +690,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
     memory_region_init_io(window->data_mem, OBJECT(vdev),
                           &vfio_generic_window_data_quirk, window,
                           "vfio-nvidia-bar5-window-data-quirk", 4);
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                         window->data_offset,
                                         window->data_mem, 1);
     memory_region_set_enabled(window->data_mem, false);
@@ -698,13 +698,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
     memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
                           &vfio_nvidia_bar5_quirk_master, bar5,
                           "vfio-nvidia-bar5-master-quirk", 4);
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                         0, &quirk->mem[2], 1);
 
     memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
                           &vfio_nvidia_bar5_quirk_enable, bar5,
                           "vfio-nvidia-bar5-enable-quirk", 4);
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                         4, &quirk->mem[3], 1);
 
     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -766,7 +766,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
                           &vfio_nvidia_mirror_quirk, mirror,
                           "vfio-nvidia-bar0-88000-mirror-quirk",
                           vdev->config_size);
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                         mirror->offset, mirror->mem, 1);
 
     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -785,7 +785,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
                               &vfio_nvidia_mirror_quirk, mirror,
                               "vfio-nvidia-bar0-1800-mirror-quirk",
                               PCI_CONFIG_SPACE_SIZE);
-        memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+        memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                             mirror->offset, mirror->mem, 1);
 
         QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -946,13 +946,13 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
     memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
                           &vfio_rtl_address_quirk, rtl,
                           "vfio-rtl8168-window-address-quirk", 4);
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                         0x74, &quirk->mem[0], 1);
 
     memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
                           &vfio_rtl_data_quirk, rtl,
                           "vfio-rtl8168-window-data-quirk", 4);
-    memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
                                         0x70, &quirk->mem[1], 1);
 
     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -1019,7 +1019,7 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
 
     QLIST_FOREACH(quirk, &bar->quirks, next) {
         for (i = 0; i < quirk->nr_mem; i++) {
-            memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]);
+            memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
         }
     }
 }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a4f3f1f..73e148d 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1170,6 +1170,68 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
     return 0;
 }
 
+static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev)
+{
+    off_t start, end;
+    VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region;
+
+    /*
+     * We expect to find a single mmap covering the whole BAR, anything else
+     * means it's either unsupported or already setup.
+     */
+    if (region->nr_mmaps != 1 || region->mmaps[0].offset ||
+        region->size != region->mmaps[0].size) {
+        return;
+    }
+
+    /* MSI-X table start and end aligned to host page size */
+    start = vdev->msix->table_offset & qemu_real_host_page_mask;
+    end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
+                               (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
+
+    /* Does the MSI-X table cover the beginning or the BAR, or the whole BAR? */
+    if (!start) {
+        if (end >= region->size) {
+            region->nr_mmaps = 0;
+            g_free(region->mmaps);
+            region->mmaps = NULL;
+            trace_vfio_msix_fixup(vdev->vbasedev.name,
+                                  vdev->msix->table_bar, 0, 0);
+        } else {
+            region->mmaps[0].offset = end;
+            region->mmaps[0].size = region->size - end;
+            trace_vfio_msix_fixup(vdev->vbasedev.name,
+                              vdev->msix->table_bar, region->mmaps[0].offset,
+                              region->mmaps[0].offset + region->mmaps[0].size);
+        }
+
+    /* Maybe it's aligned at the end of the BAR */
+    } else if (end >= region->size) {
+        region->mmaps[0].size = start;
+        trace_vfio_msix_fixup(vdev->vbasedev.name,
+                              vdev->msix->table_bar, region->mmaps[0].offset,
+                              region->mmaps[0].offset + region->mmaps[0].size);
+
+    /* Otherwise it must split the BAR */
+    } else {
+        region->nr_mmaps = 2;
+        region->mmaps = g_renew(VFIOMmap, region->mmaps, 2);
+
+        memcpy(&region->mmaps[1], &region->mmaps[0], sizeof(VFIOMmap));
+
+        region->mmaps[0].size = start;
+        trace_vfio_msix_fixup(vdev->vbasedev.name,
+                              vdev->msix->table_bar, region->mmaps[0].offset,
+                              region->mmaps[0].offset + region->mmaps[0].size);
+
+        region->mmaps[1].offset = end;
+        region->mmaps[1].size = region->size - end;
+        trace_vfio_msix_fixup(vdev->vbasedev.name,
+                              vdev->msix->table_bar, region->mmaps[1].offset,
+                              region->mmaps[1].offset + region->mmaps[1].size);
+    }
+}
+
 /*
  * We don't have any control over how pci_add_capability() inserts
  * capabilities into the chain.  In order to setup MSI-X we need a
@@ -1244,6 +1306,8 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev)
                                 msix->table_offset, msix->entries);
     vdev->msix = msix;
 
+    vfio_pci_fixup_msix_region(vdev);
+
     return 0;
 }
 
@@ -1252,9 +1316,9 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos)
     int ret;
 
     ret = msix_init(&vdev->pdev, vdev->msix->entries,
-                    &vdev->bars[vdev->msix->table_bar].region.mem,
+                    vdev->bars[vdev->msix->table_bar].region.mem,
                     vdev->msix->table_bar, vdev->msix->table_offset,
-                    &vdev->bars[vdev->msix->pba_bar].region.mem,
+                    vdev->bars[vdev->msix->pba_bar].region.mem,
                     vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
     if (ret < 0) {
         if (ret == -ENOTSUP) {
@@ -1273,8 +1337,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev)
 
     if (vdev->msix) {
         msix_uninit(&vdev->pdev,
-                    &vdev->bars[vdev->msix->table_bar].region.mem,
-                    &vdev->bars[vdev->msix->pba_bar].region.mem);
+                    vdev->bars[vdev->msix->table_bar].region.mem,
+                    vdev->bars[vdev->msix->pba_bar].region.mem);
     }
 }
 
@@ -1286,16 +1350,7 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled)
     int i;
 
     for (i = 0; i < PCI_ROM_SLOT; i++) {
-        VFIOBAR *bar = &vdev->bars[i];
-
-        if (!bar->region.size) {
-            continue;
-        }
-
-        memory_region_set_enabled(&bar->region.mmap_mem, enabled);
-        if (vdev->msix && vdev->msix->table_bar == i) {
-            memory_region_set_enabled(&vdev->msix->mmap_mem, enabled);
-        }
+        vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled);
     }
 }
 
@@ -1309,11 +1364,7 @@ static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr)
 
     vfio_bar_quirk_teardown(vdev, nr);
 
-    memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem);
-
-    if (vdev->msix && vdev->msix->table_bar == nr) {
-        memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem);
-    }
+    vfio_region_exit(&bar->region);
 }
 
 static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
@@ -1326,18 +1377,13 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
 
     vfio_bar_quirk_free(vdev, nr);
 
-    munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
-
-    if (vdev->msix && vdev->msix->table_bar == nr) {
-        munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
-    }
+    vfio_region_finalize(&bar->region);
 }
 
 static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
 {
     VFIOBAR *bar = &vdev->bars[nr];
     uint64_t size = bar->region.size;
-    char name[64];
     uint32_t pci_bar;
     uint8_t type;
     int ret;
@@ -1347,10 +1393,6 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
         return;
     }
 
-    snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d",
-             vdev->host.domain, vdev->host.bus, vdev->host.slot,
-             vdev->host.function, nr);
-
     /* Determine what type of BAR this is for registration */
     ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
                 vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
@@ -1365,41 +1407,11 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
     type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
                                     ~PCI_BASE_ADDRESS_MEM_MASK);
 
-    /* A "slow" read/write mapping underlies all BARs */
-    memory_region_init_io(&bar->region.mem, OBJECT(vdev), &vfio_region_ops,
-                          bar, name, size);
-    pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem);
-
-    /*
-     * We can't mmap areas overlapping the MSIX vector table, so we
-     * potentially insert a direct-mapped subregion before and after it.
-     */
-    if (vdev->msix && vdev->msix->table_bar == nr) {
-        size = vdev->msix->table_offset & qemu_real_host_page_mask;
-    }
-
-    strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
-    if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
-                      &bar->region.mmap_mem, &bar->region.mmap,
-                      size, 0, name)) {
-        error_report("%s unsupported. Performance may be slow", name);
-    }
-
-    if (vdev->msix && vdev->msix->table_bar == nr) {
-        uint64_t start;
-
-        start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
-                                     (vdev->msix->entries *
-                                      PCI_MSIX_ENTRY_SIZE));
+    pci_register_bar(&vdev->pdev, nr, type, bar->region.mem);
 
-        size = start < bar->region.size ? bar->region.size - start : 0;
-        strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
-        /* VFIOMSIXInfo contains another MemoryRegion for this mapping */
-        if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
-                          &vdev->msix->mmap_mem,
-                          &vdev->msix->mmap, size, start, name)) {
-            error_report("%s unsupported. Performance may be slow", name);
-        }
+    if (vfio_region_mmap(&bar->region)) {
+        error_report("Failed to mmap %s BAR %d. Performance may be slow",
+                     vdev->vbasedev.name, nr);
     }
 
     vfio_bar_quirk_setup(vdev, nr);
@@ -2032,25 +2044,18 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
     }
 
     for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
-        ret = vfio_get_region_info(vbasedev, i, &reg_info);
+        char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i);
+
+        ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+                                &vdev->bars[i].region, i, name);
+        g_free(name);
+
         if (ret) {
             error_report("vfio: Error getting region %d info: %m", i);
             goto error;
         }
 
-        trace_vfio_populate_device_region(vbasedev->name, i,
-                                          (unsigned long)reg_info->size,
-                                          (unsigned long)reg_info->offset,
-                                          (unsigned long)reg_info->flags);
-
-        vdev->bars[i].region.vbasedev = vbasedev;
-        vdev->bars[i].region.flags = reg_info->flags;
-        vdev->bars[i].region.size = reg_info->size;
-        vdev->bars[i].region.fd_offset = reg_info->offset;
-        vdev->bars[i].region.nr = i;
         QLIST_INIT(&vdev->bars[i].quirks);
-
-        g_free(reg_info);
     }
 
     ret = vfio_get_region_info(vbasedev,
@@ -2136,11 +2141,8 @@ error:
 static void vfio_put_device(VFIOPCIDevice *vdev)
 {
     g_free(vdev->vbasedev.name);
-    if (vdev->msix) {
-        object_unparent(OBJECT(&vdev->msix->mmap_mem));
-        g_free(vdev->msix);
-        vdev->msix = NULL;
-    }
+    g_free(vdev->msix);
+
     vfio_put_base_device(&vdev->vbasedev);
 }
 
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index a4642f1..e0fcd0b 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -142,12 +142,8 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
 {
     int i;
 
-    trace_vfio_platform_mmap_set_enabled(enabled);
-
     for (i = 0; i < vdev->vbasedev.num_regions; i++) {
-        VFIORegion *region = vdev->regions[i];
-
-        memory_region_set_enabled(&region->mmap_mem, enabled);
+        vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
     }
 }
 
@@ -475,29 +471,16 @@ static int vfio_populate_device(VFIODevice *vbasedev)
     vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
 
     for (i = 0; i < vbasedev->num_regions; i++) {
-        struct vfio_region_info *reg_info;
-        VFIORegion *ptr;
+        char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
 
         vdev->regions[i] = g_new0(VFIORegion, 1);
-        ptr = vdev->regions[i];
-        ret = vfio_get_region_info(vbasedev, i, &reg_info);
+        ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+                                vdev->regions[i], i, name);
+        g_free(name);
         if (ret) {
             error_report("vfio: Error getting region %d info: %m", i);
             goto reg_error;
         }
-        ptr->flags = reg_info->flags;
-        ptr->size = reg_info->size;
-        ptr->fd_offset = reg_info->offset;
-        ptr->nr = i;
-        ptr->vbasedev = vbasedev;
-
-        g_free(reg_info);
-
-        trace_vfio_platform_populate_regions(ptr->nr,
-                            (unsigned long)ptr->flags,
-                            (unsigned long)ptr->size,
-                            ptr->vbasedev->fd,
-                            (unsigned long)ptr->fd_offset);
     }
 
     vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
@@ -534,6 +517,9 @@ irq_err:
     }
 reg_error:
     for (i = 0; i < vbasedev->num_regions; i++) {
+        if (vdev->regions[i]) {
+            vfio_region_finalize(vdev->regions[i]);
+        }
         g_free(vdev->regions[i]);
     }
     g_free(vdev->regions);
@@ -629,41 +615,6 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
 }
 
 /**
- * vfio_map_region - initialize the 2 memory regions for a given
- * MMIO region index
- * @vdev: the VFIO platform device handle
- * @nr: the index of the region
- *
- * Init the top memory region and the mmapped memory region beneath
- * VFIOPlatformDevice is used since VFIODevice is not a QOM Object
- * and could not be passed to memory region functions
-*/
-static void vfio_map_region(VFIOPlatformDevice *vdev, int nr)
-{
-    VFIORegion *region = vdev->regions[nr];
-    uint64_t size = region->size;
-    char name[64];
-
-    if (!size) {
-        return;
-    }
-
-    g_snprintf(name, sizeof(name), "VFIO %s region %d",
-               vdev->vbasedev.name, nr);
-
-    /* A "slow" read/write mapping underlies all regions */
-    memory_region_init_io(&region->mem, OBJECT(vdev), &vfio_region_ops,
-                          region, name, size);
-
-    g_strlcat(name, " mmap", sizeof(name));
-
-    if (vfio_mmap_region(OBJECT(vdev), region, &region->mem,
-                         &region->mmap_mem, &region->mmap, size, 0, name)) {
-        error_report("%s unsupported. Performance may be slow", name);
-    }
-}
-
-/**
  * vfio_platform_realize  - the device realize function
  * @dev: device state pointer
  * @errp: error
@@ -691,8 +642,11 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
     }
 
     for (i = 0; i < vbasedev->num_regions; i++) {
-        vfio_map_region(vdev, i);
-        sysbus_init_mmio(sbdev, &vdev->regions[i]->mem);
+        if (vfio_region_mmap(vdev->regions[i])) {
+            error_report("%s mmap unsupported. Performance may be slow",
+                         memory_region_name(vdev->regions[i]->mem));
+        }
+        sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
     }
 }
 
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index e7a0039..f5e8594 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -41,14 +41,21 @@ enum {
     VFIO_DEVICE_TYPE_PLATFORM = 1,
 };
 
+typedef struct VFIOMmap {
+    MemoryRegion mem;
+    void *mmap;
+    off_t offset;
+    size_t size;
+} VFIOMmap;
+
 typedef struct VFIORegion {
     struct VFIODevice *vbasedev;
     off_t fd_offset; /* offset of region within device fd */
-    MemoryRegion mem; /* slow, read/write access */
-    MemoryRegion mmap_mem; /* direct mapped access */
-    void *mmap;
+    MemoryRegion *mem; /* slow, read/write access */
     size_t size;
     uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
+    uint32_t nr_mmaps;
+    VFIOMmap *mmaps;
     uint8_t nr; /* cache the region number for debug */
 } VFIORegion;
 
@@ -125,10 +132,12 @@ void vfio_region_write(void *opaque, hwaddr addr,
                            uint64_t data, unsigned size);
 uint64_t vfio_region_read(void *opaque,
                           hwaddr addr, unsigned size);
-int vfio_mmap_region(Object *vdev, VFIORegion *region,
-                     MemoryRegion *mem, MemoryRegion *submem,
-                     void **map, size_t size, off_t offset,
-                     const char *name);
+int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
+                      int index, const char *name);
+int vfio_region_mmap(VFIORegion *region);
+void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
+void vfio_region_exit(VFIORegion *region);
+void vfio_region_finalize(VFIORegion *region);
 void vfio_reset_handler(void *opaque);
 VFIOGroup *vfio_get_group(int groupid, AddressSpace *as);
 void vfio_put_group(VFIOGroup *group);
diff --git a/trace-events b/trace-events
index 0b0ff02..0128680 100644
--- a/trace-events
+++ b/trace-events
@@ -1628,6 +1628,7 @@ vfio_msix_vector_do_use(const char *name, int index) " (%s) vector %d used"
 vfio_msix_vector_release(const char *name, int index) " (%s) vector %d released"
 vfio_msix_enable(const char *name) " (%s)"
 vfio_msix_disable(const char *name) " (%s)"
+vfio_msix_fixup(const char *name, int bar, off_t offset, size_t size) " (%s) MSI-X region %d mmap fixup [0x%lx - 0x%lx]"
 vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors"
 vfio_msi_disable(const char *name) " (%s)"
 vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
@@ -1646,7 +1647,6 @@ vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s"
 vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:"
 vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d"
 vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
-vfio_populate_device_region(const char *region_name, int index, unsigned long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
 vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
 vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
 vfio_initfn(const char *name, int group_id) " (%s) group %d"
@@ -1702,13 +1702,17 @@ vfio_disconnect_container(int fd) "close container->fd=%d"
 vfio_put_group(int fd) "close group->fd=%d"
 vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
 vfio_put_base_device(int fd) "close vdev->fd=%d"
+vfio_region_setup(const char *dev, int index, const char *name, unsigned long flags, unsigned long offset, unsigned long size) "Device %s, region %d \"%s\", flags: %lx, offset: %lx, size: %lx"
+vfio_region_mmap_fault(const char *name, int index, unsigned long offset, unsigned long size, int fault) "Region %s mmaps[%d], [%lx - %lx], fault: %d"
+vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Region %s [%lx - %lx]"
+vfio_region_exit(const char *name, int index) "Device %s, region %d"
+vfio_region_finalize(const char *name, int index) "Device %s, region %d"
+vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
 
 # hw/vfio/platform.c
-vfio_platform_populate_regions(int region_index, unsigned long flag, unsigned long size, int fd, unsigned long offset) "- region %d flags = 0x%lx, size = 0x%lx, fd= %d, offset = 0x%lx"
 vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"
 vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s"
 vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)"
-vfio_platform_mmap_set_enabled(bool enabled) "fast path = %d"
 vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow path"
 vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)"
 vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)"

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [RFC post-2.5 PATCH 3/5] vfio/pci: Convert all MemoryRegion to dynamic alloc and consistent functions
  2015-11-23 21:13 [Qemu-devel] [RFC post-2.5 PATCH 0/5] VFIO: capability chains Alex Williamson
  2015-11-23 21:13 ` [Qemu-devel] [RFC post-2.5 PATCH 1/5] vfio: Wrap VFIO_DEVICE_GET_REGION_INFO Alex Williamson
  2015-11-23 21:13 ` [Qemu-devel] [RFC post-2.5 PATCH 2/5] vfio: Generalize region support Alex Williamson
@ 2015-11-23 21:14 ` Alex Williamson
  2015-11-23 21:14 ` [Qemu-devel] [RFC post-2.5 PATCH 4/5] linux-headers/vfio: Update for proposed capabilities list Alex Williamson
  2015-11-23 21:14 ` [Qemu-devel] [RFC post-2.5 PATCH 5/5] vfio: Enable sparse mmap capability Alex Williamson
  4 siblings, 0 replies; 6+ messages in thread
From: Alex Williamson @ 2015-11-23 21:14 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, kvm

Match common vfio code with setup, exit, and finalize functions for
BAR, quirk, and VGA management.  VGA is also changed to dynamic
allocation to match the other MemoryRegions.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/pci-quirks.c |   38 ++++++++---------
 hw/vfio/pci.c        |  114 +++++++++++++++++++++-----------------------------
 hw/vfio/pci.h        |   10 ++--
 3 files changed, 71 insertions(+), 91 deletions(-)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 92a2d9d..d4b076f 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -289,10 +289,10 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
 
     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
                           "vfio-ati-3c3-quirk", 1);
-    memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+    memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
                                 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
 
-    QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
+    QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
                       quirk, next);
 
     trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
@@ -427,7 +427,7 @@ static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
 
     quirk->state = NONE;
 
-    return vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+    return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
                          addr + 0x14, size);
 }
 
@@ -464,7 +464,7 @@ static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
         break;
     }
 
-    vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+    vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
                    addr + 0x14, data, size);
 }
 
@@ -480,7 +480,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
     VFIONvidia3d0Quirk *quirk = opaque;
     VFIOPCIDevice *vdev = quirk->vdev;
     VFIONvidia3d0State old_state = quirk->state;
-    uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+    uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
                                   addr + 0x10, size);
 
     quirk->state = NONE;
@@ -522,7 +522,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
         }
     }
 
-    vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+    vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
                    addr + 0x10, data, size);
 }
 
@@ -550,15 +550,15 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
 
     memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
                           data, "vfio-nvidia-3d4-quirk", 2);
-    memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+    memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
                                 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
 
     memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
                           data, "vfio-nvidia-3d0-quirk", 2);
-    memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+    memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
                                 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
 
-    QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
+    QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
                       quirk, next);
 
     trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
@@ -969,28 +969,28 @@ void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
     vfio_vga_probe_nvidia_3d0_quirk(vdev);
 }
 
-void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
+void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
 {
     VFIOQuirk *quirk;
     int i, j;
 
-    for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
-        QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
+    for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
+        QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
             for (j = 0; j < quirk->nr_mem; j++) {
-                memory_region_del_subregion(&vdev->vga.region[i].mem,
+                memory_region_del_subregion(&vdev->vga->region[i].mem,
                                             &quirk->mem[j]);
             }
         }
     }
 }
 
-void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
+void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
 {
     int i, j;
 
-    for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
-        while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
-            VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
+    for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
+        while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
+            VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
             QLIST_REMOVE(quirk, next);
             for (j = 0; j < quirk->nr_mem; j++) {
                 object_unparent(OBJECT(&quirk->mem[j]));
@@ -1011,7 +1011,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
     vfio_probe_rtl8168_bar2_quirk(vdev, nr);
 }
 
-void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
+void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
 {
     VFIOBAR *bar = &vdev->bars[nr];
     VFIOQuirk *quirk;
@@ -1024,7 +1024,7 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
     }
 }
 
-void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
+void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
 {
     VFIOBAR *bar = &vdev->bars[nr];
     int i;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 73e148d..37649f3 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1354,42 +1354,16 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled)
     }
 }
 
-static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr)
+static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr)
 {
     VFIOBAR *bar = &vdev->bars[nr];
 
-    if (!bar->region.size) {
-        return;
-    }
-
-    vfio_bar_quirk_teardown(vdev, nr);
-
-    vfio_region_exit(&bar->region);
-}
-
-static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
-{
-    VFIOBAR *bar = &vdev->bars[nr];
-
-    if (!bar->region.size) {
-        return;
-    }
-
-    vfio_bar_quirk_free(vdev, nr);
-
-    vfio_region_finalize(&bar->region);
-}
-
-static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
-{
-    VFIOBAR *bar = &vdev->bars[nr];
-    uint64_t size = bar->region.size;
     uint32_t pci_bar;
     uint8_t type;
     int ret;
 
     /* Skip both unimplemented BARs and the upper half of 64bit BARS. */
-    if (!size) {
+    if (!bar->region.size) {
         return;
     }
 
@@ -1407,72 +1381,78 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
     type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
                                     ~PCI_BASE_ADDRESS_MEM_MASK);
 
-    pci_register_bar(&vdev->pdev, nr, type, bar->region.mem);
-
     if (vfio_region_mmap(&bar->region)) {
         error_report("Failed to mmap %s BAR %d. Performance may be slow",
                      vdev->vbasedev.name, nr);
     }
 
     vfio_bar_quirk_setup(vdev, nr);
+
+    pci_register_bar(&vdev->pdev, nr, type, bar->region.mem);
 }
 
-static void vfio_map_bars(VFIOPCIDevice *vdev)
+static void vfio_bars_setup(VFIOPCIDevice *vdev)
 {
     int i;
 
     for (i = 0; i < PCI_ROM_SLOT; i++) {
-        vfio_map_bar(vdev, i);
+        vfio_bar_setup(vdev, i);
     }
 
-    if (vdev->has_vga) {
-        memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_MEM].mem,
+    if (vdev->vga) {
+        memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
                               OBJECT(vdev), &vfio_vga_ops,
-                              &vdev->vga.region[QEMU_PCI_VGA_MEM],
+                              &vdev->vga->region[QEMU_PCI_VGA_MEM],
                               "vfio-vga-mmio@0xa0000",
                               QEMU_PCI_VGA_MEM_SIZE);
-        memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem,
+        memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
                               OBJECT(vdev), &vfio_vga_ops,
-                              &vdev->vga.region[QEMU_PCI_VGA_IO_LO],
+                              &vdev->vga->region[QEMU_PCI_VGA_IO_LO],
                               "vfio-vga-io@0x3b0",
                               QEMU_PCI_VGA_IO_LO_SIZE);
-        memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+        memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
                               OBJECT(vdev), &vfio_vga_ops,
-                              &vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+                              &vdev->vga->region[QEMU_PCI_VGA_IO_HI],
                               "vfio-vga-io@0x3c0",
                               QEMU_PCI_VGA_IO_HI_SIZE);
 
-        pci_register_vga(&vdev->pdev, &vdev->vga.region[QEMU_PCI_VGA_MEM].mem,
-                         &vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem,
-                         &vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem);
+        pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
+                         &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
+                         &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
         vfio_vga_quirk_setup(vdev);
     }
 }
 
-static void vfio_unregister_bars(VFIOPCIDevice *vdev)
+static void vfio_bars_exit(VFIOPCIDevice *vdev)
 {
     int i;
 
     for (i = 0; i < PCI_ROM_SLOT; i++) {
-        vfio_unregister_bar(vdev, i);
+        vfio_bar_quirk_exit(vdev, i);
+        vfio_region_exit(&vdev->bars[i].region);
     }
 
-    if (vdev->has_vga) {
-        vfio_vga_quirk_teardown(vdev);
+    if (vdev->vga) {
         pci_unregister_vga(&vdev->pdev);
+        vfio_vga_quirk_exit(vdev);
     }
 }
 
-static void vfio_unmap_bars(VFIOPCIDevice *vdev)
+static void vfio_bars_finalize(VFIOPCIDevice *vdev)
 {
     int i;
 
     for (i = 0; i < PCI_ROM_SLOT; i++) {
-        vfio_unmap_bar(vdev, i);
+        vfio_bar_quirk_finalize(vdev, i);
+        vfio_region_finalize(&vdev->bars[i].region);
     }
 
-    if (vdev->has_vga) {
-        vfio_vga_quirk_free(vdev);
+    if (vdev->vga) {
+        vfio_vga_quirk_finalize(vdev);
+        for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
+            object_unparent(OBJECT(&vdev->vga->region[i].mem));
+        }
+        g_free(vdev->vga);
     }
 }
 
@@ -2099,24 +2079,24 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
             goto error;
         }
 
-        vdev->vga.fd_offset = reg_info->offset;
-        vdev->vga.fd = vdev->vbasedev.fd;
+        vdev->vga = g_new0(VFIOVGA, 1);
 
-        g_free(reg_info);
+        vdev->vga->fd_offset = reg_info->offset;
+        vdev->vga->fd = vdev->vbasedev.fd;
 
-        vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
-        vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
-        QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_MEM].quirks);
+        g_free(reg_info);
 
-        vdev->vga.region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE;
-        vdev->vga.region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO;
-        QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].quirks);
+        vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
+        vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
+        QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks);
 
-        vdev->vga.region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE;
-        vdev->vga.region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI;
-        QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks);
+        vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE;
+        vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO;
+        QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks);
 
-        vdev->has_vga = true;
+        vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE;
+        vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI;
+        QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks);
     }
 
     irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
@@ -2508,7 +2488,7 @@ static int vfio_initfn(PCIDevice *pdev)
         return ret;
     }
 
-    vfio_map_bars(vdev);
+    vfio_bars_setup(vdev);
 
     ret = vfio_add_capabilities(vdev);
     if (ret) {
@@ -2545,7 +2525,7 @@ static int vfio_initfn(PCIDevice *pdev)
 out_teardown:
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     vfio_teardown_msi(vdev);
-    vfio_unregister_bars(vdev);
+    vfio_bars_exit(vdev);
     return ret;
 }
 
@@ -2555,7 +2535,7 @@ static void vfio_instance_finalize(Object *obj)
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev);
     VFIOGroup *group = vdev->vbasedev.group;
 
-    vfio_unmap_bars(vdev);
+    vfio_bars_finalize(vdev);
     g_free(vdev->emulated_config_bits);
     g_free(vdev->rom);
     vfio_put_device(vdev);
@@ -2574,7 +2554,7 @@ static void vfio_exitfn(PCIDevice *pdev)
         timer_free(vdev->intx.mmap_timer);
     }
     vfio_teardown_msi(vdev);
-    vfio_unregister_bars(vdev);
+    vfio_bars_exit(vdev);
 }
 
 static void vfio_pci_reset(DeviceState *dev)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index f004d52..aea8f91 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -113,7 +113,7 @@ typedef struct VFIOPCIDevice {
     int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
     int interrupt; /* Current interrupt type */
     VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
-    VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */
+    VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
     PCIHostDeviceAddress host;
     EventNotifier err_notifier;
     EventNotifier req_notifier;
@@ -149,11 +149,11 @@ void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
 
 bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev);
 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
-void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev);
-void vfio_vga_quirk_free(VFIOPCIDevice *vdev);
+void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
+void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);
 void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr);
-void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr);
-void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr);
+void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr);
+void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr);
 void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
 
 #endif /* HW_VFIO_VFIO_PCI_H */

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [RFC post-2.5 PATCH 4/5] linux-headers/vfio: Update for proposed capabilities list
  2015-11-23 21:13 [Qemu-devel] [RFC post-2.5 PATCH 0/5] VFIO: capability chains Alex Williamson
                   ` (2 preceding siblings ...)
  2015-11-23 21:14 ` [Qemu-devel] [RFC post-2.5 PATCH 3/5] vfio/pci: Convert all MemoryRegion to dynamic alloc and consistent functions Alex Williamson
@ 2015-11-23 21:14 ` Alex Williamson
  2015-11-23 21:14 ` [Qemu-devel] [RFC post-2.5 PATCH 5/5] vfio: Enable sparse mmap capability Alex Williamson
  4 siblings, 0 replies; 6+ messages in thread
From: Alex Williamson @ 2015-11-23 21:14 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, kvm

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 linux-headers/linux/vfio.h |   53 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index aa276bc..c3860f6 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -52,6 +52,33 @@
 #define VFIO_TYPE	(';')
 #define VFIO_BASE	100
 
+/*
+ * For extension of INFO ioctls, VFIO makes use of a capability chain
+ * designed after PCI/e capabilities.  A flag bit indicates whether
+ * this capability chain is supported and a field defined in the fixed
+ * structure defines the offset of the first capability in the chain.
+ * This field is only valid when the corresponding bit in the flags
+ * bitmap is set.  This offset field is relative to the start of the
+ * INFO buffer, as is the next field within each capability header.
+ * The id within the header is a shared address space per INFO ioctl,
+ * while the version field is specific to the capability id.  The
+ * contents following the header are specific to the capability id.
+ */
+struct vfio_info_cap_header {
+	__u16	id;		/* Identifies capability */
+	__u16	version;	/* Version specific to the capability ID */
+	__u32	next;		/* Offset of next capability */
+};
+
+/*
+ * Callers of INFO ioctls passing insufficiently sized buffers will see
+ * the capability chain flag bit set, a zero value for the first capability
+ * offset (if available within the provided argsz), and argsz will be
+ * updated to report the necessary buffer size.  For compatibility, the
+ * INFO ioctl will not report error in this case, but the capability chain
+ * will not be available.
+ */
+
 /* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
 
 /**
@@ -187,13 +214,37 @@ struct vfio_region_info {
 #define VFIO_REGION_INFO_FLAG_READ	(1 << 0) /* Region supports read */
 #define VFIO_REGION_INFO_FLAG_WRITE	(1 << 1) /* Region supports write */
 #define VFIO_REGION_INFO_FLAG_MMAP	(1 << 2) /* Region supports mmap */
+#define VFIO_REGION_INFO_FLAG_CAPS	(1 << 3) /* Info supports caps */
 	__u32	index;		/* Region index */
-	__u32	resv;		/* Reserved for alignment */
+	__u32	cap_offset;	/* Offset within info struct of first cap */
 	__u64	size;		/* Region size (bytes) */
 	__u64	offset;		/* Region offset from start of device fd */
 };
 #define VFIO_DEVICE_GET_REGION_INFO	_IO(VFIO_TYPE, VFIO_BASE + 8)
 
+/*
+ * The sparse mmap capability allows finer granularity of specifying areas
+ * within a region with mmap support.  When specified, the user should only
+ * mmap the offset ranges specified by the areas array.  mmaps outside of the
+ * areas specified may fail (such as the range covering a PCI MSI-X table) or
+ * may result in improper device behavior.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_REGION_INFO_CAP_SPARSE_MMAP	1
+
+struct vfio_region_sparse_mmap_area {
+	__u64	offset;	/* Offset of mmap'able area within region */
+	__u64	size;	/* Size of mmap'able area */
+};
+
+struct vfio_region_info_cap_sparse_mmap {
+	struct vfio_info_cap_header header;
+	__u32	nr_areas;
+	__u32	reserved;
+	struct vfio_region_sparse_mmap_area areas[];
+};
+
 /**
  * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
  *				    struct vfio_irq_info)

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [RFC post-2.5 PATCH 5/5] vfio: Enable sparse mmap capability
  2015-11-23 21:13 [Qemu-devel] [RFC post-2.5 PATCH 0/5] VFIO: capability chains Alex Williamson
                   ` (3 preceding siblings ...)
  2015-11-23 21:14 ` [Qemu-devel] [RFC post-2.5 PATCH 4/5] linux-headers/vfio: Update for proposed capabilities list Alex Williamson
@ 2015-11-23 21:14 ` Alex Williamson
  4 siblings, 0 replies; 6+ messages in thread
From: Alex Williamson @ 2015-11-23 21:14 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, kvm

The sparse mmap capability in a vfio region info allows vfio to tell
us which sub-areas of a region may be mmap'd.  Thus rather than
assuming a single mmap covers the entire region and later frobbing it
ourselves for things like the PCI MSI-X vector table, we can read that
directly from vfio.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/common.c |   66 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 trace-events     |    2 ++
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index a73c6ad..3b7cf23 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -492,6 +492,52 @@ static void vfio_listener_release(VFIOContainer *container)
     memory_listener_unregister(&container->listener);
 }
 
+static struct vfio_info_cap_header *
+vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
+{
+    struct vfio_info_cap_header *header;
+
+    if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) {
+        return NULL;
+    }
+
+    for (header = (void *)info + info->cap_offset;
+         (void *)header > (void *)info; header = (void *)info + header->next) {
+        if (header->id == id) {
+            return header;
+        }
+    }
+
+    return NULL;
+}
+
+static void vfio_setup_region_sparse_mmaps(VFIORegion *region,
+                                           struct vfio_region_info *info)
+{
+    struct vfio_region_info_cap_sparse_mmap *sparse;
+    int i;
+
+    sparse = (void *)vfio_get_region_info_cap(info,
+                                              VFIO_REGION_INFO_CAP_SPARSE_MMAP);
+    if (!sparse) {
+        return;
+    }
+
+    trace_vfio_region_sparse_mmap_header(region->vbasedev->name,
+                                         region->nr, sparse->nr_areas);
+
+    region->nr_mmaps = sparse->nr_areas;
+    region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
+
+    for (i = 0; i < region->nr_mmaps; i++) {
+        region->mmaps[i].offset = sparse->areas[i].offset;
+        region->mmaps[i].size = sparse->areas[i].size;
+        trace_vfio_region_sparse_mmap_entry(i, region->mmaps[i].offset,
+                                            region->mmaps[i].offset +
+                                            region->mmaps[i].size);
+    }
+}
+
 int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
                       int index, const char *name)
 {
@@ -515,11 +561,15 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
                               region, name, region->size);
 
         if (!vbasedev->no_mmap && region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
-            region->nr_mmaps = 1;
-            region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
+            vfio_setup_region_sparse_mmaps(region, info);
+
+            if (!region->nr_mmaps) {
+                region->nr_mmaps = 1;
+                region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
 
-            region->mmaps[0].offset = 0;
-            region->mmaps[0].size = region->size;
+                region->mmaps[0].offset = 0;
+                region->mmaps[0].size = region->size;
+            }
         }
     }
 
@@ -1079,6 +1129,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
     *info = g_malloc0(argsz);
 
     (*info)->index = index;
+retry:
     (*info)->argsz = argsz;
 
     if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
@@ -1086,6 +1137,13 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
         return -errno;
     }
 
+    if ((*info)->argsz > argsz) {
+        argsz = (*info)->argsz;
+        *info = g_realloc(*info, argsz);
+
+        goto retry;
+    }
+
     return 0;
 }
 
diff --git a/trace-events b/trace-events
index 0128680..9c6c12b 100644
--- a/trace-events
+++ b/trace-events
@@ -1708,6 +1708,8 @@ vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Reg
 vfio_region_exit(const char *name, int index) "Device %s, region %d"
 vfio_region_finalize(const char *name, int index) "Device %s, region %d"
 vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
+vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
+vfio_region_sparse_mmap_entry(int i, off_t start, off_t end) "sparse entry %d [0x%lx - 0x%lx]"
 
 # hw/vfio/platform.c
 vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-11-23 21:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-11-23 21:13 [Qemu-devel] [RFC post-2.5 PATCH 0/5] VFIO: capability chains Alex Williamson
2015-11-23 21:13 ` [Qemu-devel] [RFC post-2.5 PATCH 1/5] vfio: Wrap VFIO_DEVICE_GET_REGION_INFO Alex Williamson
2015-11-23 21:13 ` [Qemu-devel] [RFC post-2.5 PATCH 2/5] vfio: Generalize region support Alex Williamson
2015-11-23 21:14 ` [Qemu-devel] [RFC post-2.5 PATCH 3/5] vfio/pci: Convert all MemoryRegion to dynamic alloc and consistent functions Alex Williamson
2015-11-23 21:14 ` [Qemu-devel] [RFC post-2.5 PATCH 4/5] linux-headers/vfio: Update for proposed capabilities list Alex Williamson
2015-11-23 21:14 ` [Qemu-devel] [RFC post-2.5 PATCH 5/5] vfio: Enable sparse mmap capability Alex Williamson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).