* [Qemu-devel] [PATCH 1/3] vfio: cleanup vfio_get_device error path, remove vfio_populate_device callback
2015-02-06 21:15 [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs in instance_finalize Paolo Bonzini
@ 2015-02-06 21:15 ` Paolo Bonzini
2015-02-06 21:15 ` [Qemu-devel] [PATCH 2/3] vfio: free dynamically-allocated data in instance_finalize Paolo Bonzini
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2015-02-06 21:15 UTC (permalink / raw)
To: qemu-devel
Now that vfio_put_base_device is called unconditionally at instance_finalize
time, it can be called twice if vfio_populate_device fails. This works
but it is slightly harder to follow.
Change vfio_get_device to not touch the vbasedev struct until it will
definitely succeed, moving the vfio_populate_device call back to vfio-pci.
This way, vfio_put_base_device will only be called once.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/vfio/common.c | 30 ++++++++++++------------------
hw/vfio/pci.c | 11 +++++++----
include/hw/vfio/vfio-common.h | 1 -
3 files changed, 19 insertions(+), 23 deletions(-)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index cf483ff..a4c6fc6 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -867,27 +867,28 @@ int vfio_get_device(VFIOGroup *group, const char *name,
VFIODevice *vbasedev)
{
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
- int ret;
+ int ret, fd;
- ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
- if (ret < 0) {
+ fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
+ if (fd < 0) {
error_report("vfio: error getting device %s from group %d: %m",
name, group->groupid);
error_printf("Verify all devices in group %d are bound to vfio-<bus> "
"or pci-stub and not already in use\n", group->groupid);
- return ret;
+ return fd;
}
- vbasedev->fd = ret;
- vbasedev->group = group;
- QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
-
- ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_INFO, &dev_info);
+ ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info);
if (ret) {
error_report("vfio: error getting device info: %m");
- goto error;
+ close(fd);
+ return ret;
}
+ vbasedev->fd = fd;
+ vbasedev->group = group;
+ QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
+
vbasedev->num_irqs = dev_info.num_irqs;
vbasedev->num_regions = dev_info.num_regions;
vbasedev->flags = dev_info.flags;
@@ -896,14 +897,7 @@ int vfio_get_device(VFIOGroup *group, const char *name,
dev_info.num_irqs);
vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
-
- ret = vbasedev->ops->vfio_populate_device(vbasedev);
-
-error:
- if (ret) {
- vfio_put_base_device(vbasedev);
- }
- return ret;
+ return 0;
}
void vfio_put_base_device(VFIODevice *vbasedev)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 014a92c..1fd3dae 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -195,7 +195,6 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
uint32_t val, int len);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
-static int vfio_populate_device(VFIODevice *vbasedev);
/*
* Disabling BAR mmaping can be slow, but toggling it around INTx can
@@ -2934,12 +2933,11 @@ static VFIODeviceOps vfio_pci_ops = {
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
.vfio_eoi = vfio_eoi,
- .vfio_populate_device = vfio_populate_device,
};
-static int vfio_populate_device(VFIODevice *vbasedev)
+static int vfio_populate_device(VFIOPCIDevice *vdev)
{
- VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
int i, ret = -1;
@@ -3247,6 +3245,11 @@ static int vfio_initfn(PCIDevice *pdev)
return ret;
}
+ ret = vfio_populate_device(vdev);
+ if (ret) {
+ goto out_put;
+ }
+
/* Get a copy of config space */
ret = pread(vdev->vbasedev.fd, vdev->pdev.config,
MIN(pci_config_size(&vdev->pdev), vdev->config_size),
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 1d5bfe8..5f3679b 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -112,7 +112,6 @@ struct VFIODeviceOps {
void (*vfio_compute_needs_reset)(VFIODevice *vdev);
int (*vfio_hot_reset_multi)(VFIODevice *vdev);
void (*vfio_eoi)(VFIODevice *vdev);
- int (*vfio_populate_device)(VFIODevice *vdev);
};
typedef struct VFIOGroup {
--
1.8.3.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [Qemu-devel] [PATCH 2/3] vfio: free dynamically-allocated data in instance_finalize
2015-02-06 21:15 [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs in instance_finalize Paolo Bonzini
2015-02-06 21:15 ` [Qemu-devel] [PATCH 1/3] vfio: cleanup vfio_get_device error path, remove vfio_populate_device callback Paolo Bonzini
@ 2015-02-06 21:15 ` Paolo Bonzini
2015-02-06 21:15 ` [Qemu-devel] [PATCH 3/3] vfio: unmap and free BAR " Paolo Bonzini
2015-02-07 1:39 ` [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs " Alex Williamson
3 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2015-02-06 21:15 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Williamson
In order to enable out-of-BQL address space lookup, destruction of
devices needs to be split in two phases.
Unrealize is the first phase; once it complete no new accesses will
be started, but there may still be pending memory accesses can still
be completed.
The second part is freeing the device, which only happens once all memory
accesses are complete. At this point the reference count has dropped to
zero, an RCU grace period must have completed (because the RCU-protected
FlatViews hold a reference to the device via memory_region_ref). This is
when instance_finalize is called.
Freeing data belongs in an instance_finalize callback, because the
dynamically allocated memory can still be used after unrealize by the
pending memory accesses.
This starts the process by creating an instance_finalize callback and
freeing most of the dynamically-allocated data in instance_finalize.
Because instance_finalize is also called on error paths or also when
the device is actually not realized, the common code needs some changes
to be ready for this. The error path in vfio_initfn can be simplified too.
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/vfio/common.c | 5 ++++-
hw/vfio/pci.c | 24 ++++++++++++++----------
2 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index a4c6fc6..a587cd7 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -847,7 +847,7 @@ free_group_exit:
void vfio_put_group(VFIOGroup *group)
{
- if (!QLIST_EMPTY(&group->device_list)) {
+ if (!group || !QLIST_EMPTY(&group->device_list)) {
return;
}
@@ -902,6 +902,9 @@ int vfio_get_device(VFIOGroup *group, const char *name,
void vfio_put_base_device(VFIODevice *vbasedev)
{
+ if (!vbasedev->group) {
+ return;
+ }
QLIST_REMOVE(vbasedev, next);
vbasedev->group = NULL;
trace_vfio_put_base_device(vbasedev->fd);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 1fd3dae..0e1d229 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3247,7 +3247,7 @@ static int vfio_initfn(PCIDevice *pdev)
ret = vfio_populate_device(vdev);
if (ret) {
- goto out_put;
+ return ret;
}
/* Get a copy of config space */
@@ -3257,7 +3257,7 @@ static int vfio_initfn(PCIDevice *pdev)
if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
ret = ret < 0 ? -errno : -EFAULT;
error_report("vfio: Failed to read device config space");
- goto out_put;
+ return ret;
}
/* vfio emulates a lot for us, but some bits need extra love */
@@ -3289,7 +3289,7 @@ static int vfio_initfn(PCIDevice *pdev)
ret = vfio_early_setup_msix(vdev);
if (ret) {
- goto out_put;
+ return ret;
}
vfio_map_bars(vdev);
@@ -3328,17 +3328,24 @@ out_teardown:
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
vfio_teardown_msi(vdev);
vfio_unmap_bars(vdev);
-out_put:
+ return ret;
+}
+
+static void vfio_instance_finalize(Object *obj)
+{
+ PCIDevice *pci_dev = PCI_DEVICE(obj);
+ VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev);
+ VFIOGroup *group = vdev->vbasedev.group;
+
g_free(vdev->emulated_config_bits);
+ g_free(vdev->rom);
vfio_put_device(vdev);
vfio_put_group(group);
- return ret;
}
static void vfio_exitfn(PCIDevice *pdev)
{
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
- VFIOGroup *group = vdev->vbasedev.group;
vfio_unregister_err_notifier(vdev);
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
@@ -3348,10 +3355,6 @@ static void vfio_exitfn(PCIDevice *pdev)
}
vfio_teardown_msi(vdev);
vfio_unmap_bars(vdev);
- g_free(vdev->emulated_config_bits);
- g_free(vdev->rom);
- vfio_put_device(vdev);
- vfio_put_group(group);
}
static void vfio_pci_reset(DeviceState *dev)
@@ -3439,6 +3442,7 @@ static const TypeInfo vfio_pci_dev_info = {
.instance_size = sizeof(VFIOPCIDevice),
.class_init = vfio_pci_dev_class_init,
.instance_init = vfio_instance_init,
+ .instance_finalize = vfio_instance_finalize,
};
static void register_vfio_pci_dev_type(void)
--
1.8.3.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [Qemu-devel] [PATCH 3/3] vfio: unmap and free BAR data in instance_finalize
2015-02-06 21:15 [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs in instance_finalize Paolo Bonzini
2015-02-06 21:15 ` [Qemu-devel] [PATCH 1/3] vfio: cleanup vfio_get_device error path, remove vfio_populate_device callback Paolo Bonzini
2015-02-06 21:15 ` [Qemu-devel] [PATCH 2/3] vfio: free dynamically-allocated data in instance_finalize Paolo Bonzini
@ 2015-02-06 21:15 ` Paolo Bonzini
2015-02-07 1:39 ` [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs " Alex Williamson
3 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2015-02-06 21:15 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Williamson
In the case of VFIO, the unrealize callback is too early to munmap the
BARs. The munmap must be delayed until memory accesses are complete.
To do this, split vfio_unmap_bars in two. The removal step, now called
vfio_unregister_bars, remains in vfio_exitfn. The reclamation step
is vfio_unmap_bars and is moved to the instance_finalize callback.
Similarly, quirk MemoryRegions have to be removed during
vfio_unregister_bars, but freeing the data structure must be delayed
to vfio_unmap_bars.
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/vfio/pci.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 57 insertions(+), 8 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0e1d229..6eb07ed 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1996,12 +1996,23 @@ static void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
static void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
{
+ VFIOQuirk *quirk;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
+ QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
+ memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem);
+ }
+ }
+}
+
+static void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
+{
int i;
for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
- memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem);
object_unparent(OBJECT(&quirk->mem));
QLIST_REMOVE(quirk, next);
g_free(quirk);
@@ -2022,10 +2033,19 @@ static void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
+ VFIOQuirk *quirk;
+
+ QLIST_FOREACH(quirk, &bar->quirks, next) {
+ memory_region_del_subregion(&bar->region.mem, &quirk->mem);
+ }
+}
+
+static void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOBAR *bar = &vdev->bars[nr];
while (!QLIST_EMPTY(&bar->quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
- memory_region_del_subregion(&bar->region.mem, &quirk->mem);
object_unparent(OBJECT(&quirk->mem));
QLIST_REMOVE(quirk, next);
g_free(quirk);
@@ -2281,7 +2301,7 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled)
}
}
-static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
+static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
@@ -2292,10 +2312,25 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
vfio_bar_quirk_teardown(vdev, nr);
memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem);
- munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
if (vdev->msix && vdev->msix->table_bar == nr) {
memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem);
+ }
+}
+
+static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOBAR *bar = &vdev->bars[nr];
+
+ if (!bar->region.size) {
+ return;
+ }
+
+ vfio_bar_quirk_free(vdev, nr);
+
+ munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
+
+ if (vdev->msix && vdev->msix->table_bar == nr) {
munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
}
}
@@ -2403,12 +2438,12 @@ static void vfio_map_bars(VFIOPCIDevice *vdev)
}
}
-static void vfio_unmap_bars(VFIOPCIDevice *vdev)
+static void vfio_unregister_bars(VFIOPCIDevice *vdev)
{
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
- vfio_unmap_bar(vdev, i);
+ vfio_unregister_bar(vdev, i);
}
if (vdev->has_vga) {
@@ -2417,6 +2452,19 @@ static void vfio_unmap_bars(VFIOPCIDevice *vdev)
}
}
+static void vfio_unmap_bars(VFIOPCIDevice *vdev)
+{
+ int i;
+
+ for (i = 0; i < PCI_ROM_SLOT; i++) {
+ vfio_unmap_bar(vdev, i);
+ }
+
+ if (vdev->has_vga) {
+ vfio_vga_quirk_free(vdev);
+ }
+}
+
/*
* General setup
*/
@@ -3327,7 +3375,7 @@ static int vfio_initfn(PCIDevice *pdev)
out_teardown:
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
vfio_teardown_msi(vdev);
- vfio_unmap_bars(vdev);
+ vfio_unregister_bars(vdev);
return ret;
}
@@ -3337,6 +3385,7 @@ static void vfio_instance_finalize(Object *obj)
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev);
VFIOGroup *group = vdev->vbasedev.group;
+ vfio_unmap_bars(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
vfio_put_device(vdev);
@@ -3354,7 +3403,7 @@ static void vfio_exitfn(PCIDevice *pdev)
timer_free(vdev->intx.mmap_timer);
}
vfio_teardown_msi(vdev);
- vfio_unmap_bars(vdev);
+ vfio_unregister_bars(vdev);
}
static void vfio_pci_reset(DeviceState *dev)
--
1.8.3.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs in instance_finalize
2015-02-06 21:15 [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs in instance_finalize Paolo Bonzini
` (2 preceding siblings ...)
2015-02-06 21:15 ` [Qemu-devel] [PATCH 3/3] vfio: unmap and free BAR " Paolo Bonzini
@ 2015-02-07 1:39 ` Alex Williamson
2015-02-07 20:00 ` Paolo Bonzini
3 siblings, 1 reply; 9+ messages in thread
From: Alex Williamson @ 2015-02-07 1:39 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Fri, 2015-02-06 at 22:15 +0100, Paolo Bonzini wrote:
> Mostly the same as v2;
We've got something screwy going on with MemoryListeners, I did some
hotplug testing with this and I hit the following segfault:
Program received signal SIGSEGV, Segmentation fault.
0x00007ff7bd8f7416 in memory_listener_register (listener=0x7ff7bf935e50,
filter=0x7ff7bf935e08)
at /net/gimli/home/alwillia/Work/qemu.git/memory.c:1931
1931 QTAILQ_INSERT_BEFORE(other, listener, link);
Call path is:
#0 0x00007ff7bd8f7416 in memory_listener_register (listener=0x7ff7bf935e50, filter=0x7ff7bf935e08)
at memory.c:1931
#1 0x00007ff7bd8aa400 in address_space_init_dispatch (as=0x7ff7bf935e08)
at exec.c:2059
#2 0x00007ff7bd8f75d4 in address_space_init (as=0x7ff7bf935e08, root=0x7ff7bf935ef0, name=0x7ff7bf71aee0 "vfio-pci")
at memory.c:1954
#3 0x00007ff7bdae5d85 in do_pci_register_device (pci_dev=0x7ff7bf935c00, bus=0x7ff7bf77ce40, name=0x7ff7bf71aee0 "vfio-pci", devfn=64)
at hw/pci/pci.c:837
#4 0x00007ff7bdae7eba in pci_qdev_init (qdev=0x7ff7bf935c00)
at hw/pci/pci.c:1768
#5 0x00007ff7bda67fc6 in device_realize (dev=0x7ff7bf935c00,
errp=0x7fffcee75550) at hw/core/qdev.c:247
#6 0x00007ff7bda6a00a in device_set_realized (obj=0x7ff7bf935c00, value=true,
errp=0x7fffcee75700) at hw/core/qdev.c:1040
#7 0x00007ff7bdb823b7 in property_set_bool (obj=0x7ff7bf935c00, v=0x7ff7bf9375b0, opaque=0x7ff7bf98aee0, name=0x7ff7bdca0869 "realized",
errp=0x7fffcee75700) at qom/object.c:1514
#8 0x00007ff7bdb80cb1 in object_property_set (obj=0x7ff7bf935c00, v=0x7ff7bf9375b0, name=0x7ff7bdca0869 "realized", errp=0x7fffcee75700)
at qom/object.c:837
#9 0x00007ff7bdb82ccf in object_property_set_qobject (obj=0x7ff7bf935c00, value=0x7ff7bfa01680, name=0x7ff7bdca0869 "realized", errp=0x7fffcee75700)
at qom/qom-qobject.c:24
#10 0x00007ff7bdb80f20 in object_property_set_bool (obj=0x7ff7bf935c00, value=true, name=0x7ff7bdca0869 "realized", errp=0x7fffcee75700)
at qom/object.c:905
#11 0x00007ff7bd9d0914 in qdev_device_add (opts=0x7ff7bfa07910)
at qdev-monitor.c:574
#12 0x00007ff7bd9d0f2e in do_device_add (mon=0x7ff7bf7b8130,
Walking through the QTAIL list, we get to these last two entries:
(gdb) p *(MemoryListener *)0x7ff7bfa6a860
$18 = {begin = 0x7ff7bd8aa0ee <mem_begin>,
commit = 0x7ff7bd8aa275 <mem_commit>, region_add = 0x7ff7bd8a7bf6 <mem_add>,
region_del = 0x0, region_nop = 0x7ff7bd8a7bf6 <mem_add>, log_start = 0x0,
log_stop = 0x0, log_sync = 0x0, log_global_start = 0x0,
log_global_stop = 0x0, eventfd_add = 0x0, eventfd_del = 0x0,
coalesced_mmio_add = 0x0, coalesced_mmio_del = 0x0, priority = 0,
address_space_filter = 0x7ff7bfa6a818, link = {tqe_next = 0x7ff7bf937e90,
tqe_prev = 0x7ff7bfa4e380}}
(gdb) p *(MemoryListener *)0x7ff7bf937e90
$19 = {begin = 0x0, commit = 0x0, region_add = 0x0, region_del = 0x0,
region_nop = 0x0, log_start = 0x0, log_stop = 0x0, log_sync = 0x0,
log_global_start = 0x0, log_global_stop = 0x0, eventfd_add = 0x0,
eventfd_del = 0x0, coalesced_mmio_add = 0x0, coalesced_mmio_del = 0x0,
priority = 0, address_space_filter = 0x0, link = {tqe_next = 0x0,
tqe_prev = 0x0}}
So we've got a zero'd MemoryListener that's still on the
memory_listeners list and QTAILQ_INSERT_BEFORE isn't happy touching
*(0x0).
I'm not sure where it's coming from yet, but I did extensive testing for
my last pull request based on ec6f25e because if I updated to d5fbb4c
vfio hotplug broke immediately. I'll keep looking, but I thought I'd
share in case you have some ideas. Thanks,
Alex
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs in instance_finalize
2015-02-07 1:39 ` [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs " Alex Williamson
@ 2015-02-07 20:00 ` Paolo Bonzini
2015-02-08 17:22 ` Alex Williamson
0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2015-02-07 20:00 UTC (permalink / raw)
To: Alex Williamson; +Cc: qemu-devel
On 07/02/2015 02:39, Alex Williamson wrote:
> I'm not sure where it's coming from yet, but I did extensive testing for
> my last pull request based on ec6f25e because if I updated to d5fbb4c
> vfio hotplug broke immediately. I'll keep looking, but I thought I'd
> share in case you have some ideas. Thanks,
I'm not sure I understand: d5fbb4c9ed52d97aebe5994d8a857c74c0d95a92 (RCU
merge) is an ancestor of ec6f25e788ef57ce1e9f734984ef8885172fd9e2 (s390
merge) and the only patches in the middle are for s390.
Paolo
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs in instance_finalize
2015-02-07 20:00 ` Paolo Bonzini
@ 2015-02-08 17:22 ` Alex Williamson
2015-02-08 18:55 ` Paolo Bonzini
0 siblings, 1 reply; 9+ messages in thread
From: Alex Williamson @ 2015-02-08 17:22 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, 2015-02-07 at 21:00 +0100, Paolo Bonzini wrote:
>
> On 07/02/2015 02:39, Alex Williamson wrote:
> > I'm not sure where it's coming from yet, but I did extensive testing for
> > my last pull request based on ec6f25e because if I updated to d5fbb4c
> > vfio hotplug broke immediately. I'll keep looking, but I thought I'd
> > share in case you have some ideas. Thanks,
>
> I'm not sure I understand: d5fbb4c9ed52d97aebe5994d8a857c74c0d95a92 (RCU
> merge) is an ancestor of ec6f25e788ef57ce1e9f734984ef8885172fd9e2 (s390
> merge) and the only patches in the middle are for s390.
Ok, I went back to 83761b9244ad, applied 3a4dbe6aa934 to get the
object_unparent() fix, then applied this series. Everything seems to
work ok. Then I manually applied and bisected the commits that came in
via d5fbb4c9ed52. I land on 374f2981d1f1 as introducing the segfault in
memory_listener_register(). I guess I was mis-remembering where I did
my testing for the last vfio pull request. My tag was based on ec6f25e,
but I remember that I had to test based on a commit before the RCU
merge.
My test is to simply do virsh detach-device, attach-device in a loop for
a vfio assigned VF NIC, 1s delay between ops. It typically fails within
100 to 150 iterations, I call 500 a pass. Thanks,
Alex
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH v3 0/3] vfio: free data and unmap BARs in instance_finalize
2015-02-08 17:22 ` Alex Williamson
@ 2015-02-08 18:55 ` Paolo Bonzini
0 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2015-02-08 18:55 UTC (permalink / raw)
To: Alex Williamson; +Cc: qemu-devel
On 08/02/2015 18:22, Alex Williamson wrote:
> Ok, I went back to 83761b9244ad, applied 3a4dbe6aa934 to get the
> object_unparent() fix, then applied this series. Everything seems to
> work ok. Then I manually applied and bisected the commits that came in
> via d5fbb4c9ed52. I land on 374f2981d1f1 as introducing the segfault in
> memory_listener_register(). I guess I was mis-remembering where I did
> my testing for the last vfio pull request. My tag was based on ec6f25e,
> but I remember that I had to test based on a commit before the RCU
> merge.
Ouch. memory_listeners needs mutex protection.
Paolo
^ permalink raw reply [flat|nested] 9+ messages in thread
* [Qemu-devel] [PATCH 3/3] vfio: unmap and free BAR data in instance_finalize
2015-02-04 12:11 [Qemu-devel] [PATCH v2 " Paolo Bonzini
@ 2015-02-04 12:11 ` Paolo Bonzini
0 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2015-02-04 12:11 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Williamson
In the case of VFIO, the unrealize callback is too early to munmap the
BARs. The munmap must be delayed until memory accesses are complete.
To do this, split vfio_unmap_bars in two. The removal step, now called
vfio_unregister_bars, remains in vfio_exitfn. The reclamation step
is vfio_unmap_bars and is moved to the instance_finalize callback.
Similarly, quirk MemoryRegions have to be removed during
vfio_unregister_bars, but freeing the data structure must be delayed
to vfio_unmap_bars.
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/vfio/pci.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 57 insertions(+), 8 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0e1d229..6eb07ed 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1996,12 +1996,23 @@ static void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
static void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
{
+ VFIOQuirk *quirk;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
+ QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
+ memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem);
+ }
+ }
+}
+
+static void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
+{
int i;
for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
- memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem);
object_unparent(OBJECT(&quirk->mem));
QLIST_REMOVE(quirk, next);
g_free(quirk);
@@ -2022,10 +2033,19 @@ static void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
+ VFIOQuirk *quirk;
+
+ QLIST_FOREACH(quirk, &bar->quirks, next) {
+ memory_region_del_subregion(&bar->region.mem, &quirk->mem);
+ }
+}
+
+static void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOBAR *bar = &vdev->bars[nr];
while (!QLIST_EMPTY(&bar->quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
- memory_region_del_subregion(&bar->region.mem, &quirk->mem);
object_unparent(OBJECT(&quirk->mem));
QLIST_REMOVE(quirk, next);
g_free(quirk);
@@ -2281,7 +2301,7 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled)
}
}
-static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
+static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
@@ -2292,10 +2312,25 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
vfio_bar_quirk_teardown(vdev, nr);
memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem);
- munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
if (vdev->msix && vdev->msix->table_bar == nr) {
memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem);
+ }
+}
+
+static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOBAR *bar = &vdev->bars[nr];
+
+ if (!bar->region.size) {
+ return;
+ }
+
+ vfio_bar_quirk_free(vdev, nr);
+
+ munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
+
+ if (vdev->msix && vdev->msix->table_bar == nr) {
munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
}
}
@@ -2403,12 +2438,12 @@ static void vfio_map_bars(VFIOPCIDevice *vdev)
}
}
-static void vfio_unmap_bars(VFIOPCIDevice *vdev)
+static void vfio_unregister_bars(VFIOPCIDevice *vdev)
{
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
- vfio_unmap_bar(vdev, i);
+ vfio_unregister_bar(vdev, i);
}
if (vdev->has_vga) {
@@ -2417,6 +2452,19 @@ static void vfio_unmap_bars(VFIOPCIDevice *vdev)
}
}
+static void vfio_unmap_bars(VFIOPCIDevice *vdev)
+{
+ int i;
+
+ for (i = 0; i < PCI_ROM_SLOT; i++) {
+ vfio_unmap_bar(vdev, i);
+ }
+
+ if (vdev->has_vga) {
+ vfio_vga_quirk_free(vdev);
+ }
+}
+
/*
* General setup
*/
@@ -3327,7 +3375,7 @@ static int vfio_initfn(PCIDevice *pdev)
out_teardown:
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
vfio_teardown_msi(vdev);
- vfio_unmap_bars(vdev);
+ vfio_unregister_bars(vdev);
return ret;
}
@@ -3337,6 +3385,7 @@ static void vfio_instance_finalize(Object *obj)
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev);
VFIOGroup *group = vdev->vbasedev.group;
+ vfio_unmap_bars(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
vfio_put_device(vdev);
@@ -3354,7 +3403,7 @@ static void vfio_exitfn(PCIDevice *pdev)
timer_free(vdev->intx.mmap_timer);
}
vfio_teardown_msi(vdev);
- vfio_unmap_bars(vdev);
+ vfio_unregister_bars(vdev);
}
static void vfio_pci_reset(DeviceState *dev)
--
1.8.3.1
^ permalink raw reply related [flat|nested] 9+ messages in thread