From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:40204) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YIcuG-0005sk-Ew for qemu-devel@nongnu.org; Tue, 03 Feb 2015 07:48:41 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YIcuB-0006pc-D6 for qemu-devel@nongnu.org; Tue, 03 Feb 2015 07:48:40 -0500 Received: from mail-we0-x236.google.com ([2a00:1450:400c:c03::236]:54687) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YIcuB-0006pF-1m for qemu-devel@nongnu.org; Tue, 03 Feb 2015 07:48:35 -0500 Received: by mail-we0-f182.google.com with SMTP id l61so44678676wev.13 for ; Tue, 03 Feb 2015 04:48:34 -0800 (PST) Sender: Paolo Bonzini From: Paolo Bonzini Date: Tue, 3 Feb 2015 13:48:25 +0100 Message-Id: <1422967705-1374-1-git-send-email-pbonzini@redhat.com> Subject: [Qemu-devel] [PATCH] vfio: free dynamically-allocated data in instance_finalize List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: Alex Williamson In order to enable out-of-BQL address space lookup, destruction of devices needs to be split in two phases. Unrealize is the first phase; once it complete no new accesses will be started, but there may still be pending memory accesses can still be completed. The second part is freeing the device, which only happens once all memory accesses are complete. At this point the reference count has dropped to zero, an RCU grace period must have completed (because the RCU-protected FlatViews hold a reference to the device via memory_region_ref). This is when instance_finalize is called. Freeing data belongs in an instance_finalize callback, because the dynamically allocated memory can still be used after unrealize by the pending memory accesses. In the case of VFIO, the unrealize callback is too early to munmap the BARs. The munmap must be delayed until memory accesses are complete. To do this, split vfio_unmap_bars in two. The removal step, now called vfio_unregister_bars, remains in vfio_exitfn. The reclamation step is vfio_unmap_bars and is moved to the instance_finalize callback. Similarly, quirk MemoryRegions have to be removed during vfio_unregister_bars, but freeing the data structure must be delayed to vfio_unmap_bars. Cc: Alex Williamson Signed-off-by: Paolo Bonzini --- This patch is part of the third installment 3 of the RCU work. Sending it out separately for Alex to review it. hw/vfio/pci.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 014a92c..69d4a33 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1997,12 +1997,23 @@ static void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) static void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev) { + VFIOQuirk *quirk; + int i; + + for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { + QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) { + memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem); + } + } +} + +static void vfio_vga_quirk_free(VFIOPCIDevice *vdev) +{ int i; for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); - memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem); object_unparent(OBJECT(&quirk->mem)); QLIST_REMOVE(quirk, next); g_free(quirk); @@ -2023,10 +2034,19 @@ static void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; + VFIOQuirk *quirk; + + QLIST_FOREACH(quirk, &bar->quirks, next) { + memory_region_del_subregion(&bar->region.mem, &quirk->mem); + } +} + +static void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr) +{ + VFIOBAR *bar = &vdev->bars[nr]; while (!QLIST_EMPTY(&bar->quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); - memory_region_del_subregion(&bar->region.mem, &quirk->mem); object_unparent(OBJECT(&quirk->mem)); QLIST_REMOVE(quirk, next); g_free(quirk); @@ -2282,7 +2302,7 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled) } } -static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) +static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; @@ -2293,10 +2313,25 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) vfio_bar_quirk_teardown(vdev, nr); memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem); - munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem)); if (vdev->msix && vdev->msix->table_bar == nr) { memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem); + } +} + +static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) +{ + VFIOBAR *bar = &vdev->bars[nr]; + + if (!bar->region.size) { + return; + } + + vfio_bar_quirk_free(vdev, nr); + + munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem)); + + if (vdev->msix && vdev->msix->table_bar == nr) { munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); } } @@ -2413,6 +2448,19 @@ static void vfio_unmap_bars(VFIOPCIDevice *vdev) } if (vdev->has_vga) { + vfio_vga_quirk_free(vdev); + } +} + +static void vfio_unregister_bars(VFIOPCIDevice *vdev) +{ + int i; + + for (i = 0; i < PCI_ROM_SLOT; i++) { + vfio_unregister_bar(vdev, i); + } + + if (vdev->has_vga) { vfio_vga_quirk_teardown(vdev); pci_unregister_vga(&vdev->pdev); } @@ -3324,6 +3372,7 @@ static int vfio_initfn(PCIDevice *pdev) out_teardown: pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); vfio_teardown_msi(vdev); + vfio_unregister_bars(vdev); vfio_unmap_bars(vdev); out_put: g_free(vdev->emulated_config_bits); @@ -3332,10 +3381,22 @@ out_put: return ret; } +static void vfio_instance_finalize(Object *obj) +{ + PCIDevice *pci_dev = PCI_DEVICE(obj); + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev); + VFIOGroup *group = vdev->vbasedev.group; + + vfio_unmap_bars(vdev); + g_free(vdev->emulated_config_bits); + g_free(vdev->rom); + vfio_put_device(vdev); + vfio_put_group(group); +} + static void vfio_exitfn(PCIDevice *pdev) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); - VFIOGroup *group = vdev->vbasedev.group; vfio_unregister_err_notifier(vdev); pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); @@ -3344,11 +3405,7 @@ static void vfio_exitfn(PCIDevice *pdev) timer_free(vdev->intx.mmap_timer); } vfio_teardown_msi(vdev); - vfio_unmap_bars(vdev); - g_free(vdev->emulated_config_bits); - g_free(vdev->rom); - vfio_put_device(vdev); - vfio_put_group(group); + vfio_unregister_bars(vdev); } static void vfio_pci_reset(DeviceState *dev) @@ -3436,6 +3493,7 @@ static const TypeInfo vfio_pci_dev_info = { .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_dev_class_init, .instance_init = vfio_instance_init, + .instance_finalize = vfio_instance_finalize, }; static void register_vfio_pci_dev_type(void) -- 1.8.3.1