From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: Alex Williamson <alex.williamson@redhat.com>
Subject: [Qemu-devel] [PATCH] vfio: free dynamically-allocated data in instance_finalize
Date: Tue, 3 Feb 2015 13:48:25 +0100 [thread overview]
Message-ID: <1422967705-1374-1-git-send-email-pbonzini@redhat.com> (raw)
In order to enable out-of-BQL address space lookup, destruction of
devices needs to be split in two phases.
Unrealize is the first phase; once it complete no new accesses will
be started, but there may still be pending memory accesses can still
be completed.
The second part is freeing the device, which only happens once all memory
accesses are complete. At this point the reference count has dropped to
zero, an RCU grace period must have completed (because the RCU-protected
FlatViews hold a reference to the device via memory_region_ref). This is
when instance_finalize is called.
Freeing data belongs in an instance_finalize callback, because the
dynamically allocated memory can still be used after unrealize by the
pending memory accesses.
In the case of VFIO, the unrealize callback is too early to munmap the
BARs. The munmap must be delayed until memory accesses are complete.
To do this, split vfio_unmap_bars in two. The removal step, now called
vfio_unregister_bars, remains in vfio_exitfn. The reclamation step
is vfio_unmap_bars and is moved to the instance_finalize callback.
Similarly, quirk MemoryRegions have to be removed during
vfio_unregister_bars, but freeing the data structure must be delayed
to vfio_unmap_bars.
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
This patch is part of the third installment 3 of the RCU work.
Sending it out separately for Alex to review it.
hw/vfio/pci.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 68 insertions(+), 10 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 014a92c..69d4a33 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1997,12 +1997,23 @@ static void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
static void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
{
+ VFIOQuirk *quirk;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
+ QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
+ memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem);
+ }
+ }
+}
+
+static void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
+{
int i;
for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
- memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem);
object_unparent(OBJECT(&quirk->mem));
QLIST_REMOVE(quirk, next);
g_free(quirk);
@@ -2023,10 +2034,19 @@ static void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
+ VFIOQuirk *quirk;
+
+ QLIST_FOREACH(quirk, &bar->quirks, next) {
+ memory_region_del_subregion(&bar->region.mem, &quirk->mem);
+ }
+}
+
+static void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOBAR *bar = &vdev->bars[nr];
while (!QLIST_EMPTY(&bar->quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
- memory_region_del_subregion(&bar->region.mem, &quirk->mem);
object_unparent(OBJECT(&quirk->mem));
QLIST_REMOVE(quirk, next);
g_free(quirk);
@@ -2282,7 +2302,7 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled)
}
}
-static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
+static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
@@ -2293,10 +2313,25 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
vfio_bar_quirk_teardown(vdev, nr);
memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem);
- munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
if (vdev->msix && vdev->msix->table_bar == nr) {
memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem);
+ }
+}
+
+static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOBAR *bar = &vdev->bars[nr];
+
+ if (!bar->region.size) {
+ return;
+ }
+
+ vfio_bar_quirk_free(vdev, nr);
+
+ munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
+
+ if (vdev->msix && vdev->msix->table_bar == nr) {
munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
}
}
@@ -2413,6 +2448,19 @@ static void vfio_unmap_bars(VFIOPCIDevice *vdev)
}
if (vdev->has_vga) {
+ vfio_vga_quirk_free(vdev);
+ }
+}
+
+static void vfio_unregister_bars(VFIOPCIDevice *vdev)
+{
+ int i;
+
+ for (i = 0; i < PCI_ROM_SLOT; i++) {
+ vfio_unregister_bar(vdev, i);
+ }
+
+ if (vdev->has_vga) {
vfio_vga_quirk_teardown(vdev);
pci_unregister_vga(&vdev->pdev);
}
@@ -3324,6 +3372,7 @@ static int vfio_initfn(PCIDevice *pdev)
out_teardown:
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
vfio_teardown_msi(vdev);
+ vfio_unregister_bars(vdev);
vfio_unmap_bars(vdev);
out_put:
g_free(vdev->emulated_config_bits);
@@ -3332,10 +3381,22 @@ out_put:
return ret;
}
+static void vfio_instance_finalize(Object *obj)
+{
+ PCIDevice *pci_dev = PCI_DEVICE(obj);
+ VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev);
+ VFIOGroup *group = vdev->vbasedev.group;
+
+ vfio_unmap_bars(vdev);
+ g_free(vdev->emulated_config_bits);
+ g_free(vdev->rom);
+ vfio_put_device(vdev);
+ vfio_put_group(group);
+}
+
static void vfio_exitfn(PCIDevice *pdev)
{
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
- VFIOGroup *group = vdev->vbasedev.group;
vfio_unregister_err_notifier(vdev);
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
@@ -3344,11 +3405,7 @@ static void vfio_exitfn(PCIDevice *pdev)
timer_free(vdev->intx.mmap_timer);
}
vfio_teardown_msi(vdev);
- vfio_unmap_bars(vdev);
- g_free(vdev->emulated_config_bits);
- g_free(vdev->rom);
- vfio_put_device(vdev);
- vfio_put_group(group);
+ vfio_unregister_bars(vdev);
}
static void vfio_pci_reset(DeviceState *dev)
@@ -3436,6 +3493,7 @@ static const TypeInfo vfio_pci_dev_info = {
.instance_size = sizeof(VFIOPCIDevice),
.class_init = vfio_pci_dev_class_init,
.instance_init = vfio_instance_init,
+ .instance_finalize = vfio_instance_finalize,
};
static void register_vfio_pci_dev_type(void)
--
1.8.3.1
next reply other threads:[~2015-02-03 12:48 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-02-03 12:48 Paolo Bonzini [this message]
2015-02-03 15:20 ` [Qemu-devel] [PATCH] vfio: free dynamically-allocated data in instance_finalize Alex Williamson
2015-02-03 16:25 ` Paolo Bonzini
2015-02-03 17:26 ` Alex Williamson
2015-02-03 17:35 ` Alex Williamson
2015-02-03 18:51 ` Paolo Bonzini
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1422967705-1374-1-git-send-email-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=alex.williamson@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).