qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Steve Sistare <steven.sistare@oracle.com>
To: qemu-devel@nongnu.org
Cc: Alex Williamson <alex.williamson@redhat.com>,
	Cedric Le Goater <clg@redhat.com>, Yi Liu <yi.l.liu@intel.com>,
	Eric Auger <eric.auger@redhat.com>,
	Zhenzhong Duan <zhenzhong.duan@intel.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
	Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>,
	Steve Sistare <steven.sistare@oracle.com>
Subject: [PATCH V2 09/45] vfio/container: preserve descriptors
Date: Fri, 14 Feb 2025 06:13:51 -0800	[thread overview]
Message-ID: <1739542467-226739-10-git-send-email-steven.sistare@oracle.com> (raw)
In-Reply-To: <1739542467-226739-1-git-send-email-steven.sistare@oracle.com>

At vfio creation time, save the value of vfio container, group, and device
descriptors in CPR state.  On qemu restart, vfio_realize() finds and uses
the saved descriptors, and remembers the reused status for subsequent
patches.  The reused status is cleared when vmstate load finishes.

During reuse, device and iommu state is already configured, so operations
in vfio_realize that would modify the configuration, such as vfio ioctl's,
are skipped.  The result is that vfio_realize constructs qemu data
structures that reflect the current state of the device.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 hw/vfio/container.c           | 57 +++++++++++++++++++++++++++++++++++++------
 hw/vfio/cpr-legacy.c          | 45 ++++++++++++++++++++++++++++++++++
 include/hw/vfio/vfio-common.h |  1 +
 include/hw/vfio/vfio-cpr.h    |  9 +++++++
 4 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index eca3362..21f2706 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -31,6 +31,8 @@
 #include "system/reset.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
 #include "pci.h"
 
 VFIOGroupList vfio_group_list =
@@ -413,7 +415,7 @@ static bool vfio_set_iommu(int container_fd, int group_fd,
 }
 
 static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
-                                            Error **errp)
+                                            bool cpr_reused, Error **errp)
 {
     int iommu_type;
     const char *vioc_name;
@@ -424,7 +426,11 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
         return NULL;
     }
 
-    if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
+    /*
+     * If container is reused, just set its type and skip the ioctls, as the
+     * container and group are already configured in the kernel.
+     */
+    if (!cpr_reused && !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
         return NULL;
     }
 
@@ -432,6 +438,7 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
 
     container = VFIO_IOMMU_LEGACY(object_new(vioc_name));
     container->fd = fd;
+    container->cpr.reused = cpr_reused;
     container->iommu_type = iommu_type;
     return container;
 }
@@ -591,6 +598,7 @@ static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group,
     group->container = container;
     QLIST_INSERT_HEAD(&container->group_list, group, container_next);
     vfio_kvm_device_add_group(group);
+    cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
     return true;
 }
 
@@ -600,6 +608,7 @@ static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group)
     group->container = NULL;
     vfio_kvm_device_del_group(group);
     vfio_ram_block_discard_disable(container, false);
+    cpr_delete_fd("vfio_container_for_group", group->groupid);
 }
 
 static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as,
@@ -612,17 +621,37 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     VFIOIOMMUClass *vioc = NULL;
     bool new_container = false;
     bool group_was_added = false;
+    bool cpr_reused;
 
     space = vfio_get_address_space(as);
+    fd = cpr_find_fd("vfio_container_for_group", group->groupid);
+    cpr_reused = (fd > 0);
+
+    /*
+     * If the container is reused, then the group is already attached in the
+     * kernel.  If a container with matching fd is found, then update the
+     * userland group list and return.  If not, then after the loop, create
+     * the container struct and group list.
+     */
 
     QLIST_FOREACH(bcontainer, &space->containers, next) {
         container = container_of(bcontainer, VFIOContainer, bcontainer);
-        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
-            return vfio_container_group_add(container, group, errp);
+
+        if (cpr_reused) {
+            if (!vfio_cpr_container_match(container, group, &fd)) {
+                continue;
+            }
+        } else if (ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+            continue;
         }
+
+        return vfio_container_group_add(container, group, errp);
+    }
+
+    if (!cpr_reused) {
+        fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
     }
 
-    fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
     if (fd < 0) {
         goto fail;
     }
@@ -634,7 +663,7 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as,
         goto fail;
     }
 
-    container = vfio_create_container(fd, group, errp);
+    container = vfio_create_container(fd, group, cpr_reused, errp);
     if (!container) {
         goto fail;
     }
@@ -700,6 +729,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
 
     QLIST_REMOVE(group, container_next);
     group->container = NULL;
+    cpr_delete_fd("vfio_container_for_group", group->groupid);
 
     /*
      * Explicitly release the listener first before unset container,
@@ -753,7 +783,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
     group = g_malloc0(sizeof(*group));
 
     snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
-    group->fd = qemu_open(path, O_RDWR, errp);
+    group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, NULL, errp);
     if (group->fd < 0) {
         goto free_group_exit;
     }
@@ -785,6 +815,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
     return group;
 
 close_fd_exit:
+    cpr_delete_fd("vfio_group", groupid);
     close(group->fd);
 
 free_group_exit:
@@ -806,6 +837,7 @@ static void vfio_put_group(VFIOGroup *group)
     vfio_disconnect_container(group);
     QLIST_REMOVE(group, next);
     trace_vfio_put_group(group->fd);
+    cpr_delete_fd("vfio_group", group->groupid);
     close(group->fd);
     g_free(group);
 }
@@ -815,8 +847,14 @@ static bool vfio_get_device(VFIOGroup *group, const char *name,
 {
     g_autofree struct vfio_device_info *info = NULL;
     int fd;
+    bool cpr_reused;
+
+    fd = cpr_find_fd(name, 0);
+    cpr_reused = (fd >= 0);
+    if (!cpr_reused) {
+        fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
+    }
 
-    fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
     if (fd < 0) {
         error_setg_errno(errp, errno, "error getting device from group %d",
                          group->groupid);
@@ -861,6 +899,8 @@ static bool vfio_get_device(VFIOGroup *group, const char *name,
     vbasedev->num_irqs = info->num_irqs;
     vbasedev->num_regions = info->num_regions;
     vbasedev->flags = info->flags;
+    vbasedev->cpr.reused = cpr_reused;
+    cpr_resave_fd(name, 0, fd);
 
     trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs);
 
@@ -877,6 +917,7 @@ static void vfio_put_base_device(VFIODevice *vbasedev)
     QLIST_REMOVE(vbasedev, next);
     vbasedev->group = NULL;
     trace_vfio_put_base_device(vbasedev->fd);
+    cpr_delete_fd(vbasedev->name, 0);
     close(vbasedev->fd);
 }
 
diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
index d0557af..cee0f4e 100644
--- a/hw/vfio/cpr-legacy.c
+++ b/hw/vfio/cpr-legacy.c
@@ -30,10 +30,27 @@ static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
     }
 }
 
+static int vfio_container_post_load(void *opaque, int version_id)
+{
+    VFIOContainer *container = opaque;
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+
+    container->cpr.reused = false;
+
+    QLIST_FOREACH(group, &container->group_list, container_next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            vbasedev->cpr.reused = false;
+        }
+    }
+    return 0;
+}
+
 static const VMStateDescription vfio_container_vmstate = {
     .name = "vfio-container",
     .version_id = 0,
     .minimum_version_id = 0,
+    .post_load = vfio_container_post_load,
     .needed = cpr_needed_for_reuse,
     .fields = (VMStateField[]) {
         VMSTATE_END_OF_LIST()
@@ -67,3 +84,31 @@ void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
     migrate_del_blocker(&container->cpr.blocker);
     vmstate_unregister(NULL, &vfio_container_vmstate, container);
 }
+
+static bool same_device(int fd1, int fd2)
+{
+    struct stat st1, st2;
+
+    return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
+}
+
+bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
+                              int *pfd)
+{
+    if (container->fd == *pfd) {
+        return true;
+    }
+    if (!same_device(container->fd, *pfd)) {
+        return false;
+    }
+    /*
+     * Same device, different fd.  This occurs when the container fd is
+     * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
+     * produces duplicates.  De-dup it.
+     */
+    cpr_delete_fd("vfio_container_for_group", group->groupid);
+    close(*pfd);
+    cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
+    *pfd = container->fd;
+    return true;
+}
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index c482364..780646e 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -152,6 +152,7 @@ typedef struct VFIODevice {
     IOMMUFDBackend *iommufd;
     VFIOIOASHwpt *hwpt;
     QLIST_ENTRY(VFIODevice) hwpt_next;
+    VFIODeviceCPR cpr;
 } VFIODevice;
 
 struct VFIODeviceOps {
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
index d4f8346..1a3eee9 100644
--- a/include/hw/vfio/vfio-cpr.h
+++ b/include/hw/vfio/vfio-cpr.h
@@ -12,9 +12,15 @@
 
 typedef struct VFIOContainerCPR {
     Error *blocker;
+    bool reused;
 } VFIOContainerCPR;
 
+typedef struct VFIODeviceCPR {
+    bool reused;
+} VFIODeviceCPR;
+
 struct VFIOContainer;
+struct VFIOGroup;
 
 int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
                              Error **errp);
@@ -22,4 +28,7 @@ int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
 bool vfio_legacy_cpr_register_container(struct VFIOContainer *container,
                                         Error **errp);
 void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container);
+
+bool vfio_cpr_container_match(struct VFIOContainer *container,
+                              struct VFIOGroup *group, int *fd);
 #endif
-- 
1.8.3.1



  parent reply	other threads:[~2025-02-14 14:15 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-14 14:13 [PATCH V2 00/45] Live update: vfio and iommufd Steve Sistare
2025-02-14 14:13 ` [PATCH V2 01/45] MAINTAINERS: Add reviewer for CPR Steve Sistare
2025-02-14 14:53   ` Peter Xu
2025-02-14 20:14     ` Steven Sistare
2025-02-14 14:13 ` [PATCH V2 02/45] migration: cpr helpers Steve Sistare
2025-02-14 16:37   ` Peter Xu
2025-02-14 20:31     ` Steven Sistare
2025-02-18 16:26       ` Peter Xu
2025-02-24 16:51         ` Steven Sistare
2025-02-14 14:13 ` [PATCH V2 03/45] migration: lower handler priority Steve Sistare
2025-02-14 15:58   ` Peter Xu
2025-02-14 14:13 ` [PATCH V2 04/45] vfio: vfio_find_ram_discard_listener Steve Sistare
2025-02-14 14:13 ` [PATCH V2 05/45] vfio/container: ram discard disable helper Steve Sistare
2025-02-17 17:58   ` Cédric Le Goater
2025-02-14 14:13 ` [PATCH V2 06/45] vfio/container: reform vfio_connect_container cleanup Steve Sistare
2025-02-17 18:01   ` Cédric Le Goater
2025-02-14 14:13 ` [PATCH V2 07/45] vfio/container: vfio_container_group_add Steve Sistare
2025-02-17 18:02   ` Cédric Le Goater
2025-02-14 14:13 ` [PATCH V2 08/45] vfio/container: register container for cpr Steve Sistare
2025-02-14 14:13 ` Steve Sistare [this message]
2025-02-14 14:13 ` [PATCH V2 10/45] vfio/container: export vfio_legacy_dma_map Steve Sistare
2025-02-14 14:13 ` [PATCH V2 11/45] vfio/container: discard old DMA vaddr Steve Sistare
2025-02-14 14:13 ` [PATCH V2 12/45] vfio/container: restore " Steve Sistare
2025-02-14 14:13 ` [PATCH V2 13/45] vfio/container: mdev cpr blocker Steve Sistare
2025-02-14 14:13 ` [PATCH V2 14/45] vfio/container: recover from unmap-all-vaddr failure Steve Sistare
2025-02-14 14:13 ` [PATCH V2 15/45] pci: export msix_is_pending Steve Sistare
2025-02-14 14:45   ` Steven Sistare
2025-02-14 14:46     ` Steven Sistare
2025-02-14 14:13 ` [PATCH V2 16/45] pci: skip reset during cpr Steve Sistare
2025-02-14 14:13 ` [PATCH V2 17/45] vfio-pci: " Steve Sistare
2025-02-14 14:14 ` [PATCH V2 18/45] vfio/pci: vfio_vector_init Steve Sistare
2025-02-14 14:14 ` [PATCH V2 19/45] vfio/pci: vfio_notifier_init Steve Sistare
2025-02-14 14:14 ` [PATCH V2 20/45] vfio/pci: pass vector to virq functions Steve Sistare
2025-02-14 14:14 ` [PATCH V2 21/45] vfio/pci: vfio_notifier_init cpr parameters Steve Sistare
2025-02-14 14:14 ` [PATCH V2 22/45] vfio/pci: vfio_notifier_cleanup Steve Sistare
2025-02-14 14:14 ` [PATCH V2 23/45] vfio/pci: export MSI functions Steve Sistare
2025-02-14 14:14 ` [PATCH V2 24/45] vfio-pci: preserve MSI Steve Sistare
2025-02-14 14:14 ` [PATCH V2 25/45] vfio-pci: preserve INTx Steve Sistare
2025-02-14 14:14 ` [PATCH V2 26/45] migration: close kvm after cpr Steve Sistare
2025-02-14 15:51   ` Steven Sistare
2025-02-14 14:14 ` [PATCH V2 27/45] migration: cpr_get_fd_param helper Steve Sistare
2025-02-14 14:14 ` [PATCH V2 28/45] vfio: return mr from vfio_get_xlat_addr Steve Sistare
2025-02-14 14:38   ` Steven Sistare
2025-02-14 16:48   ` Peter Xu
2025-02-14 20:40     ` Steven Sistare
2025-02-14 22:42       ` Peter Xu
2025-02-24 16:50         ` Steven Sistare
2025-02-24 19:20           ` Peter Xu
2025-02-24 19:35             ` Steven Sistare
2025-02-14 14:14 ` [PATCH V2 29/45] vfio: pass ramblock to vfio_container_dma_map Steve Sistare
2025-02-14 14:14 ` [PATCH V2 30/45] backends/iommufd: iommufd_backend_map_file_dma Steve Sistare
2025-02-14 14:14 ` [PATCH V2 31/45] backends/iommufd: change process ioctl Steve Sistare
2025-02-14 14:14 ` [PATCH V2 32/45] physmem: qemu_ram_get_fd_offset Steve Sistare
2025-02-14 14:39   ` Steven Sistare
2025-02-14 16:49   ` Peter Xu
2025-02-14 14:14 ` [PATCH V2 33/45] vfio/iommufd: use IOMMU_IOAS_MAP_FILE Steve Sistare
2025-02-14 14:14 ` [PATCH V2 34/45] vfio/iommufd: export iommufd_cdev_get_info_iova_range Steve Sistare
2025-02-14 14:14 ` [PATCH V2 35/45] vfio/iommufd: define hwpt constructors Steve Sistare
2025-02-14 14:14 ` [PATCH V2 36/45] vfio/iommufd: invariant device name Steve Sistare
2025-02-14 14:14 ` [PATCH V2 37/45] vfio/iommufd: fix cpr register Steve Sistare
2025-02-14 14:14 ` [PATCH V2 38/45] vfio/iommufd: register container for cpr Steve Sistare
2025-02-14 14:14 ` [PATCH V2 39/45] vfio/iommufd: preserve descriptors Steve Sistare
2025-02-14 14:14 ` [PATCH V2 40/45] vfio/iommufd: reconstruct device Steve Sistare
2025-02-14 14:14 ` [PATCH V2 41/45] vfio/iommufd: reconstruct hw_caps Steve Sistare
2025-02-14 14:14 ` [PATCH V2 42/45] vfio/iommufd: reconstruct hwpt Steve Sistare
2025-02-14 14:14 ` [PATCH V2 43/45] vfio/iommufd: change process Steve Sistare
2025-02-14 14:14 ` [PATCH V2 44/45] iommufd: preserve DMA mappings Steve Sistare
2025-02-14 14:14 ` [PATCH V2 45/45] vfio/container: delete old cpr register Steve Sistare
2025-02-14 15:56 ` [PATCH V2 00/45] Live update: vfio and iommufd Steven Sistare
2025-02-14 16:06   ` Peter Xu
2025-02-14 16:20     ` Steven Sistare
2025-02-14 16:48       ` Cédric Le Goater

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1739542467-226739-10-git-send-email-steven.sistare@oracle.com \
    --to=steven.sistare@oracle.com \
    --cc=alex.williamson@redhat.com \
    --cc=clg@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=farosas@suse.de \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yi.l.liu@intel.com \
    --cc=zhenzhong.duan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).