From: Steve Sistare <steven.sistare@oracle.com>
To: qemu-devel@nongnu.org
Cc: Alex Williamson <alex.williamson@redhat.com>,
Cedric Le Goater <clg@redhat.com>, Yi Liu <yi.l.liu@intel.com>,
Eric Auger <eric.auger@redhat.com>,
Zhenzhong Duan <zhenzhong.duan@intel.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>,
Steve Sistare <steven.sistare@oracle.com>
Subject: [PATCH V5 04/38] vfio/container: preserve descriptors
Date: Tue, 10 Jun 2025 08:39:17 -0700 [thread overview]
Message-ID: <1749569991-25171-5-git-send-email-steven.sistare@oracle.com> (raw)
In-Reply-To: <1749569991-25171-1-git-send-email-steven.sistare@oracle.com>
At vfio creation time, save the value of vfio container, group, and device
descriptors in CPR state. On qemu restart, vfio_realize() finds and uses
the saved descriptors.
During reuse, device and iommu state is already configured, so operations
in vfio_realize that would modify the configuration, such as vfio ioctl's,
are skipped. The result is that vfio_realize constructs qemu data
structures that reflect the current state of the device.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/hw/vfio/vfio-cpr.h | 6 +++++
hw/vfio/container.c | 67 +++++++++++++++++++++++++++++++++++-----------
hw/vfio/cpr-legacy.c | 42 +++++++++++++++++++++++++++++
3 files changed, 100 insertions(+), 15 deletions(-)
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
index d4e0bd5..5a2e5f6 100644
--- a/include/hw/vfio/vfio-cpr.h
+++ b/include/hw/vfio/vfio-cpr.h
@@ -13,6 +13,7 @@
struct VFIOContainer;
struct VFIOContainerBase;
+struct VFIOGroup;
typedef struct VFIOContainerCPR {
Error *blocker;
@@ -30,4 +31,9 @@ bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer,
Error **errp);
void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
+int vfio_cpr_group_get_device_fd(int d, const char *name);
+
+bool vfio_cpr_container_match(struct VFIOContainer *container,
+ struct VFIOGroup *group, int fd);
+
#endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 93cdf80..5caae4c 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -31,6 +31,8 @@
#include "system/reset.h"
#include "trace.h"
#include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
#include "pci.h"
#include "hw/vfio/vfio-container.h"
#include "vfio-helpers.h"
@@ -425,7 +427,12 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
return NULL;
}
- if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
+ /*
+ * During CPR, just set the container type and skip the ioctls, as the
+ * container and group are already configured in the kernel.
+ */
+ if (!cpr_is_incoming() &&
+ !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
return NULL;
}
@@ -592,6 +599,11 @@ static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group,
group->container = container;
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
vfio_group_add_kvm_device(group);
+ /*
+ * Remember the container fd for each group, so we can attach to the same
+ * container after CPR.
+ */
+ cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
return true;
}
@@ -601,6 +613,7 @@ static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group)
group->container = NULL;
vfio_group_del_kvm_device(group);
vfio_ram_block_discard_disable(container, false);
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
}
static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
@@ -615,17 +628,34 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
bool group_was_added = false;
space = vfio_address_space_get(as);
+ fd = cpr_find_fd("vfio_container_for_group", group->groupid);
- QLIST_FOREACH(bcontainer, &space->containers, next) {
- container = container_of(bcontainer, VFIOContainer, bcontainer);
- if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
- return vfio_container_group_add(container, group, errp);
+ if (!cpr_is_incoming()) {
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
+ container = container_of(bcontainer, VFIOContainer, bcontainer);
+ if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+ return vfio_container_group_add(container, group, errp);
+ }
}
- }
- fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
- if (fd < 0) {
- goto fail;
+ fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
+ if (fd < 0) {
+ goto fail;
+ }
+ } else {
+ /*
+ * For incoming CPR, the group is already attached in the kernel.
+ * If a container with matching fd is found, then update the
+ * userland group list and return. If not, then after the loop,
+ * create the container struct and group list.
+ */
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
+ container = container_of(bcontainer, VFIOContainer, bcontainer);
+
+ if (vfio_cpr_container_match(container, group, fd)) {
+ return vfio_container_group_add(container, group, errp);
+ }
+ }
}
ret = ioctl(fd, VFIO_GET_API_VERSION);
@@ -697,6 +727,7 @@ static void vfio_container_disconnect(VFIOGroup *group)
QLIST_REMOVE(group, container_next);
group->container = NULL;
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
/*
* Explicitly release the listener first before unset container,
@@ -750,7 +781,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp)
group = g_malloc0(sizeof(*group));
snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
- group->fd = qemu_open(path, O_RDWR, errp);
+ group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp);
if (group->fd < 0) {
goto free_group_exit;
}
@@ -782,6 +813,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp)
return group;
close_fd_exit:
+ cpr_delete_fd("vfio_group", groupid);
close(group->fd);
free_group_exit:
@@ -803,6 +835,7 @@ static void vfio_group_put(VFIOGroup *group)
vfio_container_disconnect(group);
QLIST_REMOVE(group, next);
trace_vfio_group_put(group->fd);
+ cpr_delete_fd("vfio_group", group->groupid);
close(group->fd);
g_free(group);
}
@@ -813,7 +846,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
g_autofree struct vfio_device_info *info = NULL;
int fd;
- fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
+ fd = vfio_cpr_group_get_device_fd(group->fd, name);
if (fd < 0) {
error_setg_errno(errp, errno, "error getting device from group %d",
group->groupid);
@@ -826,8 +859,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
info = vfio_get_device_info(fd);
if (!info) {
error_setg_errno(errp, errno, "error getting device info");
- close(fd);
- return false;
+ goto fail;
}
/*
@@ -841,8 +873,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
if (!QLIST_EMPTY(&group->device_list)) {
error_setg(errp, "Inconsistent setting of support for discarding "
"RAM (e.g., balloon) within group");
- close(fd);
- return false;
+ goto fail;
}
if (!group->ram_block_discard_allowed) {
@@ -860,6 +891,11 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs);
return true;
+
+fail:
+ close(fd);
+ cpr_delete_fd(name, 0);
+ return false;
}
static void vfio_device_put(VFIODevice *vbasedev)
@@ -870,6 +906,7 @@ static void vfio_device_put(VFIODevice *vbasedev)
QLIST_REMOVE(vbasedev, next);
vbasedev->group = NULL;
trace_vfio_device_put(vbasedev->fd);
+ cpr_delete_fd(vbasedev->name, 0);
close(vbasedev->fd);
}
diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
index dd7ac84..ac4a9ab 100644
--- a/hw/vfio/cpr-legacy.c
+++ b/hw/vfio/cpr-legacy.c
@@ -8,6 +8,7 @@
#include <linux/vfio.h>
#include "qemu/osdep.h"
#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-device.h"
#include "migration/blocker.h"
#include "migration/cpr.h"
#include "migration/migration.h"
@@ -66,3 +67,44 @@ void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
migrate_del_blocker(&container->cpr.blocker);
vmstate_unregister(NULL, &vfio_container_vmstate, container);
}
+
+int vfio_cpr_group_get_device_fd(int d, const char *name)
+{
+ const int id = 0;
+ int fd = cpr_find_fd(name, id);
+
+ if (fd < 0) {
+ fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name);
+ if (fd >= 0) {
+ cpr_save_fd(name, id, fd);
+ }
+ }
+ return fd;
+}
+
+static bool same_device(int fd1, int fd2)
+{
+ struct stat st1, st2;
+
+ return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
+}
+
+bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
+ int fd)
+{
+ if (container->fd == fd) {
+ return true;
+ }
+ if (!same_device(container->fd, fd)) {
+ return false;
+ }
+ /*
+ * Same device, different fd. This occurs when the container fd is
+ * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
+ * produces duplicates. De-dup it.
+ */
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
+ close(fd);
+ cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
+ return true;
+}
--
1.8.3.1
next prev parent reply other threads:[~2025-06-10 16:57 UTC|newest]
Thread overview: 101+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-10 15:39 [PATCH V5 00/38] Live update: vfio and iommufd Steve Sistare
2025-06-10 15:39 ` [PATCH V5 01/38] migration: cpr helpers Steve Sistare
2025-06-10 15:39 ` [PATCH V5 02/38] migration: lower handler priority Steve Sistare
2025-06-10 15:39 ` [PATCH V5 03/38] vfio/container: register container for cpr Steve Sistare
2025-06-10 15:39 ` Steve Sistare [this message]
2025-06-23 9:07 ` [PATCH V5 04/38] vfio/container: preserve descriptors Duan, Zhenzhong
2025-07-01 14:25 ` Steven Sistare
2025-07-02 14:23 ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 05/38] vfio/container: discard old DMA vaddr Steve Sistare
2025-06-10 15:39 ` [PATCH V5 06/38] vfio/container: restore " Steve Sistare
2025-06-10 15:39 ` [PATCH V5 07/38] vfio/container: mdev cpr blocker Steve Sistare
2025-06-10 15:39 ` [PATCH V5 08/38] vfio/container: recover from unmap-all-vaddr failure Steve Sistare
2025-08-13 12:54 ` Cédric Le Goater
2025-08-13 14:18 ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 09/38] pci: export msix_is_pending Steve Sistare
2025-06-10 15:39 ` [PATCH V5 10/38] pci: skip reset during cpr Steve Sistare
2025-06-10 15:39 ` [PATCH V5 11/38] vfio-pci: " Steve Sistare
2025-06-10 15:39 ` [PATCH V5 12/38] vfio/pci: vfio_pci_vector_init Steve Sistare
2025-06-10 15:39 ` [PATCH V5 13/38] vfio/pci: vfio_notifier_init Steve Sistare
2025-06-10 15:39 ` [PATCH V5 14/38] vfio/pci: pass vector to virq functions Steve Sistare
2025-06-10 15:39 ` [PATCH V5 15/38] vfio/pci: vfio_notifier_init cpr parameters Steve Sistare
2025-06-10 15:39 ` [PATCH V5 16/38] vfio/pci: vfio_notifier_cleanup Steve Sistare
2025-06-10 15:39 ` [PATCH V5 17/38] vfio/pci: export MSI functions Steve Sistare
2025-06-10 15:39 ` [PATCH V5 18/38] vfio-pci: preserve MSI Steve Sistare
2025-07-01 16:12 ` Steven Sistare
2025-07-02 7:17 ` Cédric Le Goater
2025-07-02 12:03 ` Steven Sistare
2025-07-02 15:35 ` Cédric Le Goater
2025-07-02 16:40 ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 19/38] vfio-pci: preserve INTx Steve Sistare
2025-07-02 15:23 ` Cédric Le Goater
2025-07-02 17:54 ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 20/38] migration: close kvm after cpr Steve Sistare
2025-07-01 15:25 ` Steven Sistare
2025-07-02 16:02 ` Peter Xu
2025-07-02 19:41 ` Steven Sistare
2025-07-03 19:45 ` Peter Xu
2025-07-03 21:21 ` Cédric Le Goater
2025-07-03 21:58 ` Peter Xu
2025-07-07 13:13 ` Steven Sistare
2025-07-01 17:49 ` Fabiano Rosas
2025-06-10 15:39 ` [PATCH V5 21/38] migration: cpr_get_fd_param helper Steve Sistare
2025-06-10 15:39 ` [PATCH V5 22/38] backends/iommufd: iommufd_backend_map_file_dma Steve Sistare
2025-06-10 15:39 ` [PATCH V5 23/38] backends/iommufd: change process ioctl Steve Sistare
2025-06-11 12:38 ` Cédric Le Goater
2025-06-23 8:20 ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 24/38] physmem: qemu_ram_get_fd_offset Steve Sistare
2025-06-10 15:39 ` [PATCH V5 25/38] vfio/iommufd: use IOMMU_IOAS_MAP_FILE Steve Sistare
2025-06-10 15:39 ` [PATCH V5 26/38] vfio/iommufd: invariant device name Steve Sistare
2025-06-23 8:25 ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 27/38] vfio/iommufd: add vfio_device_free_name Steve Sistare
2025-06-11 12:38 ` Cédric Le Goater
2025-06-23 8:27 ` Duan, Zhenzhong
2025-06-23 13:50 ` Eric Farman
2025-07-01 14:26 ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 28/38] vfio/iommufd: device name blocker Steve Sistare
2025-06-23 10:29 ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 29/38] vfio/iommufd: register container for cpr Steve Sistare
2025-07-01 14:25 ` Steven Sistare
2025-07-02 14:17 ` Duan, Zhenzhong
2025-07-02 14:52 ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 30/38] migration: vfio cpr state hook Steve Sistare
2025-06-24 11:24 ` Duan, Zhenzhong
2025-07-01 14:26 ` Steven Sistare
2025-07-02 13:39 ` Duan, Zhenzhong
2025-07-02 15:07 ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 31/38] vfio/iommufd: cpr state Steve Sistare
2025-06-23 10:45 ` Duan, Zhenzhong
2025-07-01 14:26 ` Steven Sistare
2025-07-02 13:44 ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 32/38] vfio/iommufd: preserve descriptors Steve Sistare
2025-06-25 11:40 ` Duan, Zhenzhong
2025-07-01 14:26 ` Steven Sistare
2025-07-02 14:08 ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 33/38] vfio/iommufd: reconstruct device Steve Sistare
2025-06-25 11:40 ` Duan, Zhenzhong
2025-07-01 14:26 ` Steven Sistare
2025-07-02 14:14 ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 34/38] vfio/iommufd: reconstruct hwpt Steve Sistare
2025-06-25 11:40 ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 35/38] vfio/iommufd: change process Steve Sistare
2025-06-25 11:40 ` Duan, Zhenzhong
2025-07-01 14:26 ` Steven Sistare
2025-07-02 13:46 ` Duan, Zhenzhong
2025-07-02 20:57 ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 36/38] iommufd: preserve DMA mappings Steve Sistare
2025-06-25 11:40 ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 37/38] vfio/container: delete old cpr register Steve Sistare
2025-06-25 11:40 ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 38/38] vfio: doc changes for cpr Steve Sistare
2025-07-02 14:03 ` Steven Sistare
2025-07-02 14:49 ` Cédric Le Goater
2025-07-02 17:52 ` Fabiano Rosas
2025-06-10 17:18 ` [PATCH V5 00/38] Live update: vfio and iommufd Cédric Le Goater
2025-06-10 17:39 ` Cédric Le Goater
2025-06-11 14:25 ` Cédric Le Goater
2025-06-11 14:39 ` Steven Sistare
2025-06-12 7:23 ` Cédric Le Goater
2025-06-19 12:03 ` Cédric Le Goater
2025-06-20 5:46 ` Duan, Zhenzhong
2025-06-11 14:49 ` Peter Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1749569991-25171-5-git-send-email-steven.sistare@oracle.com \
--to=steven.sistare@oracle.com \
--cc=alex.williamson@redhat.com \
--cc=clg@redhat.com \
--cc=eric.auger@redhat.com \
--cc=farosas@suse.de \
--cc=marcel.apfelbaum@gmail.com \
--cc=mst@redhat.com \
--cc=peterx@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=yi.l.liu@intel.com \
--cc=zhenzhong.duan@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).