qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Steve Sistare <steven.sistare@oracle.com>
To: qemu-devel@nongnu.org
Cc: Yi Liu <yi.l.liu@intel.com>, Eric Auger <eric.auger@redhat.com>,
	Zhenzhong Duan <zhenzhong.duan@intel.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	Cedric Le Goater <clg@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>,
	Philippe Mathieu-Daude <philmd@linaro.org>,
	David Hildenbrand <david@redhat.com>,
	Steve Sistare <steven.sistare@oracle.com>
Subject: [RFC V1 09/12] vfio/iommufd: rebuild device
Date: Sat, 20 Jul 2024 12:15:34 -0700	[thread overview]
Message-ID: <1721502937-87102-10-git-send-email-steven.sistare@oracle.com> (raw)
In-Reply-To: <1721502937-87102-1-git-send-email-steven.sistare@oracle.com>

Rebuild userland device state after CPR.  During vfio_realize, skip all
ioctls that configure the device, as it was already configured in old
QEMU, and we preserved the device descriptor.

Preserve the ioas_id in vmstate.  Because we skip ioctl's, it is not needed
at realize time.  However, we do need to gather range info, so defer the
call to iommufd_cdev_get_info_iova_range to a post_load handler, at which
time the ioas_id is known.

Registering the vfio_memory_listener causes spurious calls to map and
unmap DMA, as devices are created and the address space is built.  This
memory was already already mapped by the device, so suppress map and unmap
during CPR -- eg, if the reused flag is set.  Clear the reused flag in the
post_load handler.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 backends/iommufd.c    |  8 ++++++++
 hw/vfio/cpr-iommufd.c | 24 ++++++++++++++++++++++++
 hw/vfio/iommufd.c     | 14 +++++++++++++-
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/backends/iommufd.c b/backends/iommufd.c
index 243178e..86fd9db 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -172,6 +172,10 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
         .length = size,
     };
 
+    if (be->reused) {
+        return 0;
+    }
+
     if (!readonly) {
         map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
     }
@@ -203,6 +207,10 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
         .length = size,
     };
 
+    if (be->reused) {
+        return 0;
+    }
+
     ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
     /*
      * IOMMUFD takes mapping as some kind of object, unmapping
diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c
index f2e34f4..c38485a 100644
--- a/hw/vfio/cpr-iommufd.c
+++ b/hw/vfio/cpr-iommufd.c
@@ -27,12 +27,36 @@ static bool vfio_can_cpr_exec(VFIOIOMMUFDContainer *container, Error **errp)
     return true;
 }
 
+static int vfio_container_post_load(void *opaque, int version_id)
+{
+    VFIOIOMMUFDContainer *container = opaque;
+    VFIOContainerBase *bcontainer = &container->bcontainer;
+    VFIODevice *vbasedev;
+    Error *err = NULL;
+    uint32_t ioas_id = container->ioas_id;
+
+    if (!iommufd_cdev_get_info_iova_range(container, ioas_id, &err)) {
+        error_report_err(err);
+        return -1;
+    }
+
+    bcontainer->reused = false;
+    QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
+        vbasedev->reused = false;
+    }
+    container->be->reused = false;
+
+    return 0;
+}
+
 static const VMStateDescription vfio_container_vmstate = {
     .name = "vfio-iommufd-container",
     .version_id = 0,
     .minimum_version_id = 0,
+    .post_load = vfio_container_post_load,
     .needed = cpr_needed_for_reuse,
     .fields = (VMStateField[]) {
+        VMSTATE_UINT32(ioas_id, VFIOIOMMUFDContainer),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 585bf09..186edc7 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -357,6 +357,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
         }
     }
 
+    if (vbasedev->reused) {
+        ioas_id = -1;           /* ioas_id will be sent in vmstate */
+        goto skip_ioas_alloc;
+    }
+
     /* Need to allocate a new dedicated container */
     if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) {
         goto err_alloc_ioas;
@@ -364,6 +369,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
 
     trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
 
+skip_ioas_alloc:
     container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
     container->be = vbasedev->iommufd;
     container->ioas_id = ioas_id;
@@ -371,7 +377,8 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
     bcontainer = &container->bcontainer;
     vfio_address_space_insert(space, bcontainer);
 
-    if (!iommufd_cdev_attach_container(vbasedev, container, errp)) {
+    if (!vbasedev->reused &&
+        !iommufd_cdev_attach_container(vbasedev, container, errp)) {
         goto err_attach_container;
     }
 
@@ -380,6 +387,10 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
         goto err_discard_disable;
     }
 
+    if (vbasedev->reused) {
+        goto skip_info;
+    }
+
     if (!iommufd_cdev_get_info_iova_range(container, ioas_id, &err)) {
         error_append_hint(&err,
                    "Fallback to default 64bit IOVA range and 4K page size\n");
@@ -388,6 +399,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
         bcontainer->pgsizes = qemu_real_host_page_size();
     }
 
+skip_info:
     bcontainer->listener = vfio_memory_listener;
     memory_listener_register(&bcontainer->listener, bcontainer->space->as);
 
-- 
1.8.3.1



  parent reply	other threads:[~2024-07-20 19:17 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-20 19:15 [RFC V1 00/12] Live update: iommufd Steve Sistare
2024-07-20 19:15 ` [RFC V1 01/12] vfio: move cpr_exec_notifier Steve Sistare
2024-07-20 19:15 ` [RFC V1 02/12] iommufd: no DMA to BARs Steve Sistare
2024-08-12 22:05   ` Alex Williamson
2024-08-13  1:39   ` Yi Liu
2024-08-13 14:53     ` Steven Sistare
2024-07-20 19:15 ` [RFC V1 03/12] iommufd: pass name to connect Steve Sistare
2024-07-20 19:15 ` [RFC V1 04/12] migration: cpr_find_fd_any Steve Sistare
2024-07-20 19:15 ` [RFC V1 05/12] iommufd: preserve device fd Steve Sistare
2024-07-20 19:15 ` [RFC V1 06/12] iommufd: export iommufd_cdev_get_info_iova_range Steve Sistare
2024-07-20 19:15 ` [RFC V1 07/12] iommufd: change_process kernel interface Steve Sistare
2024-07-20 19:15 ` [RFC V1 08/12] vfio/iommufd: register container for cpr Steve Sistare
2024-07-20 19:15 ` Steve Sistare [this message]
2024-07-20 19:15 ` [RFC V1 10/12] migration/ram: old host address Steve Sistare
2024-08-16 17:57   ` Fabiano Rosas
2024-08-16 18:13     ` Steven Sistare
2024-07-20 19:15 ` [RFC V1 11/12] iommufd: update DMA virtual addresses Steve Sistare
2024-07-20 19:15 ` [RFC V1 12/12] vfio: mdev blocker Steve Sistare

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1721502937-87102-10-git-send-email-steven.sistare@oracle.com \
    --to=steven.sistare@oracle.com \
    --cc=alex.williamson@redhat.com \
    --cc=clg@redhat.com \
    --cc=david@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=farosas@suse.de \
    --cc=mst@redhat.com \
    --cc=peterx@redhat.com \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=yi.l.liu@intel.com \
    --cc=zhenzhong.duan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).