qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Steven Sistare <steven.sistare@oracle.com>
To: "Duan, Zhenzhong" <zhenzhong.duan@intel.com>,
	"qemu-devel@nongnu.org" <qemu-devel@nongnu.org>
Cc: Alex Williamson <alex.williamson@redhat.com>,
	Cedric Le Goater <clg@redhat.com>,
	"Liu, Yi L" <yi.l.liu@intel.com>,
	Eric Auger <eric.auger@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
	Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>
Subject: Re: [PATCH V5 33/38] vfio/iommufd: reconstruct device
Date: Tue, 1 Jul 2025 10:26:12 -0400	[thread overview]
Message-ID: <622bf043-49d6-48b0-af93-73bb7f3dc415@oracle.com> (raw)
In-Reply-To: <IA3PR11MB9136ED60B695DC51D2C4C3E1927BA@IA3PR11MB9136.namprd11.prod.outlook.com>

On 6/25/2025 7:40 AM, Duan, Zhenzhong wrote:
>> -----Original Message-----
>> From: Steve Sistare <steven.sistare@oracle.com>
>> Subject: [PATCH V5 33/38] vfio/iommufd: reconstruct device
>>
>> Reconstruct userland device state after CPR.  During vfio_realize, skip all
>> ioctls that configure the device, as it was already configured in old QEMU.
>>
>> Skip bind, and use the devid from CPR state.
>>
>> Skip allocation of, and attachment to, ioas_id.  Recover ioas_id from CPR
>> state, and use it to find a matching container, if any, before creating a
>> new one.
>>
>> This reconstruction is not complete.  hwpt_id is handled in a subsequent
>> patch.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>> hw/vfio/iommufd.c | 30 ++++++++++++++++++++++++++++--
>> 1 file changed, 28 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index f0d57ea..a650517 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -25,6 +25,7 @@
>> #include "system/reset.h"
>> #include "qemu/cutils.h"
>> #include "qemu/chardev_open.h"
>> +#include "migration/cpr.h"
>> #include "pci.h"
>> #include "vfio-iommufd.h"
>> #include "vfio-helpers.h"
>> @@ -121,6 +122,10 @@ static bool
>> iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
>>          goto err_kvm_device_add;
>>      }
>>
>> +    if (cpr_is_incoming()) {
>> +        goto skip_bind;
>> +    }
>> +
>>      /* Bind device to iommufd */
>>      bind.iommufd = iommufd->fd;
>>      if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) {
>> @@ -132,6 +137,8 @@ static bool
>> iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
>>      vbasedev->devid = bind.out_devid;
>>      trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
>>                                          vbasedev->fd, vbasedev->devid);
>> +
>> +skip_bind:
> 
> I'm not sure if we should take above trace for CPR..

My thinking is: on cpr, we do not connect or bind, so we should not log it.
iommufd_backend_connect() is called but it just reuses a cpr fd, and we
can observe the latter with cpr traces.

>>      return true;
>> err_bind:
>>      iommufd_cdev_kvm_device_del(vbasedev);
>> @@ -421,7 +428,9 @@ static bool iommufd_cdev_attach_container(VFIODevice
>> *vbasedev,
>>          return iommufd_cdev_autodomains_get(vbasedev, container, errp);
>>      }
>>
>> -    return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
>> +    /* If CPR, we are already attached to ioas_id. */
>> +    return cpr_is_incoming() ||
>> +           !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
>> }
>>
>> static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
>> @@ -510,6 +519,7 @@ static bool iommufd_cdev_attach(const char *name,
>> VFIODevice *vbasedev,
>>      VFIOAddressSpace *space;
>>      struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
>>      int ret, devfd;
>> +    bool res;
>>      uint32_t ioas_id;
>>      Error *err = NULL;
>>      const VFIOIOMMUClass *iommufd_vioc =
>> @@ -540,7 +550,16 @@ static bool iommufd_cdev_attach(const char *name,
>> VFIODevice *vbasedev,
>>              vbasedev->iommufd != container->be) {
>>              continue;
>>          }
>> -        if (!iommufd_cdev_attach_container(vbasedev, container, &err)) {
>> +
>> +        if (!cpr_is_incoming()) {
>> +            res = iommufd_cdev_attach_container(vbasedev, container, &err);
>> +        } else if (vbasedev->cpr.ioas_id == container->ioas_id) {
>> +            res = true;
>> +        } else {
>> +            continue;
>> +        }
>> +
>> +        if (!res) {
>>              const char *msg = error_get_pretty(err);
>>
>>              trace_iommufd_cdev_fail_attach_existing_container(msg);
>> @@ -557,6 +576,11 @@ static bool iommufd_cdev_attach(const char *name,
>> VFIODevice *vbasedev,
>>          }
>>      }
>>
>> +    if (cpr_is_incoming()) {
>> +        ioas_id = vbasedev->cpr.ioas_id;
>> +        goto skip_ioas_alloc;
>> +    }
>> +
>>      /* Need to allocate a new dedicated container */
>>      if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) {
>>          goto err_alloc_ioas;
>> @@ -564,10 +588,12 @@ static bool iommufd_cdev_attach(const char *name,
>> VFIODevice *vbasedev,
>>
>>      trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
>>
>> +skip_ioas_alloc:
> 
> Same here, others look good.

During cpr, we do not allocate a new ioas, we use the one from cpr state.
I think it would be confusing to print a trace that suggests we allocated
a new ioas.

Perhaps I should add a trace in vfio_cpr_find_device:

     trace_vfio_cpr_find_device(elem->ioas_id, elem->dev_id, elem->hwpt_id)

- Steve

>>      container =
>> VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
>>      container->be = vbasedev->iommufd;
>>      container->ioas_id = ioas_id;
>>      QLIST_INIT(&container->hwpt_list);
>> +    vbasedev->cpr.ioas_id = ioas_id;
>>
>>      bcontainer = &container->bcontainer;
>>      vfio_address_space_insert(space, bcontainer);
>> --
>> 1.8.3.1
> 



  reply	other threads:[~2025-07-01 14:27 UTC|newest]

Thread overview: 101+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-10 15:39 [PATCH V5 00/38] Live update: vfio and iommufd Steve Sistare
2025-06-10 15:39 ` [PATCH V5 01/38] migration: cpr helpers Steve Sistare
2025-06-10 15:39 ` [PATCH V5 02/38] migration: lower handler priority Steve Sistare
2025-06-10 15:39 ` [PATCH V5 03/38] vfio/container: register container for cpr Steve Sistare
2025-06-10 15:39 ` [PATCH V5 04/38] vfio/container: preserve descriptors Steve Sistare
2025-06-23  9:07   ` Duan, Zhenzhong
2025-07-01 14:25     ` Steven Sistare
2025-07-02 14:23       ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 05/38] vfio/container: discard old DMA vaddr Steve Sistare
2025-06-10 15:39 ` [PATCH V5 06/38] vfio/container: restore " Steve Sistare
2025-06-10 15:39 ` [PATCH V5 07/38] vfio/container: mdev cpr blocker Steve Sistare
2025-06-10 15:39 ` [PATCH V5 08/38] vfio/container: recover from unmap-all-vaddr failure Steve Sistare
2025-08-13 12:54   ` Cédric Le Goater
2025-08-13 14:18     ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 09/38] pci: export msix_is_pending Steve Sistare
2025-06-10 15:39 ` [PATCH V5 10/38] pci: skip reset during cpr Steve Sistare
2025-06-10 15:39 ` [PATCH V5 11/38] vfio-pci: " Steve Sistare
2025-06-10 15:39 ` [PATCH V5 12/38] vfio/pci: vfio_pci_vector_init Steve Sistare
2025-06-10 15:39 ` [PATCH V5 13/38] vfio/pci: vfio_notifier_init Steve Sistare
2025-06-10 15:39 ` [PATCH V5 14/38] vfio/pci: pass vector to virq functions Steve Sistare
2025-06-10 15:39 ` [PATCH V5 15/38] vfio/pci: vfio_notifier_init cpr parameters Steve Sistare
2025-06-10 15:39 ` [PATCH V5 16/38] vfio/pci: vfio_notifier_cleanup Steve Sistare
2025-06-10 15:39 ` [PATCH V5 17/38] vfio/pci: export MSI functions Steve Sistare
2025-06-10 15:39 ` [PATCH V5 18/38] vfio-pci: preserve MSI Steve Sistare
2025-07-01 16:12   ` Steven Sistare
2025-07-02  7:17     ` Cédric Le Goater
2025-07-02 12:03       ` Steven Sistare
2025-07-02 15:35   ` Cédric Le Goater
2025-07-02 16:40     ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 19/38] vfio-pci: preserve INTx Steve Sistare
2025-07-02 15:23   ` Cédric Le Goater
2025-07-02 17:54     ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 20/38] migration: close kvm after cpr Steve Sistare
2025-07-01 15:25   ` Steven Sistare
2025-07-02 16:02     ` Peter Xu
2025-07-02 19:41       ` Steven Sistare
2025-07-03 19:45         ` Peter Xu
2025-07-03 21:21           ` Cédric Le Goater
2025-07-03 21:58             ` Peter Xu
2025-07-07 13:13               ` Steven Sistare
2025-07-01 17:49   ` Fabiano Rosas
2025-06-10 15:39 ` [PATCH V5 21/38] migration: cpr_get_fd_param helper Steve Sistare
2025-06-10 15:39 ` [PATCH V5 22/38] backends/iommufd: iommufd_backend_map_file_dma Steve Sistare
2025-06-10 15:39 ` [PATCH V5 23/38] backends/iommufd: change process ioctl Steve Sistare
2025-06-11 12:38   ` Cédric Le Goater
2025-06-23  8:20   ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 24/38] physmem: qemu_ram_get_fd_offset Steve Sistare
2025-06-10 15:39 ` [PATCH V5 25/38] vfio/iommufd: use IOMMU_IOAS_MAP_FILE Steve Sistare
2025-06-10 15:39 ` [PATCH V5 26/38] vfio/iommufd: invariant device name Steve Sistare
2025-06-23  8:25   ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 27/38] vfio/iommufd: add vfio_device_free_name Steve Sistare
2025-06-11 12:38   ` Cédric Le Goater
2025-06-23  8:27   ` Duan, Zhenzhong
2025-06-23 13:50   ` Eric Farman
2025-07-01 14:26     ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 28/38] vfio/iommufd: device name blocker Steve Sistare
2025-06-23 10:29   ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 29/38] vfio/iommufd: register container for cpr Steve Sistare
2025-07-01 14:25   ` Steven Sistare
2025-07-02 14:17   ` Duan, Zhenzhong
2025-07-02 14:52     ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 30/38] migration: vfio cpr state hook Steve Sistare
2025-06-24 11:24   ` Duan, Zhenzhong
2025-07-01 14:26     ` Steven Sistare
2025-07-02 13:39       ` Duan, Zhenzhong
2025-07-02 15:07         ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 31/38] vfio/iommufd: cpr state Steve Sistare
2025-06-23 10:45   ` Duan, Zhenzhong
2025-07-01 14:26     ` Steven Sistare
2025-07-02 13:44       ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 32/38] vfio/iommufd: preserve descriptors Steve Sistare
2025-06-25 11:40   ` Duan, Zhenzhong
2025-07-01 14:26     ` Steven Sistare
2025-07-02 14:08       ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 33/38] vfio/iommufd: reconstruct device Steve Sistare
2025-06-25 11:40   ` Duan, Zhenzhong
2025-07-01 14:26     ` Steven Sistare [this message]
2025-07-02 14:14       ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 34/38] vfio/iommufd: reconstruct hwpt Steve Sistare
2025-06-25 11:40   ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 35/38] vfio/iommufd: change process Steve Sistare
2025-06-25 11:40   ` Duan, Zhenzhong
2025-07-01 14:26     ` Steven Sistare
2025-07-02 13:46       ` Duan, Zhenzhong
2025-07-02 20:57         ` Steven Sistare
2025-06-10 15:39 ` [PATCH V5 36/38] iommufd: preserve DMA mappings Steve Sistare
2025-06-25 11:40   ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 37/38] vfio/container: delete old cpr register Steve Sistare
2025-06-25 11:40   ` Duan, Zhenzhong
2025-06-10 15:39 ` [PATCH V5 38/38] vfio: doc changes for cpr Steve Sistare
2025-07-02 14:03   ` Steven Sistare
2025-07-02 14:49   ` Cédric Le Goater
2025-07-02 17:52   ` Fabiano Rosas
2025-06-10 17:18 ` [PATCH V5 00/38] Live update: vfio and iommufd Cédric Le Goater
2025-06-10 17:39   ` Cédric Le Goater
2025-06-11 14:25     ` Cédric Le Goater
2025-06-11 14:39       ` Steven Sistare
2025-06-12  7:23         ` Cédric Le Goater
2025-06-19 12:03           ` Cédric Le Goater
2025-06-20  5:46             ` Duan, Zhenzhong
2025-06-11 14:49       ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=622bf043-49d6-48b0-af93-73bb7f3dc415@oracle.com \
    --to=steven.sistare@oracle.com \
    --cc=alex.williamson@redhat.com \
    --cc=clg@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=farosas@suse.de \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yi.l.liu@intel.com \
    --cc=zhenzhong.duan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).