qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Steven Sistare <steven.sistare@oracle.com>
To: "Cédric Le Goater" <clg@redhat.com>, qemu-devel@nongnu.org
Cc: Alex Williamson <alex.williamson@redhat.com>,
	Yi Liu <yi.l.liu@intel.com>, Eric Auger <eric.auger@redhat.com>,
	Zhenzhong Duan <zhenzhong.duan@intel.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
	Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>
Subject: Re: [PATCH V1 13/26] vfio-pci: preserve INTx
Date: Thu, 6 Feb 2025 09:43:45 -0500	[thread overview]
Message-ID: <1a970ff3-f2fa-411c-8970-ef0dd1229c76@oracle.com> (raw)
In-Reply-To: <219faed4-284c-430c-8410-d2af398f588d@redhat.com>

On 2/5/2025 12:13 PM, Cédric Le Goater wrote:
> On 1/29/25 15:43, Steve Sistare wrote:
>> Preserve vfio INTx state across cpr-transfer.  Preserve VFIOINTx fields as
>> follows:
>>    pin : Recover this from the vfio config in kernel space
>>    interrupt : Preserve its eventfd descriptor across exec.
>>    unmask : Ditto
>>    route.irq : This could perhaps be recovered in vfio_pci_post_load by
>>      calling pci_device_route_intx_to_irq(pin), whose implementation reads
>>      config space for a bridge device such as ich9.  However, there is no
>>      guarantee that the bridge vmstate is read before vfio vmstate.  Rather
>>      than fiddling with MigrationPriority for vmstate handlers, explicitly
>>      save route.irq in vfio vmstate.
>>    pending : save in vfio vmstate.
>>    mmap_timeout, mmap_timer : Re-initialize
>>    bool kvm_accel : Re-initialize
>>
>> In vfio_realize, defer calling vfio_intx_enable until the vmstate
>> is available, in vfio_pci_post_load.  Modify vfio_intx_enable and
>> vfio_intx_kvm_enable to skip vfio initialization, but still perform
>> kvm initialization.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>   hw/vfio/pci.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++----
>>   1 file changed, 47 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index df6e298..c50dbef 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -184,12 +184,17 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
>>           return true;
>>       }
>> +    if (vdev->vbasedev.reused) {
> 
> 1 x vdev->vbasedev.reused
> 
>> +        goto skip_state;
>> +    }
>> +
>>       /* Get to a known interrupt state */
>>       qemu_set_fd_handler(irq_fd, NULL, NULL, vdev);
>>       vfio_mask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
>>       vdev->intx.pending = false;
>>       pci_irq_deassert(&vdev->pdev);
>> +skip_state:
> 
> 
> hmm, this skip_state label and  ...
> 
>>       /* Get an eventfd for resample/unmask */
>>       if (vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0)) {
>>           error_setg(errp, "vfio_notifier_init intx-unmask failed");
>> @@ -204,6 +209,10 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
>>           goto fail_irqfd;
>>       }
>> +    if (vdev->vbasedev.reused) {
> 
> 2 x vdev->vbasedev.reused
> 
>> +        goto skip_irq;
>> +    }
>> +
>>       if (!vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
>>                                   VFIO_IRQ_SET_ACTION_UNMASK,
>>                                   event_notifier_get_fd(&vdev->intx.unmask),
>> @@ -214,6 +223,7 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
>>       /* Let'em rip */
>>       vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
>> +skip_irq:
> 
> ... this skip_irq label are one "very quick" way to get things done :)

I chose to use goto's and skip labels for your benefit as a reviewer, to reduce
diffs, so you can see that the non-cpr code is not changed.  Not as a quick way to
get this done.  But if you prefer, I can use conditional blocks instead of goto's,
and let indentation create additional diffs:

         if (reused)
             goto skip;
         non-cpr code;
     skip:

vs

         if (!reused) {
             non-cpr code;
         }

>>       vdev->intx.kvm_accel = true;
>>       trace_vfio_intx_enable_kvm(vdev->vbasedev.name);
>> @@ -329,7 +339,13 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
>>           return true;
>>       }
>> -    vfio_disable_interrupts(vdev);
>> +    /*
>> +     * Do not alter interrupt state during vfio_realize and cpr load.  The
>> +     * reused flag is cleared thereafter.
>> +     */
>> +    if (!vdev->vbasedev.reused) {
> 
> 3 x vdev->vbasedev.reused
> 
>> +        vfio_disable_interrupts(vdev);
>> +    }
>>       vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
>>       pci_config_set_interrupt_pin(vdev->pdev.config, pin);
>> @@ -351,7 +367,8 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
>>       fd = event_notifier_get_fd(&vdev->intx.interrupt);
>>       qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
>> -    if (!vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
>> +    if (!vdev->vbasedev.reused &&
> 
> 4 x vdev->vbasedev.reused
> 
>> +        !vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
>>                                   VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
>>           qemu_set_fd_handler(fd, NULL, NULL, vdev);
>>           vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0);
> 
>> @@ -3256,7 +3273,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>                                                vfio_intx_routing_notifier);
>>           vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
>>           kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
>> -        if (!vfio_intx_enable(vdev, errp)) {
>> +        /* Wait until cpr load reads intx routing data to enable */
>> +        if (!vdev->vbasedev.reused && !vfio_intx_enable(vdev, errp)) {
> 
> 5 x vdev->vbasedev.reused
> 
> This patch already adds a test on vdev->vbasedev.reused at the top of
> vfio_intx_enable(). This one seems redudant.

This test is necessary.  I will expand the comment to be more explicit:

          /*
           * During CPR, do not call vfio_intx_enable at this time.  Instead,
           * call it from vfio_pci_post_load after the intx routing data has
           * been loaded from vmstate.
           */
          if (!vdev->vbasedev.reused && !vfio_intx_enable(vdev, errp)) {

> Please duplicate the whole vfio_intx_enable() routine and move it
> under a cpr file.

Do you just mean vfio_intx_enable?  Or also vfio_intx_enable_kvm?  The
occurrences of vdev->vbasedev.reused that you flag occur in both.

I coded with reused conditionals and "skip" labels for a good reason.  By
keeping the common logic inline with the cpr conditionals, I minimize the
chance that changes in the common logic will break cpr.  Conversely,
outlining cpr specific versions of these functions and duplicating common
code creates the very real possibility that changes in vfio core code will
not be made in the cpr copies, and break cpr.

>>               goto out_deregister;
>>           }
>>       }
>> @@ -3578,12 +3596,36 @@ static int vfio_pci_post_load(void *opaque, int version_id)
>>           vfio_claim_vectors(vdev, nr_vectors, false);>       } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
>> -        g_assert_not_reached();      /* completed in a subsequent patch */
>> +        Error *err = NULL;
>> +        if (!vfio_intx_enable(vdev, &err)) {
>> +            error_report_err(err);
>> +            return -1;> +        }
>>       }
>>       return 0;
>>   }
>> +static const VMStateDescription vfio_intx_vmstate = {
>> +    .name = "vfio-intx",
>> +    .version_id = 0,
>> +    .minimum_version_id = 0,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_BOOL(pending, VFIOINTx),
>> +        VMSTATE_UINT32(route.mode, VFIOINTx),
>> +        VMSTATE_INT32(route.irq, VFIOINTx),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>> +#define VMSTATE_VFIO_INTX(_field, _state) {                         \
>> +    .name       = (stringify(_field)),                              \
>> +    .size       = sizeof(VFIOINTx),                                 \
>> +    .vmsd       = &vfio_intx_vmstate,                               \
>> +    .flags      = VMS_STRUCT,                                       \
>> +    .offset     = vmstate_offset_value(_state, _field, VFIOINTx),   \
>> +}
>> +
> 
> move these to cpr file please.

OK.

- Steve

>>   static const VMStateDescription vfio_pci_vmstate = {
>>       .name = "vfio-pci",
>>       .version_id = 0,
>> @@ -3594,6 +3636,7 @@ static const VMStateDescription vfio_pci_vmstate = {
>>       .fields = (VMStateField[]) {
>>           VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
>>           VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present),
>> +        VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
>>           VMSTATE_END_OF_LIST()
>>       }
>>   };
> 



  reply	other threads:[~2025-02-06 14:44 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-29 14:42 [PATCH V1 00/26] Live update: vfio and iommufd Steve Sistare
2025-01-29 14:42 ` [PATCH V1 01/26] migration: cpr helpers Steve Sistare
2025-01-29 14:42 ` [PATCH V1 02/26] migration: lower handler priority Steve Sistare
2025-02-03 16:21   ` Fabiano Rosas
2025-02-03 16:58   ` Peter Xu
2025-02-06 13:39     ` Steven Sistare
2025-01-29 14:42 ` [PATCH V1 03/26] vfio: vfio_find_ram_discard_listener Steve Sistare
2025-02-03 16:57   ` Cédric Le Goater
2025-01-29 14:43 ` [PATCH V1 04/26] vfio/container: register container for cpr Steve Sistare
2025-02-03 17:01   ` Cédric Le Goater
2025-02-03 22:26     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 05/26] vfio/container: preserve descriptors Steve Sistare
2025-02-03 17:48   ` Cédric Le Goater
2025-02-03 22:26     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 06/26] vfio/container: preserve DMA mappings Steve Sistare
2025-02-03 18:25   ` Cédric Le Goater
2025-02-03 22:27     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 07/26] vfio/container: recover from unmap-all-vaddr failure Steve Sistare
2025-02-04 14:10   ` Cédric Le Goater
2025-02-04 16:13     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 08/26] pci: skip reset during cpr Steve Sistare
2025-02-04 14:14   ` Cédric Le Goater
2025-02-04 16:13     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 09/26] pci: export msix_is_pending Steve Sistare
2025-01-29 14:43 ` [PATCH V1 10/26] vfio-pci: refactor for cpr Steve Sistare
2025-02-04 14:39   ` Cédric Le Goater
2025-02-04 16:14     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 11/26] vfio-pci: skip reset during cpr Steve Sistare
2025-02-04 14:56   ` Cédric Le Goater
2025-02-04 16:15     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 12/26] vfio-pci: preserve MSI Steve Sistare
2025-02-05 16:48   ` Cédric Le Goater
2025-02-06 14:41     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 13/26] vfio-pci: preserve INTx Steve Sistare
2025-02-05 17:13   ` Cédric Le Goater
2025-02-06 14:43     ` Steven Sistare [this message]
2025-01-29 14:43 ` [PATCH V1 14/26] migration: close kvm after cpr Steve Sistare
2025-01-29 14:43 ` [PATCH V1 15/26] migration: cpr_get_fd_param helper Steve Sistare
2025-01-29 14:43 ` [PATCH V1 16/26] vfio: return mr from vfio_get_xlat_addr Steve Sistare
2025-02-04 15:47   ` Cédric Le Goater
2025-02-04 17:42     ` Steven Sistare
2025-02-16 23:19       ` John Levon
2025-01-29 14:43 ` [PATCH V1 17/26] vfio: pass ramblock to vfio_container_dma_map Steve Sistare
2025-01-29 14:43 ` [PATCH V1 18/26] vfio/iommufd: define iommufd_cdev_make_hwpt Steve Sistare
2025-02-04 16:22   ` Cédric Le Goater
2025-02-04 17:42     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 19/26] vfio/iommufd: use IOMMU_IOAS_MAP_FILE Steve Sistare
2025-02-05 17:23   ` Cédric Le Goater
2025-02-05 22:01     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 20/26] vfio/iommufd: export iommufd_cdev_get_info_iova_range Steve Sistare
2025-02-05 17:33   ` Cédric Le Goater
2025-02-05 22:01     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 21/26] iommufd: change process ioctl Steve Sistare
2025-02-05 17:34   ` Cédric Le Goater
2025-02-05 22:02     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 22/26] vfio/iommufd: invariant device name Steve Sistare
2025-02-05 17:42   ` Cédric Le Goater
2025-02-05 22:02     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 23/26] vfio/iommufd: register container for cpr Steve Sistare
2025-02-05 17:45   ` Cédric Le Goater
2025-02-05 22:03     ` Steven Sistare
2025-01-29 14:43 ` [PATCH V1 24/26] vfio/iommufd: preserve descriptors Steve Sistare
2025-01-29 14:43 ` [PATCH V1 25/26] vfio/iommufd: reconstruct device Steve Sistare
2025-01-29 14:43 ` [PATCH V1 26/26] iommufd: preserve DMA mappings Steve Sistare

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1a970ff3-f2fa-411c-8970-ef0dd1229c76@oracle.com \
    --to=steven.sistare@oracle.com \
    --cc=alex.williamson@redhat.com \
    --cc=clg@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=farosas@suse.de \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yi.l.liu@intel.com \
    --cc=zhenzhong.duan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).