qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: si-wei liu <si-wei.liu@oracle.com>
To: "Michael S. Tsirkin" <mst@redhat.com>,
	Venu Busireddy <venu.busireddy@oracle.com>
Cc: Marcel Apfelbaum <marcel@redhat.com>,
	qemu-devel@nongnu.org, virtio-dev@lists.oasis-open.org
Subject: Re: [Qemu-devel] [PATCH 3/3] vfio-pci: Add FAILOVER_PRIMARY_CHANGED event to shorten downtime during failover
Date: Mon, 10 Dec 2018 12:23:42 -0800	[thread overview]
Message-ID: <7b2570e0-b5b0-fba5-ff90-942298201c7e@oracle.com> (raw)
In-Reply-To: <20181210122311-mutt-send-email-mst@kernel.org>



On 12/10/2018 9:31 AM, Michael S. Tsirkin wrote:
> On Mon, Dec 10, 2018 at 11:15:48AM -0500, Venu Busireddy wrote:
>> From: Si-Wei Liu <si-wei.liu@oracle.com>
>>
>> When a VF is hotplugged into the guest, datapath switching will be
>> performed immediately, which is sub-optimal in terms of timing, and
>> could end up with substantial network downtime. One of ways to shorten
>> this downtime is to switch the datapath only after the VF is seen to get
>> enabled by guest, indicated by the bus master bit in VF's PCI config
>> space getting enabled. The FAILOVER_PRIMARY_CHANGED event is emitted
>> at that time to indicate this condition. Then management stack can kick
>> off datapath switching upon receiving the event.
>>
>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>> Signed-off-by: Venu Busireddy <venu.busireddy@oracle.com>
> As management stacks can lose events, it's necessary
> to also have a query command to check device status.

Hmm, makes sense. Will do.

-Siwei
>
>> ---
>>   hw/vfio/pci.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   qapi/net.json | 26 ++++++++++++++++++++++++++
>>   2 files changed, 83 insertions(+)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index ce1f33c..ea24ca2 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -34,6 +34,7 @@
>>   #include "pci.h"
>>   #include "trace.h"
>>   #include "qapi/error.h"
>> +#include "qapi/qapi-events-net.h"
>>   
>>   #define MSIX_CAP_LENGTH 12
>>   
>> @@ -42,6 +43,7 @@
>>   
>>   static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>>   static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
>> +static void vfio_failover_notify(VFIOPCIDevice *vdev, bool status);
>>   
>>   /*
>>    * Disabling BAR mmaping can be slow, but toggling it around INTx can
>> @@ -1170,6 +1172,8 @@ void vfio_pci_write_config(PCIDevice *pdev,
>>   {
>>       VFIOPCIDevice *vdev = PCI_VFIO(pdev);
>>       uint32_t val_le = cpu_to_le32(val);
>> +    bool may_notify = false;
>> +    bool master_was = false;
>>   
>>       trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len);
>>   
>> @@ -1180,6 +1184,14 @@ void vfio_pci_write_config(PCIDevice *pdev,
>>                        __func__, vdev->vbasedev.name, addr, val, len);
>>       }
>>   
>> +    /* Bus Master Enabling/Disabling */
>> +    if (pdev->failover_primary && current_cpu &&
>> +        range_covers_byte(addr, len, PCI_COMMAND)) {
>> +        master_was = !!(pci_get_word(pdev->config + PCI_COMMAND) &
>> +                        PCI_COMMAND_MASTER);
>> +        may_notify = true;
>> +    }
>> +
>>       /* MSI/MSI-X Enabling/Disabling */
>>       if (pdev->cap_present & QEMU_PCI_CAP_MSI &&
>>           ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size)) {
>> @@ -1235,6 +1247,14 @@ void vfio_pci_write_config(PCIDevice *pdev,
>>           /* Write everything to QEMU to keep emulated bits correct */
>>           pci_default_write_config(pdev, addr, val, len);
>>       }
>> +
>> +    if (may_notify) {
>> +        bool master_now = !!(pci_get_word(pdev->config + PCI_COMMAND) &
>> +                             PCI_COMMAND_MASTER);
>> +        if (master_was != master_now) {
>> +            vfio_failover_notify(vdev, master_now);
>> +        }
>> +    }
>>   }
>>   
>>   /*
> It's very easy to have guest trigger a high load of events by playing
> with the bus master enable bits.  How about instead sending an event
> that just says "something changed" without the current status and have
> management issue a query command to check the status. QEMU then does not
> need to re-send an event until management issues a query command.
>
>
>> @@ -2801,6 +2821,17 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>>       vdev->req_enabled = false;
>>   }
>>   
>> +static void vfio_failover_notify(VFIOPCIDevice *vdev, bool status)
>> +{
>> +    PCIDevice *pdev = &vdev->pdev;
>> +    const char *n;
>> +    gchar *path;
>> +
>> +    n = pdev->qdev.id ? pdev->qdev.id : vdev->vbasedev.name;
>> +    path = object_get_canonical_path(OBJECT(vdev));
>> +    qapi_event_send_failover_primary_changed(!!n, n, path, status);
>> +}
>> +
>>   static void vfio_realize(PCIDevice *pdev, Error **errp)
>>   {
>>       VFIOPCIDevice *vdev = PCI_VFIO(pdev);
>> @@ -3109,10 +3140,36 @@ static void vfio_instance_finalize(Object *obj)
>>       vfio_put_group(group);
>>   }
>>   
>> +static void vfio_exit_failover_notify(VFIOPCIDevice *vdev)
>> +{
>> +    PCIDevice *pdev = &vdev->pdev;
>> +
>> +    /*
>> +     * Guest driver may not get the chance to disable bus mastering
>> +     * before the device object gets to be unrealized. In that event,
>> +     * send out a "disabled" notification on behalf of guest driver.
>> +     */
>> +    if (pdev->failover_primary &&
>> +        pdev->bus_master_enable_region.enabled) {
>> +        vfio_failover_notify(vdev, false);
>> +    }
>> +}
>> +
> With the idea above this won't be necessary anymore.
>
>
>>   static void vfio_exitfn(PCIDevice *pdev)
>>   {
>>       VFIOPCIDevice *vdev = PCI_VFIO(pdev);
>>   
>> +    /*
>> +     * During the guest reboot sequence, it is sometimes possible that
>> +     * the guest may not get sufficient time to complete the entire driver
>> +     * removal sequence, near the end of which a PCI config space write to
>> +     * disable bus mastering can be intercepted by device. In such cases,
>> +     * the FAILOVER_PRIMARY_CHANGED "disable" event will not be emitted. It
>> +     * is imperative to generate the event on the guest's behalf if the
>> +     * guest fails to make it.
>> +     */
>> +    vfio_exit_failover_notify(vdev);
>> +
>>       vfio_unregister_req_notifier(vdev);
>>       vfio_unregister_err_notifier(vdev);
>>       pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
>> diff --git a/qapi/net.json b/qapi/net.json
>> index 04a9de9..e5992c8 100644
>> --- a/qapi/net.json
>> +++ b/qapi/net.json
>> @@ -727,3 +727,29 @@
>>   ##
>>   { 'event': 'FAILOVER_UNPLUG_PRIMARY',
>>     'data': {'*device': 'str', 'path': 'str'} }
>> +
>> +##
>> +# @FAILOVER_PRIMARY_CHANGED:
>> +#
>> +# Emitted whenever the driver of failover primary is loaded or unloaded
>> +# by the guest.
>> +#
>> +# @device: device name
>> +#
>> +# @path: device path
>> +#
>> +# @enabled: true if driver is loaded thus device is enabled in guest
>> +#
>> +# Since: 3.0
>> +#
>> +# Example:
>> +#
>> +# <- { "event": "FAILOVER_PRIMARY_CHANGED",
>
> This event doesn't seem to be failover specific.
> How about a more generic name?
>
>
>
>> +#      "data": { "device": "vfio-0",
>> +#                "path": "/machine/peripheral/vfio-0" },
>> +#                "enabled": "true" },
>> +#      "timestamp": { "seconds": 1539935213, "microseconds": 753529 } }
>> +#
>> +##
>> +{ 'event': 'FAILOVER_PRIMARY_CHANGED',
>> +  'data': { '*device': 'str', 'path': 'str', 'enabled': 'bool' } }

  reply	other threads:[~2018-12-10 20:23 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-10 16:15 [Qemu-devel] [PATCH 0/3] Support for datapath switching during live migration Venu Busireddy
2018-12-10 16:15 ` [Qemu-devel] [PATCH 1/3] virtio_net: Add VIRTIO_NET_F_STANDBY feature bit Venu Busireddy
2018-12-10 16:15 ` [Qemu-devel] [PATCH 2/3] virtio_net: Add support for "Data Path Switching" during Live Migration Venu Busireddy
2018-12-10 17:41   ` Michael S. Tsirkin
2018-12-10 20:22     ` si-wei liu
2018-12-10 23:07       ` Michael S. Tsirkin
2018-12-10 19:28   ` Eric Blake
2019-01-07 17:58     ` Venu Busireddy
2018-12-10 16:15 ` [Qemu-devel] [PATCH 3/3] vfio-pci: Add FAILOVER_PRIMARY_CHANGED event to shorten downtime during failover Venu Busireddy
2018-12-10 17:31   ` Michael S. Tsirkin
2018-12-10 20:23     ` si-wei liu [this message]
2019-01-07 18:01     ` [Qemu-devel] [virtio-dev] " Venu Busireddy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7b2570e0-b5b0-fba5-ff90-942298201c7e@oracle.com \
    --to=si-wei.liu@oracle.com \
    --cc=marcel@redhat.com \
    --cc=mst@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=venu.busireddy@oracle.com \
    --cc=virtio-dev@lists.oasis-open.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).