qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Venu Busireddy <venu.busireddy@oracle.com>
To: venu.busireddy@oracle.com, "Michael S. Tsirkin" <mst@redhat.com>,
	Marcel Apfelbaum <marcel@redhat.com>
Cc: Si-Wei Liu <si-wei.liu@oracle.com>,
	virtio-dev@lists.oasis-open.org, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH v3 4/5] vfio-pci: Add FAILOVER_PRIMARY_CHANGED event to shorten downtime during failover
Date: Mon,  7 Jan 2019 17:29:43 -0500	[thread overview]
Message-ID: <1546900184-27403-5-git-send-email-venu.busireddy@oracle.com> (raw)
In-Reply-To: <1546900184-27403-1-git-send-email-venu.busireddy@oracle.com>

From: Si-Wei Liu <si-wei.liu@oracle.com>

When a VF is hotplugged into the guest, datapath switching will be
performed immediately, which is sub-optimal in terms of timing, and
could end up with substantial network downtime. One of ways to shorten
this downtime is to switch the datapath only after the VF is seen to get
enabled by guest, indicated by the bus master bit in VF's PCI config
space getting enabled. The FAILOVER_PRIMARY_CHANGED event is emitted
at that time to indicate this condition. Then management stack can kick
off datapath switching upon receiving the event.

Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
Signed-off-by: Venu Busireddy <venu.busireddy@oracle.com>
---
 hw/vfio/pci.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 qapi/net.json | 26 ++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index bd83b58..adcc95a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -34,6 +34,7 @@
 #include "pci.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "qapi/qapi-events-net.h"
 
 #define MSIX_CAP_LENGTH 12
 
@@ -42,6 +43,7 @@
 
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
+static void vfio_failover_notify(VFIOPCIDevice *vdev, bool status);
 
 /*
  * Disabling BAR mmaping can be slow, but toggling it around INTx can
@@ -1170,6 +1172,8 @@ void vfio_pci_write_config(PCIDevice *pdev,
 {
     VFIOPCIDevice *vdev = PCI_VFIO(pdev);
     uint32_t val_le = cpu_to_le32(val);
+    bool may_notify = false;
+    bool master_was = false;
 
     trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len);
 
@@ -1180,6 +1184,14 @@ void vfio_pci_write_config(PCIDevice *pdev,
                      __func__, vdev->vbasedev.name, addr, val, len);
     }
 
+    /* Bus Master Enabling/Disabling */
+    if (pdev->failover_primary && current_cpu &&
+        range_covers_byte(addr, len, PCI_COMMAND)) {
+        master_was = !!(pci_get_word(pdev->config + PCI_COMMAND) &
+                        PCI_COMMAND_MASTER);
+        may_notify = true;
+    }
+
     /* MSI/MSI-X Enabling/Disabling */
     if (pdev->cap_present & QEMU_PCI_CAP_MSI &&
         ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size)) {
@@ -1235,6 +1247,14 @@ void vfio_pci_write_config(PCIDevice *pdev,
         /* Write everything to QEMU to keep emulated bits correct */
         pci_default_write_config(pdev, addr, val, len);
     }
+
+    if (may_notify) {
+        bool master_now = !!(pci_get_word(pdev->config + PCI_COMMAND) &
+                             PCI_COMMAND_MASTER);
+        if (master_was != master_now) {
+            vfio_failover_notify(vdev, master_now);
+        }
+    }
 }
 
 /*
@@ -2801,6 +2821,17 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static void vfio_failover_notify(VFIOPCIDevice *vdev, bool status)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    const char *n;
+    gchar *path;
+
+    n = pdev->qdev.id ? pdev->qdev.id : vdev->vbasedev.name;
+    path = object_get_canonical_path(OBJECT(vdev));
+    qapi_event_send_failover_primary_changed(!!n, n, path, status);
+}
+
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
     VFIOPCIDevice *vdev = PCI_VFIO(pdev);
@@ -3109,10 +3140,36 @@ static void vfio_instance_finalize(Object *obj)
     vfio_put_group(group);
 }
 
+static void vfio_exit_failover_notify(VFIOPCIDevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+
+    /*
+     * Guest driver may not get the chance to disable bus mastering
+     * before the device object gets to be unrealized. In that event,
+     * send out a "disabled" notification on behalf of guest driver.
+     */
+    if (pdev->failover_primary &&
+        pdev->bus_master_enable_region.enabled) {
+        vfio_failover_notify(vdev, false);
+    }
+}
+
 static void vfio_exitfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = PCI_VFIO(pdev);
 
+    /*
+     * During the guest reboot sequence, it is sometimes possible that
+     * the guest may not get sufficient time to complete the entire driver
+     * removal sequence, near the end of which a PCI config space write to
+     * disable bus mastering can be intercepted by device. In such cases,
+     * the FAILOVER_PRIMARY_CHANGED "disable" event will not be emitted. It
+     * is imperative to generate the event on the guest's behalf if the
+     * guest fails to make it.
+     */
+    vfio_exit_failover_notify(vdev);
+
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
diff --git a/qapi/net.json b/qapi/net.json
index 633ac87..a5b8d70 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -757,3 +757,29 @@
 ##
 { 'command': 'query-standby-status', 'data': { '*device': 'str' },
   'returns': ['StandbyStatusInfo'] }
+
+##
+# @FAILOVER_PRIMARY_CHANGED:
+#
+# Emitted whenever the driver of failover primary is loaded or unloaded
+# by the guest.
+#
+# @device: device name
+#
+# @path: device path
+#
+# @enabled: true if driver is loaded thus device is enabled in guest
+#
+# Since: 3.0
+#
+# Example:
+#
+# <- { "event": "FAILOVER_PRIMARY_CHANGED",
+#      "data": { "device": "vfio-0",
+#                "path": "/machine/peripheral/vfio-0" },
+#                "enabled": "true" },
+#      "timestamp": { "seconds": 1539935213, "microseconds": 753529 } }
+#
+##
+{ 'event': 'FAILOVER_PRIMARY_CHANGED',
+  'data': { '*device': 'str', 'path': 'str', 'enabled': 'bool' } }

  parent reply	other threads:[~2019-01-07 22:30 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-07 22:29 [Qemu-devel] [PATCH v3 0/5] Support for datapath switching during live migration Venu Busireddy
2019-01-07 22:29 ` [Qemu-devel] [PATCH v3 1/5] virtio_net: Add VIRTIO_NET_F_STANDBY feature bit Venu Busireddy
2019-01-08 16:56   ` Dongli Zhang
2019-01-08 17:25     ` Venu Busireddy
2019-01-09  0:14       ` Dongli Zhang
2019-01-09  0:18         ` Samudrala, Sridhar
2019-01-09  0:39           ` Dongli Zhang
2019-01-09  4:17             ` Michael S. Tsirkin
2019-01-07 22:29 ` [Qemu-devel] [PATCH v3 2/5] virtio_net: Add support for "Data Path Switching" during Live Migration Venu Busireddy
2019-01-09 13:39   ` [Qemu-devel] [virtio-dev] " Cornelia Huck
2019-01-09 15:56   ` [Qemu-devel] " Michael S. Tsirkin
2019-01-11  2:09     ` si-wei liu
2019-01-11  3:20       ` Michael S. Tsirkin
2019-01-11  7:09         ` [Qemu-devel] [virtio-dev] " si-wei liu
2019-01-14 11:10           ` Roman Kagan
2019-01-07 22:29 ` [Qemu-devel] [PATCH v3 3/5] virtio_net: Add a query command for FAILOVER_STANDBY_CHANGED event Venu Busireddy
2019-01-08  0:10   ` Michael S. Tsirkin
2019-01-07 22:29 ` Venu Busireddy [this message]
2019-01-07 23:17   ` [Qemu-devel] [PATCH v3 4/5] vfio-pci: Add FAILOVER_PRIMARY_CHANGED event to shorten downtime during failover Alex Williamson
2019-01-07 23:22     ` Michael S. Tsirkin
2019-01-07 23:41       ` Alex Williamson
2019-01-08  0:12         ` Michael S. Tsirkin
2019-01-08  0:24           ` Alex Williamson
2019-01-08  0:43             ` Michael S. Tsirkin
2019-01-08  1:13         ` si-wei liu
2019-01-07 22:29 ` [Qemu-devel] [PATCH v3 5/5] pci: query command extension to check the bus master enabling status of the failover-primary device Venu Busireddy
2019-01-07 23:32 ` [Qemu-devel] [PATCH v3 0/5] Support for datapath switching during live migration Michael S. Tsirkin
2019-01-08  1:45   ` [Qemu-devel] [virtio-dev] " si-wei liu
2019-01-08  2:25     ` Michael S. Tsirkin
2019-01-09  4:55       ` si-wei liu
2019-01-09 13:39         ` Michael S. Tsirkin
2019-01-11  6:57           ` si-wei liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1546900184-27403-5-git-send-email-venu.busireddy@oracle.com \
    --to=venu.busireddy@oracle.com \
    --cc=marcel@redhat.com \
    --cc=mst@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=si-wei.liu@oracle.com \
    --cc=virtio-dev@lists.oasis-open.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).