qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, kvm@vger.kernel.org
Subject: [Qemu-devel] [RFC PATCH 3/5] vfio/quirks: Automatic ioeventfd enabling for NVIDIA BAR0 quirks
Date: Tue, 06 Feb 2018 17:26:32 -0700	[thread overview]
Message-ID: <20180207002632.1156.53770.stgit@gimli.home> (raw)
In-Reply-To: <20180207001615.1156.10547.stgit@gimli.home>

Record data writes that come through the NVIDIA BAR0 quirk, if we get
enough in a row that we're only passing through, automatically enable
an ioeventfd for it.  The primary target for this is the MSI-ACK
that NVIDIA uses to allow the MSI interrupt to re-trigger, which is a
4-byte write, data value 0x0 to offset 0x704 into the quirk, 0x88704
into BAR0 MMIO space.  For an interrupt latency sensitive micro-
benchmark, this takes us from 83% of performance versus disabling the
quirk entirely (which GeForce cannot do), to to almost 90%.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/pci-quirks.c |   89 +++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/vfio/pci.h        |    2 +
 2 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index e4cf4ea2dd9c..e739efe601b1 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -203,6 +203,7 @@ typedef struct VFIOConfigMirrorQuirk {
     uint32_t offset;
     uint8_t bar;
     MemoryRegion *mem;
+    uint8_t data[];
 } VFIOConfigMirrorQuirk;
 
 static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
@@ -297,6 +298,50 @@ static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
     g_free(ioeventfd);
 }
 
+static void vfio_ioeventfd_handler(void *opaque)
+{
+    VFIOIOEventFD *ioeventfd = opaque;
+
+    if (event_notifier_test_and_clear(&ioeventfd->e)) {
+        vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
+                          ioeventfd->data, ioeventfd->size);
+    }
+}
+
+static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
+                                          MemoryRegion *mr, hwaddr addr,
+                                          unsigned size, uint64_t data,
+                                          VFIORegion *region,
+                                          hwaddr region_addr)
+{
+    VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
+
+    if (event_notifier_init(&ioeventfd->e, 0)) {
+        g_free(ioeventfd);
+        return NULL;
+    }
+
+    ioeventfd->mr = mr;
+    ioeventfd->addr = addr;
+    ioeventfd->size = size;
+    ioeventfd->match_data = true;
+    ioeventfd->data = data;
+    ioeventfd->region = region;
+    ioeventfd->region_addr = region_addr;
+
+    qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
+                        vfio_ioeventfd_handler, NULL, ioeventfd);
+    memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
+                              ioeventfd->size, ioeventfd->match_data,
+                              ioeventfd->data, &ioeventfd->e);
+
+    info_report("Enabled automatic ioeventfd acceleration for %s region %d, "
+                "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u",
+                vdev->vbasedev.name, region->nr, region_addr, data, size);
+
+    return ioeventfd;
+}
+
 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
 {
     VFIOQuirk *quirk;
@@ -732,6 +777,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
     trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
 }
 
+typedef struct LastDataSet {
+    hwaddr addr;
+    uint64_t data;
+    unsigned size;
+    int count;
+} LastDataSet;
+
 /*
  * Finally, BAR0 itself.  We want to redirect any accesses to either
  * 0x1800 or 0x88000 through the PCI config space access functions.
@@ -742,6 +794,7 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
     VFIOConfigMirrorQuirk *mirror = opaque;
     VFIOPCIDevice *vdev = mirror->vdev;
     PCIDevice *pdev = &vdev->pdev;
+    LastDataSet *last = (LastDataSet *)&mirror->data;
 
     vfio_generic_quirk_mirror_write(opaque, addr, data, size);
 
@@ -756,6 +809,38 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
                           addr + mirror->offset, data, size);
         trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
     }
+
+    /*
+     * Automatically add an ioeventfd to handle any repeated write with the
+     * same data and size above the standard PCI config space header.  This is
+     * primarily expected to accelerate the MSI-ACK behavior, such as noted
+     * above.  Current hardware/drivers should trigger an ioeventfd at config
+     * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
+     */
+    if (addr > PCI_STD_HEADER_SIZEOF) {
+        if (addr != last->addr || data != last->data || size != last->size) {
+            last->addr = addr;
+            last->data = data;
+            last->size = size;
+            last->count = 1;
+        } else if (++last->count > 10) {
+            VFIOIOEventFD *ioeventfd;
+
+            ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size, data,
+                                            &vdev->bars[mirror->bar].region,
+                                            mirror->offset + addr);
+            if (ioeventfd) {
+                VFIOQuirk *quirk;
+
+                QLIST_FOREACH(quirk, &vdev->bars[mirror->bar].quirks, next) {
+                    if (quirk->data == mirror) {
+                        QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
+                        break;
+                    }
+                }
+            }
+        }
+    }
 }
 
 static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
@@ -776,7 +861,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
     }
 
     quirk = vfio_quirk_alloc(1);
-    mirror = quirk->data = g_malloc0(sizeof(*mirror));
+    mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
     mirror->mem = quirk->mem;
     mirror->vdev = vdev;
     mirror->offset = 0x88000;
@@ -794,7 +879,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
     /* The 0x1800 offset mirror only seems to get used by legacy VGA */
     if (vdev->vga) {
         quirk = vfio_quirk_alloc(1);
-        mirror = quirk->data = g_malloc0(sizeof(*mirror));
+        mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
         mirror->mem = quirk->mem;
         mirror->vdev = vdev;
         mirror->offset = 0x1800;
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 146065c2f715..ec53b9935725 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -32,6 +32,8 @@ typedef struct VFIOIOEventFD {
     bool match_data;
     uint64_t data;
     EventNotifier e;
+    VFIORegion *region;
+    hwaddr region_addr;
 } VFIOIOEventFD;
 
 typedef struct VFIOQuirk {

  parent reply	other threads:[~2018-02-07  0:26 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-07  0:26 [Qemu-devel] [RFC PATCH 0/5] vfio: ioeventfd support Alex Williamson
2018-02-07  0:26 ` [Qemu-devel] [RFC PATCH 1/5] vfio/quirks: Add common quirk alloc helper Alex Williamson
2018-02-08 11:10   ` Auger Eric
2018-02-08 18:28     ` Alex Williamson
2018-02-07  0:26 ` [Qemu-devel] [RFC PATCH 2/5] vfio/quirks: Add generic support for ioveventfds Alex Williamson
2018-02-08 11:11   ` Auger Eric
2018-02-08 18:33     ` Alex Williamson
2018-02-08 20:37       ` Auger Eric
2018-02-07  0:26 ` Alex Williamson [this message]
2018-02-08 11:10   ` [Qemu-devel] [RFC PATCH 3/5] vfio/quirks: Automatic ioeventfd enabling for NVIDIA BAR0 quirks Auger Eric
2018-02-08 11:33     ` Auger Eric
2018-02-08 18:24     ` Alex Williamson
2018-02-08 20:52       ` Auger Eric
2018-02-07  0:26 ` [Qemu-devel] [RFC PATCH 4/5] vfio: Update linux header Alex Williamson
2018-02-07  0:26 ` [Qemu-devel] [RFC PATCH 5/5] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly Alex Williamson
2018-02-08 11:42   ` Auger Eric
2018-02-08 18:41     ` Alex Williamson
2018-02-09  7:11   ` Peter Xu
2018-02-09 22:09     ` Alex Williamson
2018-02-11  2:38       ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180207002632.1156.53770.stgit@gimli.home \
    --to=alex.williamson@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).