From: Alex Williamson <alex.williamson@redhat.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com
Subject: [Qemu-devel] [PULL 2/4] vfio-pci: Fix MSI-X masking performance
Date: Mon, 30 Jun 2014 11:27:58 -0600 [thread overview]
Message-ID: <20140630172758.24172.89340.stgit@bling.home> (raw)
In-Reply-To: <20140630172611.24172.93339.stgit@bling.home>
There are still old guests out there that over-exercise MSI-X masking.
The current code completely sets-up and tears-down an MSI-X vector on
the "use" and "release" callbacks. While this is functional, it can
slow an old guest to a crawl. We can easily skip the KVM parts of
this so that we keep the MSI route and irqfd setup. We do however
need to switch VFIO to trigger a different eventfd while masked.
Actually, we have the option of continuing to use -1 to disable the
trigger, but by using another EventNotifier we can allow the MSI-X
core to emulate pending bits and re-fire the vector once unmasked.
MSI code gets updated as well to use the same setup and teardown
structures and functions.
Prior to this change, an igbvf assigned to a RHEL5 guest gets about
20Mbps and 50 transactions/s with netperf (remote or VF->PF). With
this change, we get line rate and 3k transactions/s remote or 2Gbps
and 6k+ transactions/s to the PF. No significant change is expected
for newer guests with more well behaved MSI-X support.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/misc/vfio.c | 233 +++++++++++++++++++++++++++++++-------------------------
1 file changed, 131 insertions(+), 102 deletions(-)
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 4975ccf..7312ce2 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -121,6 +121,7 @@ typedef struct VFIOINTx {
typedef struct VFIOMSIVector {
EventNotifier interrupt; /* eventfd triggered on interrupt */
+ EventNotifier kvm_interrupt; /* eventfd triggered for KVM irqfd bypass */
struct VFIODevice *vdev; /* back pointer to device */
MSIMessage msg; /* cache the MSI message so we know when it changes */
int virq; /* KVM irqchip route for QEMU bypass */
@@ -682,10 +683,11 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
for (i = 0; i < vdev->nr_vectors; i++) {
if (!vdev->msi_vectors[i].use) {
fds[i] = -1;
- continue;
+ } else if (vdev->msi_vectors[i].virq >= 0) {
+ fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt);
+ } else {
+ fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
}
-
- fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
}
ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
@@ -695,6 +697,52 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
return ret;
}
+static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg,
+ bool msix)
+{
+ int virq;
+
+ if ((msix && !VFIO_ALLOW_KVM_MSIX) ||
+ (!msix && !VFIO_ALLOW_KVM_MSI) || !msg) {
+ return;
+ }
+
+ if (event_notifier_init(&vector->kvm_interrupt, 0)) {
+ return;
+ }
+
+ virq = kvm_irqchip_add_msi_route(kvm_state, *msg);
+ if (virq < 0) {
+ event_notifier_cleanup(&vector->kvm_interrupt);
+ return;
+ }
+
+ if (kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
+ NULL, virq) < 0) {
+ kvm_irqchip_release_virq(kvm_state, virq);
+ event_notifier_cleanup(&vector->kvm_interrupt);
+ return;
+ }
+
+ vector->msg = *msg;
+ vector->virq = virq;
+}
+
+static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
+{
+ kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
+ vector->virq);
+ kvm_irqchip_release_virq(kvm_state, vector->virq);
+ vector->virq = -1;
+ event_notifier_cleanup(&vector->kvm_interrupt);
+}
+
+static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg)
+{
+ kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg);
+ vector->msg = msg;
+}
+
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
@@ -707,30 +755,32 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
vdev->host.function, nr);
vector = &vdev->msi_vectors[nr];
- vector->vdev = vdev;
- vector->use = true;
-
- msix_vector_use(pdev, nr);
- if (event_notifier_init(&vector->interrupt, 0)) {
- error_report("vfio: Error: event_notifier_init failed");
+ if (!vector->use) {
+ vector->vdev = vdev;
+ vector->virq = -1;
+ if (event_notifier_init(&vector->interrupt, 0)) {
+ error_report("vfio: Error: event_notifier_init failed");
+ }
+ vector->use = true;
+ msix_vector_use(pdev, nr);
}
+ qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+ handler, NULL, vector);
+
/*
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vector->virq = msg && VFIO_ALLOW_KVM_MSIX ?
- kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
- if (vector->virq < 0 ||
- kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
- NULL, vector->virq) < 0) {
- if (vector->virq >= 0) {
- kvm_irqchip_release_virq(kvm_state, vector->virq);
- vector->virq = -1;
+ if (vector->virq >= 0) {
+ if (!msg) {
+ vfio_remove_kvm_msi_virq(vector);
+ } else {
+ vfio_update_kvm_msi_virq(vector, *msg);
}
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- handler, NULL, vector);
+ } else {
+ vfio_add_kvm_msi_virq(vector, msg, true);
}
/*
@@ -761,7 +811,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
irq_set->count = 1;
pfd = (int32_t *)&irq_set->data;
- *pfd = event_notifier_get_fd(&vector->interrupt);
+ if (vector->virq >= 0) {
+ *pfd = event_notifier_get_fd(&vector->kvm_interrupt);
+ } else {
+ *pfd = event_notifier_get_fd(&vector->interrupt);
+ }
ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
g_free(irq_set);
@@ -783,50 +837,41 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
{
VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
VFIOMSIVector *vector = &vdev->msi_vectors[nr];
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__,
vdev->host.domain, vdev->host.bus, vdev->host.slot,
vdev->host.function, nr);
/*
- * XXX What's the right thing to do here? This turns off the interrupt
- * completely, but do we really just want to switch the interrupt to
- * bouncing through userspace and let msix.c drop it? Not sure.
+ * There are still old guests that mask and unmask vectors on every
+ * interrupt. If we're using QEMU bypass with a KVM irqfd, leave all of
+ * the KVM setup in place, simply switch VFIO to use the non-bypass
+ * eventfd. We'll then fire the interrupt through QEMU and the MSI-X
+ * core will mask the interrupt and set pending bits, allowing it to
+ * be re-asserted on unmask. Nothing to do if already using QEMU mode.
*/
- msix_vector_unuse(pdev, nr);
-
- argsz = sizeof(*irq_set) + sizeof(*pfd);
+ if (vector->virq >= 0) {
+ int argsz;
+ struct vfio_irq_set *irq_set;
+ int32_t *pfd;
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
- irq_set->start = nr;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
- *pfd = -1;
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = nr;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
- ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ *pfd = event_notifier_get_fd(&vector->interrupt);
- g_free(irq_set);
+ ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
- if (vector->virq < 0) {
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- NULL, NULL, NULL);
- } else {
- kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
- vector->virq);
- kvm_irqchip_release_virq(kvm_state, vector->virq);
- vector->virq = -1;
+ g_free(irq_set);
}
-
- event_notifier_cleanup(&vector->interrupt);
- vector->use = false;
}
static void vfio_enable_msix(VFIODevice *vdev)
@@ -876,28 +921,28 @@ retry:
VFIOMSIVector *vector = &vdev->msi_vectors[i];
vector->vdev = vdev;
+ vector->virq = -1;
vector->use = true;
if (event_notifier_init(&vector->interrupt, 0)) {
error_report("vfio: Error: event_notifier_init failed");
}
+ qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+ vfio_msi_interrupt, NULL, vector);
+
vector->msg = msi_get_message(&vdev->pdev, i);
/*
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vector->virq = VFIO_ALLOW_KVM_MSI ?
- kvm_irqchip_add_msi_route(kvm_state, vector->msg) : -1;
- if (vector->virq < 0 ||
- kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
- NULL, vector->virq) < 0) {
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- vfio_msi_interrupt, NULL, vector);
- }
+ vfio_add_kvm_msi_virq(vector, &vector->msg, false);
}
+ /* Set interrupt type prior to possible interrupts */
+ vdev->interrupt = VFIO_INT_MSI;
+
ret = vfio_enable_vectors(vdev, false);
if (ret) {
if (ret < 0) {
@@ -910,14 +955,10 @@ retry:
for (i = 0; i < vdev->nr_vectors; i++) {
VFIOMSIVector *vector = &vdev->msi_vectors[i];
if (vector->virq >= 0) {
- kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
- vector->virq);
- kvm_irqchip_release_virq(kvm_state, vector->virq);
- vector->virq = -1;
- } else {
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- NULL, NULL, NULL);
+ vfio_remove_kvm_msi_virq(vector);
}
+ qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+ NULL, NULL, NULL);
event_notifier_cleanup(&vector->interrupt);
}
@@ -929,11 +970,17 @@ retry:
}
vdev->nr_vectors = 0;
+ /*
+ * Failing to setup MSI doesn't really fall within any specification.
+ * Let's try leaving interrupts disabled and hope the guest figures
+ * out to fall back to INTx for this device.
+ */
+ error_report("vfio: Error: Failed to enable MSI");
+ vdev->interrupt = VFIO_INT_NONE;
+
return;
}
- vdev->interrupt = VFIO_INT_MSI;
-
DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__,
vdev->host.domain, vdev->host.bus, vdev->host.slot,
vdev->host.function, vdev->nr_vectors);
@@ -941,6 +988,20 @@ retry:
static void vfio_disable_msi_common(VFIODevice *vdev)
{
+ int i;
+
+ for (i = 0; i < vdev->nr_vectors; i++) {
+ VFIOMSIVector *vector = &vdev->msi_vectors[i];
+ if (vdev->msi_vectors[i].use) {
+ if (vector->virq >= 0) {
+ vfio_remove_kvm_msi_virq(vector);
+ }
+ qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+ NULL, NULL, NULL);
+ event_notifier_cleanup(&vector->interrupt);
+ }
+ }
+
g_free(vdev->msi_vectors);
vdev->msi_vectors = NULL;
vdev->nr_vectors = 0;
@@ -962,6 +1023,7 @@ static void vfio_disable_msix(VFIODevice *vdev)
for (i = 0; i < vdev->nr_vectors; i++) {
if (vdev->msi_vectors[i].use) {
vfio_msix_vector_release(&vdev->pdev, i);
+ msix_vector_unuse(&vdev->pdev, i);
}
}
@@ -977,30 +1039,7 @@ static void vfio_disable_msix(VFIODevice *vdev)
static void vfio_disable_msi(VFIODevice *vdev)
{
- int i;
-
vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX);
-
- for (i = 0; i < vdev->nr_vectors; i++) {
- VFIOMSIVector *vector = &vdev->msi_vectors[i];
-
- if (!vector->use) {
- continue;
- }
-
- if (vector->virq >= 0) {
- kvm_irqchip_remove_irqfd_notifier(kvm_state,
- &vector->interrupt, vector->virq);
- kvm_irqchip_release_virq(kvm_state, vector->virq);
- vector->virq = -1;
- } else {
- qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
- NULL, NULL, NULL);
- }
-
- event_notifier_cleanup(&vector->interrupt);
- }
-
vfio_disable_msi_common(vdev);
DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
@@ -1020,17 +1059,7 @@ static void vfio_update_msi(VFIODevice *vdev)
}
msg = msi_get_message(&vdev->pdev, i);
-
- if (msg.address != vector->msg.address ||
- msg.data != vector->msg.data) {
-
- DPRINTF("%s(%04x:%02x:%02x.%x) MSI vector %d changed\n",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function, i);
-
- kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg);
- vector->msg = msg;
- }
+ vfio_update_kvm_msi_virq(vector, msg);
}
}
next prev parent reply other threads:[~2014-06-30 17:28 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-06-30 17:27 [Qemu-devel] [PULL 0/4] VFIO fixes for QEMU 2.1 Alex Williamson
2014-06-30 17:27 ` [Qemu-devel] [PULL 1/4] vfio-pci: Fix MSI/X debug code Alex Williamson
2014-06-30 17:27 ` Alex Williamson [this message]
2014-06-30 17:28 ` [Qemu-devel] [PULL 3/4] vfio: Make BARs native endian Alex Williamson
2014-09-08 12:32 ` Alexander Graf
2014-06-30 17:28 ` [Qemu-devel] [PULL 4/4] vfio: use correct runstate Alex Williamson
2014-06-30 17:47 ` [Qemu-devel] [PULL 0/4] VFIO fixes for QEMU 2.1 Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140630172758.24172.89340.stgit@bling.home \
--to=alex.williamson@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.