* [Qemu-devel] [PATCH 1/3] linux-headers: Update to 3.7-rc5
2012-11-13 19:48 [Qemu-devel] [PULL 0/3] vfio-pci for 1.3-rc0 Alex Williamson
@ 2012-11-13 19:49 ` Alex Williamson
2012-11-13 19:49 ` [Qemu-devel] [PATCH 2/3] vfio-pci: Add KVM INTx acceleration Alex Williamson
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Alex Williamson @ 2012-11-13 19:49 UTC (permalink / raw)
To: aliguori; +Cc: qemu-devel, kvm
update-linux-headers.sh script run against Linux tag v3.7-rc5
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
| 6 +++---
| 8 +-------
| 17 +++++++++++++++++
| 25 +++++++++++++++++++++----
| 6 +++---
| 6 +++---
| 6 +++---
| 6 +++---
8 files changed, 54 insertions(+), 26 deletions(-)
--git a/linux-headers/asm-powerpc/kvm_para.h b/linux-headers/asm-powerpc/kvm_para.h
index c047a84..5e04383 100644
--- a/linux-headers/asm-powerpc/kvm_para.h
+++ b/linux-headers/asm-powerpc/kvm_para.h
@@ -17,8 +17,8 @@
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
-#ifndef __POWERPC_KVM_PARA_H__
-#define __POWERPC_KVM_PARA_H__
+#ifndef _UAPI__POWERPC_KVM_PARA_H__
+#define _UAPI__POWERPC_KVM_PARA_H__
#include <linux/types.h>
@@ -87,4 +87,4 @@ struct kvm_vcpu_arch_shared {
#define KVM_MAGIC_FEAT_MAS0_TO_SPRG7 (1 << 1)
-#endif /* __POWERPC_KVM_PARA_H__ */
+#endif /* _UAPI__POWERPC_KVM_PARA_H__ */
--git a/linux-headers/asm-s390/kvm_para.h b/linux-headers/asm-s390/kvm_para.h
index 870051f..ff1f4e7 100644
--- a/linux-headers/asm-s390/kvm_para.h
+++ b/linux-headers/asm-s390/kvm_para.h
@@ -1,5 +1,5 @@
/*
- * definition for paravirtual devices on s390
+ * User API definitions for paravirtual devices on s390
*
* Copyright IBM Corp. 2008
*
@@ -9,9 +9,3 @@
*
* Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
*/
-
-#ifndef __S390_KVM_PARA_H
-#define __S390_KVM_PARA_H
-
-
-#endif /* __S390_KVM_PARA_H */
--git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 246617e..a65ec29 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -9,6 +9,22 @@
#include <linux/types.h>
#include <linux/ioctl.h>
+#define DE_VECTOR 0
+#define DB_VECTOR 1
+#define BP_VECTOR 3
+#define OF_VECTOR 4
+#define BR_VECTOR 5
+#define UD_VECTOR 6
+#define NM_VECTOR 7
+#define DF_VECTOR 8
+#define TS_VECTOR 10
+#define NP_VECTOR 11
+#define SS_VECTOR 12
+#define GP_VECTOR 13
+#define PF_VECTOR 14
+#define MF_VECTOR 16
+#define MC_VECTOR 18
+
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
#define __KVM_HAVE_IOAPIC
@@ -25,6 +41,7 @@
#define __KVM_HAVE_DEBUGREGS
#define __KVM_HAVE_XSAVE
#define __KVM_HAVE_XCRS
+#define __KVM_HAVE_READONLY_MEM
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
--git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 4b9e575..81d2feb 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -101,9 +101,13 @@ struct kvm_userspace_memory_region {
__u64 userspace_addr; /* start of the userspace allocated memory */
};
-/* for kvm_memory_region::flags */
-#define KVM_MEM_LOG_DIRTY_PAGES 1UL
-#define KVM_MEMSLOT_INVALID (1UL << 1)
+/*
+ * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
+ * other bits are reserved for kvm internal use which are defined in
+ * include/linux/kvm_host.h.
+ */
+#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
+#define KVM_MEM_READONLY (1UL << 1)
/* for KVM_IRQ_LINE */
struct kvm_irq_level {
@@ -618,6 +622,10 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_PPC_GET_SMMU_INFO 78
#define KVM_CAP_S390_COW 79
#define KVM_CAP_PPC_ALLOC_HTAB 80
+#ifdef __KVM_HAVE_READONLY_MEM
+#define KVM_CAP_READONLY_MEM 81
+#endif
+#define KVM_CAP_IRQFD_RESAMPLE 82
#ifdef KVM_CAP_IRQ_ROUTING
@@ -683,12 +691,21 @@ struct kvm_xen_hvm_config {
#endif
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
+/*
+ * Available with KVM_CAP_IRQFD_RESAMPLE
+ *
+ * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
+ * the irqfd to operate in resampling mode for level triggered interrupt
+ * emlation. See Documentation/virtual/kvm/api.txt.
+ */
+#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
struct kvm_irqfd {
__u32 fd;
__u32 gsi;
__u32 flags;
- __u8 pad[20];
+ __u32 resamplefd;
+ __u8 pad[16];
};
struct kvm_clock_data {
--git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
index 7bdcf93..cea2c5c 100644
--- a/linux-headers/linux/kvm_para.h
+++ b/linux-headers/linux/kvm_para.h
@@ -1,5 +1,5 @@
-#ifndef __LINUX_KVM_PARA_H
-#define __LINUX_KVM_PARA_H
+#ifndef _UAPI__LINUX_KVM_PARA_H
+#define _UAPI__LINUX_KVM_PARA_H
/*
* This header file provides a method for making a hypercall to the host
@@ -25,4 +25,4 @@
*/
#include <asm/kvm_para.h>
-#endif /* __LINUX_KVM_PARA_H */
+#endif /* _UAPI__LINUX_KVM_PARA_H */
--git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index f787b72..4758d1b 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -8,8 +8,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-#ifndef VFIO_H
-#define VFIO_H
+#ifndef _UAPIVFIO_H
+#define _UAPIVFIO_H
#include <linux/types.h>
#include <linux/ioctl.h>
@@ -365,4 +365,4 @@ struct vfio_iommu_type1_dma_unmap {
#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
-#endif /* VFIO_H */
+#endif /* _UAPIVFIO_H */
--git a/linux-headers/linux/virtio_config.h b/linux-headers/linux/virtio_config.h
index 4f51d8f..b7cda39 100644
--- a/linux-headers/linux/virtio_config.h
+++ b/linux-headers/linux/virtio_config.h
@@ -1,5 +1,5 @@
-#ifndef _LINUX_VIRTIO_CONFIG_H
-#define _LINUX_VIRTIO_CONFIG_H
+#ifndef _UAPI_LINUX_VIRTIO_CONFIG_H
+#define _UAPI_LINUX_VIRTIO_CONFIG_H
/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
* anyone can use the definitions to implement compatible drivers/servers.
*
@@ -51,4 +51,4 @@
* suppressed them? */
#define VIRTIO_F_NOTIFY_ON_EMPTY 24
-#endif /* _LINUX_VIRTIO_CONFIG_H */
+#endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
--git a/linux-headers/linux/virtio_ring.h b/linux-headers/linux/virtio_ring.h
index 1b333e2..921694a 100644
--- a/linux-headers/linux/virtio_ring.h
+++ b/linux-headers/linux/virtio_ring.h
@@ -1,5 +1,5 @@
-#ifndef _LINUX_VIRTIO_RING_H
-#define _LINUX_VIRTIO_RING_H
+#ifndef _UAPI_LINUX_VIRTIO_RING_H
+#define _UAPI_LINUX_VIRTIO_RING_H
/* An interface for efficient virtio implementation, currently for use by KVM
* and lguest, but hopefully others soon. Do NOT change this since it will
* break existing servers and clients.
@@ -160,4 +160,4 @@ static __inline__ int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old
return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
}
-#endif /* _LINUX_VIRTIO_RING_H */
+#endif /* _UAPI_LINUX_VIRTIO_RING_H */
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [Qemu-devel] [PATCH 2/3] vfio-pci: Add KVM INTx acceleration
2012-11-13 19:48 [Qemu-devel] [PULL 0/3] vfio-pci for 1.3-rc0 Alex Williamson
2012-11-13 19:49 ` [Qemu-devel] [PATCH 1/3] linux-headers: Update to 3.7-rc5 Alex Williamson
@ 2012-11-13 19:49 ` Alex Williamson
2012-11-13 19:49 ` [Qemu-devel] [PATCH 3/3] vfio-pci: Use common msi_get_message Alex Williamson
2012-11-14 16:27 ` [Qemu-devel] [PULL 0/3] vfio-pci for 1.3-rc0 Anthony Liguori
3 siblings, 0 replies; 5+ messages in thread
From: Alex Williamson @ 2012-11-13 19:49 UTC (permalink / raw)
To: aliguori; +Cc: qemu-devel, kvm
This makes use of the new level irqfd support enabling bypass of qemu
userspace both on INTx injection and unmask. This significantly
boosts the performance of devices making use of legacy interrupts (ex.
~60% better netperf TCP_RR scores for an e1000e assigned to a Linux
guest and booted with pci=nomsi). This also avoids flipping mmaps on
and off to simulate EOIs, so greatly improves performance of device
access in addition to interrupt latency.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/vfio_pci.c | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 186 insertions(+)
diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 0473ae8..4e9c2dd 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -185,6 +185,21 @@ static void vfio_unmask_intx(VFIODevice *vdev)
ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
}
+#ifdef CONFIG_KVM /* Unused outside of CONFIG_KVM code */
+static void vfio_mask_intx(VFIODevice *vdev)
+{
+ struct vfio_irq_set irq_set = {
+ .argsz = sizeof(irq_set),
+ .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
+ .index = VFIO_PCI_INTX_IRQ_INDEX,
+ .start = 0,
+ .count = 1,
+ };
+
+ ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
+}
+#endif
+
/*
* Disabling BAR mmaping can be slow, but toggling it around INTx can
* also be a huge overhead. We try to get the best of both worlds by
@@ -248,6 +263,161 @@ static void vfio_eoi(VFIODevice *vdev)
vfio_unmask_intx(vdev);
}
+static void vfio_enable_intx_kvm(VFIODevice *vdev)
+{
+#ifdef CONFIG_KVM
+ struct kvm_irqfd irqfd = {
+ .fd = event_notifier_get_fd(&vdev->intx.interrupt),
+ .gsi = vdev->intx.route.irq,
+ .flags = KVM_IRQFD_FLAG_RESAMPLE,
+ };
+ struct vfio_irq_set *irq_set;
+ int ret, argsz;
+ int32_t *pfd;
+
+ if (!kvm_irqchip_in_kernel() ||
+ vdev->intx.route.mode != PCI_INTX_ENABLED ||
+ !kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) {
+ return;
+ }
+
+ /* Get to a known interrupt state */
+ qemu_set_fd_handler(irqfd.fd, NULL, NULL, vdev);
+ vfio_mask_intx(vdev);
+ vdev->intx.pending = false;
+ qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
+
+ /* Get an eventfd for resample/unmask */
+ if (event_notifier_init(&vdev->intx.unmask, 0)) {
+ error_report("vfio: Error: event_notifier_init failed eoi\n");
+ goto fail;
+ }
+
+ /* KVM triggers it, VFIO listens for it */
+ irqfd.resamplefd = event_notifier_get_fd(&vdev->intx.unmask);
+
+ if (kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd)) {
+ error_report("vfio: Error: Failed to setup resample irqfd: %m\n");
+ goto fail_irqfd;
+ }
+
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
+ irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+ irq_set->start = 0;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
+
+ *pfd = irqfd.resamplefd;
+
+ ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ g_free(irq_set);
+ if (ret) {
+ error_report("vfio: Error: Failed to setup INTx unmask fd: %m\n");
+ goto fail_vfio;
+ }
+
+ /* Let'em rip */
+ vfio_unmask_intx(vdev);
+
+ vdev->intx.kvm_accel = true;
+
+ DPRINTF("%s(%04x:%02x:%02x.%x) KVM INTx accel enabled\n",
+ __func__, vdev->host.domain, vdev->host.bus,
+ vdev->host.slot, vdev->host.function);
+
+ return;
+
+fail_vfio:
+ irqfd.flags = KVM_IRQFD_FLAG_DEASSIGN;
+ kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd);
+fail_irqfd:
+ event_notifier_cleanup(&vdev->intx.unmask);
+fail:
+ qemu_set_fd_handler(irqfd.fd, vfio_intx_interrupt, NULL, vdev);
+ vfio_unmask_intx(vdev);
+#endif
+}
+
+static void vfio_disable_intx_kvm(VFIODevice *vdev)
+{
+#ifdef CONFIG_KVM
+ struct kvm_irqfd irqfd = {
+ .fd = event_notifier_get_fd(&vdev->intx.interrupt),
+ .gsi = vdev->intx.route.irq,
+ .flags = KVM_IRQFD_FLAG_DEASSIGN,
+ };
+
+ if (!vdev->intx.kvm_accel) {
+ return;
+ }
+
+ /*
+ * Get to a known state, hardware masked, QEMU ready to accept new
+ * interrupts, QEMU IRQ de-asserted.
+ */
+ vfio_mask_intx(vdev);
+ vdev->intx.pending = false;
+ qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
+
+ /* Tell KVM to stop listening for an INTx irqfd */
+ if (kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd)) {
+ error_report("vfio: Error: Failed to disable INTx irqfd: %m\n");
+ }
+
+ /* We only need to close the eventfd for VFIO to cleanup the kernel side */
+ event_notifier_cleanup(&vdev->intx.unmask);
+
+ /* QEMU starts listening for interrupt events. */
+ qemu_set_fd_handler(irqfd.fd, vfio_intx_interrupt, NULL, vdev);
+
+ vdev->intx.kvm_accel = false;
+
+ /* If we've missed an event, let it re-fire through QEMU */
+ vfio_unmask_intx(vdev);
+
+ DPRINTF("%s(%04x:%02x:%02x.%x) KVM INTx accel disabled\n",
+ __func__, vdev->host.domain, vdev->host.bus,
+ vdev->host.slot, vdev->host.function);
+#endif
+}
+
+static void vfio_update_irq(PCIDevice *pdev)
+{
+ VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+ PCIINTxRoute route;
+
+ if (vdev->interrupt != VFIO_INT_INTx) {
+ return;
+ }
+
+ route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin);
+
+ if (!pci_intx_route_changed(&vdev->intx.route, &route)) {
+ return; /* Nothing changed */
+ }
+
+ DPRINTF("%s(%04x:%02x:%02x.%x) IRQ moved %d -> %d\n", __func__,
+ vdev->host.domain, vdev->host.bus, vdev->host.slot,
+ vdev->host.function, vdev->intx.route.irq, route.irq);
+
+ vfio_disable_intx_kvm(vdev);
+
+ vdev->intx.route = route;
+
+ if (route.mode != PCI_INTX_ENABLED) {
+ return;
+ }
+
+ vfio_enable_intx_kvm(vdev);
+
+ /* Re-enable the interrupt in cased we missed an EOI */
+ vfio_eoi(vdev);
+}
+
static int vfio_enable_intx(VFIODevice *vdev)
{
uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
@@ -262,6 +432,18 @@ static int vfio_enable_intx(VFIODevice *vdev)
vfio_disable_interrupts(vdev);
vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
+
+#ifdef CONFIG_KVM
+ /*
+ * Only conditional to avoid generating error messages on platforms
+ * where we won't actually use the result anyway.
+ */
+ if (kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) {
+ vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev,
+ vdev->intx.pin);
+ }
+#endif
+
ret = event_notifier_init(&vdev->intx.interrupt, 0);
if (ret) {
error_report("vfio: Error: event_notifier_init failed\n");
@@ -290,6 +472,8 @@ static int vfio_enable_intx(VFIODevice *vdev)
return -errno;
}
+ vfio_enable_intx_kvm(vdev);
+
vdev->interrupt = VFIO_INT_INTx;
DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
@@ -303,6 +487,7 @@ static void vfio_disable_intx(VFIODevice *vdev)
int fd;
qemu_del_timer(vdev->intx.mmap_timer);
+ vfio_disable_intx_kvm(vdev);
vfio_disable_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX);
vdev->intx.pending = false;
qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
@@ -1839,6 +2024,7 @@ static int vfio_initfn(PCIDevice *pdev)
if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
vdev->intx.mmap_timer = qemu_new_timer_ms(vm_clock,
vfio_intx_mmap_enable, vdev);
+ pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_update_irq);
ret = vfio_enable_intx(vdev);
if (ret) {
goto out_teardown;
^ permalink raw reply related [flat|nested] 5+ messages in thread