* [PATCHv6 1/4] qemu-kvm: add vhost.h header
2010-03-17 13:04 [PATCHv6 0/4] qemu-kvm: vhost net port Michael S. Tsirkin
@ 2010-03-17 13:04 ` Michael S. Tsirkin
2010-03-17 13:04 ` [PATCHv6 2/4] kvm: irqfd support Michael S. Tsirkin
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Michael S. Tsirkin @ 2010-03-17 13:04 UTC (permalink / raw)
To: kvm
This makes it possible to build vhost support
on systems which do not have this header.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
kvm/include/linux/vhost.h | 130 +++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 130 insertions(+), 0 deletions(-)
create mode 100644 kvm/include/linux/vhost.h
diff --git a/kvm/include/linux/vhost.h b/kvm/include/linux/vhost.h
new file mode 100644
index 0000000..165a484
--- /dev/null
+++ b/kvm/include/linux/vhost.h
@@ -0,0 +1,130 @@
+#ifndef _LINUX_VHOST_H
+#define _LINUX_VHOST_H
+/* Userspace interface for in-kernel virtio accelerators. */
+
+/* vhost is used to reduce the number of system calls involved in virtio.
+ *
+ * Existing virtio net code is used in the guest without modification.
+ *
+ * This header includes interface used by userspace hypervisor for
+ * device configuration.
+ */
+
+#include <linux/types.h>
+
+#include <linux/ioctl.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+struct vhost_vring_state {
+ unsigned int index;
+ unsigned int num;
+};
+
+struct vhost_vring_file {
+ unsigned int index;
+ int fd; /* Pass -1 to unbind from file. */
+
+};
+
+struct vhost_vring_addr {
+ unsigned int index;
+ /* Option flags. */
+ unsigned int flags;
+ /* Flag values: */
+ /* Whether log address is valid. If set enables logging. */
+#define VHOST_VRING_F_LOG 0
+
+ /* Start of array of descriptors (virtually contiguous) */
+ __u64 desc_user_addr;
+ /* Used structure address. Must be 32 bit aligned */
+ __u64 used_user_addr;
+ /* Available structure address. Must be 16 bit aligned */
+ __u64 avail_user_addr;
+ /* Logging support. */
+ /* Log writes to used structure, at offset calculated from specified
+ * address. Address must be 32 bit aligned. */
+ __u64 log_guest_addr;
+};
+
+struct vhost_memory_region {
+ __u64 guest_phys_addr;
+ __u64 memory_size; /* bytes */
+ __u64 userspace_addr;
+ __u64 flags_padding; /* No flags are currently specified. */
+};
+
+/* All region addresses and sizes must be 4K aligned. */
+#define VHOST_PAGE_SIZE 0x1000
+
+struct vhost_memory {
+ __u32 nregions;
+ __u32 padding;
+ struct vhost_memory_region regions[0];
+};
+
+/* ioctls */
+
+#define VHOST_VIRTIO 0xAF
+
+/* Features bitmask for forward compatibility. Transport bits are used for
+ * vhost specific features. */
+#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
+#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
+
+/* Set current process as the (exclusive) owner of this file descriptor. This
+ * must be called before any other vhost command. Further calls to
+ * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */
+#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
+/* Give up ownership, and reset the device to default values.
+ * Allows subsequent call to VHOST_OWNER_SET to succeed. */
+#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
+
+/* Set up/modify memory layout */
+#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory)
+
+/* Write logging setup. */
+/* Memory writes can optionally be logged by setting bit at an offset
+ * (calculated from the physical address) from specified log base.
+ * The bit is set using an atomic 32 bit operation. */
+/* Set base address for logging. */
+#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
+/* Specify an eventfd file descriptor to signal on log write. */
+#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+
+/* Ring setup. */
+/* Set number of descriptors in ring. This parameter can not
+ * be modified while ring is running (bound to a device). */
+#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
+/* Set addresses for the ring. */
+#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
+/* Base value where queue looks for available descriptors */
+#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+/* Get accessor: reads index, writes value in num */
+#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+
+/* The following ioctls use eventfd file descriptors to signal and poll
+ * for events. */
+
+/* Set eventfd to poll for added buffers */
+#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
+/* Set eventfd to signal when buffers have beed used */
+#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
+/* Set eventfd to signal an error */
+#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
+
+/* VHOST_NET specific defines */
+
+/* Attach virtio net ring to a raw socket, or tap device.
+ * The socket must be already bound to an ethernet device, this device will be
+ * used for transmit. Pass fd -1 to unbind from the socket and the transmit
+ * device. This can be used to stop the ring (e.g. for migration). */
+#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
+
+/* Feature bits */
+/* Log all write descriptors. Can be changed while device is active. */
+#define VHOST_F_LOG_ALL 26
+/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
+#define VHOST_NET_F_VIRTIO_NET_HDR 27
+
+#endif
--
1.7.0.18.g0d53a5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCHv6 2/4] kvm: irqfd support
2010-03-17 13:04 [PATCHv6 0/4] qemu-kvm: vhost net port Michael S. Tsirkin
2010-03-17 13:04 ` [PATCHv6 1/4] qemu-kvm: add vhost.h header Michael S. Tsirkin
@ 2010-03-17 13:04 ` Michael S. Tsirkin
2010-03-17 13:04 ` [PATCHv6 3/4] msix: add mask/unmask notifiers Michael S. Tsirkin
` (2 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Michael S. Tsirkin @ 2010-03-17 13:04 UTC (permalink / raw)
To: kvm
Add API to assign/deassign irqfd to kvm.
Add stub so that users do not have to use
ifdefs.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
kvm-all.c | 19 +++++++++++++++++++
kvm.h | 10 ++++++++++
2 files changed, 29 insertions(+), 0 deletions(-)
diff --git a/kvm-all.c b/kvm-all.c
index 7b05462..1a15662 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1200,5 +1200,24 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
}
#endif
+#if defined(KVM_IRQFD)
+int kvm_set_irqfd(int gsi, int fd, bool assigned)
+{
+ struct kvm_irqfd irqfd = {
+ .fd = fd,
+ .gsi = gsi,
+ .flags = assigned ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
+ };
+ int r;
+ if (!kvm_enabled() || !kvm_irqchip_in_kernel())
+ return -ENOSYS;
+
+ r = kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd);
+ if (r < 0)
+ return r;
+ return 0;
+}
+#endif
+
#undef PAGE_SIZE
#include "qemu-kvm.c"
diff --git a/kvm.h b/kvm.h
index 0951380..72dcaca 100644
--- a/kvm.h
+++ b/kvm.h
@@ -180,4 +180,14 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign)
}
#endif
+#if defined(KVM_IRQFD) && defined(CONFIG_KVM)
+int kvm_set_irqfd(int gsi, int fd, bool assigned);
+#else
+static inline
+int kvm_set_irqfd(int gsi, int fd, bool assigned)
+{
+ return -ENOSYS;
+}
+#endif
+
#endif
--
1.7.0.18.g0d53a5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCHv6 3/4] msix: add mask/unmask notifiers
2010-03-17 13:04 [PATCHv6 0/4] qemu-kvm: vhost net port Michael S. Tsirkin
2010-03-17 13:04 ` [PATCHv6 1/4] qemu-kvm: add vhost.h header Michael S. Tsirkin
2010-03-17 13:04 ` [PATCHv6 2/4] kvm: irqfd support Michael S. Tsirkin
@ 2010-03-17 13:04 ` Michael S. Tsirkin
2010-03-17 13:04 ` [PATCHv6 4/4] virtio-pci: irqfd support Michael S. Tsirkin
2010-03-24 12:38 ` [PATCHv6 0/4] qemu-kvm: vhost net port Avi Kivity
4 siblings, 0 replies; 9+ messages in thread
From: Michael S. Tsirkin @ 2010-03-17 13:04 UTC (permalink / raw)
To: kvm
Support per-vector callbacks for msix mask/unmask.
Will be used for vhost net.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/msix.c | 36 +++++++++++++++++++++++++++++++++++-
hw/msix.h | 1 +
hw/pci.h | 6 ++++++
3 files changed, 42 insertions(+), 1 deletions(-)
diff --git a/hw/msix.c b/hw/msix.c
index faee0b2..3ec8805 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -317,6 +317,13 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
if (kvm_enabled() && kvm_irqchip_in_kernel()) {
kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector));
}
+ if (was_masked != msix_is_masked(dev, vector) &&
+ dev->msix_mask_notifier && dev->msix_mask_notifier_opaque[vector]) {
+ int r = dev->msix_mask_notifier(dev, vector,
+ dev->msix_mask_notifier_opaque[vector],
+ msix_is_masked(dev, vector));
+ assert(r >= 0);
+ }
msix_handle_mask_update(dev, vector);
}
@@ -355,10 +362,18 @@ void msix_mmio_map(PCIDevice *d, int region_num,
static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
{
- int vector;
+ int vector, r;
for (vector = 0; vector < nentries; ++vector) {
unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL;
+ int was_masked = msix_is_masked(dev, vector);
dev->msix_table_page[offset] |= MSIX_VECTOR_MASK;
+ if (was_masked != msix_is_masked(dev, vector) &&
+ dev->msix_mask_notifier && dev->msix_mask_notifier_opaque[vector]) {
+ r = dev->msix_mask_notifier(dev, vector,
+ dev->msix_mask_notifier_opaque[vector],
+ msix_is_masked(dev, vector));
+ assert(r >= 0);
+ }
}
}
@@ -381,6 +396,9 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
sizeof *dev->msix_irq_entries);
}
#endif
+ dev->msix_mask_notifier_opaque =
+ qemu_mallocz(nentries * sizeof *dev->msix_mask_notifier_opaque);
+ dev->msix_mask_notifier = NULL;
dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
sizeof *dev->msix_entry_used);
@@ -443,6 +461,8 @@ int msix_uninit(PCIDevice *dev)
dev->msix_entry_used = NULL;
qemu_free(dev->msix_irq_entries);
dev->msix_irq_entries = NULL;
+ qemu_free(dev->msix_mask_notifier_opaque);
+ dev->msix_mask_notifier_opaque = NULL;
dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
return 0;
}
@@ -586,3 +606,17 @@ void msix_unuse_all_vectors(PCIDevice *dev)
return;
msix_free_irq_entries(dev);
}
+
+int msix_set_mask_notifier(PCIDevice *dev, unsigned vector, void *opaque)
+{
+ int r = 0;
+ if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+ return 0;
+
+ if (dev->msix_mask_notifier)
+ r = dev->msix_mask_notifier(dev, vector, opaque,
+ msix_is_masked(dev, vector));
+ if (r >= 0)
+ dev->msix_mask_notifier_opaque[vector] = opaque;
+ return r;
+}
diff --git a/hw/msix.h b/hw/msix.h
index a9f7993..f167231 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -33,4 +33,5 @@ void msix_reset(PCIDevice *dev);
extern int msix_supported;
+int msix_set_mask_notifier(PCIDevice *dev, unsigned vector, void *opaque);
#endif
diff --git a/hw/pci.h b/hw/pci.h
index 1eab8f2..100104c 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -136,6 +136,9 @@ enum {
#define PCI_CAPABILITY_CONFIG_MSI_LENGTH 0x10
#define PCI_CAPABILITY_CONFIG_MSIX_LENGTH 0x10
+typedef int (*msix_mask_notifier_func)(PCIDevice *, unsigned vector,
+ void *opaque, int masked);
+
struct PCIDevice {
DeviceState qdev;
/* PCI config space */
@@ -201,6 +204,9 @@ struct PCIDevice {
struct kvm_irq_routing_entry *msix_irq_entries;
+ void **msix_mask_notifier_opaque;
+ msix_mask_notifier_func msix_mask_notifier;
+
/* Device capability configuration space */
struct {
int supported;
--
1.7.0.18.g0d53a5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCHv6 4/4] virtio-pci: irqfd support
2010-03-17 13:04 [PATCHv6 0/4] qemu-kvm: vhost net port Michael S. Tsirkin
` (2 preceding siblings ...)
2010-03-17 13:04 ` [PATCHv6 3/4] msix: add mask/unmask notifiers Michael S. Tsirkin
@ 2010-03-17 13:04 ` Michael S. Tsirkin
2010-03-24 12:38 ` [PATCHv6 0/4] qemu-kvm: vhost net port Avi Kivity
4 siblings, 0 replies; 9+ messages in thread
From: Michael S. Tsirkin @ 2010-03-17 13:04 UTC (permalink / raw)
To: kvm
Use irqfd when supported by kernel.
This uses msix mask notifiers: when vector is masked, we poll it from
userspace. When it is unmasked, we poll it from kernel.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/virtio-pci.c | 27 +++++++++++++++++++++++++++
1 files changed, 27 insertions(+), 0 deletions(-)
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 4255d98..f8d8022 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -402,6 +402,27 @@ static void virtio_pci_guest_notifier_read(void *opaque)
}
}
+static int virtio_pci_mask_notifier(PCIDevice *dev, unsigned vector,
+ void *opaque, int masked)
+{
+ VirtQueue *vq = opaque;
+ EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
+ int r = kvm_set_irqfd(dev->msix_irq_entries[vector].gsi,
+ event_notifier_get_fd(notifier),
+ !masked);
+ if (r < 0) {
+ return (r == -ENOSYS) ? 0 : r;
+ }
+ if (masked) {
+ qemu_set_fd_handler(event_notifier_get_fd(notifier),
+ virtio_pci_guest_notifier_read, NULL, vq);
+ } else {
+ qemu_set_fd_handler(event_notifier_get_fd(notifier),
+ NULL, NULL, NULL);
+ }
+ return 0;
+}
+
static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
{
VirtIOPCIProxy *proxy = opaque;
@@ -415,7 +436,11 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
}
qemu_set_fd_handler(event_notifier_get_fd(notifier),
virtio_pci_guest_notifier_read, NULL, vq);
+ msix_set_mask_notifier(&proxy->pci_dev,
+ virtio_queue_vector(proxy->vdev, n), vq);
} else {
+ msix_set_mask_notifier(&proxy->pci_dev,
+ virtio_queue_vector(proxy->vdev, n), NULL);
qemu_set_fd_handler(event_notifier_get_fd(notifier),
NULL, NULL, NULL);
event_notifier_cleanup(notifier);
@@ -500,6 +525,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
proxy->pci_dev.config_write = virtio_write_config;
+ proxy->pci_dev.msix_mask_notifier = virtio_pci_mask_notifier;
+
size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
if (size & (size-1))
size = 1 << qemu_fls(size);
--
1.7.0.18.g0d53a5
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCHv6 0/4] qemu-kvm: vhost net port
2010-03-17 13:04 [PATCHv6 0/4] qemu-kvm: vhost net port Michael S. Tsirkin
` (3 preceding siblings ...)
2010-03-17 13:04 ` [PATCHv6 4/4] virtio-pci: irqfd support Michael S. Tsirkin
@ 2010-03-24 12:38 ` Avi Kivity
2010-04-04 11:46 ` Michael S. Tsirkin
4 siblings, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2010-03-24 12:38 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: kvm
On 03/17/2010 03:04 PM, Michael S. Tsirkin wrote:
> This is port of vhost v6 patch set I posted previously to qemu-kvm, for
> those that want to get good performance out of it :) This patchset needs
> to be applied when qemu.git one gets merged, this includes irqchip
> support.
>
>
Ping me when this happens please.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCHv6 0/4] qemu-kvm: vhost net port
2010-03-24 12:38 ` [PATCHv6 0/4] qemu-kvm: vhost net port Avi Kivity
@ 2010-04-04 11:46 ` Michael S. Tsirkin
2010-04-04 16:30 ` Avi Kivity
0 siblings, 1 reply; 9+ messages in thread
From: Michael S. Tsirkin @ 2010-04-04 11:46 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm
On Wed, Mar 24, 2010 at 02:38:57PM +0200, Avi Kivity wrote:
> On 03/17/2010 03:04 PM, Michael S. Tsirkin wrote:
>> This is port of vhost v6 patch set I posted previously to qemu-kvm, for
>> those that want to get good performance out of it :) This patchset needs
>> to be applied when qemu.git one gets merged, this includes irqchip
>> support.
>>
>>
>
> Ping me when this happens please.
Ping
> --
> error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCHv6 0/4] qemu-kvm: vhost net port
2010-04-04 11:46 ` Michael S. Tsirkin
@ 2010-04-04 16:30 ` Avi Kivity
2010-04-06 19:05 ` Marcelo Tosatti
0 siblings, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2010-04-04 16:30 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm
On 04/04/2010 02:46 PM, Michael S. Tsirkin wrote:
> On Wed, Mar 24, 2010 at 02:38:57PM +0200, Avi Kivity wrote:
>
>> On 03/17/2010 03:04 PM, Michael S. Tsirkin wrote:
>>
>>> This is port of vhost v6 patch set I posted previously to qemu-kvm, for
>>> those that want to get good performance out of it :) This patchset needs
>>> to be applied when qemu.git one gets merged, this includes irqchip
>>> support.
>>>
>>>
>>>
>> Ping me when this happens please.
>>
> Ping
>
Bounce.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCHv6 0/4] qemu-kvm: vhost net port
2010-04-04 16:30 ` Avi Kivity
@ 2010-04-06 19:05 ` Marcelo Tosatti
0 siblings, 0 replies; 9+ messages in thread
From: Marcelo Tosatti @ 2010-04-06 19:05 UTC (permalink / raw)
To: Avi Kivity; +Cc: Michael S. Tsirkin, kvm
On Sun, Apr 04, 2010 at 07:30:20PM +0300, Avi Kivity wrote:
> On 04/04/2010 02:46 PM, Michael S. Tsirkin wrote:
> >On Wed, Mar 24, 2010 at 02:38:57PM +0200, Avi Kivity wrote:
> >>On 03/17/2010 03:04 PM, Michael S. Tsirkin wrote:
> >>>This is port of vhost v6 patch set I posted previously to qemu-kvm, for
> >>>those that want to get good performance out of it :) This patchset needs
> >>>to be applied when qemu.git one gets merged, this includes irqchip
> >>>support.
> >>>
> >>>
> >>Ping me when this happens please.
> >Ping
>
> Bounce.
Applied, thanks.
^ permalink raw reply [flat|nested] 9+ messages in thread