From mboxrd@z Thu Jan 1 00:00:00 1970 From: Anthony Liguori Subject: Re: [PATCH 5/6] kvm: qemu: virtio-net: handle all tx in I/O thread without timer Date: Thu, 30 Oct 2008 14:24:31 -0500 Message-ID: <490A09EF.2030006@codemonkey.ws> References: <> <1225389113-28332-1-git-send-email-markmc@redhat.com> <1225389113-28332-2-git-send-email-markmc@redhat.com> <1225389113-28332-3-git-send-email-markmc@redhat.com> <1225389113-28332-4-git-send-email-markmc@redhat.com> <1225389113-28332-5-git-send-email-markmc@redhat.com> <1225389113-28332-6-git-send-email-markmc@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: Avi Kivity , kvm@vger.kernel.org To: Mark McLoughlin Return-path: Received: from hs-out-0708.google.com ([64.233.178.243]:65018 "EHLO hs-out-0708.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755283AbYJ3TYk (ORCPT ); Thu, 30 Oct 2008 15:24:40 -0400 Received: by hs-out-0708.google.com with SMTP id 4so389564hsl.5 for ; Thu, 30 Oct 2008 12:24:39 -0700 (PDT) In-Reply-To: <1225389113-28332-6-git-send-email-markmc@redhat.com> Sender: kvm-owner@vger.kernel.org List-ID: Mark McLoughlin wrote: > By removing the tx timer altogether and doing all the copies in the > I/O thread, we can keep the I/O churning away in parallel with the > guest generating more I/O. > > In my tests, this significantly increases guest->host throughput, > causes a minor increase in host->guest throughput, reduces CPU > utilization somewhat and greatly reduces roundtrip times. > > Even aside from the benchmark results, removing the arbitrary 150us > timer is a nicer option than coming up with a heuristic to make it > vary according to load. Finally, on kernels which don't have a > suitably low posix timer latency, we won't be scuppered by effectively > having e.g. a 1ms timer. > > Note, this highlights that the I/O thread may become a scalability > concern and we might want to consider e.g. an I/O thread per device. > > Note also that when tuning for a specific workload, which CPU > the I/O thread is pinned to is important. > Instead of using an event fd, perhaps you could just schedule a bottom half? I think that would be a whole lot cleaner. Regards, Anthony Liguori > Signed-off-by: Mark McLoughlin > --- > qemu/hw/virtio-net.c | 79 ++++++++++++++++++++++++++++--------------------- > 1 files changed, 45 insertions(+), 34 deletions(-) > > diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c > index bc2ede6..0612f5f 100644 > --- a/qemu/hw/virtio-net.c > +++ b/qemu/hw/virtio-net.c > @@ -15,6 +15,8 @@ > #include "net.h" > #include "qemu-timer.h" > #include "qemu-kvm.h" > +#include "qemu-char.h" > +#include "compatfd.h" > > /* from Linux's virtio_net.h */ > > @@ -35,8 +37,6 @@ > #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ > #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ > > -#define TX_TIMER_INTERVAL 150000 /* 150 us */ > - > /* The config defining mac address (6 bytes) */ > struct virtio_net_config > { > @@ -68,8 +68,7 @@ typedef struct VirtIONet > VirtQueue *rx_vq; > VirtQueue *tx_vq; > VLANClientState *vc; > - QEMUTimer *tx_timer; > - int tx_timer_active; > + int tx_eventfds[2]; > } VirtIONet; > > /* TODO > @@ -227,13 +226,14 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) > } > > /* TX */ > -static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) > +static int virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) > { > VirtQueueElement elem; > int has_vnet_hdr = tap_has_vnet_hdr(n->vc->vlan->first_client); > + int num_packets = 0; > > if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) > - return; > + return num_packets; > > while (virtqueue_pop(vq, &elem)) { > ssize_t len = 0; > @@ -256,38 +256,31 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) > > virtqueue_push(vq, &elem, len); > virtio_notify(&n->vdev, vq); > + > + num_packets++; > } > + > + return num_packets; > } > > static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq) > { > VirtIONet *n = to_virtio_net(vdev); > > - if (n->tx_timer_active) { > - vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; > - qemu_del_timer(n->tx_timer); > - n->tx_timer_active = 0; > - virtio_net_flush_tx(n, vq); > - } else { > - qemu_mod_timer(n->tx_timer, > - qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL); > - n->tx_timer_active = 1; > - vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; > - } > + vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; > + qemu_eventfd_write(n->tx_eventfds[1], 1); > } > > -static void virtio_net_tx_timer(void *opaque) > +static void virtio_net_tx_event(void *opaque) > { > VirtIONet *n = opaque; > > - n->tx_timer_active = 0; > + qemu_eventfd_read(n->tx_eventfds[0]); > > - /* Just in case the driver is not ready on more */ > - if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) > - return; > - > - n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; > - virtio_net_flush_tx(n, n->tx_vq); > + if (!virtio_net_flush_tx(n, n->tx_vq)) > + n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; > + else > + qemu_eventfd_write(n->tx_eventfds[1], 1); > } > > static void virtio_net_save(QEMUFile *f, void *opaque) > @@ -297,7 +290,6 @@ static void virtio_net_save(QEMUFile *f, void *opaque) > virtio_save(&n->vdev, f); > > qemu_put_buffer(f, n->mac, 6); > - qemu_put_be32(f, n->tx_timer_active); > } > > static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) > @@ -310,12 +302,16 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) > virtio_load(&n->vdev, f); > > qemu_get_buffer(f, n->mac, 6); > - n->tx_timer_active = qemu_get_be32(f); > > - if (n->tx_timer_active) { > - qemu_mod_timer(n->tx_timer, > - qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL); > - } > + return 0; > +} > + > +static int virtio_net_uninit(PCIDevice *dev) > +{ > + VirtIONet *n = (VirtIONet *)dev; > + > + close(n->tx_eventfds[0]); > + close(n->tx_eventfds[1]); > > return 0; > } > @@ -324,13 +320,23 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn) > { > VirtIONet *n; > static int virtio_net_id; > + int eventfds[2]; > + > + if (qemu_eventfd(eventfds) == -1) { > + fprintf(stderr, "Failed to create eventfds : %s\n", > + strerror(errno)); > + return NULL; > + } > > n = (VirtIONet *)virtio_init_pci(bus, "virtio-net", 6900, 0x1000, > 0, VIRTIO_ID_NET, > 0x02, 0x00, 0x00, > 6, sizeof(VirtIONet)); > - if (!n) > + if (!n) { > + close(eventfds[0]); > + close(eventfds[1]); > return NULL; > + } > > n->vdev.get_config = virtio_net_update_config; > n->vdev.get_features = virtio_net_get_features; > @@ -341,8 +347,13 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn) > n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive, > virtio_net_can_receive, n); > > - n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n); > - n->tx_timer_active = 0; > + fcntl(eventfds[0], F_SETFL, O_NONBLOCK); > + n->tx_eventfds[0] = eventfds[0]; > + fcntl(eventfds[1], F_SETFL, O_NONBLOCK); > + n->tx_eventfds[1] = eventfds[1]; > + > + n->vdev.pci_dev.unregister = virtio_net_uninit; > + qemu_set_fd_handler2(n->tx_eventfds[0], NULL, virtio_net_tx_event, NULL, n); > > register_savevm("virtio-net", virtio_net_id++, 1, > virtio_net_save, virtio_net_load, n); >