From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mark McLoughlin Subject: Re: [PATCH 5/6] kvm: qemu: virtio-net: handle all tx in I/O thread without timer Date: Mon, 03 Nov 2008 15:07:55 +0000 Message-ID: <1225724875.5904.66.camel@blaa> References: <> <1225389113-28332-1-git-send-email-markmc@redhat.com> <1225389113-28332-2-git-send-email-markmc@redhat.com> <1225389113-28332-3-git-send-email-markmc@redhat.com> <1225389113-28332-4-git-send-email-markmc@redhat.com> <1225389113-28332-5-git-send-email-markmc@redhat.com> <1225389113-28332-6-git-send-email-markmc@redhat.com> <490A09EF.2030006@codemonkey.ws> <1225444619.3758.21.camel@blaa> Reply-To: Mark McLoughlin Mime-Version: 1.0 Content-Type: text/plain Content-Transfer-Encoding: 7bit Cc: Avi Kivity , kvm@vger.kernel.org To: Anthony Liguori Return-path: Received: from mx2.redhat.com ([66.187.237.31]:33516 "EHLO mx2.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754343AbYKCPKz (ORCPT ); Mon, 3 Nov 2008 10:10:55 -0500 In-Reply-To: <1225444619.3758.21.camel@blaa> Sender: kvm-owner@vger.kernel.org List-ID: On Fri, 2008-10-31 at 09:16 +0000, Mark McLoughlin wrote: > +static void virtio_net_tx_bh(void *opaque) > { > VirtIONet *n = opaque; > > - n->tx_timer_active = 0; > - > - /* Just in case the driver is not ready on more */ > - if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) > - return; > - > - n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; > - virtio_net_flush_tx(n, n->tx_vq); > + if (!virtio_net_flush_tx(n, n->tx_vq)) > + n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; > + else > + qemu_bh_schedule(n->tx_bh); > } Ouch, race condition here between our finding the ring empty and re-enabling the notification. We need to check the ring again. Cheers, Mark. From: Mark McLoughlin Subject: [PATCH] kvm: qemu: virtio-net: handle all tx in I/O thread without timer By removing the tx timer altogether and doing all the copies in the I/O thread, we can keep the I/O churning away in parallel with the guest generating more I/O. In my tests, this significantly increases guest->host throughput, causes a minor increase in host->guest throughput, reduces CPU utilization somewhat and greatly reduces roundtrip times. Even aside from the benchmark results, removing the arbitrary 150us timer is a nicer option than coming up with a heuristic to make it vary according to load. Finally, on kernels which don't have a suitably low posix timer latency, we won't be scuppered by effectively having e.g. a 1ms timer. Note, this highlights that the I/O thread may become a scalability concern and we might want to consider e.g. an I/O thread per device. Note also that when tuning for a specific workload, which CPU the I/O thread is pinned to is important. Signed-off-by: Mark McLoughlin --- qemu/hw/virtio-net.c | 65 +++++++++++++++++++++++++------------------------ 1 files changed, 33 insertions(+), 32 deletions(-) diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c index bc2ede6..43a06da 100644 --- a/qemu/hw/virtio-net.c +++ b/qemu/hw/virtio-net.c @@ -35,8 +35,6 @@ #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ -#define TX_TIMER_INTERVAL 150000 /* 150 us */ - /* The config defining mac address (6 bytes) */ struct virtio_net_config { @@ -68,8 +66,7 @@ typedef struct VirtIONet VirtQueue *rx_vq; VirtQueue *tx_vq; VLANClientState *vc; - QEMUTimer *tx_timer; - int tx_timer_active; + QEMUBH *tx_bh; } VirtIONet; /* TODO @@ -227,13 +224,14 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) } /* TX */ -static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) +static int virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) { VirtQueueElement elem; int has_vnet_hdr = tap_has_vnet_hdr(n->vc->vlan->first_client); + int num_packets = 0; if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) - return; + return num_packets; while (virtqueue_pop(vq, &elem)) { ssize_t len = 0; @@ -256,38 +254,38 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) virtqueue_push(vq, &elem, len); virtio_notify(&n->vdev, vq); + + num_packets++; } + + return num_packets; } static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq) { VirtIONet *n = to_virtio_net(vdev); - if (n->tx_timer_active) { - vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; - qemu_del_timer(n->tx_timer); - n->tx_timer_active = 0; - virtio_net_flush_tx(n, vq); - } else { - qemu_mod_timer(n->tx_timer, - qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL); - n->tx_timer_active = 1; - vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; - } + vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; + qemu_bh_schedule(n->tx_bh); } -static void virtio_net_tx_timer(void *opaque) +static void virtio_net_tx_bh(void *opaque) { VirtIONet *n = opaque; + VirtQueue *vq = n->tx_vq; - n->tx_timer_active = 0; + /* Re-enable notifications if empty */ + if (!virtio_net_flush_tx(n, vq)) { + vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; - /* Just in case the driver is not ready on more */ - if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) - return; + /* Re-check to be sure we didn't miss something */ + if (!virtio_net_flush_tx(n, vq)) + return; + } - n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; - virtio_net_flush_tx(n, n->tx_vq); + /* We flushed some packets; let's try again soon */ + vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; + qemu_bh_schedule(n->tx_bh); } static void virtio_net_save(QEMUFile *f, void *opaque) @@ -297,7 +295,6 @@ static void virtio_net_save(QEMUFile *f, void *opaque) virtio_save(&n->vdev, f); qemu_put_buffer(f, n->mac, 6); - qemu_put_be32(f, n->tx_timer_active); } static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) @@ -310,12 +307,15 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) virtio_load(&n->vdev, f); qemu_get_buffer(f, n->mac, 6); - n->tx_timer_active = qemu_get_be32(f); - if (n->tx_timer_active) { - qemu_mod_timer(n->tx_timer, - qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL); - } + return 0; +} + +static int virtio_net_uninit(PCIDevice *dev) +{ + VirtIONet *n = (VirtIONet *)dev; + + qemu_bh_delete(n->tx_bh); return 0; } @@ -341,8 +341,9 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn) n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive, virtio_net_can_receive, n); - n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n); - n->tx_timer_active = 0; + n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n); + + n->vdev.pci_dev.unregister = virtio_net_uninit; register_savevm("virtio-net", virtio_net_id++, 1, virtio_net_save, virtio_net_load, n); -- 1.6.0.1