All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mark McLoughlin <markmc@redhat.com>
To: Anthony Liguori <anthony@codemonkey.ws>
Cc: Avi Kivity <avi@c13-ss-2-lb.cnet.com>, kvm@vger.kernel.org
Subject: Re: [PATCH 5/6] kvm: qemu: virtio-net: handle all tx in I/O thread without timer
Date: Mon, 03 Nov 2008 15:07:55 +0000	[thread overview]
Message-ID: <1225724875.5904.66.camel@blaa> (raw)
In-Reply-To: <1225444619.3758.21.camel@blaa>

On Fri, 2008-10-31 at 09:16 +0000, Mark McLoughlin wrote:
 
> +static void virtio_net_tx_bh(void *opaque)
>  {
>      VirtIONet *n = opaque;
>  
> -    n->tx_timer_active = 0;
> -
> -    /* Just in case the driver is not ready on more */
> -    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
> -        return;
> -
> -    n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
> -    virtio_net_flush_tx(n, n->tx_vq);
> +    if (!virtio_net_flush_tx(n, n->tx_vq))
> +        n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
> +    else
> +        qemu_bh_schedule(n->tx_bh);
>  }

Ouch, race condition here between our finding the ring empty and
re-enabling the notification. We need to check the ring again.

Cheers,
Mark.

From: Mark McLoughlin <markmc@redhat.com>
Subject: [PATCH] kvm: qemu: virtio-net: handle all tx in I/O thread without timer

By removing the tx timer altogether and doing all the copies in the
I/O thread, we can keep the I/O churning away in parallel with the
guest generating more I/O.

In my tests, this significantly increases guest->host throughput,
causes a minor increase in host->guest throughput, reduces CPU
utilization somewhat and greatly reduces roundtrip times.

Even aside from the benchmark results, removing the arbitrary 150us
timer is a nicer option than coming up with a heuristic to make it
vary according to load. Finally, on kernels which don't have a
suitably low posix timer latency, we won't be scuppered by effectively
having e.g. a 1ms timer.

Note, this highlights that the I/O thread may become a scalability
concern and we might want to consider e.g. an I/O thread per device.

Note also that when tuning for a specific workload, which CPU
the I/O thread is pinned to is important.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 qemu/hw/virtio-net.c |   65 +++++++++++++++++++++++++------------------------
 1 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index bc2ede6..43a06da 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -35,8 +35,6 @@
 #define VIRTIO_NET_F_HOST_ECN	13	/* Host can handle TSO[6] w/ ECN in. */
 #define VIRTIO_NET_F_HOST_UFO	14	/* Host can handle UFO in. */
 
-#define TX_TIMER_INTERVAL 150000 /* 150 us */
-
 /* The config defining mac address (6 bytes) */
 struct virtio_net_config
 {
@@ -68,8 +66,7 @@ typedef struct VirtIONet
     VirtQueue *rx_vq;
     VirtQueue *tx_vq;
     VLANClientState *vc;
-    QEMUTimer *tx_timer;
-    int tx_timer_active;
+    QEMUBH *tx_bh;
 } VirtIONet;
 
 /* TODO
@@ -227,13 +224,14 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
 }
 
 /* TX */
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
+static int virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
 {
     VirtQueueElement elem;
     int has_vnet_hdr = tap_has_vnet_hdr(n->vc->vlan->first_client);
+    int num_packets = 0;
 
     if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
-        return;
+        return num_packets;
 
     while (virtqueue_pop(vq, &elem)) {
 	ssize_t len = 0;
@@ -256,38 +254,38 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
 
 	virtqueue_push(vq, &elem, len);
 	virtio_notify(&n->vdev, vq);
+
+	num_packets++;
     }
+
+    return num_packets;
 }
 
 static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = to_virtio_net(vdev);
 
-    if (n->tx_timer_active) {
-	vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
-	qemu_del_timer(n->tx_timer);
-	n->tx_timer_active = 0;
-	virtio_net_flush_tx(n, vq);
-    } else {
-	qemu_mod_timer(n->tx_timer,
-		       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
-	n->tx_timer_active = 1;
-	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
-    }
+    vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+    qemu_bh_schedule(n->tx_bh);
 }
 
-static void virtio_net_tx_timer(void *opaque)
+static void virtio_net_tx_bh(void *opaque)
 {
     VirtIONet *n = opaque;
+    VirtQueue *vq = n->tx_vq;
 
-    n->tx_timer_active = 0;
+    /* Re-enable notifications if empty */
+    if (!virtio_net_flush_tx(n, vq)) {
+        vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
 
-    /* Just in case the driver is not ready on more */
-    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
-        return;
+        /* Re-check to be sure we didn't miss something */
+        if (!virtio_net_flush_tx(n, vq))
+            return;
+    }
 
-    n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
-    virtio_net_flush_tx(n, n->tx_vq);
+    /* We flushed some packets; let's try again soon */
+    vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+    qemu_bh_schedule(n->tx_bh);
 }
 
 static void virtio_net_save(QEMUFile *f, void *opaque)
@@ -297,7 +295,6 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
     virtio_save(&n->vdev, f);
 
     qemu_put_buffer(f, n->mac, 6);
-    qemu_put_be32(f, n->tx_timer_active);
 }
 
 static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
@@ -310,12 +307,15 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
     virtio_load(&n->vdev, f);
 
     qemu_get_buffer(f, n->mac, 6);
-    n->tx_timer_active = qemu_get_be32(f);
 
-    if (n->tx_timer_active) {
-	qemu_mod_timer(n->tx_timer,
-		       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
-    }
+    return 0;
+}
+
+static int virtio_net_uninit(PCIDevice *dev)
+{
+    VirtIONet *n = (VirtIONet *)dev;
+
+    qemu_bh_delete(n->tx_bh);
 
     return 0;
 }
@@ -341,8 +341,9 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
     n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
                                  virtio_net_can_receive, n);
 
-    n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
-    n->tx_timer_active = 0;
+    n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
+
+    n->vdev.pci_dev.unregister = virtio_net_uninit;
 
     register_savevm("virtio-net", virtio_net_id++, 1,
 		    virtio_net_save, virtio_net_load, n);
-- 
1.6.0.1


  reply	other threads:[~2008-11-03 15:10 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-30 17:51 [PATCH 0/6] Kill off the virtio_net tx mitigation timer Mark McLoughlin
2008-10-30 17:51 ` [PATCH 1/6] kvm: qemu: virtio: remove unused variable Mark McLoughlin
2008-10-30 17:51   ` [PATCH 2/6] kvm: qemu: dup the qemu_eventfd() return Mark McLoughlin
2008-10-30 17:51     ` [PATCH 3/6] kvm: qemu: add qemu_eventfd_write() and qemu_eventfd_read() Mark McLoughlin
2008-10-30 17:51       ` [PATCH 4/6] kvm: qemu: aggregate reads from eventfd Mark McLoughlin
2008-10-30 17:51         ` [PATCH 5/6] kvm: qemu: virtio-net: handle all tx in I/O thread without timer Mark McLoughlin
2008-10-30 17:51           ` [PATCH 6/6] kvm: qemu: virtio-net: drop mutex during tx tapfd write Mark McLoughlin
2008-11-04 11:43             ` Avi Kivity
2008-10-30 19:24           ` [PATCH 5/6] kvm: qemu: virtio-net: handle all tx in I/O thread without timer Anthony Liguori
2008-10-31  9:16             ` Mark McLoughlin
2008-11-03 15:07               ` Mark McLoughlin [this message]
2008-11-02  9:56           ` Avi Kivity
2008-11-04 15:23           ` David S. Ahern
2008-11-06 17:02             ` Mark McLoughlin
2008-11-06 17:13               ` David S. Ahern
2008-11-06 17:43               ` Avi Kivity
2008-10-30 19:20 ` [PATCH 0/6] Kill off the virtio_net tx mitigation timer Anthony Liguori
2008-11-02  9:48 ` Avi Kivity
2008-11-03 12:23   ` Mark McLoughlin
2008-11-03 12:40     ` Avi Kivity
2008-11-03 15:04       ` Mark McLoughlin
2008-11-03 15:19         ` Avi Kivity
2008-11-06 16:46           ` Mark McLoughlin
2008-11-06 17:38             ` Avi Kivity
2008-11-06 17:45       ` Mark McLoughlin
2008-11-09 11:29         ` Avi Kivity
2008-11-02  9:57 ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1225724875.5904.66.camel@blaa \
    --to=markmc@redhat.com \
    --cc=anthony@codemonkey.ws \
    --cc=avi@c13-ss-2-lb.cnet.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.