From: Mark McLoughlin <markmc@redhat.com>
To: Anthony Liguori <anthony@codemonkey.ws>
Cc: Avi Kivity <avi@qumranet.com>, kvm@vger.kernel.org
Subject: Re: [PATCH 5/6] kvm: qemu: virtio-net: handle all tx in I/O thread without timer
Date: Fri, 31 Oct 2008 09:16:59 +0000 [thread overview]
Message-ID: <1225444619.3758.21.camel@blaa> (raw)
In-Reply-To: <490A09EF.2030006@codemonkey.ws>
On Thu, 2008-10-30 at 14:24 -0500, Anthony Liguori wrote:
> Instead of using an event fd, perhaps you could just schedule a bottom
> half? I think that would be a whole lot cleaner.
Nice, I hadn't noticed the bottom halves. Much cleaner, indeed.
Results are a little better too:
| guest->host tput | host->guest tput
netperf, 10x20s runs (Gb/s) | min/ mean/ max/stddev | min/ mean/ max/stddev
------------------------------+----------------------------+---------------------------
eventfd, 1k | 4.260/ 6.739/ 8.020/ 1.075 | 3.640/ 3.777/ 4.020/ 0.110
eventfd, 16k | 8.530/ 8.867/ 9.110/ 0.146 | 7.220/ 7.276/ 7.360/ 0.040
eventfd, 65k | 9.200/ 9.282/ 9.870/ 0.198 | 7.850/ 7.924/ 8.000/ 0.045
bottom half, bh, 1k | 4.890/ 6.712/ 7.800/ 0.800 | 4.900/ 5.002/ 5.130/ 0.067
bottom half, bh, 16k | 8.640/ 8.827/ 9.290/ 0.186 | 7.920/ 7.963/ 8.020/ 0.034
bottom half, bh, 65k | 9.310/ 9.323/ 9.350/ 0.010 | 8.740/ 8.769/ 8.800/ 0.016
| guest->host %idle | host->guest %idle
netperf, 10x20s runs (Gb/s) | avg/cpu#0/cpu#1/cpu#2/cpu#3 | avg/cpu#0/cpu#1/cpu#2/cpu#3
------------------------------+--------------------------------+-----------------------------
eventfd, 1k | 57.68/99.81/99.96/18.76/21.91 | 65.49/99.84/99.97/15.20/52.46
eventfd, 16k | 61.66/99.85/99.98/13.01/40.83 | 61.43/99.79/99.93/0.97/47.61
eventfd, 65k | 62.57/99.99/99.95/13.13/39.28 | 60.15/99.81/99.97/1.27/43.10
bottom half, 1k | 57.29/99.83/99.81/22.30/15.93 | 63.36/99.98/99.95/2.92/51.26
bottom half, 16k | 62.21/99.81/99.96/15.74/42.40 | 60.21/99.81/99.97/0.92/43.78
bottom half, 65k | 62.25/99.99/99.96/12.67/37.45 | 58.63/99.83/99.96/0.92/36.71
(With this approach, patches 2-4 aren't needed anymore, but they're
still worthwhile cleanups IMHO)
Cheers,
Mark.
From: Mark McLoughlin <markmc@redhat.com>
Subject: kvm: qemu: virtio-net: handle all tx in I/O thread without timer
By removing the tx timer altogether and doing all the copies in the
I/O thread, we can keep the I/O churning away in parallel with the
guest generating more I/O.
In my tests, this significantly increases guest->host throughput,
causes a minor increase in host->guest throughput, reduces CPU
utilization somewhat and greatly reduces roundtrip times.
Even aside from the benchmark results, removing the arbitrary 150us
timer is a nicer option than coming up with a heuristic to make it
vary according to load. Finally, on kernels which don't have a
suitably low posix timer latency, we won't be scuppered by effectively
having e.g. a 1ms timer.
Note, this highlights that the I/O thread may become a scalability
concern and we might want to consider e.g. an I/O thread per device.
Note also that when tuning for a specific workload, which CPU
the I/O thread is pinned to is important.
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
qemu/hw/virtio-net.c | 60 +++++++++++++++++++++----------------------------
1 files changed, 26 insertions(+), 34 deletions(-)
diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index bc2ede6..bc198ef 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -35,8 +35,6 @@
#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */
#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
-#define TX_TIMER_INTERVAL 150000 /* 150 us */
-
/* The config defining mac address (6 bytes) */
struct virtio_net_config
{
@@ -68,8 +66,7 @@ typedef struct VirtIONet
VirtQueue *rx_vq;
VirtQueue *tx_vq;
VLANClientState *vc;
- QEMUTimer *tx_timer;
- int tx_timer_active;
+ QEMUBH *tx_bh;
} VirtIONet;
/* TODO
@@ -227,13 +224,14 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
}
/* TX */
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
+static int virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
{
VirtQueueElement elem;
int has_vnet_hdr = tap_has_vnet_hdr(n->vc->vlan->first_client);
+ int num_packets = 0;
if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
- return;
+ return num_packets;
while (virtqueue_pop(vq, &elem)) {
ssize_t len = 0;
@@ -256,38 +254,29 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
virtqueue_push(vq, &elem, len);
virtio_notify(&n->vdev, vq);
+
+ num_packets++;
}
+
+ return num_packets;
}
static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIONet *n = to_virtio_net(vdev);
- if (n->tx_timer_active) {
- vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
- qemu_del_timer(n->tx_timer);
- n->tx_timer_active = 0;
- virtio_net_flush_tx(n, vq);
- } else {
- qemu_mod_timer(n->tx_timer,
- qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
- n->tx_timer_active = 1;
- vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
- }
+ vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+ qemu_bh_schedule(n->tx_bh);
}
-static void virtio_net_tx_timer(void *opaque)
+static void virtio_net_tx_bh(void *opaque)
{
VirtIONet *n = opaque;
- n->tx_timer_active = 0;
-
- /* Just in case the driver is not ready on more */
- if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
- return;
-
- n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
- virtio_net_flush_tx(n, n->tx_vq);
+ if (!virtio_net_flush_tx(n, n->tx_vq))
+ n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+ else
+ qemu_bh_schedule(n->tx_bh);
}
static void virtio_net_save(QEMUFile *f, void *opaque)
@@ -297,7 +286,6 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
virtio_save(&n->vdev, f);
qemu_put_buffer(f, n->mac, 6);
- qemu_put_be32(f, n->tx_timer_active);
}
static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
@@ -310,12 +298,15 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
virtio_load(&n->vdev, f);
qemu_get_buffer(f, n->mac, 6);
- n->tx_timer_active = qemu_get_be32(f);
- if (n->tx_timer_active) {
- qemu_mod_timer(n->tx_timer,
- qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
- }
+ return 0;
+}
+
+static int virtio_net_uninit(PCIDevice *dev)
+{
+ VirtIONet *n = (VirtIONet *)dev;
+
+ qemu_bh_delete(n->tx_bh);
return 0;
}
@@ -341,8 +332,9 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
virtio_net_can_receive, n);
- n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
- n->tx_timer_active = 0;
+ n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
+
+ n->vdev.pci_dev.unregister = virtio_net_uninit;
register_savevm("virtio-net", virtio_net_id++, 1,
virtio_net_save, virtio_net_load, n);
--
1.5.4.3
next prev parent reply other threads:[~2008-10-31 9:18 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-30 17:51 [PATCH 0/6] Kill off the virtio_net tx mitigation timer Mark McLoughlin
2008-10-30 17:51 ` [PATCH 1/6] kvm: qemu: virtio: remove unused variable Mark McLoughlin
2008-10-30 17:51 ` [PATCH 2/6] kvm: qemu: dup the qemu_eventfd() return Mark McLoughlin
2008-10-30 17:51 ` [PATCH 3/6] kvm: qemu: add qemu_eventfd_write() and qemu_eventfd_read() Mark McLoughlin
2008-10-30 17:51 ` [PATCH 4/6] kvm: qemu: aggregate reads from eventfd Mark McLoughlin
2008-10-30 17:51 ` [PATCH 5/6] kvm: qemu: virtio-net: handle all tx in I/O thread without timer Mark McLoughlin
2008-10-30 17:51 ` [PATCH 6/6] kvm: qemu: virtio-net: drop mutex during tx tapfd write Mark McLoughlin
2008-11-04 11:43 ` Avi Kivity
2008-10-30 19:24 ` [PATCH 5/6] kvm: qemu: virtio-net: handle all tx in I/O thread without timer Anthony Liguori
2008-10-31 9:16 ` Mark McLoughlin [this message]
2008-11-03 15:07 ` Mark McLoughlin
2008-11-02 9:56 ` Avi Kivity
2008-11-04 15:23 ` David S. Ahern
2008-11-06 17:02 ` Mark McLoughlin
2008-11-06 17:13 ` David S. Ahern
2008-11-06 17:43 ` Avi Kivity
2008-10-30 19:20 ` [PATCH 0/6] Kill off the virtio_net tx mitigation timer Anthony Liguori
2008-11-02 9:48 ` Avi Kivity
2008-11-03 12:23 ` Mark McLoughlin
2008-11-03 12:40 ` Avi Kivity
2008-11-03 15:04 ` Mark McLoughlin
2008-11-03 15:19 ` Avi Kivity
2008-11-06 16:46 ` Mark McLoughlin
2008-11-06 17:38 ` Avi Kivity
2008-11-06 17:45 ` Mark McLoughlin
2008-11-09 11:29 ` Avi Kivity
2008-11-02 9:57 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1225444619.3758.21.camel@blaa \
--to=markmc@redhat.com \
--cc=anthony@codemonkey.ws \
--cc=avi@qumranet.com \
--cc=kvm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.