From: Mark McLoughlin <markmc@redhat.com>
To: Avi Kivity <avi@redhat.com>
Cc: kvm@vger.kernel.org, Rusty Russell <rusty@rustcorp.com.au>,
Herbert Xu <herbert.xu@redhat.com>,
Mark McLoughlin <markmc@redhat.com>
Subject: [PATCH 5/5] kvm: qemu: Improve virtio_net recv buffer allocation scheme
Date: Wed, 8 Oct 2008 20:35:13 +0100 [thread overview]
Message-ID: <1223494513-18826-5-git-send-email-markmc@redhat.com> (raw)
In-Reply-To: <1223494513-18826-4-git-send-email-markmc@redhat.com>
From: Herbert Xu <herbert.xu@redhat.com>
Currently, in order to receive large packets, the guest must allocate
max-sized packet buffers and pass them to the host. Each of these
max-sized packets occupy 20 ring entries, which means we can only
transfer a maximum of 12 packets in a single batch with a 256 entry
ring.
When receiving packets from external networks, we only receive MTU
sized packets and so the throughput observed is throttled by the
number of packets the ring can hold.
Implement the VIRTIO_NET_F_MRG_RXBUF feature to let guests know that
we can merge smaller buffers together in order to handle large packets.
This scheme allows us to be efficient in our use of ring entries
while still supporting large packets. Benchmarking using netperf from
an external machine to a guest over a 10Gb/s network shows a 100%
improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark
with GSO disabled on the host side, throughput was seen to increase
from 700Mb/s to 1.7Gb/s.
Based on a patch from Herbert, with the feature renamed from
"datahead" and some re-factoring for readability.
Signed-off-by: Herbert Xu <herbert.xu@redhat.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
qemu/hw/virtio-net.c | 67 +++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 61 insertions(+), 6 deletions(-)
diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 403247b..afa5fe5 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -34,9 +34,13 @@
#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */
#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */
#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */
#define TX_TIMER_INTERVAL 150000 /* 150 us */
+/* Should be the largest MAX_SKB_FRAGS supported. */
+#define VIRTIO_NET_MAX_FRAGS 18
+
/* The config defining mac address (6 bytes) */
struct virtio_net_config
{
@@ -70,6 +74,7 @@ typedef struct VirtIONet
VLANClientState *vc;
QEMUTimer *tx_timer;
int tx_timer_active;
+ int mergeable_rx_bufs;
} VirtIONet;
/* TODO
@@ -106,6 +111,7 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev)
features |= (1 << VIRTIO_NET_F_HOST_TSO4);
features |= (1 << VIRTIO_NET_F_HOST_TSO6);
features |= (1 << VIRTIO_NET_F_HOST_ECN);
+ features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
/* Kernel can't actually handle UFO in software currently. */
}
@@ -117,6 +123,8 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
VirtIONet *n = to_virtio_net(vdev);
VLANClientState *host = n->vc->vlan->first_client;
+ n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
+
if (!tap_has_vnet_hdr(host) || !host->set_offload)
return;
@@ -141,12 +149,15 @@ static int virtio_net_can_receive(void *opaque)
{
VirtIONet *n = opaque;
VirtQueue *vq = n->rx_vq;
+ int min_bufs;
if (vq->vring.avail == NULL ||
!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return 0;
- if (vq->vring.avail->idx == vq->last_avail_idx) {
+ min_bufs = n->mergeable_rx_bufs ? VIRTIO_NET_MAX_FRAGS : 1;
+
+ if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) < min_bufs) {
vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
return 0;
}
@@ -209,7 +220,12 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
if (virtqueue_pop(n->rx_vq, &elem) == 0)
return;
- if (elem.in_num < 1 || elem.in_sg[0].iov_len != sizeof(*hdr)) {
+ if (n->mergeable_rx_bufs) {
+ if (elem.in_num < 1 || elem.in_sg[0].iov_len < TARGET_PAGE_SIZE) {
+ fprintf(stderr, "virtio-net IOV is irregular\n");
+ exit(1);
+ }
+ } else if (elem.in_num < 1 || elem.in_sg[0].iov_len != sizeof(*hdr)) {
fprintf(stderr, "virtio-net header not in first element\n");
exit(1);
}
@@ -229,11 +245,49 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
}
/* copy in packet. ugh */
- iov_fill(&elem.in_sg[1], elem.in_num - 1,
- buf + offset, size - offset);
- /* signal other side */
- virtqueue_push(n->rx_vq, &elem, total);
+ if (n->mergeable_rx_bufs) {
+ int i = 0;
+
+ elem.in_sg[0].iov_base += sizeof(*hdr);
+ elem.in_sg[0].iov_len -= sizeof(*hdr);
+
+ offset += iov_fill(&elem.in_sg[0], elem.in_num,
+ buf + offset, size - offset);
+
+ /* signal other side */
+ virtqueue_fill(n->rx_vq, &elem, total, i++);
+
+ while (offset < size) {
+ int len;
+
+ if (virtqueue_pop(n->rx_vq, &elem) == 0) {
+ fprintf(stderr, "virtio-net truncating packet\n");
+ exit(1);
+ }
+
+ if (elem.in_num < 1 || elem.in_sg[0].iov_len < TARGET_PAGE_SIZE) {
+ fprintf(stderr, "virtio-net IOV is irregular\n");
+ exit(1);
+ }
+
+ len = iov_fill(&elem.in_sg[0], elem.in_num,
+ buf + offset, size - offset);
+
+ virtqueue_fill(n->rx_vq, &elem, len, i++);
+
+ offset += len;
+ }
+
+ virtqueue_flush(n->rx_vq, i);
+ } else {
+ iov_fill(&elem.in_sg[1], elem.in_num - 1,
+ buf + offset, size - offset);
+
+ /* signal other side */
+ virtqueue_push(n->rx_vq, &elem, total);
+ }
+
virtio_notify(&n->vdev, n->rx_vq);
}
@@ -354,6 +408,7 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
n->tx_timer_active = 0;
+ n->mergeable_rx_bufs = 0;
register_savevm("virtio-net", virtio_net_id++, 1,
virtio_net_save, virtio_net_load, n);
--
1.5.4.3
next prev parent reply other threads:[~2008-10-08 19:35 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-08 19:35 [PATCH 1/5] kvm: qemu: Move virtqueue_next_desc() around Mark McLoughlin
2008-10-08 19:35 ` [PATCH 2/5] kvm: qemu: Introduce virtqueue_fill() and virtqueue_flush() Mark McLoughlin
2008-10-08 19:35 ` [PATCH 3/5] kvm: qemu: Simplify virtio_net_can_receive() a little Mark McLoughlin
2008-10-08 19:35 ` [PATCH 4/5] kvm: qemu: Split iov_fill() out from virtio_net_receive() Mark McLoughlin
2008-10-08 19:35 ` Mark McLoughlin [this message]
2008-10-12 10:00 ` [PATCH 5/5] kvm: qemu: Improve virtio_net recv buffer allocation scheme Avi Kivity
2008-10-14 13:44 ` Mark McLoughlin
2008-10-14 15:47 ` Avi Kivity
2008-11-26 14:50 ` [PATCH 0/5] kvm: qemu: virtio_net: add support for mergeable rx buffers Mark McLoughlin
2008-11-26 14:50 ` [PATCH 1/5] kvm: qemu: virtio: move virtqueue_next_desc() around Mark McLoughlin
2008-11-26 14:50 ` [PATCH 2/5] kvm: qemu: virtio: introduce virtqueue_fill() and virtqueue_flush() Mark McLoughlin
2008-11-26 14:50 ` [PATCH 3/5] kvm: qemu: virtio: split some helpers out of virtqueue_pop() Mark McLoughlin
2008-11-26 14:50 ` [PATCH 4/5] kvm: qemu: virtio-net: split iov_fill() out from virtio_net_receive() Mark McLoughlin
2008-11-26 14:50 ` [PATCH 5/5] kvm: qemu: virtio-net: add a new virtio-net receive buffer scheme Mark McLoughlin
2008-11-27 12:45 ` [PATCH 0/5] kvm: qemu: virtio_net: add support for mergeable rx buffers Avi Kivity
2008-11-27 13:32 ` Mark McLoughlin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1223494513-18826-5-git-send-email-markmc@redhat.com \
--to=markmc@redhat.com \
--cc=avi@redhat.com \
--cc=herbert.xu@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=rusty@rustcorp.com.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).