All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mark McLoughlin <markmc@redhat.com>
To: kvm@vger.kernel.org
Cc: Herbert Xu <herbert@gondor.apana.org.au>,
	Rusty Russell <rusty@rustcorp.com.au>,
	Mark McLoughlin <markmc@redhat.com>
Subject: [PATCH 9/9] kvm: qemu: Eliminate extra virtio_net copy
Date: Thu, 24 Jul 2008 12:46:19 +0100	[thread overview]
Message-ID: <1216899979-32532-10-git-send-email-markmc@redhat.com> (raw)
In-Reply-To: <1216899979-32532-9-git-send-email-markmc@redhat.com>

This is Anthony's net-tap-zero-copy.patch which eliminates
a copy on the host->guest data path with virtio_net.
---
 qemu/hw/virtio-net.c |   76 ++++++++++++++++++++++++++++++++++++-------------
 qemu/net.h           |    3 ++
 qemu/vl.c            |   50 +++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 20 deletions(-)

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index a681a7e..5e71afe 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -70,6 +70,8 @@ typedef struct VirtIONet
     VLANClientState *vc;
     QEMUTimer *tx_timer;
     int tx_timer_active;
+    int last_elem_valid;
+    VirtQueueElement last_elem;
 } VirtIONet;
 
 /* TODO
@@ -153,47 +155,80 @@ static int virtio_net_can_receive(void *opaque)
     return 1;
 }
 
-static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
+static void virtio_net_receive_zc(void *opaque, IOZeroCopyHandler *zc, void *data)
 {
     VirtIONet *n = opaque;
-    VirtQueueElement elem;
+    VirtQueueElement *elem = &n->last_elem;
     struct virtio_net_hdr *hdr;
-    int offset, i;
-    int total;
+    ssize_t err;
+    int idx;
 
-    if (virtqueue_pop(n->rx_vq, &elem) == 0)
+    if (!n->last_elem_valid && virtqueue_pop(n->rx_vq, elem) == 0)
 	return;
 
-    if (elem.in_num < 1 || elem.in_sg[0].iov_len != sizeof(*hdr)) {
+    if (elem->in_num < 1 || elem->in_sg[0].iov_len != sizeof(*hdr)) {
 	fprintf(stderr, "virtio-net header not in first element\n");
 	exit(1);
     }
 
-    hdr = (void *)elem.in_sg[0].iov_base;
+    n->last_elem_valid = 1;
+
+    hdr = (void *)elem->in_sg[0].iov_base;
     hdr->flags = 0;
     hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
 
-    offset = 0;
-    total = sizeof(*hdr);
+    idx = tap_has_offload(n->vc->vlan->first_client) ? 0 : 1;
+
+    do {
+        err = zc(data, &elem->in_sg[idx], elem->in_num - idx);
+    } while (err == -1 && errno == EINTR);
+
+    if (err == -1 && errno == EAGAIN)
+        return;
 
-    if (tap_has_offload(n->vc->vlan->first_client)) {
-	memcpy(hdr, buf, sizeof(*hdr));
-	offset += total;
+    if (err < 0) {
+        fprintf(stderr, "virtio_net: error during IO\n");
+        return;
     }
 
+    /* signal other side */
+    n->last_elem_valid = 0;
+    virtqueue_push(n->rx_vq, elem, sizeof(*hdr) + err);
+    virtio_notify(&n->vdev, n->rx_vq);
+}
+
+struct compat_data
+{
+    const uint8_t *buf;
+    int size;
+};
+
+static ssize_t compat_copy(void *opaque, struct iovec *iov, int iovcnt)
+{
+    struct compat_data *compat = opaque;
+    int offset, i;
+
     /* copy in packet.  ugh */
-    i = 1;
-    while (offset < size && i < elem.in_num) {
-	int len = MIN(elem.in_sg[i].iov_len, size - offset);
-	memcpy(elem.in_sg[i].iov_base, buf + offset, len);
+    offset = 0;
+    i = 0;
+    while (offset < compat->size && i < iovcnt) {
+	int len = MIN(iov[i].iov_len, compat->size - offset);
+	memcpy(iov[i].iov_base, compat->buf + offset, len);
 	offset += len;
-	total += len;
 	i++;
     }
 
-    /* signal other side */
-    virtqueue_push(n->rx_vq, &elem, total);
-    virtio_notify(&n->vdev, n->rx_vq);
+    return offset;
+}
+
+static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
+{
+    struct compat_data compat;
+
+    compat.buf = buf;
+    compat.size = size;
+
+    virtio_net_receive_zc(opaque, compat_copy, &compat);
 }
 
 /* TX */
@@ -310,6 +345,7 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
     memcpy(n->mac, nd->macaddr, 6);
     n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
                                  virtio_net_can_receive, n);
+    n->vc->fd_read_zc = virtio_net_receive_zc;
 
     n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
     n->tx_timer_active = 0;
diff --git a/qemu/net.h b/qemu/net.h
index 6cfd8ce..aca50e9 100644
--- a/qemu/net.h
+++ b/qemu/net.h
@@ -6,6 +6,8 @@
 /* VLANs support */
 
 typedef ssize_t (IOReadvHandler)(void *, const struct iovec *, int);
+typedef ssize_t (IOZeroCopyHandler)(void *, struct iovec *, int);
+typedef void (IOReadZCHandler)(void *, IOZeroCopyHandler *, void *);
 
 typedef struct VLANClientState VLANClientState;
 
@@ -14,6 +16,7 @@ typedef void (SetOffload)(VLANClientState *, int, int, int, int);
 struct VLANClientState {
     IOReadHandler *fd_read;
     IOReadvHandler *fd_readv;
+    IOReadZCHandler *fd_read_zc;
     /* Packets may still be sent if this returns zero.  It's used to
        rate-limit the slirp code.  */
     IOCanRWHandler *fd_can_read;
diff --git a/qemu/vl.c b/qemu/vl.c
index de92848..bc5b151 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -4204,6 +4204,7 @@ typedef struct TAPState {
     char buf[TAP_BUFSIZE];
     int size;
     int offload;
+    int received_eagain;
 } TAPState;
 
 static void tap_receive(void *opaque, const uint8_t *buf, int size)
@@ -4232,6 +4233,48 @@ static ssize_t tap_readv(void *opaque, const struct iovec *iov,
     return len;
 }
 
+static VLANClientState *tap_can_zero_copy(TAPState *s)
+{
+    VLANClientState *vc, *vc1 = NULL;
+    int vc_count = 0;
+
+    for (vc = s->vc->vlan->first_client; vc; vc = vc->next) {
+        if (vc == s->vc)
+            continue;
+
+        if (!vc->fd_read_zc || vc_count)
+            return NULL;
+
+        vc_count++;
+        vc1 = vc;
+    }
+
+    return vc1;
+}
+
+static ssize_t tap_sendv(void *opaque, struct iovec *iov, int iovcnt)
+{
+    TAPState *s = opaque;
+    ssize_t ret;
+
+    kvm_sleep_begin();
+    ret = readv(s->fd, iov, iovcnt);
+    kvm_sleep_end();
+    if (ret == -1 && errno == EAGAIN)
+        s->received_eagain = 1;
+
+    return ret;
+}
+
+static void tap_send_zero_copy(TAPState *s, VLANClientState *vc)
+{
+    s->received_eagain = 0;
+    while (s->received_eagain == 0 &&
+           (!vc->fd_can_read || vc->fd_can_read(vc->opaque))) {
+        vc->fd_read_zc(vc->opaque, tap_sendv, s);
+    }
+}
+
 static int tap_can_send(void *opaque)
 {
     TAPState *s = opaque;
@@ -4261,6 +4304,13 @@ static int tap_can_send(void *opaque)
 static void tap_send(void *opaque)
 {
     TAPState *s = opaque;
+    VLANClientState *zc;
+
+    zc = tap_can_zero_copy(s);
+    if (zc) {
+        tap_send_zero_copy(s, zc);
+        return;
+    }
 
     /* First try to send any buffered packet */
     if (s->size > 0) {
-- 
1.5.4.1


  reply	other threads:[~2008-07-24 11:46 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-24 11:46 [PATCH 0/9][RFC] KVM virtio_net performance Mark McLoughlin
2008-07-24 11:46 ` [PATCH 1/9] kvm: qemu: Set MIN_TIMER_REARM_US to 150us Mark McLoughlin
2008-07-24 11:46   ` [PATCH 2/9] kvm: qemu: Fix virtio_net tx timer Mark McLoughlin
2008-07-24 11:46     ` [PATCH 3/9] kvm: qemu: Remove virtio_net tx ring-full heuristic Mark McLoughlin
2008-07-24 11:46       ` [PATCH 4/9] kvm: qemu: Add VIRTIO_F_NOTIFY_ON_EMPTY Mark McLoughlin
2008-07-24 11:46         ` [PATCH 5/9] kvm: qemu: Disable recv notifications until avail buffers exhausted Mark McLoughlin
2008-07-24 11:46           ` [PATCH 6/9] kvm: qemu: Add support for partial csums and GSO Mark McLoughlin
2008-07-24 11:46             ` [PATCH 7/9] kvm: qemu: Increase size of virtio_net rings Mark McLoughlin
2008-07-24 11:46               ` [PATCH 8/9] kvm: qemu: Drop the mutex while reading from tapfd Mark McLoughlin
2008-07-24 11:46                 ` Mark McLoughlin [this message]
2008-07-24 23:33                 ` Dor Laor
2008-07-25 17:25                   ` Mark McLoughlin
2008-07-24 23:22       ` [PATCH 3/9] kvm: qemu: Remove virtio_net tx ring-full heuristic Dor Laor
2008-07-25  0:30         ` Rusty Russell
2008-07-25 17:30           ` Mark McLoughlin
2008-07-25 17:23         ` Mark McLoughlin
2008-07-24 23:56       ` Dor Laor
2008-07-26  9:48     ` [PATCH 2/9] kvm: qemu: Fix virtio_net tx timer Avi Kivity
2008-07-26 12:08       ` Mark McLoughlin
2008-07-24 11:55 ` [PATCH 0/9][RFC] KVM virtio_net performance Herbert Xu
2008-07-24 16:53 ` Mark McLoughlin
2008-07-24 18:29   ` Anthony Liguori
2008-07-25 16:36     ` Mark McLoughlin
2008-07-24 20:56 ` Anthony Liguori
2008-07-25 17:17   ` Mark McLoughlin
2008-07-25 21:29     ` Dor Laor
2008-07-26 19:09   ` Bill Davidsen
2008-07-27  7:52     ` Avi Kivity
2008-07-27 12:52       ` Bill Davidsen
2008-07-27 13:17       ` Bill Davidsen
2008-07-28  6:42         ` Mark McLoughlin
2008-08-11  7:44   ` Rusty Russell
2008-08-11  9:51     ` Herbert Xu
2008-08-11 13:50       ` csum offload and af_packet Rusty Russell
2008-08-12  0:32         ` Herbert Xu
2008-08-12  0:51           ` David Miller
2008-08-12  0:58             ` Herbert Xu
2008-08-12 16:17               ` Ingo Oeser
2008-08-12 23:37                 ` Herbert Xu
2008-08-13  0:55                   ` David Miller
2008-08-13  1:09                     ` Herbert Xu
2008-08-13  1:17                       ` David Miller
2008-08-13  1:21                         ` Herbert Xu
2008-08-13  1:25                           ` David Miller
2008-08-13  1:37                             ` Herbert Xu
2008-08-13 11:26                       ` Patrick McHardy
2008-08-17 23:08                         ` David Miller
2008-08-18  1:10                           ` Herbert Xu
2008-08-18  1:12                             ` David Miller
     [not found]                           ` <48A8CCBF.3020408@trash.net>
2008-08-18  1:15                             ` David Miller
2008-08-18  2:12                             ` David Miller
2008-08-18 11:17                               ` Patrick McHardy
2008-08-12  2:27           ` Rusty Russell
2008-07-26  9:45 ` [PATCH 0/9][RFC] KVM virtio_net performance Avi Kivity
2008-07-27  6:48   ` Rusty Russell
2008-07-27  6:48   ` Rusty Russell
2008-08-11 19:56   ` Mark McLoughlin
2008-08-12 13:35     ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1216899979-32532-10-git-send-email-markmc@redhat.com \
    --to=markmc@redhat.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=kvm@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.