netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC][ PATCH 2/3] vhost-net: handle vnet_hdr processing for MRG_RX_BUF
@ 2010-03-03  0:20 David Stevens
  2010-03-07 16:12 ` Michael S. Tsirkin
  0 siblings, 1 reply; 4+ messages in thread
From: David Stevens @ 2010-03-03  0:20 UTC (permalink / raw)
  To: mst, rusty; +Cc: netdev, kvm, virtualization

[-- Attachment #1: Type: text/plain, Size: 13122 bytes --]

This patch adds vnet_hdr processing for mergeable buffer
support to vhost-net.

Signed-off-by: David L Stevens <dlstevens@us.ibm.com>

diff -ruN net-next-p1/drivers/vhost/net.c net-next-p2/drivers/vhost/net.c
--- net-next-p1/drivers/vhost/net.c     2010-03-01 11:44:22.000000000 
-0800
+++ net-next-p2/drivers/vhost/net.c     2010-03-02 13:01:34.000000000 
-0800
@@ -109,7 +109,6 @@
        };
        size_t len, total_len = 0;
        int err, wmem;
-       size_t hdr_size;
        struct socket *sock = rcu_dereference(vq->private_data);
        if (!sock)
                return;
@@ -124,7 +123,6 @@
 
        if (wmem < sock->sk->sk_sndbuf * 2)
                tx_poll_stop(net);
-       hdr_size = vq->hdr_size;
 
        for (;;) {
                head.iov_base = (void *)vhost_get_vq_desc(&net->dev, vq,
@@ -148,25 +146,45 @@
                               "out %d, int %d\n", out, in);
                        break;
                }
+               if (vq->guest_hlen > vq->sock_hlen) {
+                       if (msg.msg_iov[0].iov_len == vq->guest_hlen)
+                               msg.msg_iov[0].iov_len = vq->sock_hlen;
+                       else if (out == ARRAY_SIZE(vq->iov))
+                               vq_err(vq, "handle_tx iov overflow!");
+                       else {
+                               int i;
+
+                               /* give header its own iov */
+                               for (i=out; i>0; ++i)
+                                       msg.msg_iov[i+1] = msg.msg_iov[i];
+                               msg.msg_iov[0].iov_len = vq->sock_hlen;
+                               msg.msg_iov[1].iov_base += vq->guest_hlen;
+                               msg.msg_iov[1].iov_len -= vq->guest_hlen;
+                               out++;
+                       }
+               }
                /* Skip header. TODO: support TSO. */
-               s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
                msg.msg_iovlen = out;
                head.iov_len = len = iov_length(vq->iov, out);
                /* Sanity check */
                if (!len) {
                        vq_err(vq, "Unexpected header len for TX: "
                               "%zd expected %zd\n",
-                              iov_length(vq->hdr, s), hdr_size);
+                              len, vq->guest_hlen);
                        break;
                }
                /* TODO: Check specific error and bomb out unless ENOBUFS? 
*/
                err = sock->ops->sendmsg(NULL, sock, &msg, len);
                if (unlikely(err < 0)) {
-                       vhost_discard(vq, 1);
-                       tx_poll_start(net, sock);
+                       if (err == -EAGAIN) {
+                               tx_poll_start(net, sock);
+                       } else {
+                               vq_err(vq, "sendmsg: errno %d\n", -err);
+                               /* drop packet; do not discard/resend */
+ vhost_add_used_and_signal(&net->dev,vq,&head,1);
+                       }
                        break;
-               }
-               if (err != len)
+               } else if (err != len)
                        pr_err("Truncated TX packet: "
                               " len %d != %zd\n", err, len);
                vhost_add_used_and_signal(&net->dev, vq, &head, 1);
@@ -207,14 +225,8 @@
                .msg_flags = MSG_DONTWAIT,
        };
 
-       struct virtio_net_hdr hdr = {
-               .flags = 0,
-               .gso_type = VIRTIO_NET_HDR_GSO_NONE
-       };
-
        size_t len, total_len = 0;
        int err, headcount, datalen;
-       size_t hdr_size;
        struct socket *sock = rcu_dereference(vq->private_data);
 
        if (!sock || !skb_head_len(&sock->sk->sk_receive_queue))
@@ -223,7 +235,6 @@
        use_mm(net->dev.mm);
        mutex_lock(&vq->mutex);
        vhost_disable_notify(vq);
-       hdr_size = vq->hdr_size;
 
        vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
                vq->log : NULL;
@@ -232,25 +243,18 @@
                headcount = vhost_get_heads(vq, datalen, &in, vq_log, 
&log);
                /* OK, now we need to know about added descriptors. */
                if (!headcount) {
-                       if (unlikely(vhost_enable_notify(vq))) {
-                               /* They have slipped one in as we were
-                                * doing that: check again. */
-                               vhost_disable_notify(vq);
-                               continue;
-                       }
-                       /* Nothing new?  Wait for eventfd to tell us
-                        * they refilled. */
+                       vhost_enable_notify(vq);
                        break;
                }
                /* Skip header. TODO: support TSO/mergeable rx buffers. */
-               s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
                msg.msg_iovlen = in;
                len = iov_length(vq->iov, in);
+
                /* Sanity check */
                if (!len) {
                        vq_err(vq, "Unexpected header len for RX: "
                               "%zd expected %zd\n",
-                              iov_length(vq->hdr, s), hdr_size);
+                              len, vq->guest_hlen);
                        break;
                }
                err = sock->ops->recvmsg(NULL, sock, &msg,
@@ -268,13 +272,7 @@
                        continue;
                }
                len = err;
-               err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, 
hdr_size);
-               if (err) {
-                       vq_err(vq, "Unable to write vnet_hdr at addr %p: 
%d\n",
-                              vq->iov->iov_base, err);
-                       break;
-               }
-               len += hdr_size;
+               len += vq->guest_hlen - vq->sock_hlen;
                vhost_add_used_and_signal(&net->dev, vq, vq->heads, 
headcount);
                if (unlikely(vq_log))
                        vhost_log_write(vq, vq_log, log, len);
@@ -483,6 +481,13 @@
        return ERR_PTR(-ENOTSOCK);
 }
 
+static int vhost_sock_is_raw(struct socket *sock)
+{
+       if (!sock || !sock->sk)
+               return 0;
+       return sock->sk->sk_type == SOCK_RAW;
+}
+
 static long vhost_net_set_backend(struct vhost_net *n, unsigned index, 
int fd)
 {
        struct socket *sock, *oldsock;
@@ -519,6 +524,20 @@
 
        vhost_net_disable_vq(n, vq);
        rcu_assign_pointer(vq->private_data, sock);
+
+       if (sock && sock->sk) {
+               if (!vhost_sock_is_raw(sock) ||
+                   vhost_has_feature(&n->dev, 
VHOST_NET_F_VIRTIO_NET_HDR)) {
+                       vq->sock_hlen = sizeof(struct virtio_net_hdr);
+                       if (vhost_has_feature(&n->dev, 
VIRTIO_NET_F_MRG_RXBUF))
+                               vq->guest_hlen =
+                                       sizeof(struct 
virtio_net_hdr_mrg_rxbuf);
+                       else
+                               vq->guest_hlen = sizeof(struct 
virtio_net_hdr);
+               } else
+                       vq->guest_hlen = vq->sock_hlen = 0;
+       } else
+               vq_err(vq, "vhost_net_set_backend: sock->sk is NULL");
        vhost_net_enable_vq(n, vq);
        mutex_unlock(&vq->mutex);
 done:
@@ -566,8 +585,17 @@
        n->dev.acked_features = features;
        smp_wmb();
        for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
-               mutex_lock(&n->vqs[i].mutex);
-               n->vqs[i].hdr_size = hdr_size;
+               struct vhost_virtqueue *vq = n->vqs + i;
+               struct socket *sock = vq->private_data;
+
+               mutex_lock(&vq->mutex);
+               if (features & (1 << VIRTIO_NET_F_MRG_RXBUF))
+                       vq->sock_hlen = sizeof(struct 
virtio_net_hdr_mrg_rxbuf);
+               else if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ||
+                        !vhost_sock_is_raw(sock))
+                       vq->sock_hlen = sizeof(struct virtio_net_hdr);
+               else
+                       vq->sock_hlen = 0;
                mutex_unlock(&n->vqs[i].mutex);
        }
        vhost_net_flush(n);
diff -ruN net-next-p1/drivers/vhost/vhost.c 
net-next-p2/drivers/vhost/vhost.c
--- net-next-p1/drivers/vhost/vhost.c   2010-03-01 11:44:06.000000000 
-0800
+++ net-next-p2/drivers/vhost/vhost.c   2010-03-02 12:53:02.000000000 
-0800
@@ -113,7 +113,8 @@
        vq->used_flags = 0;
        vq->log_used = false;
        vq->log_addr = -1ull;
-       vq->hdr_size = 0;
+       vq->guest_hlen = 0;
+       vq->sock_hlen = 0;
        vq->private_data = NULL;
        vq->log_base = NULL;
        vq->error_ctx = NULL;
@@ -848,20 +849,85 @@
        return 0;
 }
 
+static int
+vhost_get_hdr(struct vhost_virtqueue *vq, int *in, struct vhost_log *log,
+       int *log_num)
+{
+       struct iovec *heads = vq->heads;
+       struct iovec *iov = vq->iov;
+       int out;
+
+       *in = 0;
+       iov[0].iov_len = 0;
+
+       /* get buffer, starting from iov[1] */
+       heads[0].iov_base = (void *)vhost_get_vq_desc(vq->dev, vq,
+               vq->iov+1, ARRAY_SIZE(vq->iov)-1, &out, in, log, log_num);
+       if (out || *in <= 0) {
+               vq_err(vq, "unexpected descriptor format for RX: out %d, "
+                       "in %d\n", out, *in);
+               return 0;
+       }
+       if (heads[0].iov_base == (void *)vq->num)
+               return 0;
+
+       /* make iov[0] the header */
+       if (!vq->guest_hlen) {
+               if (vq->sock_hlen) {
+                       static struct virtio_net_hdr junk; /* bit bucket 
*/
+
+                       iov[0].iov_base = &junk;
+                       iov[0].iov_len = sizeof(junk);
+               } else
+                       iov[0].iov_len = 0;
+       }
+       if (vq->sock_hlen < vq->guest_hlen) {
+               iov[0].iov_base = iov[1].iov_base;
+               iov[0].iov_len = vq->sock_hlen;
+
+               if (iov[1].iov_len < vq->sock_hlen) {
+                       vq_err(vq, "can't fit header in one buffer!");
+                       vhost_discard(vq, 1);
+                       return 0;
+               }
+               if (!vq->sock_hlen) {
+                       static const struct virtio_net_hdr_mrg_rxbuf hdr = 
{
+                               .hdr.flags = 0,
+                               .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
+                       };
+                       memcpy(iov[0].iov_base, &hdr, vq->guest_hlen);
+               }
+               iov[1].iov_base += vq->guest_hlen;
+               iov[1].iov_len -= vq->guest_hlen;
+       }
+       return 1;
+}
+
 unsigned vhost_get_heads(struct vhost_virtqueue *vq, int datalen, int 
*iovcount,
        struct vhost_log *log, unsigned int *log_num)
 {
        struct iovec *heads = vq->heads;
-       int out, in;
+       int out, in = 0;
+       int seg = 0;
        int hc = 0;
 
+       if (vq->guest_hlen != vq->sock_hlen) {
+               seg = vhost_get_hdr(vq, &in, log, log_num);
+               if (!seg)
+                       return 0;
+               hc++;
+               datalen -= iov_length(vq->iov+seg, in);
+               seg += in;
+       }
+
        while (datalen > 0) {
                if (hc >= VHOST_NET_MAX_SG) {
                        vhost_discard(vq, hc);
                        return 0;
                }
                heads[hc].iov_base = (void *)vhost_get_vq_desc(vq->dev, 
vq,
-                       vq->iov, ARRAY_SIZE(vq->iov), &out, &in, log, 
log_num);
+                       vq->iov+seg, ARRAY_SIZE(vq->iov)-seg, &out, &in,
+                       log, log_num);
                if (heads[hc].iov_base == (void *)vq->num) {
                        vhost_discard(vq, hc);
                        return 0;
@@ -872,11 +938,12 @@
                        vhost_discard(vq, hc);
                        return 0;
                }
-               heads[hc].iov_len = iov_length(vq->iov, in);
-               hc++;
+               heads[hc].iov_len = iov_length(vq->iov+seg, in);
                datalen -= heads[hc].iov_len;
+               hc++;
+               seg += in;
        }
-       *iovcount = in;
+       *iovcount = seg;
        return hc;
 }
 
diff -ruN net-next-p1/drivers/vhost/vhost.h 
net-next-p2/drivers/vhost/vhost.h
--- net-next-p1/drivers/vhost/vhost.h   2010-03-01 11:42:18.000000000 
-0800
+++ net-next-p2/drivers/vhost/vhost.h   2010-03-02 13:02:03.000000000 
-0800
@@ -82,10 +82,9 @@
        u64 log_addr;
 
        struct iovec indirect[VHOST_NET_MAX_SG];
-       struct iovec iov[VHOST_NET_MAX_SG];
-       struct iovec hdr[VHOST_NET_MAX_SG];
+       struct iovec iov[VHOST_NET_MAX_SG+1]; /* an extra for vnet hdr */
        struct iovec heads[VHOST_NET_MAX_SG];
-       size_t hdr_size;
+       size_t guest_hlen, sock_hlen;
        /* We use a kind of RCU to access private pointer.
         * All readers access it from workqueue, which makes it possible 
to
         * flush the workqueue instead of synchronize_rcu. Therefore 
readers do


[-- Attachment #2: MRXB2.patch --]
[-- Type: application/octet-stream, Size: 9374 bytes --]

diff -ruN net-next-p1/drivers/vhost/net.c net-next-p2/drivers/vhost/net.c
--- net-next-p1/drivers/vhost/net.c	2010-03-01 11:44:22.000000000 -0800
+++ net-next-p2/drivers/vhost/net.c	2010-03-02 13:01:34.000000000 -0800
@@ -109,7 +109,6 @@
 	};
 	size_t len, total_len = 0;
 	int err, wmem;
-	size_t hdr_size;
 	struct socket *sock = rcu_dereference(vq->private_data);
 	if (!sock)
 		return;
@@ -124,7 +123,6 @@
 
 	if (wmem < sock->sk->sk_sndbuf * 2)
 		tx_poll_stop(net);
-	hdr_size = vq->hdr_size;
 
 	for (;;) {
 		head.iov_base = (void *)vhost_get_vq_desc(&net->dev, vq,
@@ -148,25 +146,45 @@
 			       "out %d, int %d\n", out, in);
 			break;
 		}
+		if (vq->guest_hlen > vq->sock_hlen) {
+			if (msg.msg_iov[0].iov_len == vq->guest_hlen)
+				msg.msg_iov[0].iov_len = vq->sock_hlen;
+			else if (out == ARRAY_SIZE(vq->iov))
+				vq_err(vq, "handle_tx iov overflow!");
+			else {
+				int i;
+
+				/* give header its own iov */
+				for (i=out; i>0; ++i)
+					msg.msg_iov[i+1] = msg.msg_iov[i];
+				msg.msg_iov[0].iov_len = vq->sock_hlen;
+				msg.msg_iov[1].iov_base += vq->guest_hlen;
+				msg.msg_iov[1].iov_len -= vq->guest_hlen;
+				out++;
+			}
+		}
 		/* Skip header. TODO: support TSO. */
-		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
 		msg.msg_iovlen = out;
 		head.iov_len = len = iov_length(vq->iov, out);
 		/* Sanity check */
 		if (!len) {
 			vq_err(vq, "Unexpected header len for TX: "
 			       "%zd expected %zd\n",
-			       iov_length(vq->hdr, s), hdr_size);
+			       len, vq->guest_hlen);
 			break;
 		}
 		/* TODO: Check specific error and bomb out unless ENOBUFS? */
 		err = sock->ops->sendmsg(NULL, sock, &msg, len);
 		if (unlikely(err < 0)) {
-			vhost_discard(vq, 1);
-			tx_poll_start(net, sock);
+			if (err == -EAGAIN) {
+				tx_poll_start(net, sock);
+			} else {
+				vq_err(vq, "sendmsg: errno %d\n", -err);
+				/* drop packet; do not discard/resend */
+				vhost_add_used_and_signal(&net->dev,vq,&head,1);
+			}
 			break;
-		}
-		if (err != len)
+		} else if (err != len)
 			pr_err("Truncated TX packet: "
 			       " len %d != %zd\n", err, len);
 		vhost_add_used_and_signal(&net->dev, vq, &head, 1);
@@ -207,14 +225,8 @@
 		.msg_flags = MSG_DONTWAIT,
 	};
 
-	struct virtio_net_hdr hdr = {
-		.flags = 0,
-		.gso_type = VIRTIO_NET_HDR_GSO_NONE
-	};
-
 	size_t len, total_len = 0;
 	int err, headcount, datalen;
-	size_t hdr_size;
 	struct socket *sock = rcu_dereference(vq->private_data);
 
 	if (!sock || !skb_head_len(&sock->sk->sk_receive_queue))
@@ -223,7 +235,6 @@
 	use_mm(net->dev.mm);
 	mutex_lock(&vq->mutex);
 	vhost_disable_notify(vq);
-	hdr_size = vq->hdr_size;
 
 	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
@@ -232,25 +243,18 @@
 		headcount = vhost_get_heads(vq, datalen, &in, vq_log, &log);
 		/* OK, now we need to know about added descriptors. */
 		if (!headcount) {
-			if (unlikely(vhost_enable_notify(vq))) {
-				/* They have slipped one in as we were
-				 * doing that: check again. */
-				vhost_disable_notify(vq);
-				continue;
-			}
-			/* Nothing new?  Wait for eventfd to tell us
-			 * they refilled. */
+			vhost_enable_notify(vq);
 			break;
 		}
 		/* Skip header. TODO: support TSO/mergeable rx buffers. */
-		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
 		msg.msg_iovlen = in;
 		len = iov_length(vq->iov, in);
+
 		/* Sanity check */
 		if (!len) {
 			vq_err(vq, "Unexpected header len for RX: "
 			       "%zd expected %zd\n",
-			       iov_length(vq->hdr, s), hdr_size);
+			       len, vq->guest_hlen);
 			break;
 		}
 		err = sock->ops->recvmsg(NULL, sock, &msg,
@@ -268,13 +272,7 @@
 			continue;
 		}
 		len = err;
-		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
-		if (err) {
-			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
-			       vq->iov->iov_base, err);
-			break;
-		}
-		len += hdr_size;
+		len += vq->guest_hlen - vq->sock_hlen;
 		vhost_add_used_and_signal(&net->dev, vq, vq->heads, headcount);
 		if (unlikely(vq_log))
 			vhost_log_write(vq, vq_log, log, len);
@@ -483,6 +481,13 @@
 	return ERR_PTR(-ENOTSOCK);
 }
 
+static int vhost_sock_is_raw(struct socket *sock)
+{
+	if (!sock || !sock->sk)
+		return 0;
+	return sock->sk->sk_type == SOCK_RAW;
+}
+
 static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 {
 	struct socket *sock, *oldsock;
@@ -519,6 +524,20 @@
 
 	vhost_net_disable_vq(n, vq);
 	rcu_assign_pointer(vq->private_data, sock);
+
+	if (sock && sock->sk) {
+		if (!vhost_sock_is_raw(sock) ||
+		    vhost_has_feature(&n->dev, VHOST_NET_F_VIRTIO_NET_HDR)) {
+			vq->sock_hlen = sizeof(struct virtio_net_hdr);
+			if (vhost_has_feature(&n->dev, VIRTIO_NET_F_MRG_RXBUF))
+				vq->guest_hlen =
+					sizeof(struct virtio_net_hdr_mrg_rxbuf);
+			else
+				vq->guest_hlen = sizeof(struct virtio_net_hdr);
+		} else
+			vq->guest_hlen = vq->sock_hlen = 0;
+	} else
+		vq_err(vq, "vhost_net_set_backend: sock->sk is NULL");
 	vhost_net_enable_vq(n, vq);
 	mutex_unlock(&vq->mutex);
 done:
@@ -566,8 +585,17 @@
 	n->dev.acked_features = features;
 	smp_wmb();
 	for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
-		mutex_lock(&n->vqs[i].mutex);
-		n->vqs[i].hdr_size = hdr_size;
+		struct vhost_virtqueue *vq = n->vqs + i;
+		struct socket *sock = vq->private_data;
+
+		mutex_lock(&vq->mutex);
+		if (features & (1 << VIRTIO_NET_F_MRG_RXBUF))
+			vq->sock_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+		else if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ||
+			 !vhost_sock_is_raw(sock))
+			vq->sock_hlen = sizeof(struct virtio_net_hdr);
+		else
+			vq->sock_hlen = 0;
 		mutex_unlock(&n->vqs[i].mutex);
 	}
 	vhost_net_flush(n);
diff -ruN net-next-p1/drivers/vhost/vhost.c net-next-p2/drivers/vhost/vhost.c
--- net-next-p1/drivers/vhost/vhost.c	2010-03-01 11:44:06.000000000 -0800
+++ net-next-p2/drivers/vhost/vhost.c	2010-03-02 12:53:02.000000000 -0800
@@ -113,7 +113,8 @@
 	vq->used_flags = 0;
 	vq->log_used = false;
 	vq->log_addr = -1ull;
-	vq->hdr_size = 0;
+	vq->guest_hlen = 0;
+	vq->sock_hlen = 0;
 	vq->private_data = NULL;
 	vq->log_base = NULL;
 	vq->error_ctx = NULL;
@@ -848,20 +849,85 @@
 	return 0;
 }
 
+static int
+vhost_get_hdr(struct vhost_virtqueue *vq, int *in, struct vhost_log *log,
+	int *log_num)
+{
+	struct iovec *heads = vq->heads;
+	struct iovec *iov = vq->iov;
+	int out;
+
+	*in = 0;
+	iov[0].iov_len = 0;
+
+	/* get buffer, starting from iov[1] */
+	heads[0].iov_base = (void *)vhost_get_vq_desc(vq->dev, vq,
+		vq->iov+1, ARRAY_SIZE(vq->iov)-1, &out, in, log, log_num);
+	if (out || *in <= 0) {
+		vq_err(vq, "unexpected descriptor format for RX: out %d, "
+			"in %d\n", out, *in);
+		return 0;
+	}
+	if (heads[0].iov_base == (void *)vq->num)
+		return 0;
+
+	/* make iov[0] the header */
+	if (!vq->guest_hlen) {
+		if (vq->sock_hlen) {
+			static struct virtio_net_hdr junk; /* bit bucket */
+
+			iov[0].iov_base = &junk;
+			iov[0].iov_len = sizeof(junk);
+		} else
+			iov[0].iov_len = 0;
+	}
+	if (vq->sock_hlen < vq->guest_hlen) {
+		iov[0].iov_base = iov[1].iov_base;
+		iov[0].iov_len = vq->sock_hlen;
+
+		if (iov[1].iov_len < vq->sock_hlen) {
+			vq_err(vq, "can't fit header in one buffer!");
+			vhost_discard(vq, 1);
+			return 0;
+		}
+		if (!vq->sock_hlen) {
+			static const struct virtio_net_hdr_mrg_rxbuf hdr = {
+				.hdr.flags = 0,
+				.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
+			};
+			memcpy(iov[0].iov_base, &hdr, vq->guest_hlen);
+		}
+		iov[1].iov_base += vq->guest_hlen;
+		iov[1].iov_len -= vq->guest_hlen;
+	}
+	return 1;
+}
+
 unsigned vhost_get_heads(struct vhost_virtqueue *vq, int datalen, int *iovcount,
 	struct vhost_log *log, unsigned int *log_num)
 {
 	struct iovec *heads = vq->heads;
-	int out, in;
+	int out, in = 0;
+	int seg = 0;
 	int hc = 0;
 
+	if (vq->guest_hlen != vq->sock_hlen) {
+		seg = vhost_get_hdr(vq, &in, log, log_num);
+		if (!seg)
+			return 0;
+		hc++;
+		datalen -= iov_length(vq->iov+seg, in);
+		seg += in;
+	}
+
 	while (datalen > 0) {
 		if (hc >= VHOST_NET_MAX_SG) {
 			vhost_discard(vq, hc);
 			return 0;
 		}
 		heads[hc].iov_base = (void *)vhost_get_vq_desc(vq->dev, vq,
-			vq->iov, ARRAY_SIZE(vq->iov), &out, &in, log, log_num);
+			vq->iov+seg, ARRAY_SIZE(vq->iov)-seg, &out, &in,
+			log, log_num);
 		if (heads[hc].iov_base == (void *)vq->num) {
 			vhost_discard(vq, hc);
 			return 0;
@@ -872,11 +938,12 @@
 			vhost_discard(vq, hc);
 			return 0;
 		}
-		heads[hc].iov_len = iov_length(vq->iov, in);
-		hc++;
+		heads[hc].iov_len = iov_length(vq->iov+seg, in);
 		datalen -= heads[hc].iov_len;
+		hc++;
+		seg += in;
 	}
-	*iovcount = in;
+	*iovcount = seg;
 	return hc;
 }
 
diff -ruN net-next-p1/drivers/vhost/vhost.h net-next-p2/drivers/vhost/vhost.h
--- net-next-p1/drivers/vhost/vhost.h	2010-03-01 11:42:18.000000000 -0800
+++ net-next-p2/drivers/vhost/vhost.h	2010-03-02 13:02:03.000000000 -0800
@@ -82,10 +82,9 @@
 	u64 log_addr;
 
 	struct iovec indirect[VHOST_NET_MAX_SG];
-	struct iovec iov[VHOST_NET_MAX_SG];
-	struct iovec hdr[VHOST_NET_MAX_SG];
+	struct iovec iov[VHOST_NET_MAX_SG+1]; /* an extra for vnet hdr */
 	struct iovec heads[VHOST_NET_MAX_SG];
-	size_t hdr_size;
+	size_t guest_hlen, sock_hlen;
 	/* We use a kind of RCU to access private pointer.
 	 * All readers access it from workqueue, which makes it possible to
 	 * flush the workqueue instead of synchronize_rcu. Therefore readers do

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-03-08  7:58 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-03  0:20 [RFC][ PATCH 2/3] vhost-net: handle vnet_hdr processing for MRG_RX_BUF David Stevens
2010-03-07 16:12 ` Michael S. Tsirkin
2010-03-08  1:28   ` David Stevens
2010-03-08  7:54     ` Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).