netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] tun: Interface to query tun/tap features.
@ 2008-06-25 14:28 Rusty Russell
  2008-06-25 14:29 ` [PATCH 2/4] tun: TUNSETFEATURES to set gso features Rusty Russell
  2008-07-02  4:59 ` [PATCH 1/4] tun: Interface to query tun/tap features Max Krasnyansky
  0 siblings, 2 replies; 17+ messages in thread
From: Rusty Russell @ 2008-06-25 14:28 UTC (permalink / raw)
  To: Max Krasnyansky; +Cc: Herbert Xu, netdev, virtualization

The problem with introducing checksum offload and gso to tun is they
need to set dev->features to enable GSO and/or checksumming, which is
supposed to be done before register_netdevice(), ie. as part of
TUNSETIFF.

Unfortunately, TUNSETIFF has always just ignored flags it doesn't
understand, so there's no good way of detecting whether the kernel
supports new IFF_ flags.

This patch implements a TUNGETFEATURES ioctl which returns all the valid IFF
flags.  It could be extended later to include other features.

Here's an example program which uses it:

#include <linux/if_tun.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <err.h>
#include <stdio.h>

static struct {
	unsigned int flag;
	const char *name;
} known_flags[] = {
	{ IFF_TUN, "TUN" },
	{ IFF_TAP, "TAP" },
	{ IFF_NO_PI, "NO_PI" },
	{ IFF_ONE_QUEUE, "ONE_QUEUE" },
};

int main()
{
	unsigned int features, i;

	int netfd = open("/dev/net/tun", O_RDWR);
	if (netfd < 0)
		err(1, "Opening /dev/net/tun");

	if (ioctl(netfd, TUNGETFEATURES, &features) != 0) {
		printf("Kernel does not support TUNGETFEATURES, guessing\n");
		features = (IFF_TUN|IFF_TAP|IFF_NO_PI|IFF_ONE_QUEUE);
	}
	printf("Available features are: ");
	for (i = 0; i < sizeof(known_flags)/sizeof(known_flags[0]); i++) {
		if (features & known_flags[i].flag) {
			features &= ~known_flags[i].flag;
			printf("%s ", known_flags[i].name);
		}
	}
	if (features)
		printf("(UNKNOWN %#x)", features);
	printf("\n");
	return 0;
}

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/tun.c      |    8 ++++++++
 include/linux/if_tun.h |    1 +
 2 files changed, 9 insertions(+)

diff -r 8414a579e106 drivers/net/tun.c
--- a/drivers/net/tun.c	Tue Apr 22 07:36:45 2008 +1000
+++ b/drivers/net/tun.c	Tue Apr 22 07:37:33 2008 +1000
@@ -625,6 +625,14 @@ static int tun_chr_ioctl(struct inode *i
 		return 0;
 	}
 
+	if (cmd == TUNGETFEATURES) {
+		/* Currently this just means: "what IFF flags are valid?".
+		 * This is needed because we never checked for invalid flags on
+		 * TUNSETIFF. */
+		return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE,
+				(unsigned int __user*)argp);
+	}
+
 	if (!tun)
 		return -EBADFD;
 
diff -r 8414a579e106 include/linux/if_tun.h
--- a/include/linux/if_tun.h	Tue Apr 22 07:36:45 2008 +1000
+++ b/include/linux/if_tun.h	Tue Apr 22 07:37:33 2008 +1000
@@ -42,6 +42,7 @@
 #define TUNSETOWNER   _IOW('T', 204, int)
 #define TUNSETLINK    _IOW('T', 205, int)
 #define TUNSETGROUP   _IOW('T', 206, int)
+#define TUNGETFEATURES _IOR('T', 207, unsigned int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2/4] tun: TUNSETFEATURES to set gso features.
  2008-06-25 14:28 [PATCH 1/4] tun: Interface to query tun/tap features Rusty Russell
@ 2008-06-25 14:29 ` Rusty Russell
  2008-06-25 14:30   ` [PATCH 3/4] tun: Allow GSO using virtio_net_hdr Rusty Russell
  2008-07-02  5:02   ` [PATCH 2/4] tun: TUNSETFEATURES to set gso features Max Krasnyansky
  2008-07-02  4:59 ` [PATCH 1/4] tun: Interface to query tun/tap features Max Krasnyansky
  1 sibling, 2 replies; 17+ messages in thread
From: Rusty Russell @ 2008-06-25 14:29 UTC (permalink / raw)
  To: Max Krasnyansky; +Cc: Herbert Xu, netdev, virtualization, markmc

ethtool is useful for setting (some) device fields, but it's
root-only.  Finer feature control is available through a tun-specific
ioctl.

(Includes Mark McLoughlin <markmc@redhat.com>'s fix to hold rtnl sem).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/tun.c      |   43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/if_tun.h |    7 +++++++
 2 files changed, 50 insertions(+)

diff -r e08e6c5130ff drivers/net/tun.c
--- a/drivers/net/tun.c	Thu Jun 26 00:17:07 2008 +1000
+++ b/drivers/net/tun.c	Thu Jun 26 00:21:11 2008 +1000
@@ -596,6 +596,46 @@ static int tun_set_iff(struct net *net, 
 	return err;
 }
 
+/* This is like a cut-down ethtool ops, except done via tun fd so no
+ * privs required. */
+static int set_offload(struct net_device *dev, unsigned long arg)
+{
+	unsigned int old_features, features;
+
+	old_features = dev->features;
+	/* Unset features, set them as we chew on the arg. */
+	features = (old_features & ~(NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST
+				    |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6));
+
+	if (arg & TUN_F_CSUM) {
+		features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
+		arg &= ~TUN_F_CSUM;
+
+		if (arg & (TUN_F_TSO4|TUN_F_TSO6)) {
+			if (arg & TUN_F_TSO_ECN) {
+				features |= NETIF_F_TSO_ECN;
+				arg &= ~TUN_F_TSO_ECN;
+			}
+			if (arg & TUN_F_TSO4)
+				features |= NETIF_F_TSO;
+			if (arg & TUN_F_TSO6)
+				features |= NETIF_F_TSO6;
+			arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
+		}
+	}
+
+	/* This gives the user a way to test for new features in future by
+	 * trying to set them. */
+	if (arg)
+		return -EINVAL;
+
+	dev->features = features;
+	if (old_features != dev->features)
+		netdev_features_change(dev);
+
+	return 0;
+}
+
 static int tun_chr_ioctl(struct inode *inode, struct file *file,
 			 unsigned int cmd, unsigned long arg)
 {
@@ -699,6 +739,15 @@ static int tun_chr_ioctl(struct inode *i
 		tun->debug = arg;
 		break;
 #endif
+
+	case TUNSETOFFLOAD:
+	{
+		int ret;
+		rtnl_lock();
+		ret = set_offload(tun->dev, arg);
+		rtnl_unlock();
+		return ret;
+	}
 
 	case SIOCGIFFLAGS:
 		ifr.ifr_flags = tun->if_flags;
diff -r e08e6c5130ff include/linux/if_tun.h
--- a/include/linux/if_tun.h	Thu Jun 26 00:17:07 2008 +1000
+++ b/include/linux/if_tun.h	Thu Jun 26 00:21:11 2008 +1000
@@ -43,12 +43,19 @@
 #define TUNSETLINK    _IOW('T', 205, int)
 #define TUNSETGROUP   _IOW('T', 206, int)
 #define TUNGETFEATURES _IOR('T', 207, unsigned int)
+#define TUNSETOFFLOAD _IOW('T', 208, unsigned int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
 #define IFF_TAP		0x0002
 #define IFF_NO_PI	0x1000
 #define IFF_ONE_QUEUE	0x2000
+
+/* Features for GSO (TUNSETOFFLOAD). */
+#define TUN_F_CSUM	0x01	/* You can hand me unchecksummed packets. */
+#define TUN_F_TSO4	0x02	/* I can handle TSO for IPv4 packets */
+#define TUN_F_TSO6	0x04	/* I can handle TSO for IPv6 packets */
+#define TUN_F_TSO_ECN	0x08	/* I can handle TSO with ECN bits. */
 
 struct tun_pi {
 	unsigned short flags;

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 3/4] tun: Allow GSO using virtio_net_hdr
  2008-06-25 14:29 ` [PATCH 2/4] tun: TUNSETFEATURES to set gso features Rusty Russell
@ 2008-06-25 14:30   ` Rusty Russell
  2008-06-25 14:32     ` [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap Rusty Russell
                       ` (3 more replies)
  2008-07-02  5:02   ` [PATCH 2/4] tun: TUNSETFEATURES to set gso features Max Krasnyansky
  1 sibling, 4 replies; 17+ messages in thread
From: Rusty Russell @ 2008-06-25 14:30 UTC (permalink / raw)
  To: Max Krasnyansky; +Cc: Herbert Xu, netdev, virtualization, markmc

Add a IFF_VNET_HDR flag.  This uses the same ABI as virtio_net (ie. prepending
struct virtio_net_hdr to packets) to indicate GSO and checksum information.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/tun.c      |   90 ++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/if_tun.h |    2 +
 2 files changed, 91 insertions(+), 1 deletion(-)

diff -r d94590c1550a drivers/net/tun.c
--- a/drivers/net/tun.c	Thu Jun 26 00:21:11 2008 +1000
+++ b/drivers/net/tun.c	Thu Jun 26 00:21:59 2008 +1000
@@ -63,6 +63,7 @@
 #include <linux/if_tun.h>
 #include <linux/crc32.h>
 #include <linux/nsproxy.h>
+#include <linux/virtio_net.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -283,12 +284,24 @@ static __inline__ ssize_t tun_get_user(s
 	struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
 	struct sk_buff *skb;
 	size_t len = count, align = 0;
+	struct virtio_net_hdr gso = { 0 };
 
 	if (!(tun->flags & TUN_NO_PI)) {
 		if ((len -= sizeof(pi)) > count)
 			return -EINVAL;
 
 		if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
+			return -EFAULT;
+	}
+
+	if (tun->flags & TUN_VNET_HDR) {
+		if ((len -= sizeof(gso)) > count)
+			return -EINVAL;
+
+		if (gso.hdr_len > len)
+			return -EINVAL;
+
+		if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
 			return -EFAULT;
 	}
 
@@ -322,8 +335,45 @@ static __inline__ ssize_t tun_get_user(s
 		break;
 	};
 
-	if (tun->flags & TUN_NOCHECKSUM)
+	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+		if (!skb_partial_csum_set(skb, gso.csum_start,
+					  gso.csum_offset)) {
+			tun->dev->stats.rx_dropped++;
+			kfree_skb(skb);
+			return -EINVAL;
+		}
+	} else if (tun->flags & TUN_NOCHECKSUM)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		pr_debug("GSO!\n");
+		switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+		case VIRTIO_NET_HDR_GSO_TCPV4:
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+			break;
+		case VIRTIO_NET_HDR_GSO_TCPV6:
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+			break;
+		default:
+			tun->dev->stats.rx_dropped++;
+			kfree_skb(skb);
+			return -EINVAL;
+		}
+
+		if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
+			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+
+		skb_shinfo(skb)->gso_size = gso.gso_size;
+		if (skb_shinfo(skb)->gso_size == 0) {
+			tun->dev->stats.rx_dropped++;
+			kfree_skb(skb);
+			return -EINVAL;
+		}
+
+		/* Header must be checked, and gso_segs computed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
 
 	netif_rx_ni(skb);
 	tun->dev->last_rx = jiffies;
@@ -367,6 +417,39 @@ static __inline__ ssize_t tun_put_user(s
 		if (memcpy_toiovec(iv, (void *) &pi, sizeof(pi)))
 			return -EFAULT;
 		total += sizeof(pi);
+	}
+
+	if (tun->flags & TUN_VNET_HDR) {
+		struct virtio_net_hdr gso = { 0 }; /* no info leak */
+		if ((len -= sizeof(gso)) < 0)
+			return -EINVAL;
+
+		if (skb_is_gso(skb)) {
+			struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+			/* This is a hint as to how much should be linear. */
+			gso.hdr_len = skb_headlen(skb);
+			gso.gso_size = sinfo->gso_size;
+			if (sinfo->gso_type & SKB_GSO_TCPV4)
+				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+			else if (sinfo->gso_type & SKB_GSO_TCPV6)
+				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+			else
+				BUG();
+			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+				gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+		} else
+			gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+			gso.csum_start = skb->csum_start - skb_headroom(skb);
+			gso.csum_offset = skb->csum_offset;
+		} /* else everything is zero */
+
+		if (unlikely(memcpy_toiovec(iv, (void *)&gso, sizeof(gso))))
+			return -EFAULT;
+		total += sizeof(gso);
 	}
 
 	len = min_t(int, skb->len, len);
@@ -583,6 +666,11 @@ static int tun_set_iff(struct net *net, 
 	else
 		tun->flags &= ~TUN_ONE_QUEUE;
 
+	if (ifr->ifr_flags & IFF_VNET_HDR)
+		tun->flags |= TUN_VNET_HDR;
+	else
+		tun->flags &= ~TUN_VNET_HDR;
+
 	file->private_data = tun;
 	tun->attached = 1;
 	get_net(dev_net(tun->dev));
@@ -669,7 +757,8 @@ static int tun_chr_ioctl(struct inode *i
 		/* Currently this just means: "what IFF flags are valid?".
 		 * This is needed because we never checked for invalid flags on
 		 * TUNSETIFF. */
-		return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE,
+		return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
+				IFF_VNET_HDR,
 				(unsigned int __user*)argp);
 	}
 
diff -r d94590c1550a include/linux/if_tun.h
--- a/include/linux/if_tun.h	Thu Jun 26 00:21:11 2008 +1000
+++ b/include/linux/if_tun.h	Thu Jun 26 00:21:59 2008 +1000
@@ -33,6 +33,7 @@
 #define TUN_NO_PI	0x0040
 #define TUN_ONE_QUEUE	0x0080
 #define TUN_PERSIST 	0x0100	
+#define TUN_VNET_HDR 	0x0200
 
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
@@ -50,6 +51,7 @@
 #define IFF_TAP		0x0002
 #define IFF_NO_PI	0x1000
 #define IFF_ONE_QUEUE	0x2000
+#define IFF_VNET_HDR	0x4000
 
 /* Features for GSO (TUNSETOFFLOAD). */
 #define TUN_F_CSUM	0x01	/* You can hand me unchecksummed packets. */

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap
  2008-06-25 14:30   ` [PATCH 3/4] tun: Allow GSO using virtio_net_hdr Rusty Russell
@ 2008-06-25 14:32     ` Rusty Russell
  2008-06-25 15:45       ` Rusty Russell
       [not found]     ` <200806260032.12359.rusty__46755.7742762894$1214404667$gmane$org@rustcorp.com.au>
                       ` (2 subsequent siblings)
  3 siblings, 1 reply; 17+ messages in thread
From: Rusty Russell @ 2008-06-25 14:32 UTC (permalink / raw)
  To: Max Krasnyansky; +Cc: Herbert Xu, netdev, virtualization, markmc

(Might not apply cleanly to current Linus, there are other lguest things
 going on, but this gives you the idea at least).

Guest -> Host 1GB TCP:
Before: Seconds 16.6282 xmit 250498 recv 3 timeout 248355
After: Seconds 9.86102 xmit 241989 recv 192014 timeout 231224

Host -> Guest 1GB TCP:
Before: Seconds 11.0831 xmit 324742 recv 1910 timeout 323429
After: Seconds 10.6626 xmit 342489 recv 24 timeout 341173

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c |   34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff -r 7358caf10bd8 Documentation/lguest/lguest.c
--- a/Documentation/lguest/lguest.c	Tue Jun 24 16:15:36 2008 +1000
+++ b/Documentation/lguest/lguest.c	Wed Jun 25 00:29:31 2008 +1000
@@ -928,11 +928,9 @@ static void handle_net_output(int fd, st
 	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
 		if (in)
 			errx(1, "Input buffers in output queue?");
-		/* Check header, but otherwise ignore it (we told the Guest we
-		 * supported no features, so it shouldn't have anything
-		 * interesting). */
-		(void)convert(&iov[0], struct virtio_net_hdr);
-		len = writev(vq->dev->fd, iov+1, out-1);
+		len = writev(vq->dev->fd, iov, out);
+		if (len < 0)
+			err(1, "Writing network packet to tun");
 		add_used_and_trigger(fd, vq, head, len);
 		num++;
 	}
@@ -949,7 +947,6 @@ static bool handle_tun_input(int fd, str
 	unsigned int head, in_num, out_num;
 	int len;
 	struct iovec iov[dev->vq->vring.num];
-	struct virtio_net_hdr *hdr;
 
 	/* First we need a network buffer from the Guests's recv virtqueue. */
 	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
@@ -970,18 +969,13 @@ static bool handle_tun_input(int fd, str
 	} else if (out_num)
 		errx(1, "Output buffers in network recv queue?");
 
-	/* First element is the header: we set it to 0 (no features). */
-	hdr = convert(&iov[0], struct virtio_net_hdr);
-	hdr->flags = 0;
-	hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
-
 	/* Read the packet from the device directly into the Guest's buffer. */
-	len = readv(dev->fd, iov+1, in_num-1);
+	len = readv(dev->fd, iov, in_num);
 	if (len <= 0)
 		err(1, "reading network");
 
 	/* Tell the Guest about the new packet. */
-	add_used_and_trigger(fd, dev->vq, head, sizeof(*hdr) + len);
+	add_used_and_trigger(fd, dev->vq, head, len);
 
 	verbose("tun input packet len %i [%02x %02x] (%s)\n", len,
 		((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1],
@@ -1492,10 +1486,14 @@ static int get_tun_device(char tapif[IFN
 	 * the truth, I completely blundered my way through this code, but it
 	 * works now! */
 	netfd = open_or_die("/dev/net/tun", O_RDWR);
-	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
 	strcpy(ifr.ifr_name, "tap%d");
 	if (ioctl(netfd, TUNSETIFF, &ifr) != 0)
 		err(1, "configuring /dev/net/tun");
+
+	if (ioctl(netfd, TUNSETFEATURES,
+		  TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0)
+		err(1, "Could not set features for tun device");
 
 	/* We don't need checksums calculated for packets coming in this
 	 * device: trust us! */
@@ -1563,6 +1561,16 @@ static void setup_tun_net(char *arg)
 	/* Tell Guest what MAC address to use. */
 	add_feature(dev, VIRTIO_NET_F_MAC);
 	add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
+	/* Expect Guest to handle everything except UFO */
+	add_feature(dev, VIRTIO_NET_F_CSUM);
+	add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
+	add_feature(dev, VIRTIO_NET_F_MAC);
+	add_feature(dev, VIRTIO_NET_F_GUEST_TSO4);
+	add_feature(dev, VIRTIO_NET_F_GUEST_TSO6);
+	add_feature(dev, VIRTIO_NET_F_GUEST_ECN);
+	add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
+	add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
+	add_feature(dev, VIRTIO_NET_F_HOST_ECN);
 	set_config(dev, sizeof(conf), &conf);
 
 	/* We don't need the socket any more; setup is done. */

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap
  2008-06-25 14:32     ` [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap Rusty Russell
@ 2008-06-25 15:45       ` Rusty Russell
  0 siblings, 0 replies; 17+ messages in thread
From: Rusty Russell @ 2008-06-25 15:45 UTC (permalink / raw)
  To: virtualization; +Cc: Max Krasnyansky, markmc, netdev, Herbert Xu

On Thursday 26 June 2008 00:32:12 Rusty Russell wrote:
> (Might not apply cleanly to current Linus, there are other lguest things
>  going on, but this gives you the idea at least).
...
> +	if (ioctl(netfd, TUNSETFEATURES,
> +		  TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0)
> +		err(1, "Could not set features for tun device");

This should be TUNSETOFFLOAD of course.  I renamed it at the last minute, 
because TUNSETFEATURES implies symmetry with TUNGETFEATURES, which is 
completely different.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap
       [not found]     ` <200806260032.12359.rusty__46755.7742762894$1214404667$gmane$org@rustcorp.com.au>
@ 2008-06-25 19:07       ` Anthony Liguori
  2008-06-26  4:40         ` Rusty Russell
  0 siblings, 1 reply; 17+ messages in thread
From: Anthony Liguori @ 2008-06-25 19:07 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Max Krasnyansky, markmc, netdev, Herbert Xu, virtualization

Rusty Russell wrote:
> @@ -1563,6 +1561,16 @@ static void setup_tun_net(char *arg)
>  	/* Tell Guest what MAC address to use. */
>  	add_feature(dev, VIRTIO_NET_F_MAC);
>  	add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
> +	/* Expect Guest to handle everything except UFO */
> +	add_feature(dev, VIRTIO_NET_F_CSUM);

You're setting this feature twice.

> +	add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);

You set this feature, but I never see the virtio-net driver acknowledge 
the feature.  Curiously, my implementation with KVM is struggling 
because UDP packet checksums are not correct so the DHCP client is 
ignoring them.  If I disable CSUM offload, things it works fine (using 
the virtio-net header).  The problem is only host=>guest, guest=>host is 
fine.

Regards,

Anthony Liguori


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap
  2008-06-25 19:07       ` Anthony Liguori
@ 2008-06-26  4:40         ` Rusty Russell
  2008-06-26 18:16           ` Anthony Liguori
  2008-07-02  5:25           ` Max Krasnyansky
  0 siblings, 2 replies; 17+ messages in thread
From: Rusty Russell @ 2008-06-26  4:40 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Max Krasnyansky, markmc, netdev, Herbert Xu, virtualization

On Thursday 26 June 2008 05:07:18 Anthony Liguori wrote:
> Rusty Russell wrote:
> > @@ -1563,6 +1561,16 @@ static void setup_tun_net(char *arg)
> >  	/* Tell Guest what MAC address to use. */
> >  	add_feature(dev, VIRTIO_NET_F_MAC);
> >  	add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
> > +	/* Expect Guest to handle everything except UFO */
> > +	add_feature(dev, VIRTIO_NET_F_CSUM);
>
> You're setting this feature twice.

Hmm, not in the version here?

> > +	add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
>
> You set this feature, but I never see the virtio-net driver acknowledge
> the feature.  Curiously, my implementation with KVM is struggling
> because UDP packet checksums are not correct so the DHCP client is
> ignoring them.  If I disable CSUM offload, things it works fine (using
> the virtio-net header).  The problem is only host=>guest, guest=>host is
> fine.

OK, found this: wrong args to skb_partial_csum_set.  It was found by Mark 
McLoughlin before, I just lost the fix when I extracted this into a separate 
patch.  I chose to move the call to skb_partial_csum_set(), rather than use 
his fix (which assumed a tap not tun device).

Here's two fixes on top of previous patch:

diff -u b/drivers/net/tun.c b/drivers/net/tun.c
--- b/drivers/net/tun.c	Thu Jun 26 00:21:59 2008 +1000
+++ b/drivers/net/tun.c	Thu Jun 26 14:35:03 2008 +1000
@@ -298,11 +298,11 @@
 		if ((len -= sizeof(gso)) > count)
 			return -EINVAL;
 
-		if (gso.hdr_len > len)
-			return -EINVAL;
-
 		if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
 			return -EFAULT;
+
+		if (gso.hdr_len > len)
+			return -EINVAL;
 	}
 
 	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
@@ -324,6 +324,16 @@
 		return -EFAULT;
 	}
 
+	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+		if (!skb_partial_csum_set(skb, gso.csum_start,
+					  gso.csum_offset)) {
+			tun->dev->stats.rx_dropped++;
+			kfree_skb(skb);
+			return -EINVAL;
+		}
+	} else if (tun->flags & TUN_NOCHECKSUM)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
 	switch (tun->flags & TUN_TYPE_MASK) {
 	case TUN_TUN_DEV:
 		skb_reset_mac_header(skb);
@@ -335,16 +345,6 @@
 		break;
 	};
 
-	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-		if (!skb_partial_csum_set(skb, gso.csum_start,
-					  gso.csum_offset)) {
-			tun->dev->stats.rx_dropped++;
-			kfree_skb(skb);
-			return -EINVAL;
-		}
-	} else if (tun->flags & TUN_NOCHECKSUM)
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-
 	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 		pr_debug("GSO!\n");
 		switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap
  2008-06-26  4:40         ` Rusty Russell
@ 2008-06-26 18:16           ` Anthony Liguori
  2008-06-27  3:50             ` Rusty Russell
  2008-07-02  5:25           ` Max Krasnyansky
  1 sibling, 1 reply; 17+ messages in thread
From: Anthony Liguori @ 2008-06-26 18:16 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Max Krasnyansky, markmc, netdev, Herbert Xu, virtualization

Rusty Russell wrote:
> On Thursday 26 June 2008 05:07:18 Anthony Liguori wrote:
>   
>> Rusty Russell wrote:
>>     
>>> @@ -1563,6 +1561,16 @@ static void setup_tun_net(char *arg)
>>>  	/* Tell Guest what MAC address to use. */
>>>  	add_feature(dev, VIRTIO_NET_F_MAC);
>>>  	add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
>>> +	/* Expect Guest to handle everything except UFO */
>>> +	add_feature(dev, VIRTIO_NET_F_CSUM);
>>>       
>> You're setting this feature twice.
>>     
>
> Hmm, not in the version here?
>   

Sorry, misread apparently.

>>> +	add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
>>>       
>> You set this feature, but I never see the virtio-net driver acknowledge
>> the feature.

I still don't see GUEST_CSUM ever get referenced in virtio_net.c.  
What's the intention of this feature bit?  Could I be missing a 
virtio_net patch?  I'm using the latest bits in Linus' tree.

>>   Curiously, my implementation with KVM is struggling
>> because UDP packet checksums are not correct so the DHCP client is
>> ignoring them.  If I disable CSUM offload, things it works fine (using
>> the virtio-net header).  The problem is only host=>guest, guest=>host is
>> fine.
>>     
>
> OK, found this: wrong args to skb_partial_csum_set.  It was found by Mark 
> McLoughlin before, I just lost the fix when I extracted this into a separate 
> patch.  I chose to move the call to skb_partial_csum_set(), rather than use 
> his fix (which assumed a tap not tun device).
>   

This still doesn't fix the problem.  I can manually assign an IP address 
and even do netperf runs but I cannot get a dhcp address (dhclient is 
picky about the udp csum).  Also, the RX performance is so low that I'm 
sure a ton of packets are getting dropped.  However, this patchset is 
extremely promising, here are the results with KVM for TX:

w/o gso
[  3]  0.0-10.0 sec    593 MBytes    498 Mbits/sec

w/gso
[  5]  0.0-10.0 sec  1.86 GBytes  1.60 Gbits/sec

So that's a huge increase.  Unfortunately, RX drops from 1.04 Gbits/sec 
to only a few hundred Kbit/sec.  I'm pretty sure this is the 
checksumming issue.

Also, when I exit KVM, QEMU zombies and I notice:

Message from syslogd@squirrel at Jun 26 13:02:07 ...
 kernel: unregister_netdevice: waiting for tap0 to become free. Usage 
count = 3

Message from syslogd@squirrel at Jun 26 13:02:17 ...
 kernel: unregister_netdevice: waiting for tap0 to become free. Usage 
count = 0

Once the refcount drops to 0, the process exits.  It looks fishy to me 
though.

Regards,

Anthony Liguori

> Here's two fixes on top of previous patch:
>
> diff -u b/drivers/net/tun.c b/drivers/net/tun.c
> --- b/drivers/net/tun.c	Thu Jun 26 00:21:59 2008 +1000
> +++ b/drivers/net/tun.c	Thu Jun 26 14:35:03 2008 +1000
> @@ -298,11 +298,11 @@
>  		if ((len -= sizeof(gso)) > count)
>  			return -EINVAL;
>  
> -		if (gso.hdr_len > len)
> -			return -EINVAL;
> -
>  		if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
>  			return -EFAULT;
> +
> +		if (gso.hdr_len > len)
> +			return -EINVAL;
>  	}
>  
>  	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
> @@ -324,6 +324,16 @@
>  		return -EFAULT;
>  	}
>  
> +	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
> +		if (!skb_partial_csum_set(skb, gso.csum_start,
> +					  gso.csum_offset)) {
> +			tun->dev->stats.rx_dropped++;
> +			kfree_skb(skb);
> +			return -EINVAL;
> +		}
> +	} else if (tun->flags & TUN_NOCHECKSUM)
> +		skb->ip_summed = CHECKSUM_UNNECESSARY;
> +
>  	switch (tun->flags & TUN_TYPE_MASK) {
>  	case TUN_TUN_DEV:
>  		skb_reset_mac_header(skb);
> @@ -335,16 +345,6 @@
>  		break;
>  	};
>  
> -	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
> -		if (!skb_partial_csum_set(skb, gso.csum_start,
> -					  gso.csum_offset)) {
> -			tun->dev->stats.rx_dropped++;
> -			kfree_skb(skb);
> -			return -EINVAL;
> -		}
> -	} else if (tun->flags & TUN_NOCHECKSUM)
> -		skb->ip_summed = CHECKSUM_UNNECESSARY;
> -
>  	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
>  		pr_debug("GSO!\n");
>  		switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
>   


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap
  2008-06-26 18:16           ` Anthony Liguori
@ 2008-06-27  3:50             ` Rusty Russell
  0 siblings, 0 replies; 17+ messages in thread
From: Rusty Russell @ 2008-06-27  3:50 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Max Krasnyansky, markmc, netdev, Herbert Xu, virtualization

On Friday 27 June 2008 04:16:25 Anthony Liguori wrote:
> Rusty Russell wrote:
> > On Thursday 26 June 2008 05:07:18 Anthony Liguori wrote:
> >> Rusty Russell wrote:
> >>> +	add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
> >>
> >> You set this feature, but I never see the virtio-net driver acknowledge
> >> the feature.
>
> I still don't see GUEST_CSUM ever get referenced in virtio_net.c.
> What's the intention of this feature bit?  Could I be missing a
> virtio_net patch?  I'm using the latest bits in Linus' tree.

It says that the guest can does csum offload.  It's not advertised in the 
current Linus tree (it got snuck in by Mark after the large-packets patches 
still sitting in my tree).  It should be tho, since the driver can handle it.  
See patch at end.

> >>   Curiously, my implementation with KVM is struggling
> >> because UDP packet checksums are not correct so the DHCP client is
> >> ignoring them.  If I disable CSUM offload, things it works fine (using
> >> the virtio-net header).  The problem is only host=>guest, guest=>host is
> >> fine.
> >
> > OK, found this: wrong args to skb_partial_csum_set.  It was found by Mark
> > McLoughlin before, I just lost the fix when I extracted this into a
> > separate patch.  I chose to move the call to skb_partial_csum_set(),
> > rather than use his fix (which assumed a tap not tun device).
>
> This still doesn't fix the problem.  I can manually assign an IP address
> and even do netperf runs but I cannot get a dhcp address (dhclient is
> picky about the udp csum).

I'll retest this when I'm back home with my machines.  Perhaps it's something 
to do with the csum issue.

> Also, when I exit KVM, QEMU zombies and I notice:
>
> Message from syslogd@squirrel at Jun 26 13:02:07 ...
>  kernel: unregister_netdevice: waiting for tap0 to become free. Usage
> count = 3

That seems odd; there is a leak in the vringfd interface which I know about, 
but this looks like a GSO packet from the tun device is sticking around 
somehow.

Cheers,
Rusty.

Subject: virtio_net: Set VIRTIO_NET_F_GUEST_CSUM feature
Date: Fri, 13 Jun 2008 14:27:34 +0100
From: Mark McLoughlin <markmc@redhat.com>

We can handle receiving partial csums, so set the
appropriate feature bit.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/virtio_net.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -550,7 +550,8 @@ static struct virtio_device_id id_table[
 };
 
 static unsigned int features[] = {
-	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
+	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
+	VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
 	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
 	VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY,
 };


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/4] tun: Interface to query tun/tap features.
  2008-06-25 14:28 [PATCH 1/4] tun: Interface to query tun/tap features Rusty Russell
  2008-06-25 14:29 ` [PATCH 2/4] tun: TUNSETFEATURES to set gso features Rusty Russell
@ 2008-07-02  4:59 ` Max Krasnyansky
  2008-07-02  5:27   ` David Miller
  1 sibling, 1 reply; 17+ messages in thread
From: Max Krasnyansky @ 2008-07-02  4:59 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Herbert Xu, netdev, virtualization, David Miller


Rusty Russell wrote:
> The problem with introducing checksum offload and gso to tun is they
> need to set dev->features to enable GSO and/or checksumming, which is
> supposed to be done before register_netdevice(), ie. as part of
> TUNSETIFF.
> 
> Unfortunately, TUNSETIFF has always just ignored flags it doesn't
> understand, so there's no good way of detecting whether the kernel
> supports new IFF_ flags.
> 
> This patch implements a TUNGETFEATURES ioctl which returns all the valid IFF
> flags.  It could be extended later to include other features.

<snip>

Looks good.

Dave, do you want me to put all outstanding TUN patches into a git tree so
that you can pull them in one shot ?
Otherwise if you're ok with applying them one by one please apply this one.

Acked-by: Max Krasnyansky <maxk@qualcomm.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/4] tun: TUNSETFEATURES to set gso features.
  2008-06-25 14:29 ` [PATCH 2/4] tun: TUNSETFEATURES to set gso features Rusty Russell
  2008-06-25 14:30   ` [PATCH 3/4] tun: Allow GSO using virtio_net_hdr Rusty Russell
@ 2008-07-02  5:02   ` Max Krasnyansky
  1 sibling, 0 replies; 17+ messages in thread
From: Max Krasnyansky @ 2008-07-02  5:02 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Herbert Xu, netdev, virtualization, markmc, David Miller



Rusty Russell wrote:
> ethtool is useful for setting (some) device fields, but it's
> root-only.  Finer feature control is available through a tun-specific
> ioctl.
> 
> (Includes Mark McLoughlin <markmc@redhat.com>'s fix to hold rtnl sem).
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>


Agree. And it's often way more convenient to muck with the TUN settings
directly on the fd instead of using external tool.

Acked-by: Max Krasnyansky <maxk@qualcomm.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/4] tun: Allow GSO using virtio_net_hdr
  2008-06-25 14:30   ` [PATCH 3/4] tun: Allow GSO using virtio_net_hdr Rusty Russell
  2008-06-25 14:32     ` [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap Rusty Russell
       [not found]     ` <200806260032.12359.rusty__46755.7742762894$1214404667$gmane$org@rustcorp.com.au>
@ 2008-07-02  5:13     ` Max Krasnyansky
  2008-07-02  7:00       ` Rusty Russell
  2008-07-24 14:20     ` Herbert Xu
  3 siblings, 1 reply; 17+ messages in thread
From: Max Krasnyansky @ 2008-07-02  5:13 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Herbert Xu, netdev, virtualization, markmc



Rusty Russell wrote:
> Add a IFF_VNET_HDR flag.  This uses the same ABI as virtio_net (ie. prepending
> struct virtio_net_hdr to packets) to indicate GSO and checksum information.
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> ---
>  drivers/net/tun.c      |   90 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/if_tun.h |    2 +
>  2 files changed, 91 insertions(+), 1 deletion(-)
> 
> diff -r d94590c1550a drivers/net/tun.c
> --- a/drivers/net/tun.c	Thu Jun 26 00:21:11 2008 +1000
> +++ b/drivers/net/tun.c	Thu Jun 26 00:21:59 2008 +1000
> @@ -63,6 +63,7 @@
>  #include <linux/if_tun.h>
>  #include <linux/crc32.h>
>  #include <linux/nsproxy.h>
> +#include <linux/virtio_net.h>
>  #include <net/net_namespace.h>
>  #include <net/netns/generic.h>
>  
> @@ -283,12 +284,24 @@ static __inline__ ssize_t tun_get_user(s
>  	struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
>  	struct sk_buff *skb;
>  	size_t len = count, align = 0;
> +	struct virtio_net_hdr gso = { 0 };
>  
>  	if (!(tun->flags & TUN_NO_PI)) {
>  		if ((len -= sizeof(pi)) > count)
>  			return -EINVAL;
>  
>  		if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
> +			return -EFAULT;
> +	}
> +
> +	if (tun->flags & TUN_VNET_HDR) {
> +		if ((len -= sizeof(gso)) > count)
> +			return -EINVAL;
> +
> +		if (gso.hdr_len > len)
> +			return -EINVAL;
> +
> +		if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
>  			return -EFAULT;
>  	}

Unless I'm missing something the 'if (gso.hdr_len > len)' must be after
memcpy_fromiovec().

> @@ -322,8 +335,45 @@ static __inline__ ssize_t tun_get_user(s
>  		break;
>  	};
>  
> -	if (tun->flags & TUN_NOCHECKSUM)
> +	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
> +		if (!skb_partial_csum_set(skb, gso.csum_start,
> +					  gso.csum_offset)) {
> +			tun->dev->stats.rx_dropped++;
> +			kfree_skb(skb);
> +			return -EINVAL;
> +		}
> +	} else if (tun->flags & TUN_NOCHECKSUM)
>  		skb->ip_summed = CHECKSUM_UNNECESSARY;
> +
> +	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> +		pr_debug("GSO!\n");
> +		switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
> +		case VIRTIO_NET_HDR_GSO_TCPV4:
> +			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> +			break;
> +		case VIRTIO_NET_HDR_GSO_TCPV6:
> +			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
> +			break;
> +		default:
> +			tun->dev->stats.rx_dropped++;
> +			kfree_skb(skb);
> +			return -EINVAL;
> +		}

We should use stats.rx_frame_errors instead of stats.rx_dropped to indicated
that we dropped it because something was wrong with the framing (headers,
etc). Applies to both of the cases above.

> +
> +		if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
> +			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
> +
> +		skb_shinfo(skb)->gso_size = gso.gso_size;
> +		if (skb_shinfo(skb)->gso_size == 0) {
> +			tun->dev->stats.rx_dropped++;
> +			kfree_skb(skb);
> +			return -EINVAL;
> +		}
Same here.

Everything else looks good.

Max


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap
  2008-06-26  4:40         ` Rusty Russell
  2008-06-26 18:16           ` Anthony Liguori
@ 2008-07-02  5:25           ` Max Krasnyansky
  1 sibling, 0 replies; 17+ messages in thread
From: Max Krasnyansky @ 2008-07-02  5:25 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Anthony Liguori, markmc, netdev, Herbert Xu, virtualization



Rusty Russell wrote:
> On Thursday 26 June 2008 05:07:18 Anthony Liguori wrote:
>> Rusty Russell wrote:
>>> @@ -1563,6 +1561,16 @@ static void setup_tun_net(char *arg)
>>>  	/* Tell Guest what MAC address to use. */
>>>  	add_feature(dev, VIRTIO_NET_F_MAC);
>>>  	add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
>>> +	/* Expect Guest to handle everything except UFO */
>>> +	add_feature(dev, VIRTIO_NET_F_CSUM);
>> You're setting this feature twice.
> 
> Hmm, not in the version here?
> 
>>> +	add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
>> You set this feature, but I never see the virtio-net driver acknowledge
>> the feature.  Curiously, my implementation with KVM is struggling
>> because UDP packet checksums are not correct so the DHCP client is
>> ignoring them.  If I disable CSUM offload, things it works fine (using
>> the virtio-net header).  The problem is only host=>guest, guest=>host is
>> fine.
> 
> OK, found this: wrong args to skb_partial_csum_set.  It was found by Mark 
> McLoughlin before, I just lost the fix when I extracted this into a separate 
> patch.  I chose to move the call to skb_partial_csum_set(), rather than use 
> his fix (which assumed a tap not tun device).
> 
> Here's two fixes on top of previous patch:
> 
> diff -u b/drivers/net/tun.c b/drivers/net/tun.c
> --- b/drivers/net/tun.c	Thu Jun 26 00:21:59 2008 +1000
> +++ b/drivers/net/tun.c	Thu Jun 26 14:35:03 2008 +1000
> @@ -298,11 +298,11 @@
>  		if ((len -= sizeof(gso)) > count)
>  			return -EINVAL;
>  
> -		if (gso.hdr_len > len)
> -			return -EINVAL;
> -
>  		if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
>  			return -EFAULT;
> +
> +		if (gso.hdr_len > len)
> +			return -EINVAL;
>  	}

Yep, looks better now.

>  
>  	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
> @@ -324,6 +324,16 @@
>  		return -EFAULT;
>  	}
>  
> +	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
> +		if (!skb_partial_csum_set(skb, gso.csum_start,
> +					  gso.csum_offset)) {
> +			tun->dev->stats.rx_dropped++;
> +			kfree_skb(skb);
> +			return -EINVAL;
> +		}
> +	} else if (tun->flags & TUN_NOCHECKSUM)
> +		skb->ip_summed = CHECKSUM_UNNECESSARY;
> +
>  	switch (tun->flags & TUN_TYPE_MASK) {
>  	case TUN_TUN_DEV:
>  		skb_reset_mac_header(skb);
> @@ -335,16 +345,6 @@
>  		break;
>  	};
>  
> -	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
> -		if (!skb_partial_csum_set(skb, gso.csum_start,
> -					  gso.csum_offset)) {
> -			tun->dev->stats.rx_dropped++;
> -			kfree_skb(skb);
> -			return -EINVAL;
> -		}
> -	} else if (tun->flags & TUN_NOCHECKSUM)
> -		skb->ip_summed = CHECKSUM_UNNECESSARY;
> -
>  	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
>  		pr_debug("GSO!\n");
>  		switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {

Do you want to resent the GSO patch with all the latest fixes ? ie other
things (stat counters) I pointed out in the prev email.
I'll ack it.

Thanx
Max

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/4] tun: Interface to query tun/tap features.
  2008-07-02  4:59 ` [PATCH 1/4] tun: Interface to query tun/tap features Max Krasnyansky
@ 2008-07-02  5:27   ` David Miller
  0 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2008-07-02  5:27 UTC (permalink / raw)
  To: maxk; +Cc: rusty, herbert, netdev, virtualization

From: Max Krasnyansky <maxk@qualcomm.com>
Date: Tue, 01 Jul 2008 21:59:02 -0700

> Dave, do you want me to put all outstanding TUN patches into a git tree so
> that you can pull them in one shot ?
> Otherwise if you're ok with applying them one by one please apply this one.
> 
> Acked-by: Max Krasnyansky <maxk@qualcomm.com>

I'll apply Rusty's patches after I give them a review too.

Thanks Max.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/4] tun: Allow GSO using virtio_net_hdr
  2008-07-02  5:13     ` [PATCH 3/4] tun: Allow GSO using virtio_net_hdr Max Krasnyansky
@ 2008-07-02  7:00       ` Rusty Russell
  0 siblings, 0 replies; 17+ messages in thread
From: Rusty Russell @ 2008-07-02  7:00 UTC (permalink / raw)
  To: Max Krasnyansky; +Cc: Herbert Xu, netdev, virtualization, markmc

On Wednesday 02 July 2008 15:13:59 Max Krasnyansky wrote:
> Rusty Russell wrote:
> > Add a IFF_VNET_HDR flag.  This uses the same ABI as virtio_net (ie.
> > prepending struct virtio_net_hdr to packets) to indicate GSO and checksum
> > information.
> >
> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> > ---
> >  drivers/net/tun.c      |   90
> > ++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/if_tun.h
> > |    2 +
> >  2 files changed, 91 insertions(+), 1 deletion(-)
> >
> > diff -r d94590c1550a drivers/net/tun.c
> > --- a/drivers/net/tun.c	Thu Jun 26 00:21:11 2008 +1000
> > +++ b/drivers/net/tun.c	Thu Jun 26 00:21:59 2008 +1000
> > @@ -63,6 +63,7 @@
> >  #include <linux/if_tun.h>
> >  #include <linux/crc32.h>
> >  #include <linux/nsproxy.h>
> > +#include <linux/virtio_net.h>
> >  #include <net/net_namespace.h>
> >  #include <net/netns/generic.h>
> >
> > @@ -283,12 +284,24 @@ static __inline__ ssize_t tun_get_user(s
> >  	struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
> >  	struct sk_buff *skb;
> >  	size_t len = count, align = 0;
> > +	struct virtio_net_hdr gso = { 0 };
> >
> >  	if (!(tun->flags & TUN_NO_PI)) {
> >  		if ((len -= sizeof(pi)) > count)
> >  			return -EINVAL;
> >
> >  		if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
> > +			return -EFAULT;
> > +	}
> > +
> > +	if (tun->flags & TUN_VNET_HDR) {
> > +		if ((len -= sizeof(gso)) > count)
> > +			return -EINVAL;
> > +
> > +		if (gso.hdr_len > len)
> > +			return -EINVAL;
> > +
> > +		if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
> >  			return -EFAULT;
> >  	}
>
> Unless I'm missing something the 'if (gso.hdr_len > len)' must be after
> memcpy_fromiovec().

Yes, this was fixed in a followup... there was another bug picked up by markmc 
too in this patch.

> > +		case VIRTIO_NET_HDR_GSO_TCPV6:
> > +			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
> > +			break;
> > +		default:
> > +			tun->dev->stats.rx_dropped++;
> > +			kfree_skb(skb);
> > +			return -EINVAL;
> > +		}
>
> We should use stats.rx_frame_errors instead of stats.rx_dropped to
> indicated that we dropped it because something was wrong with the framing
> (headers, etc). Applies to both of the cases above.

OK, done (all three).  Will repost.

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/4] tun: Allow GSO using virtio_net_hdr
  2008-06-25 14:30   ` [PATCH 3/4] tun: Allow GSO using virtio_net_hdr Rusty Russell
                       ` (2 preceding siblings ...)
  2008-07-02  5:13     ` [PATCH 3/4] tun: Allow GSO using virtio_net_hdr Max Krasnyansky
@ 2008-07-24 14:20     ` Herbert Xu
  2008-07-24 23:54       ` Rusty Russell
  3 siblings, 1 reply; 17+ messages in thread
From: Herbert Xu @ 2008-07-24 14:20 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Max Krasnyansky, netdev, virtualization, markmc

On Thu, Jun 26, 2008 at 12:30:37AM +1000, Rusty Russell wrote:
> Add a IFF_VNET_HDR flag.  This uses the same ABI as virtio_net (ie. prepending
> struct virtio_net_hdr to packets) to indicate GSO and checksum information.
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

I just noticed that we still allocate a linear skb even when GSO
is enabled.  Please fix this by allocating page frags where
necessary.  Otherwise GSO is only going to work before memory
fragmentation sets in.

IIRC I'd sent out a patch to the virt mailing list with code
that did this.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/4] tun: Allow GSO using virtio_net_hdr
  2008-07-24 14:20     ` Herbert Xu
@ 2008-07-24 23:54       ` Rusty Russell
  0 siblings, 0 replies; 17+ messages in thread
From: Rusty Russell @ 2008-07-24 23:54 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Max Krasnyansky, netdev, virtualization, markmc

On Friday 25 July 2008 00:20:44 Herbert Xu wrote:
> On Thu, Jun 26, 2008 at 12:30:37AM +1000, Rusty Russell wrote:
> > Add a IFF_VNET_HDR flag.  This uses the same ABI as virtio_net (ie.
> > prepending struct virtio_net_hdr to packets) to indicate GSO and checksum
> > information.
> >
> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>
> I just noticed that we still allocate a linear skb even when GSO
> is enabled.  Please fix this by allocating page frags where
> necessary.  Otherwise GSO is only going to work before memory
> fragmentation sets in.
>
> IIRC I'd sent out a patch to the virt mailing list with code
> that did this.
>
> Thanks,

Hi Herbert,

   Thanks for the reminder.  I have this code in the virtio_net patches which 
Linus failed to pull.  Will just need to share it with the tun code (maybe a 
skb_alloc_large() helper).

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2008-07-24 23:55 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-25 14:28 [PATCH 1/4] tun: Interface to query tun/tap features Rusty Russell
2008-06-25 14:29 ` [PATCH 2/4] tun: TUNSETFEATURES to set gso features Rusty Russell
2008-06-25 14:30   ` [PATCH 3/4] tun: Allow GSO using virtio_net_hdr Rusty Russell
2008-06-25 14:32     ` [PATCH 4/4] lguest: Use GSO/IFF_VNET_HDR extensions on tun/tap Rusty Russell
2008-06-25 15:45       ` Rusty Russell
     [not found]     ` <200806260032.12359.rusty__46755.7742762894$1214404667$gmane$org@rustcorp.com.au>
2008-06-25 19:07       ` Anthony Liguori
2008-06-26  4:40         ` Rusty Russell
2008-06-26 18:16           ` Anthony Liguori
2008-06-27  3:50             ` Rusty Russell
2008-07-02  5:25           ` Max Krasnyansky
2008-07-02  5:13     ` [PATCH 3/4] tun: Allow GSO using virtio_net_hdr Max Krasnyansky
2008-07-02  7:00       ` Rusty Russell
2008-07-24 14:20     ` Herbert Xu
2008-07-24 23:54       ` Rusty Russell
2008-07-02  5:02   ` [PATCH 2/4] tun: TUNSETFEATURES to set gso features Max Krasnyansky
2008-07-02  4:59 ` [PATCH 1/4] tun: Interface to query tun/tap features Max Krasnyansky
2008-07-02  5:27   ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).