[PATCH net-next v3] xen-netfront: Add support for IPv6 offloads

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads
@ 2013-11-26 16:41 Paul Durrant
  2013-11-28 23:50 ` David Miller
  2013-12-31 19:10 ` Boris Ostrovsky
  0 siblings, 2 replies; 7+ messages in thread
From: Paul Durrant @ 2013-11-26 16:41 UTC (permalink / raw)
  To: xen-devel, netdev
  Cc: Paul Durrant, Konrad Rzeszutek Wilk, Boris Ostrovsky,
	David Vrabel, Ian Campbell, Wei Liu, Annie Li

This patch adds support for IPv6 checksum offload and GSO when those
features are available in the backend.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Annie Li <annie.li@oracle.com>
---

v3:
 - Addressed comments raised by Annie Li

v2:
 - Addressed comments raised by Ian Campbell

 drivers/net/xen-netfront.c |  239 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/ipv6.h       |    2 +
 2 files changed, 224 insertions(+), 17 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index dd1011e..fe747e4 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		tx->flags |= XEN_NETTXF_extra_info;
 
 		gso->u.gso.size = skb_shinfo(skb)->gso_size;
-		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+		gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
+			          XEN_NETIF_GSO_TYPE_TCPV6 :
+			          XEN_NETIF_GSO_TYPE_TCPV4;
 		gso->u.gso.pad = 0;
 		gso->u.gso.features = 0;
 
@@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
 		return -EINVAL;
 	}
 
-	/* Currently only TCPv4 S.O. is supported. */
-	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
+	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
 		if (net_ratelimit())
 			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
 		return -EINVAL;
 	}
 
 	skb_shinfo(skb)->gso_size = gso->u.gso.size;
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb)->gso_type =
+		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
+		SKB_GSO_TCPV4 :
+		SKB_GSO_TCPV6;
 
 	/* Header must be checked, and gso_segs computed. */
 	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -856,11 +861,42 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np,
 	return cons;
 }
 
-static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
+static inline bool maybe_pull_tail(struct sk_buff *skb, unsigned int min,
+				   unsigned int max)
 {
-	struct iphdr *iph;
-	int err = -EPROTO;
+	int target;
+
+	BUG_ON(max < min);
+
+	if (!skb_is_nonlinear(skb) || skb_headlen(skb) >= min)
+		return true;
+
+	/* If we need to pullup then pullup to max, so we hopefully
+	 * won't need to do it again.
+	 */
+	target = min_t(int, skb->len, max);
+	__pskb_pull_tail(skb, target - skb_headlen(skb));
+
+	if (skb_headlen(skb) < min) {
+		net_err_ratelimited("Failed to pullup packet header\n");
+		return false;    
+	}
+
+	return true;
+}
+
+/* This value should be large enough to cover a tagged ethernet header plus
+ * maximally sized IP and TCP or UDP headers.
+ */
+#define MAX_IP_HEADER 128
+
+static int checksum_setup_ip(struct net_device *dev, struct sk_buff *skb)
+{
+	struct iphdr *iph = (void *)skb->data;
+	unsigned int header_size;
+	unsigned int off;
 	int recalculate_partial_csum = 0;
+	int err = -EPROTO;
 
 	/*
 	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
@@ -879,40 +915,158 @@ static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
 		return 0;
 
-	if (skb->protocol != htons(ETH_P_IP))
+	off = sizeof(struct iphdr);
+
+	header_size = skb->network_header + off;
+	if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
 		goto out;
 
-	iph = (void *)skb->data;
+	off = iph->ihl * 4;
 
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
-		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
+		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct tcphdr, check)))
 			goto out;
 
 		if (recalculate_partial_csum) {
 			struct tcphdr *tcph = tcp_hdr(skb);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct tcphdr);
+			if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
+				goto out;
+
 			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - iph->ihl*4,
+							 skb->len - off,
 							 IPPROTO_TCP, 0);
 		}
 		break;
 	case IPPROTO_UDP:
-		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
+		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct udphdr, check)))
 			goto out;
 
 		if (recalculate_partial_csum) {
 			struct udphdr *udph = udp_hdr(skb);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct udphdr);
+			if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
+				goto out;
+
 			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - iph->ihl*4,
+							 skb->len - off,
 							 IPPROTO_UDP, 0);
 		}
 		break;
 	default:
-		if (net_ratelimit())
-			pr_err("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
-			       iph->protocol);
+		net_err_ratelimited("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
+				    iph->protocol);
+		goto out;
+	}
+
+	err = 0;
+
+out:
+	return err;
+}
+
+/* This value should be large enough to cover a tagged ethernet header plus
+ * an IPv6 header, all options, and a maximal TCP or UDP header.
+ */
+#define MAX_IPV6_HEADER 256
+
+static int checksum_setup_ipv6(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h = (void *)skb->data;
+	u8 nexthdr;
+	unsigned int header_size;
+	unsigned int off;
+	bool fragment;
+	bool done;
+	int err = -EPROTO;
+
+	done = false;
+
+	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	off = sizeof(struct ipv6hdr);
+
+	header_size = skb->network_header + off;
+	if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
+		goto out;
+
+	nexthdr = ipv6h->nexthdr;
+
+	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
+	       !done) {
+		switch (nexthdr) {
+		case IPPROTO_DSTOPTS:
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING: {
+			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct ipv6_opt_hdr);
+			if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
+				goto out;
+
+			nexthdr = hp->nexthdr;
+			off += ipv6_optlen(hp);
+			break;
+		}
+		case IPPROTO_AH: {
+			struct ip_auth_hdr *hp = (void *)(skb->data + off);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct ip_auth_hdr);
+			if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
+				goto out;
+
+			nexthdr = hp->nexthdr;
+			off += ipv6_ahlen(hp);
+			break;
+		}
+		case IPPROTO_FRAGMENT:
+			fragment = true;
+			/* fall through */
+		default:
+			done = true;
+			break;
+		}
+	}
+
+	if (!done) {
+		net_err_ratelimited("Failed to parse packet header\n");
+		goto out;
+	}
+
+	if (fragment) {
+		net_err_ratelimited("Packet is a fragment!\n");
+		goto out;
+	}
+
+	switch (nexthdr) {
+	case IPPROTO_TCP:
+		if (!skb_partial_csum_set(skb, off,
+					  offsetof(struct tcphdr, check)))
+			goto out;
+		break;
+	case IPPROTO_UDP:
+		if (!skb_partial_csum_set(skb, off,
+					  offsetof(struct udphdr, check)))
+			goto out;
+		break;
+	default:
+		net_err_ratelimited("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
+				    nexthdr);
 		goto out;
 	}
 
@@ -922,6 +1076,25 @@ out:
 	return err;
 }
 
+static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
+{
+	int err;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		err = checksum_setup_ip(dev, skb);
+		break;
+	case htons(ETH_P_IPV6):
+		err = checksum_setup_ipv6(dev, skb);
+		break;
+	default:
+		err = -EPROTO;
+		break;
+	}
+
+	return err;
+}
+
 static int handle_incoming_queue(struct net_device *dev,
 				 struct sk_buff_head *rxq)
 {
@@ -1232,6 +1405,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
 			features &= ~NETIF_F_SG;
 	}
 
+	if (features & NETIF_F_IPV6_CSUM) {
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-ipv6-csum-offload", "%d", &val) < 0)
+			val = 0;
+
+		if (!val)
+			features &= ~NETIF_F_IPV6_CSUM;
+	}
+
 	if (features & NETIF_F_TSO) {
 		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
 				 "feature-gso-tcpv4", "%d", &val) < 0)
@@ -1241,6 +1423,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
 			features &= ~NETIF_F_TSO;
 	}
 
+	if (features & NETIF_F_TSO6) {
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-gso-tcpv6", "%d", &val) < 0)
+			val = 0;
+
+		if (!val)
+			features &= ~NETIF_F_TSO6;
+	}
+
 	return features;
 }
 
@@ -1373,7 +1564,9 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
 	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 				  NETIF_F_GSO_ROBUST;
-	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
+	netdev->hw_features	= NETIF_F_SG | 
+		                  NETIF_F_IPV6_CSUM |
+		                  NETIF_F_TSO | NETIF_F_TSO6;
 
 	/*
          * Assume that all hw features are available for now. This set
@@ -1751,6 +1944,18 @@ again:
 		goto abort_transaction;
 	}
 
+	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", "%d", 1);
+	if (err) {
+		message = "writing feature-gso-tcpv6";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-offload", "%d", 1);
+	if (err) {
+		message = "writing feature-ipv6-csum-offload";
+		goto abort_transaction;
+	}
+
 	err = xenbus_transaction_end(xbt, 0);
 	if (err) {
 		if (err == -EAGAIN)
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5d89d1b..10f1b03 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -4,6 +4,8 @@
 #include <uapi/linux/ipv6.h>
 
 #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3)
+#define ipv6_ahlen(p)   (((p)->hdrlen+2) << 2);
+
 /*
  * This structure contains configuration options per IPv6 link.
  */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads
  2013-11-26 16:41 [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads Paul Durrant
@ 2013-11-28 23:50 ` David Miller
  2013-12-31 19:10 ` Boris Ostrovsky
  1 sibling, 0 replies; 7+ messages in thread
From: David Miller @ 2013-11-28 23:50 UTC (permalink / raw)
  To: paul.durrant
  Cc: xen-devel, netdev, konrad.wilk, boris.ostrovsky, david.vrabel,
	ian.campbell, wei.liu2, annie.li

From: Paul Durrant <paul.durrant@citrix.com>
Date: Tue, 26 Nov 2013 16:41:52 +0000

> This patch adds support for IPv6 checksum offload and GSO when those
> features are available in the backend.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>

Please resubmit this when the net-next tree opens back up, thank you.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads
  2013-11-26 16:41 [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads Paul Durrant
  2013-11-28 23:50 ` David Miller
@ 2013-12-31 19:10 ` Boris Ostrovsky
  2014-01-07 10:25   ` Paul Durrant
  1 sibling, 1 reply; 7+ messages in thread
From: Boris Ostrovsky @ 2013-12-31 19:10 UTC (permalink / raw)
  To: Paul Durrant
  Cc: xen-devel, netdev, Konrad Rzeszutek Wilk, David Vrabel,
	Ian Campbell, Wei Liu, Annie Li

On 11/26/2013 11:41 AM, Paul Durrant wrote:
> This patch adds support for IPv6 checksum offload and GSO when those
> features are available in the backend.

Sorry for late review. Mostly style comments.

>
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Cc: David Vrabel <david.vrabel@citrix.com>
> Cc: Ian Campbell <ian.campbell@citrix.com>
> Cc: Wei Liu <wei.liu2@citrix.com>
> Cc: Annie Li <annie.li@oracle.com>
> ---
>
> v3:
>   - Addressed comments raised by Annie Li
>
> v2:
>   - Addressed comments raised by Ian Campbell
>
>   drivers/net/xen-netfront.c |  239 ++++++++++++++++++++++++++++++++++++++++----
>   include/linux/ipv6.h       |    2 +
>   2 files changed, 224 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> index dd1011e..fe747e4 100644
> --- a/drivers/net/xen-netfront.c
> +++ b/drivers/net/xen-netfront.c
> @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
>   		tx->flags |= XEN_NETTXF_extra_info;
>   
>   		gso->u.gso.size = skb_shinfo(skb)->gso_size;
> -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> +		gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
> +			          XEN_NETIF_GSO_TYPE_TCPV6 :
> +			          XEN_NETIF_GSO_TYPE_TCPV4;
>   		gso->u.gso.pad = 0;
>   		gso->u.gso.features = 0;
>   
> @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
>   		return -EINVAL;
>   	}
>   
> -	/* Currently only TCPv4 S.O. is supported. */
> -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
> +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
>   		if (net_ratelimit())
>   			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
>   		return -EINVAL;
>   	}
>   
>   	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> +	skb_shinfo(skb)->gso_type =
> +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
> +		SKB_GSO_TCPV4 :
> +		SKB_GSO_TCPV6;
>   
>   	/* Header must be checked, and gso_segs computed. */
>   	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> @@ -856,11 +861,42 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np,
>   	return cons;
>   }
>   
> -static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> +static inline bool maybe_pull_tail(struct sk_buff *skb, unsigned int min,
> +				   unsigned int max)

Should this routine return error code instead of a boolean? Otherwise 
it's not clear what "false" should mean --- whether it is that it failed 
to pull or that the pull wasn't needed.

>   {
> -	struct iphdr *iph;
> -	int err = -EPROTO;
> +	int target;
> +
> +	BUG_ON(max < min);
> +
> +	if (!skb_is_nonlinear(skb) || skb_headlen(skb) >= min)
> +		return true;
> +
> +	/* If we need to pullup then pullup to max, so we hopefully
> +	 * won't need to do it again.
> +	 */

Comment style.

> +	target = min_t(int, skb->len, max);
> +	__pskb_pull_tail(skb, target - skb_headlen(skb));
> +
> +	if (skb_headlen(skb) < min) {

Why not explicitly check whether__pskb_pull_tail() returned NULL ?

> +		net_err_ratelimited("Failed to pullup packet header\n");
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +/* This value should be large enough to cover a tagged ethernet header plus
> + * maximally sized IP and TCP or UDP headers.
> + */

Comment style.

> +#define MAX_IP_HEADER 128
> +
> +static int checksum_setup_ip(struct net_device *dev, struct sk_buff *skb)
> +{
> +	struct iphdr *iph = (void *)skb->data;
> +	unsigned int header_size;
> +	unsigned int off;
>   	int recalculate_partial_csum = 0;
> +	int err = -EPROTO;
>   
>   	/*
>   	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> @@ -879,40 +915,158 @@ static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
>   	if (skb->ip_summed != CHECKSUM_PARTIAL)
>   		return 0;
>   
> -	if (skb->protocol != htons(ETH_P_IP))
> +	off = sizeof(struct iphdr);
> +
> +	header_size = skb->network_header + off;
> +	if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
>   		goto out;
>   
> -	iph = (void *)skb->data;
> +	off = iph->ihl * 4;
>   
>   	switch (iph->protocol) {
>   	case IPPROTO_TCP:
> -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> +		if (!skb_partial_csum_set(skb, off,
>   					  offsetof(struct tcphdr, check)))
>   			goto out;
>   
>   		if (recalculate_partial_csum) {
>   			struct tcphdr *tcph = tcp_hdr(skb);
> +
> +			header_size = skb->network_header +
> +				off +
> +				sizeof(struct tcphdr);

You can put these (off and sizeof) onto the same line.

> +			if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
> +				goto out;
> +
>   			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
> -							 skb->len - iph->ihl*4,
> +							 skb->len - off,
>   							 IPPROTO_TCP, 0);
>   		}
>   		break;
>   	case IPPROTO_UDP:
> -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> +		if (!skb_partial_csum_set(skb, off,
>   					  offsetof(struct udphdr, check)))
>   			goto out;
>   
>   		if (recalculate_partial_csum) {
>   			struct udphdr *udph = udp_hdr(skb);
> +
> +			header_size = skb->network_header +
> +				off +
> +				sizeof(struct udphdr);
> +			if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
> +				goto out;
> +
>   			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
> -							 skb->len - iph->ihl*4,
> +							 skb->len - off,
>   							 IPPROTO_UDP, 0);
>   		}
>   		break;
>   	default:
> -		if (net_ratelimit())
> -			pr_err("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
> -			       iph->protocol);
> +		net_err_ratelimited("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
> +				    iph->protocol);
> +		goto out;
> +	}
> +
> +	err = 0;
> +
> +out:
> +	return err;
> +}
> +
> +/* This value should be large enough to cover a tagged ethernet header plus
> + * an IPv6 header, all options, and a maximal TCP or UDP header.
> + */
> +#define MAX_IPV6_HEADER 256
> +
> +static int checksum_setup_ipv6(struct net_device *dev, struct sk_buff *skb)
> +{
> +	struct ipv6hdr *ipv6h = (void *)skb->data;
> +	u8 nexthdr;
> +	unsigned int header_size;
> +	unsigned int off;
> +	bool fragment;
> +	bool done;
> +	int err = -EPROTO;
> +
> +	done = false;

This should probably be moved down to the beginning of the while loop. 
And you also need to initialize fragment to "false" (and possibly rename 
it to is_fragment?)

> +
> +	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> +		return 0;
> +
> +	off = sizeof(struct ipv6hdr);
> +
> +	header_size = skb->network_header + off;
> +	if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
> +		goto out;
> +
> +	nexthdr = ipv6h->nexthdr;
> +
> +	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
> +	       !done) {
> +		switch (nexthdr) {
> +		case IPPROTO_DSTOPTS:
> +		case IPPROTO_HOPOPTS:
> +		case IPPROTO_ROUTING: {
> +			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
> +
> +			header_size = skb->network_header +
> +				off +
> +				sizeof(struct ipv6_opt_hdr);

I'd merge the last two lines.

> +			if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
> +				goto out;
> +
> +			nexthdr = hp->nexthdr;
> +			off += ipv6_optlen(hp);
> +			break;
> +		}
> +		case IPPROTO_AH: {
> +			struct ip_auth_hdr *hp = (void *)(skb->data + off);
> +
> +			header_size = skb->network_header +
> +				off +
> +				sizeof(struct ip_auth_hdr);

Here as well.

> +			if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
> +				goto out;
> +
> +			nexthdr = hp->nexthdr;
> +			off += ipv6_ahlen(hp);
> +			break;
> +		}
> +		case IPPROTO_FRAGMENT:
> +			fragment = true;
> +			/* fall through */
> +		default:
> +			done = true;
> +			break;
> +		}
> +	}
> +
> +	if (!done) {
> +		net_err_ratelimited("Failed to parse packet header\n");
> +		goto out;
> +	}
> +
> +	if (fragment) {
> +		net_err_ratelimited("Packet is a fragment!\n");
> +		goto out;
> +	}
> +
> +	switch (nexthdr) {
> +	case IPPROTO_TCP:
> +		if (!skb_partial_csum_set(skb, off,
> +					  offsetof(struct tcphdr, check)))
> +			goto out;
> +		break;
> +	case IPPROTO_UDP:
> +		if (!skb_partial_csum_set(skb, off,
> +					  offsetof(struct udphdr, check)))
> +			goto out;
> +		break;
> +	default:
> +		net_err_ratelimited("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
> +				    nexthdr);
>   		goto out;
>   	}
>   
> @@ -922,6 +1076,25 @@ out:
>   	return err;
>   }
>   
> +static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> +{
> +	int err;

Initialize to -EPROTO (just to keep consistent with the rest of the file)

> +
> +	switch (skb->protocol) {
> +	case htons(ETH_P_IP):
> +		err = checksum_setup_ip(dev, skb);
> +		break;
> +	case htons(ETH_P_IPV6):
> +		err = checksum_setup_ipv6(dev, skb);
> +		break;
> +	default:
> +		err = -EPROTO;
> +		break;
> +	}
> +
> +	return err;
> +}
> +
>   static int handle_incoming_queue(struct net_device *dev,
>   				 struct sk_buff_head *rxq)
>   {
> @@ -1232,6 +1405,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
>   			features &= ~NETIF_F_SG;
>   	}
>   
> +	if (features & NETIF_F_IPV6_CSUM) {
> +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> +				 "feature-ipv6-csum-offload", "%d", &val) < 0)
> +			val = 0;
> +
> +		if (!val)
> +			features &= ~NETIF_F_IPV6_CSUM;
> +	}
> +
>   	if (features & NETIF_F_TSO) {
>   		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
>   				 "feature-gso-tcpv4", "%d", &val) < 0)
> @@ -1241,6 +1423,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
>   			features &= ~NETIF_F_TSO;
>   	}
>   
> +	if (features & NETIF_F_TSO6) {
> +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> +				 "feature-gso-tcpv6", "%d", &val) < 0)
> +			val = 0;
> +
> +		if (!val)
> +			features &= ~NETIF_F_TSO6;
> +	}
> +
>   	return features;
>   }
>   
> @@ -1373,7 +1564,9 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
>   	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
>   	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>   				  NETIF_F_GSO_ROBUST;
> -	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
> +	netdev->hw_features	= NETIF_F_SG |
> +		                  NETIF_F_IPV6_CSUM |
> +		                  NETIF_F_TSO | NETIF_F_TSO6;

Can you merge these three lines and stay under 80? If not, merge either 
of the two of them.


-boris

>   
>   	/*
>            * Assume that all hw features are available for now. This set
> @@ -1751,6 +1944,18 @@ again:
>   		goto abort_transaction;
>   	}
>   
> +	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", "%d", 1);
> +	if (err) {
> +		message = "writing feature-gso-tcpv6";
> +		goto abort_transaction;
> +	}
> +
> +	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-offload", "%d", 1);
> +	if (err) {
> +		message = "writing feature-ipv6-csum-offload";
> +		goto abort_transaction;
> +	}
> +
>   	err = xenbus_transaction_end(xbt, 0);
>   	if (err) {
>   		if (err == -EAGAIN)
> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
> index 5d89d1b..10f1b03 100644
> --- a/include/linux/ipv6.h
> +++ b/include/linux/ipv6.h
> @@ -4,6 +4,8 @@
>   #include <uapi/linux/ipv6.h>
>   
>   #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3)
> +#define ipv6_ahlen(p)   (((p)->hdrlen+2) << 2);
> +
>   /*
>    * This structure contains configuration options per IPv6 link.
>    */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads
  2013-12-31 19:10 ` Boris Ostrovsky
@ 2014-01-07 10:25   ` Paul Durrant
  2014-01-07 14:53     ` [Xen-devel] " Boris Ostrovsky
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Durrant @ 2014-01-07 10:25 UTC (permalink / raw)
  To: xen-devel@lists.xen.org, netdev@vger.kernel.org
  Cc: Konrad Rzeszutek Wilk, David Vrabel, Ian Campbell, Wei Liu,
	Annie Li, Boris Ostrovsky

> -----Original Message-----
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: 31 December 2013 19:10
> To: Paul Durrant
> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Konrad Rzeszutek
> Wilk; David Vrabel; Ian Campbell; Wei Liu; Annie Li
> Subject: Re: [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads
> 
> On 11/26/2013 11:41 AM, Paul Durrant wrote:
> > This patch adds support for IPv6 checksum offload and GSO when those
> > features are available in the backend.
> 
> Sorry for late review. Mostly style comments.
> 

Thanks for the review.

The checksum related code essentially needs to be a duplicate of that in netback and it seems wasteful to have the code in both places. Could this code be moved perhaps to net/core/dev.c? It's not specific to netback/netfront usage.

Opinions?

  Paul

> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> > Cc: David Vrabel <david.vrabel@citrix.com>
> > Cc: Ian Campbell <ian.campbell@citrix.com>
> > Cc: Wei Liu <wei.liu2@citrix.com>
> > Cc: Annie Li <annie.li@oracle.com>
> > ---
> >
> > v3:
> >   - Addressed comments raised by Annie Li
> >
> > v2:
> >   - Addressed comments raised by Ian Campbell
> >
> >   drivers/net/xen-netfront.c |  239
> ++++++++++++++++++++++++++++++++++++++++----
> >   include/linux/ipv6.h       |    2 +
> >   2 files changed, 224 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> > index dd1011e..fe747e4 100644
> > --- a/drivers/net/xen-netfront.c
> > +++ b/drivers/net/xen-netfront.c
> > @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb,
> struct net_device *dev)
> >   		tx->flags |= XEN_NETTXF_extra_info;
> >
> >   		gso->u.gso.size = skb_shinfo(skb)->gso_size;
> > -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> > +		gso->u.gso.type = (skb_shinfo(skb)->gso_type &
> SKB_GSO_TCPV6) ?
> > +			          XEN_NETIF_GSO_TYPE_TCPV6 :
> > +			          XEN_NETIF_GSO_TYPE_TCPV4;
> >   		gso->u.gso.pad = 0;
> >   		gso->u.gso.features = 0;
> >
> > @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff
> *skb,
> >   		return -EINVAL;
> >   	}
> >
> > -	/* Currently only TCPv4 S.O. is supported. */
> > -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> > +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
> > +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
> >   		if (net_ratelimit())
> >   			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
> >   		return -EINVAL;
> >   	}
> >
> >   	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> > -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> > +	skb_shinfo(skb)->gso_type =
> > +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
> > +		SKB_GSO_TCPV4 :
> > +		SKB_GSO_TCPV6;
> >
> >   	/* Header must be checked, and gso_segs computed. */
> >   	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> > @@ -856,11 +861,42 @@ static RING_IDX xennet_fill_frags(struct
> netfront_info *np,
> >   	return cons;
> >   }
> >
> > -static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> > +static inline bool maybe_pull_tail(struct sk_buff *skb, unsigned int min,
> > +				   unsigned int max)
> 
> Should this routine return error code instead of a boolean? Otherwise
> it's not clear what "false" should mean --- whether it is that it failed
> to pull or that the pull wasn't needed.
> 
> >   {
> > -	struct iphdr *iph;
> > -	int err = -EPROTO;
> > +	int target;
> > +
> > +	BUG_ON(max < min);
> > +
> > +	if (!skb_is_nonlinear(skb) || skb_headlen(skb) >= min)
> > +		return true;
> > +
> > +	/* If we need to pullup then pullup to max, so we hopefully
> > +	 * won't need to do it again.
> > +	 */
> 
> Comment style.
> 
> > +	target = min_t(int, skb->len, max);
> > +	__pskb_pull_tail(skb, target - skb_headlen(skb));
> > +
> > +	if (skb_headlen(skb) < min) {
> 
> Why not explicitly check whether__pskb_pull_tail() returned NULL ?
> 
> > +		net_err_ratelimited("Failed to pullup packet header\n");
> > +		return false;
> > +	}
> > +
> > +	return true;
> > +}
> > +
> > +/* This value should be large enough to cover a tagged ethernet header
> plus
> > + * maximally sized IP and TCP or UDP headers.
> > + */
> 
> Comment style.
> 
> > +#define MAX_IP_HEADER 128
> > +
> > +static int checksum_setup_ip(struct net_device *dev, struct sk_buff
> *skb)
> > +{
> > +	struct iphdr *iph = (void *)skb->data;
> > +	unsigned int header_size;
> > +	unsigned int off;
> >   	int recalculate_partial_csum = 0;
> > +	int err = -EPROTO;
> >
> >   	/*
> >   	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> > @@ -879,40 +915,158 @@ static int checksum_setup(struct net_device
> *dev, struct sk_buff *skb)
> >   	if (skb->ip_summed != CHECKSUM_PARTIAL)
> >   		return 0;
> >
> > -	if (skb->protocol != htons(ETH_P_IP))
> > +	off = sizeof(struct iphdr);
> > +
> > +	header_size = skb->network_header + off;
> > +	if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
> >   		goto out;
> >
> > -	iph = (void *)skb->data;
> > +	off = iph->ihl * 4;
> >
> >   	switch (iph->protocol) {
> >   	case IPPROTO_TCP:
> > -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> > +		if (!skb_partial_csum_set(skb, off,
> >   					  offsetof(struct tcphdr, check)))
> >   			goto out;
> >
> >   		if (recalculate_partial_csum) {
> >   			struct tcphdr *tcph = tcp_hdr(skb);
> > +
> > +			header_size = skb->network_header +
> > +				off +
> > +				sizeof(struct tcphdr);
> 
> You can put these (off and sizeof) onto the same line.
> 
> > +			if (!maybe_pull_tail(skb, header_size,
> MAX_IP_HEADER))
> > +				goto out;
> > +
> >   			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph-
> >daddr,
> > -							 skb->len - iph->ihl*4,
> > +							 skb->len - off,
> >   							 IPPROTO_TCP, 0);
> >   		}
> >   		break;
> >   	case IPPROTO_UDP:
> > -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> > +		if (!skb_partial_csum_set(skb, off,
> >   					  offsetof(struct udphdr, check)))
> >   			goto out;
> >
> >   		if (recalculate_partial_csum) {
> >   			struct udphdr *udph = udp_hdr(skb);
> > +
> > +			header_size = skb->network_header +
> > +				off +
> > +				sizeof(struct udphdr);
> > +			if (!maybe_pull_tail(skb, header_size,
> MAX_IP_HEADER))
> > +				goto out;
> > +
> >   			udph->check = ~csum_tcpudp_magic(iph->saddr,
> iph->daddr,
> > -							 skb->len - iph->ihl*4,
> > +							 skb->len - off,
> >   							 IPPROTO_UDP, 0);
> >   		}
> >   		break;
> >   	default:
> > -		if (net_ratelimit())
> > -			pr_err("Attempting to checksum a non-TCP/UDP
> packet, dropping a protocol %d packet\n",
> > -			       iph->protocol);
> > +		net_err_ratelimited("Attempting to checksum a non-
> TCP/UDP packet, dropping a protocol %d packet\n",
> > +				    iph->protocol);
> > +		goto out;
> > +	}
> > +
> > +	err = 0;
> > +
> > +out:
> > +	return err;
> > +}
> > +
> > +/* This value should be large enough to cover a tagged ethernet header
> plus
> > + * an IPv6 header, all options, and a maximal TCP or UDP header.
> > + */
> > +#define MAX_IPV6_HEADER 256
> > +
> > +static int checksum_setup_ipv6(struct net_device *dev, struct sk_buff
> *skb)
> > +{
> > +	struct ipv6hdr *ipv6h = (void *)skb->data;
> > +	u8 nexthdr;
> > +	unsigned int header_size;
> > +	unsigned int off;
> > +	bool fragment;
> > +	bool done;
> > +	int err = -EPROTO;
> > +
> > +	done = false;
> 
> This should probably be moved down to the beginning of the while loop.
> And you also need to initialize fragment to "false" (and possibly rename
> it to is_fragment?)
> 
> > +
> > +	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> > +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> > +		return 0;
> > +
> > +	off = sizeof(struct ipv6hdr);
> > +
> > +	header_size = skb->network_header + off;
> > +	if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
> > +		goto out;
> > +
> > +	nexthdr = ipv6h->nexthdr;
> > +
> > +	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len))
> &&
> > +	       !done) {
> > +		switch (nexthdr) {
> > +		case IPPROTO_DSTOPTS:
> > +		case IPPROTO_HOPOPTS:
> > +		case IPPROTO_ROUTING: {
> > +			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
> > +
> > +			header_size = skb->network_header +
> > +				off +
> > +				sizeof(struct ipv6_opt_hdr);
> 
> I'd merge the last two lines.
> 
> > +			if (!maybe_pull_tail(skb, header_size,
> MAX_IPV6_HEADER))
> > +				goto out;
> > +
> > +			nexthdr = hp->nexthdr;
> > +			off += ipv6_optlen(hp);
> > +			break;
> > +		}
> > +		case IPPROTO_AH: {
> > +			struct ip_auth_hdr *hp = (void *)(skb->data + off);
> > +
> > +			header_size = skb->network_header +
> > +				off +
> > +				sizeof(struct ip_auth_hdr);
> 
> Here as well.
> 
> > +			if (!maybe_pull_tail(skb, header_size,
> MAX_IPV6_HEADER))
> > +				goto out;
> > +
> > +			nexthdr = hp->nexthdr;
> > +			off += ipv6_ahlen(hp);
> > +			break;
> > +		}
> > +		case IPPROTO_FRAGMENT:
> > +			fragment = true;
> > +			/* fall through */
> > +		default:
> > +			done = true;
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (!done) {
> > +		net_err_ratelimited("Failed to parse packet header\n");
> > +		goto out;
> > +	}
> > +
> > +	if (fragment) {
> > +		net_err_ratelimited("Packet is a fragment!\n");
> > +		goto out;
> > +	}
> > +
> > +	switch (nexthdr) {
> > +	case IPPROTO_TCP:
> > +		if (!skb_partial_csum_set(skb, off,
> > +					  offsetof(struct tcphdr, check)))
> > +			goto out;
> > +		break;
> > +	case IPPROTO_UDP:
> > +		if (!skb_partial_csum_set(skb, off,
> > +					  offsetof(struct udphdr, check)))
> > +			goto out;
> > +		break;
> > +	default:
> > +		net_err_ratelimited("Attempting to checksum a non-
> TCP/UDP packet, dropping a protocol %d packet\n",
> > +				    nexthdr);
> >   		goto out;
> >   	}
> >
> > @@ -922,6 +1076,25 @@ out:
> >   	return err;
> >   }
> >
> > +static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> > +{
> > +	int err;
> 
> Initialize to -EPROTO (just to keep consistent with the rest of the file)
> 
> > +
> > +	switch (skb->protocol) {
> > +	case htons(ETH_P_IP):
> > +		err = checksum_setup_ip(dev, skb);
> > +		break;
> > +	case htons(ETH_P_IPV6):
> > +		err = checksum_setup_ipv6(dev, skb);
> > +		break;
> > +	default:
> > +		err = -EPROTO;
> > +		break;
> > +	}
> > +
> > +	return err;
> > +}
> > +
> >   static int handle_incoming_queue(struct net_device *dev,
> >   				 struct sk_buff_head *rxq)
> >   {
> > @@ -1232,6 +1405,15 @@ static netdev_features_t
> xennet_fix_features(struct net_device *dev,
> >   			features &= ~NETIF_F_SG;
> >   	}
> >
> > +	if (features & NETIF_F_IPV6_CSUM) {
> > +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> > +				 "feature-ipv6-csum-offload", "%d", &val) <
> 0)
> > +			val = 0;
> > +
> > +		if (!val)
> > +			features &= ~NETIF_F_IPV6_CSUM;
> > +	}
> > +
> >   	if (features & NETIF_F_TSO) {
> >   		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> >   				 "feature-gso-tcpv4", "%d", &val) < 0)
> > @@ -1241,6 +1423,15 @@ static netdev_features_t
> xennet_fix_features(struct net_device *dev,
> >   			features &= ~NETIF_F_TSO;
> >   	}
> >
> > +	if (features & NETIF_F_TSO6) {
> > +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> > +				 "feature-gso-tcpv6", "%d", &val) < 0)
> > +			val = 0;
> > +
> > +		if (!val)
> > +			features &= ~NETIF_F_TSO6;
> > +	}
> > +
> >   	return features;
> >   }
> >
> > @@ -1373,7 +1564,9 @@ static struct net_device
> *xennet_create_dev(struct xenbus_device *dev)
> >   	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
> >   	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
> >   				  NETIF_F_GSO_ROBUST;
> > -	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG |
> NETIF_F_TSO;
> > +	netdev->hw_features	= NETIF_F_SG |
> > +		                  NETIF_F_IPV6_CSUM |
> > +		                  NETIF_F_TSO | NETIF_F_TSO6;
> 
> Can you merge these three lines and stay under 80? If not, merge either
> of the two of them.
> 
> 
> -boris
> 
> >
> >   	/*
> >            * Assume that all hw features are available for now. This set
> > @@ -1751,6 +1944,18 @@ again:
> >   		goto abort_transaction;
> >   	}
> >
> > +	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
> "%d", 1);
> > +	if (err) {
> > +		message = "writing feature-gso-tcpv6";
> > +		goto abort_transaction;
> > +	}
> > +
> > +	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-
> offload", "%d", 1);
> > +	if (err) {
> > +		message = "writing feature-ipv6-csum-offload";
> > +		goto abort_transaction;
> > +	}
> > +
> >   	err = xenbus_transaction_end(xbt, 0);
> >   	if (err) {
> >   		if (err == -EAGAIN)
> > diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
> > index 5d89d1b..10f1b03 100644
> > --- a/include/linux/ipv6.h
> > +++ b/include/linux/ipv6.h
> > @@ -4,6 +4,8 @@
> >   #include <uapi/linux/ipv6.h>
> >
> >   #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3)
> > +#define ipv6_ahlen(p)   (((p)->hdrlen+2) << 2);
> > +
> >   /*
> >    * This structure contains configuration options per IPv6 link.
> >    */


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Xen-devel] [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads
  2014-01-07 10:25   ` Paul Durrant
@ 2014-01-07 14:53     ` Boris Ostrovsky
  2014-01-07 15:05       ` Paul Durrant
  0 siblings, 1 reply; 7+ messages in thread
From: Boris Ostrovsky @ 2014-01-07 14:53 UTC (permalink / raw)
  To: Paul Durrant
  Cc: xen-devel@lists.xen.org, netdev@vger.kernel.org, Wei Liu,
	Ian Campbell, Annie Li, David Vrabel

On 01/07/2014 05:25 AM, Paul Durrant wrote:
>> -----Original Message-----
>> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
>> Sent: 31 December 2013 19:10
>> To: Paul Durrant
>> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Konrad Rzeszutek
>> Wilk; David Vrabel; Ian Campbell; Wei Liu; Annie Li
>> Subject: Re: [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads
>>
>> On 11/26/2013 11:41 AM, Paul Durrant wrote:
>>> This patch adds support for IPv6 checksum offload and GSO when those
>>> features are available in the backend.
>> Sorry for late review. Mostly style comments.
>>
> Thanks for the review.
>
> The checksum related code essentially needs to be a duplicate of that in netback and it seems wasteful to have the code in both places. Could this code be moved perhaps to net/core/dev.c? It's not specific to netback/netfront usage.

Will any of these routines be called for anything other than Xen 
networking?

I don't know about net/core/dev.c but given the large amount of 
duplicate code between netfront and netback I think factoring out should 
be done at least for these two. Into xen-netcore.c or some such.

-boris


>
> Opinions?
>
>    Paul
>
>>> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
>>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>>> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>>> Cc: David Vrabel <david.vrabel@citrix.com>
>>> Cc: Ian Campbell <ian.campbell@citrix.com>
>>> Cc: Wei Liu <wei.liu2@citrix.com>
>>> Cc: Annie Li <annie.li@oracle.com>
>>> ---
>>>
>>> v3:
>>>    - Addressed comments raised by Annie Li
>>>
>>> v2:
>>>    - Addressed comments raised by Ian Campbell
>>>
>>>    drivers/net/xen-netfront.c |  239
>> ++++++++++++++++++++++++++++++++++++++++----
>>>    include/linux/ipv6.h       |    2 +
>>>    2 files changed, 224 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
>>> index dd1011e..fe747e4 100644
>>> --- a/drivers/net/xen-netfront.c
>>> +++ b/drivers/net/xen-netfront.c
>>> @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb,
>> struct net_device *dev)
>>>    		tx->flags |= XEN_NETTXF_extra_info;
>>>
>>>    		gso->u.gso.size = skb_shinfo(skb)->gso_size;
>>> -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
>>> +		gso->u.gso.type = (skb_shinfo(skb)->gso_type &
>> SKB_GSO_TCPV6) ?
>>> +			          XEN_NETIF_GSO_TYPE_TCPV6 :
>>> +			          XEN_NETIF_GSO_TYPE_TCPV4;
>>>    		gso->u.gso.pad = 0;
>>>    		gso->u.gso.features = 0;
>>>
>>> @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff
>> *skb,
>>>    		return -EINVAL;
>>>    	}
>>>
>>> -	/* Currently only TCPv4 S.O. is supported. */
>>> -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
>>> +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
>>> +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
>>>    		if (net_ratelimit())
>>>    			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
>>>    		return -EINVAL;
>>>    	}
>>>
>>>    	skb_shinfo(skb)->gso_size = gso->u.gso.size;
>>> -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
>>> +	skb_shinfo(skb)->gso_type =
>>> +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
>>> +		SKB_GSO_TCPV4 :
>>> +		SKB_GSO_TCPV6;
>>>
>>>    	/* Header must be checked, and gso_segs computed. */
>>>    	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
>>> @@ -856,11 +861,42 @@ static RING_IDX xennet_fill_frags(struct
>> netfront_info *np,
>>>    	return cons;
>>>    }
>>>
>>> -static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
>>> +static inline bool maybe_pull_tail(struct sk_buff *skb, unsigned int min,
>>> +				   unsigned int max)
>> Should this routine return error code instead of a boolean? Otherwise
>> it's not clear what "false" should mean --- whether it is that it failed
>> to pull or that the pull wasn't needed.
>>
>>>    {
>>> -	struct iphdr *iph;
>>> -	int err = -EPROTO;
>>> +	int target;
>>> +
>>> +	BUG_ON(max < min);
>>> +
>>> +	if (!skb_is_nonlinear(skb) || skb_headlen(skb) >= min)
>>> +		return true;
>>> +
>>> +	/* If we need to pullup then pullup to max, so we hopefully
>>> +	 * won't need to do it again.
>>> +	 */
>> Comment style.
>>
>>> +	target = min_t(int, skb->len, max);
>>> +	__pskb_pull_tail(skb, target - skb_headlen(skb));
>>> +
>>> +	if (skb_headlen(skb) < min) {
>> Why not explicitly check whether__pskb_pull_tail() returned NULL ?
>>
>>> +		net_err_ratelimited("Failed to pullup packet header\n");
>>> +		return false;
>>> +	}
>>> +
>>> +	return true;
>>> +}
>>> +
>>> +/* This value should be large enough to cover a tagged ethernet header
>> plus
>>> + * maximally sized IP and TCP or UDP headers.
>>> + */
>> Comment style.
>>
>>> +#define MAX_IP_HEADER 128
>>> +
>>> +static int checksum_setup_ip(struct net_device *dev, struct sk_buff
>> *skb)
>>> +{
>>> +	struct iphdr *iph = (void *)skb->data;
>>> +	unsigned int header_size;
>>> +	unsigned int off;
>>>    	int recalculate_partial_csum = 0;
>>> +	int err = -EPROTO;
>>>
>>>    	/*
>>>    	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
>>> @@ -879,40 +915,158 @@ static int checksum_setup(struct net_device
>> *dev, struct sk_buff *skb)
>>>    	if (skb->ip_summed != CHECKSUM_PARTIAL)
>>>    		return 0;
>>>
>>> -	if (skb->protocol != htons(ETH_P_IP))
>>> +	off = sizeof(struct iphdr);
>>> +
>>> +	header_size = skb->network_header + off;
>>> +	if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
>>>    		goto out;
>>>
>>> -	iph = (void *)skb->data;
>>> +	off = iph->ihl * 4;
>>>
>>>    	switch (iph->protocol) {
>>>    	case IPPROTO_TCP:
>>> -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
>>> +		if (!skb_partial_csum_set(skb, off,
>>>    					  offsetof(struct tcphdr, check)))
>>>    			goto out;
>>>
>>>    		if (recalculate_partial_csum) {
>>>    			struct tcphdr *tcph = tcp_hdr(skb);
>>> +
>>> +			header_size = skb->network_header +
>>> +				off +
>>> +				sizeof(struct tcphdr);
>> You can put these (off and sizeof) onto the same line.
>>
>>> +			if (!maybe_pull_tail(skb, header_size,
>> MAX_IP_HEADER))
>>> +				goto out;
>>> +
>>>    			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph-
>>> daddr,
>>> -							 skb->len - iph->ihl*4,
>>> +							 skb->len - off,
>>>    							 IPPROTO_TCP, 0);
>>>    		}
>>>    		break;
>>>    	case IPPROTO_UDP:
>>> -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
>>> +		if (!skb_partial_csum_set(skb, off,
>>>    					  offsetof(struct udphdr, check)))
>>>    			goto out;
>>>
>>>    		if (recalculate_partial_csum) {
>>>    			struct udphdr *udph = udp_hdr(skb);
>>> +
>>> +			header_size = skb->network_header +
>>> +				off +
>>> +				sizeof(struct udphdr);
>>> +			if (!maybe_pull_tail(skb, header_size,
>> MAX_IP_HEADER))
>>> +				goto out;
>>> +
>>>    			udph->check = ~csum_tcpudp_magic(iph->saddr,
>> iph->daddr,
>>> -							 skb->len - iph->ihl*4,
>>> +							 skb->len - off,
>>>    							 IPPROTO_UDP, 0);
>>>    		}
>>>    		break;
>>>    	default:
>>> -		if (net_ratelimit())
>>> -			pr_err("Attempting to checksum a non-TCP/UDP
>> packet, dropping a protocol %d packet\n",
>>> -			       iph->protocol);
>>> +		net_err_ratelimited("Attempting to checksum a non-
>> TCP/UDP packet, dropping a protocol %d packet\n",
>>> +				    iph->protocol);
>>> +		goto out;
>>> +	}
>>> +
>>> +	err = 0;
>>> +
>>> +out:
>>> +	return err;
>>> +}
>>> +
>>> +/* This value should be large enough to cover a tagged ethernet header
>> plus
>>> + * an IPv6 header, all options, and a maximal TCP or UDP header.
>>> + */
>>> +#define MAX_IPV6_HEADER 256
>>> +
>>> +static int checksum_setup_ipv6(struct net_device *dev, struct sk_buff
>> *skb)
>>> +{
>>> +	struct ipv6hdr *ipv6h = (void *)skb->data;
>>> +	u8 nexthdr;
>>> +	unsigned int header_size;
>>> +	unsigned int off;
>>> +	bool fragment;
>>> +	bool done;
>>> +	int err = -EPROTO;
>>> +
>>> +	done = false;
>> This should probably be moved down to the beginning of the while loop.
>> And you also need to initialize fragment to "false" (and possibly rename
>> it to is_fragment?)
>>
>>> +
>>> +	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
>>> +	if (skb->ip_summed != CHECKSUM_PARTIAL)
>>> +		return 0;
>>> +
>>> +	off = sizeof(struct ipv6hdr);
>>> +
>>> +	header_size = skb->network_header + off;
>>> +	if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
>>> +		goto out;
>>> +
>>> +	nexthdr = ipv6h->nexthdr;
>>> +
>>> +	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len))
>> &&
>>> +	       !done) {
>>> +		switch (nexthdr) {
>>> +		case IPPROTO_DSTOPTS:
>>> +		case IPPROTO_HOPOPTS:
>>> +		case IPPROTO_ROUTING: {
>>> +			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
>>> +
>>> +			header_size = skb->network_header +
>>> +				off +
>>> +				sizeof(struct ipv6_opt_hdr);
>> I'd merge the last two lines.
>>
>>> +			if (!maybe_pull_tail(skb, header_size,
>> MAX_IPV6_HEADER))
>>> +				goto out;
>>> +
>>> +			nexthdr = hp->nexthdr;
>>> +			off += ipv6_optlen(hp);
>>> +			break;
>>> +		}
>>> +		case IPPROTO_AH: {
>>> +			struct ip_auth_hdr *hp = (void *)(skb->data + off);
>>> +
>>> +			header_size = skb->network_header +
>>> +				off +
>>> +				sizeof(struct ip_auth_hdr);
>> Here as well.
>>
>>> +			if (!maybe_pull_tail(skb, header_size,
>> MAX_IPV6_HEADER))
>>> +				goto out;
>>> +
>>> +			nexthdr = hp->nexthdr;
>>> +			off += ipv6_ahlen(hp);
>>> +			break;
>>> +		}
>>> +		case IPPROTO_FRAGMENT:
>>> +			fragment = true;
>>> +			/* fall through */
>>> +		default:
>>> +			done = true;
>>> +			break;
>>> +		}
>>> +	}
>>> +
>>> +	if (!done) {
>>> +		net_err_ratelimited("Failed to parse packet header\n");
>>> +		goto out;
>>> +	}
>>> +
>>> +	if (fragment) {
>>> +		net_err_ratelimited("Packet is a fragment!\n");
>>> +		goto out;
>>> +	}
>>> +
>>> +	switch (nexthdr) {
>>> +	case IPPROTO_TCP:
>>> +		if (!skb_partial_csum_set(skb, off,
>>> +					  offsetof(struct tcphdr, check)))
>>> +			goto out;
>>> +		break;
>>> +	case IPPROTO_UDP:
>>> +		if (!skb_partial_csum_set(skb, off,
>>> +					  offsetof(struct udphdr, check)))
>>> +			goto out;
>>> +		break;
>>> +	default:
>>> +		net_err_ratelimited("Attempting to checksum a non-
>> TCP/UDP packet, dropping a protocol %d packet\n",
>>> +				    nexthdr);
>>>    		goto out;
>>>    	}
>>>
>>> @@ -922,6 +1076,25 @@ out:
>>>    	return err;
>>>    }
>>>
>>> +static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
>>> +{
>>> +	int err;
>> Initialize to -EPROTO (just to keep consistent with the rest of the file)
>>
>>> +
>>> +	switch (skb->protocol) {
>>> +	case htons(ETH_P_IP):
>>> +		err = checksum_setup_ip(dev, skb);
>>> +		break;
>>> +	case htons(ETH_P_IPV6):
>>> +		err = checksum_setup_ipv6(dev, skb);
>>> +		break;
>>> +	default:
>>> +		err = -EPROTO;
>>> +		break;
>>> +	}
>>> +
>>> +	return err;
>>> +}
>>> +
>>>    static int handle_incoming_queue(struct net_device *dev,
>>>    				 struct sk_buff_head *rxq)
>>>    {
>>> @@ -1232,6 +1405,15 @@ static netdev_features_t
>> xennet_fix_features(struct net_device *dev,
>>>    			features &= ~NETIF_F_SG;
>>>    	}
>>>
>>> +	if (features & NETIF_F_IPV6_CSUM) {
>>> +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
>>> +				 "feature-ipv6-csum-offload", "%d", &val) <
>> 0)
>>> +			val = 0;
>>> +
>>> +		if (!val)
>>> +			features &= ~NETIF_F_IPV6_CSUM;
>>> +	}
>>> +
>>>    	if (features & NETIF_F_TSO) {
>>>    		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
>>>    				 "feature-gso-tcpv4", "%d", &val) < 0)
>>> @@ -1241,6 +1423,15 @@ static netdev_features_t
>> xennet_fix_features(struct net_device *dev,
>>>    			features &= ~NETIF_F_TSO;
>>>    	}
>>>
>>> +	if (features & NETIF_F_TSO6) {
>>> +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
>>> +				 "feature-gso-tcpv6", "%d", &val) < 0)
>>> +			val = 0;
>>> +
>>> +		if (!val)
>>> +			features &= ~NETIF_F_TSO6;
>>> +	}
>>> +
>>>    	return features;
>>>    }
>>>
>>> @@ -1373,7 +1564,9 @@ static struct net_device
>> *xennet_create_dev(struct xenbus_device *dev)
>>>    	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
>>>    	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>>>    				  NETIF_F_GSO_ROBUST;
>>> -	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG |
>> NETIF_F_TSO;
>>> +	netdev->hw_features	= NETIF_F_SG |
>>> +		                  NETIF_F_IPV6_CSUM |
>>> +		                  NETIF_F_TSO | NETIF_F_TSO6;
>> Can you merge these three lines and stay under 80? If not, merge either
>> of the two of them.
>>
>>
>> -boris
>>
>>>    	/*
>>>             * Assume that all hw features are available for now. This set
>>> @@ -1751,6 +1944,18 @@ again:
>>>    		goto abort_transaction;
>>>    	}
>>>
>>> +	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
>> "%d", 1);
>>> +	if (err) {
>>> +		message = "writing feature-gso-tcpv6";
>>> +		goto abort_transaction;
>>> +	}
>>> +
>>> +	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-
>> offload", "%d", 1);
>>> +	if (err) {
>>> +		message = "writing feature-ipv6-csum-offload";
>>> +		goto abort_transaction;
>>> +	}
>>> +
>>>    	err = xenbus_transaction_end(xbt, 0);
>>>    	if (err) {
>>>    		if (err == -EAGAIN)
>>> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
>>> index 5d89d1b..10f1b03 100644
>>> --- a/include/linux/ipv6.h
>>> +++ b/include/linux/ipv6.h
>>> @@ -4,6 +4,8 @@
>>>    #include <uapi/linux/ipv6.h>
>>>
>>>    #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3)
>>> +#define ipv6_ahlen(p)   (((p)->hdrlen+2) << 2);
>>> +
>>>    /*
>>>     * This structure contains configuration options per IPv6 link.
>>>     */
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [Xen-devel] [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads
  2014-01-07 14:53     ` [Xen-devel] " Boris Ostrovsky
@ 2014-01-07 15:05       ` Paul Durrant
  2014-01-07 15:12         ` Ian Campbell
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Durrant @ 2014-01-07 15:05 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: xen-devel@lists.xen.org, netdev@vger.kernel.org, Wei Liu,
	Ian Campbell, Annie Li, David Vrabel

> -----Original Message-----
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: 07 January 2014 14:54
> To: Paul Durrant
> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; Ian Campbell;
> Annie Li; David Vrabel
> Subject: Re: [Xen-devel] [PATCH net-next v3] xen-netfront: Add support for
> IPv6 offloads
> 
> On 01/07/2014 05:25 AM, Paul Durrant wrote:
> >> -----Original Message-----
> >> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> >> Sent: 31 December 2013 19:10
> >> To: Paul Durrant
> >> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Konrad Rzeszutek
> >> Wilk; David Vrabel; Ian Campbell; Wei Liu; Annie Li
> >> Subject: Re: [PATCH net-next v3] xen-netfront: Add support for IPv6
> offloads
> >>
> >> On 11/26/2013 11:41 AM, Paul Durrant wrote:
> >>> This patch adds support for IPv6 checksum offload and GSO when those
> >>> features are available in the backend.
> >> Sorry for late review. Mostly style comments.
> >>
> > Thanks for the review.
> >
> > The checksum related code essentially needs to be a duplicate of that in
> netback and it seems wasteful to have the code in both places. Could this
> code be moved perhaps to net/core/dev.c? It's not specific to
> netback/netfront usage.
> 
> Will any of these routines be called for anything other than Xen
> networking?
> 

I guess similar logic must be duplicated in other drivers - I can't believe that netback and netfront are the only ones to want to know where the TCP/UDP checksum field is located.

> I don't know about net/core/dev.c but given the large amount of
> duplicate code between netfront and netback I think factoring out should
> be done at least for these two. Into xen-netcore.c or some such.
> 

That's probably a pragmatic first step; I'll do that and post a patch series as v4.

  Paul

> -boris
> 
> 
> >
> > Opinions?
> >
> >    Paul
> >
> >>> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> >>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> >>> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> >>> Cc: David Vrabel <david.vrabel@citrix.com>
> >>> Cc: Ian Campbell <ian.campbell@citrix.com>
> >>> Cc: Wei Liu <wei.liu2@citrix.com>
> >>> Cc: Annie Li <annie.li@oracle.com>
> >>> ---
> >>>
> >>> v3:
> >>>    - Addressed comments raised by Annie Li
> >>>
> >>> v2:
> >>>    - Addressed comments raised by Ian Campbell
> >>>
> >>>    drivers/net/xen-netfront.c |  239
> >> ++++++++++++++++++++++++++++++++++++++++----
> >>>    include/linux/ipv6.h       |    2 +
> >>>    2 files changed, 224 insertions(+), 17 deletions(-)
> >>>
> >>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> >>> index dd1011e..fe747e4 100644
> >>> --- a/drivers/net/xen-netfront.c
> >>> +++ b/drivers/net/xen-netfront.c
> >>> @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb,
> >> struct net_device *dev)
> >>>    		tx->flags |= XEN_NETTXF_extra_info;
> >>>
> >>>    		gso->u.gso.size = skb_shinfo(skb)->gso_size;
> >>> -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> >>> +		gso->u.gso.type = (skb_shinfo(skb)->gso_type &
> >> SKB_GSO_TCPV6) ?
> >>> +			          XEN_NETIF_GSO_TYPE_TCPV6 :
> >>> +			          XEN_NETIF_GSO_TYPE_TCPV4;
> >>>    		gso->u.gso.pad = 0;
> >>>    		gso->u.gso.features = 0;
> >>>
> >>> @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff
> >> *skb,
> >>>    		return -EINVAL;
> >>>    	}
> >>>
> >>> -	/* Currently only TCPv4 S.O. is supported. */
> >>> -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> >>> +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
> >>> +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
> >>>    		if (net_ratelimit())
> >>>    			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
> >>>    		return -EINVAL;
> >>>    	}
> >>>
> >>>    	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> >>> -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> >>> +	skb_shinfo(skb)->gso_type =
> >>> +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
> >>> +		SKB_GSO_TCPV4 :
> >>> +		SKB_GSO_TCPV6;
> >>>
> >>>    	/* Header must be checked, and gso_segs computed. */
> >>>    	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> >>> @@ -856,11 +861,42 @@ static RING_IDX xennet_fill_frags(struct
> >> netfront_info *np,
> >>>    	return cons;
> >>>    }
> >>>
> >>> -static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> >>> +static inline bool maybe_pull_tail(struct sk_buff *skb, unsigned int min,
> >>> +				   unsigned int max)
> >> Should this routine return error code instead of a boolean? Otherwise
> >> it's not clear what "false" should mean --- whether it is that it failed
> >> to pull or that the pull wasn't needed.
> >>
> >>>    {
> >>> -	struct iphdr *iph;
> >>> -	int err = -EPROTO;
> >>> +	int target;
> >>> +
> >>> +	BUG_ON(max < min);
> >>> +
> >>> +	if (!skb_is_nonlinear(skb) || skb_headlen(skb) >= min)
> >>> +		return true;
> >>> +
> >>> +	/* If we need to pullup then pullup to max, so we hopefully
> >>> +	 * won't need to do it again.
> >>> +	 */
> >> Comment style.
> >>
> >>> +	target = min_t(int, skb->len, max);
> >>> +	__pskb_pull_tail(skb, target - skb_headlen(skb));
> >>> +
> >>> +	if (skb_headlen(skb) < min) {
> >> Why not explicitly check whether__pskb_pull_tail() returned NULL ?
> >>
> >>> +		net_err_ratelimited("Failed to pullup packet header\n");
> >>> +		return false;
> >>> +	}
> >>> +
> >>> +	return true;
> >>> +}
> >>> +
> >>> +/* This value should be large enough to cover a tagged ethernet
> header
> >> plus
> >>> + * maximally sized IP and TCP or UDP headers.
> >>> + */
> >> Comment style.
> >>
> >>> +#define MAX_IP_HEADER 128
> >>> +
> >>> +static int checksum_setup_ip(struct net_device *dev, struct sk_buff
> >> *skb)
> >>> +{
> >>> +	struct iphdr *iph = (void *)skb->data;
> >>> +	unsigned int header_size;
> >>> +	unsigned int off;
> >>>    	int recalculate_partial_csum = 0;
> >>> +	int err = -EPROTO;
> >>>
> >>>    	/*
> >>>    	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> >>> @@ -879,40 +915,158 @@ static int checksum_setup(struct net_device
> >> *dev, struct sk_buff *skb)
> >>>    	if (skb->ip_summed != CHECKSUM_PARTIAL)
> >>>    		return 0;
> >>>
> >>> -	if (skb->protocol != htons(ETH_P_IP))
> >>> +	off = sizeof(struct iphdr);
> >>> +
> >>> +	header_size = skb->network_header + off;
> >>> +	if (!maybe_pull_tail(skb, header_size, MAX_IP_HEADER))
> >>>    		goto out;
> >>>
> >>> -	iph = (void *)skb->data;
> >>> +	off = iph->ihl * 4;
> >>>
> >>>    	switch (iph->protocol) {
> >>>    	case IPPROTO_TCP:
> >>> -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> >>> +		if (!skb_partial_csum_set(skb, off,
> >>>    					  offsetof(struct tcphdr, check)))
> >>>    			goto out;
> >>>
> >>>    		if (recalculate_partial_csum) {
> >>>    			struct tcphdr *tcph = tcp_hdr(skb);
> >>> +
> >>> +			header_size = skb->network_header +
> >>> +				off +
> >>> +				sizeof(struct tcphdr);
> >> You can put these (off and sizeof) onto the same line.
> >>
> >>> +			if (!maybe_pull_tail(skb, header_size,
> >> MAX_IP_HEADER))
> >>> +				goto out;
> >>> +
> >>>    			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph-
> >>> daddr,
> >>> -							 skb->len - iph->ihl*4,
> >>> +							 skb->len - off,
> >>>    							 IPPROTO_TCP, 0);
> >>>    		}
> >>>    		break;
> >>>    	case IPPROTO_UDP:
> >>> -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> >>> +		if (!skb_partial_csum_set(skb, off,
> >>>    					  offsetof(struct udphdr, check)))
> >>>    			goto out;
> >>>
> >>>    		if (recalculate_partial_csum) {
> >>>    			struct udphdr *udph = udp_hdr(skb);
> >>> +
> >>> +			header_size = skb->network_header +
> >>> +				off +
> >>> +				sizeof(struct udphdr);
> >>> +			if (!maybe_pull_tail(skb, header_size,
> >> MAX_IP_HEADER))
> >>> +				goto out;
> >>> +
> >>>    			udph->check = ~csum_tcpudp_magic(iph->saddr,
> >> iph->daddr,
> >>> -							 skb->len - iph->ihl*4,
> >>> +							 skb->len - off,
> >>>    							 IPPROTO_UDP, 0);
> >>>    		}
> >>>    		break;
> >>>    	default:
> >>> -		if (net_ratelimit())
> >>> -			pr_err("Attempting to checksum a non-TCP/UDP
> >> packet, dropping a protocol %d packet\n",
> >>> -			       iph->protocol);
> >>> +		net_err_ratelimited("Attempting to checksum a non-
> >> TCP/UDP packet, dropping a protocol %d packet\n",
> >>> +				    iph->protocol);
> >>> +		goto out;
> >>> +	}
> >>> +
> >>> +	err = 0;
> >>> +
> >>> +out:
> >>> +	return err;
> >>> +}
> >>> +
> >>> +/* This value should be large enough to cover a tagged ethernet
> header
> >> plus
> >>> + * an IPv6 header, all options, and a maximal TCP or UDP header.
> >>> + */
> >>> +#define MAX_IPV6_HEADER 256
> >>> +
> >>> +static int checksum_setup_ipv6(struct net_device *dev, struct sk_buff
> >> *skb)
> >>> +{
> >>> +	struct ipv6hdr *ipv6h = (void *)skb->data;
> >>> +	u8 nexthdr;
> >>> +	unsigned int header_size;
> >>> +	unsigned int off;
> >>> +	bool fragment;
> >>> +	bool done;
> >>> +	int err = -EPROTO;
> >>> +
> >>> +	done = false;
> >> This should probably be moved down to the beginning of the while loop.
> >> And you also need to initialize fragment to "false" (and possibly rename
> >> it to is_fragment?)
> >>
> >>> +
> >>> +	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> >>> +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> >>> +		return 0;
> >>> +
> >>> +	off = sizeof(struct ipv6hdr);
> >>> +
> >>> +	header_size = skb->network_header + off;
> >>> +	if (!maybe_pull_tail(skb, header_size, MAX_IPV6_HEADER))
> >>> +		goto out;
> >>> +
> >>> +	nexthdr = ipv6h->nexthdr;
> >>> +
> >>> +	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len))
> >> &&
> >>> +	       !done) {
> >>> +		switch (nexthdr) {
> >>> +		case IPPROTO_DSTOPTS:
> >>> +		case IPPROTO_HOPOPTS:
> >>> +		case IPPROTO_ROUTING: {
> >>> +			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
> >>> +
> >>> +			header_size = skb->network_header +
> >>> +				off +
> >>> +				sizeof(struct ipv6_opt_hdr);
> >> I'd merge the last two lines.
> >>
> >>> +			if (!maybe_pull_tail(skb, header_size,
> >> MAX_IPV6_HEADER))
> >>> +				goto out;
> >>> +
> >>> +			nexthdr = hp->nexthdr;
> >>> +			off += ipv6_optlen(hp);
> >>> +			break;
> >>> +		}
> >>> +		case IPPROTO_AH: {
> >>> +			struct ip_auth_hdr *hp = (void *)(skb->data + off);
> >>> +
> >>> +			header_size = skb->network_header +
> >>> +				off +
> >>> +				sizeof(struct ip_auth_hdr);
> >> Here as well.
> >>
> >>> +			if (!maybe_pull_tail(skb, header_size,
> >> MAX_IPV6_HEADER))
> >>> +				goto out;
> >>> +
> >>> +			nexthdr = hp->nexthdr;
> >>> +			off += ipv6_ahlen(hp);
> >>> +			break;
> >>> +		}
> >>> +		case IPPROTO_FRAGMENT:
> >>> +			fragment = true;
> >>> +			/* fall through */
> >>> +		default:
> >>> +			done = true;
> >>> +			break;
> >>> +		}
> >>> +	}
> >>> +
> >>> +	if (!done) {
> >>> +		net_err_ratelimited("Failed to parse packet header\n");
> >>> +		goto out;
> >>> +	}
> >>> +
> >>> +	if (fragment) {
> >>> +		net_err_ratelimited("Packet is a fragment!\n");
> >>> +		goto out;
> >>> +	}
> >>> +
> >>> +	switch (nexthdr) {
> >>> +	case IPPROTO_TCP:
> >>> +		if (!skb_partial_csum_set(skb, off,
> >>> +					  offsetof(struct tcphdr, check)))
> >>> +			goto out;
> >>> +		break;
> >>> +	case IPPROTO_UDP:
> >>> +		if (!skb_partial_csum_set(skb, off,
> >>> +					  offsetof(struct udphdr, check)))
> >>> +			goto out;
> >>> +		break;
> >>> +	default:
> >>> +		net_err_ratelimited("Attempting to checksum a non-
> >> TCP/UDP packet, dropping a protocol %d packet\n",
> >>> +				    nexthdr);
> >>>    		goto out;
> >>>    	}
> >>>
> >>> @@ -922,6 +1076,25 @@ out:
> >>>    	return err;
> >>>    }
> >>>
> >>> +static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> >>> +{
> >>> +	int err;
> >> Initialize to -EPROTO (just to keep consistent with the rest of the file)
> >>
> >>> +
> >>> +	switch (skb->protocol) {
> >>> +	case htons(ETH_P_IP):
> >>> +		err = checksum_setup_ip(dev, skb);
> >>> +		break;
> >>> +	case htons(ETH_P_IPV6):
> >>> +		err = checksum_setup_ipv6(dev, skb);
> >>> +		break;
> >>> +	default:
> >>> +		err = -EPROTO;
> >>> +		break;
> >>> +	}
> >>> +
> >>> +	return err;
> >>> +}
> >>> +
> >>>    static int handle_incoming_queue(struct net_device *dev,
> >>>    				 struct sk_buff_head *rxq)
> >>>    {
> >>> @@ -1232,6 +1405,15 @@ static netdev_features_t
> >> xennet_fix_features(struct net_device *dev,
> >>>    			features &= ~NETIF_F_SG;
> >>>    	}
> >>>
> >>> +	if (features & NETIF_F_IPV6_CSUM) {
> >>> +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> >>> +				 "feature-ipv6-csum-offload", "%d", &val) <
> >> 0)
> >>> +			val = 0;
> >>> +
> >>> +		if (!val)
> >>> +			features &= ~NETIF_F_IPV6_CSUM;
> >>> +	}
> >>> +
> >>>    	if (features & NETIF_F_TSO) {
> >>>    		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> >>>    				 "feature-gso-tcpv4", "%d", &val) < 0)
> >>> @@ -1241,6 +1423,15 @@ static netdev_features_t
> >> xennet_fix_features(struct net_device *dev,
> >>>    			features &= ~NETIF_F_TSO;
> >>>    	}
> >>>
> >>> +	if (features & NETIF_F_TSO6) {
> >>> +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> >>> +				 "feature-gso-tcpv6", "%d", &val) < 0)
> >>> +			val = 0;
> >>> +
> >>> +		if (!val)
> >>> +			features &= ~NETIF_F_TSO6;
> >>> +	}
> >>> +
> >>>    	return features;
> >>>    }
> >>>
> >>> @@ -1373,7 +1564,9 @@ static struct net_device
> >> *xennet_create_dev(struct xenbus_device *dev)
> >>>    	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
> >>>    	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
> >>>    				  NETIF_F_GSO_ROBUST;
> >>> -	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG |
> >> NETIF_F_TSO;
> >>> +	netdev->hw_features	= NETIF_F_SG |
> >>> +		                  NETIF_F_IPV6_CSUM |
> >>> +		                  NETIF_F_TSO | NETIF_F_TSO6;
> >> Can you merge these three lines and stay under 80? If not, merge either
> >> of the two of them.
> >>
> >>
> >> -boris
> >>
> >>>    	/*
> >>>             * Assume that all hw features are available for now. This set
> >>> @@ -1751,6 +1944,18 @@ again:
> >>>    		goto abort_transaction;
> >>>    	}
> >>>
> >>> +	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
> >> "%d", 1);
> >>> +	if (err) {
> >>> +		message = "writing feature-gso-tcpv6";
> >>> +		goto abort_transaction;
> >>> +	}
> >>> +
> >>> +	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-
> >> offload", "%d", 1);
> >>> +	if (err) {
> >>> +		message = "writing feature-ipv6-csum-offload";
> >>> +		goto abort_transaction;
> >>> +	}
> >>> +
> >>>    	err = xenbus_transaction_end(xbt, 0);
> >>>    	if (err) {
> >>>    		if (err == -EAGAIN)
> >>> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
> >>> index 5d89d1b..10f1b03 100644
> >>> --- a/include/linux/ipv6.h
> >>> +++ b/include/linux/ipv6.h
> >>> @@ -4,6 +4,8 @@
> >>>    #include <uapi/linux/ipv6.h>
> >>>
> >>>    #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3)
> >>> +#define ipv6_ahlen(p)   (((p)->hdrlen+2) << 2);
> >>> +
> >>>    /*
> >>>     * This structure contains configuration options per IPv6 link.
> >>>     */
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Xen-devel] [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads
  2014-01-07 15:05       ` Paul Durrant
@ 2014-01-07 15:12         ` Ian Campbell
  0 siblings, 0 replies; 7+ messages in thread
From: Ian Campbell @ 2014-01-07 15:12 UTC (permalink / raw)
  To: Paul Durrant
  Cc: Boris Ostrovsky, xen-devel@lists.xen.org, netdev@vger.kernel.org,
	Wei Liu, Annie Li, David Vrabel

On Tue, 2014-01-07 at 15:05 +0000, Paul Durrant wrote:
> > -----Original Message-----
> > From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> > Sent: 07 January 2014 14:54
> > To: Paul Durrant
> > Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; Ian Campbell;
> > Annie Li; David Vrabel
> > Subject: Re: [Xen-devel] [PATCH net-next v3] xen-netfront: Add support for
> > IPv6 offloads
> > 
> > On 01/07/2014 05:25 AM, Paul Durrant wrote:
> > >> -----Original Message-----
> > >> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> > >> Sent: 31 December 2013 19:10
> > >> To: Paul Durrant
> > >> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Konrad Rzeszutek
> > >> Wilk; David Vrabel; Ian Campbell; Wei Liu; Annie Li
> > >> Subject: Re: [PATCH net-next v3] xen-netfront: Add support for IPv6
> > offloads
> > >>
> > >> On 11/26/2013 11:41 AM, Paul Durrant wrote:
> > >>> This patch adds support for IPv6 checksum offload and GSO when those
> > >>> features are available in the backend.
> > >> Sorry for late review. Mostly style comments.
> > >>
> > > Thanks for the review.
> > >
> > > The checksum related code essentially needs to be a duplicate of that in
> > netback and it seems wasteful to have the code in both places. Could this
> > code be moved perhaps to net/core/dev.c? It's not specific to
> > netback/netfront usage.
> > 
> > Will any of these routines be called for anything other than Xen
> > networking?
> > 
> 
> I guess similar logic must be duplicated in other drivers - I can't
> believe that netback and netfront are the only ones to want to know
> where the TCP/UDP checksum field is located.

Me neither. Given that we already have two consumers (albeit both *xen*)
and that the functionality is generic in nature it seems to make sense
to me to have it in a generic place.

> > I don't know about net/core/dev.c but given the large amount of
> > duplicate code between netfront and netback I think factoring out should
> > be done at least for these two. Into xen-netcore.c or some such.
> > 
> 
> That's probably a pragmatic first step; I'll do that and post a patch series as v4.

I think this makes sense for code which has two consumers (both *xen*)
but which is actually Xen specific. Obviously if the network maintainers
don't think the checksum functionality is plausibly generically useful
then we could put it here instead.

Ian.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-01-07 15:12 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-11-26 16:41 [PATCH net-next v3] xen-netfront: Add support for IPv6 offloads Paul Durrant
2013-11-28 23:50 ` David Miller
2013-12-31 19:10 ` Boris Ostrovsky
2014-01-07 10:25   ` Paul Durrant
2014-01-07 14:53     ` [Xen-devel] " Boris Ostrovsky
2014-01-07 15:05       ` Paul Durrant
2014-01-07 15:12         ` Ian Campbell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).