[PATCH 1/2] net: add and use skb_gso_transport

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/2] net: add and use skb_gso_transport_seglen()
  2014-01-25 22:48 [PATCH 0/2] Fix handling of GRO skbs in forwarding path Florian Westphal
@ 2014-01-25 22:48 ` Florian Westphal
  2014-01-26  1:28   ` Eric Dumazet
  0 siblings, 1 reply; 30+ messages in thread
From: Florian Westphal @ 2014-01-25 22:48 UTC (permalink / raw)
  To: netdev; +Cc: eric.dumazet, Florian Westphal

This moves part of Eric Dumazets skb_gso_seglen helper from tbf sched to
skbuff core so it may be reused by upcoming ip forwarding path patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/linux/skbuff.h |  1 +
 net/core/skbuff.c      | 26 ++++++++++++++++++++++++++
 net/sched/sch_tbf.c    | 12 +++---------
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f69b3f..3d76066 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2371,6 +2371,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
+unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 
 struct skb_checksum_ops {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 06e72d3..2e1fd75 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -45,6 +45,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/in.h>
+#include <linux/tcp.h>
 #include <linux/inet.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
@@ -71,6 +72,8 @@
 #include <trace/events/skb.h>
 #include <linux/highmem.h>
 
+#include <uapi/linux/udp.h>
+
 struct kmem_cache *skbuff_head_cache __read_mostly;
 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 
@@ -3592,3 +3595,26 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	nf_reset_trace(skb);
 }
 EXPORT_SYMBOL_GPL(skb_scrub_packet);
+
+/**
+ * skb_gso_transport_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_transport_seglen is used to determine the real size of the
+ * individual segments, including Layer4 headers (TCP/UDP).
+ *
+ * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
+ */
+unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	unsigned int hdr_len;
+
+	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+		hdr_len = tcp_hdrlen(skb);
+	else
+		hdr_len = sizeof(struct udphdr);
+	return hdr_len + shinfo->gso_size;
+}
+EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 887e672..837a61b 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -148,16 +148,10 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r,
  * Return length of individual segments of a gso packet,
  * including all headers (MAC, IP, TCP/UDP)
  */
-static unsigned int skb_gso_seglen(const struct sk_buff *skb)
+static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
 {
 	unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
-	const struct skb_shared_info *shinfo = skb_shinfo(skb);
-
-	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
-		hdr_len += tcp_hdrlen(skb);
-	else
-		hdr_len += sizeof(struct udphdr);
-	return hdr_len + shinfo->gso_size;
+	return hdr_len + skb_gso_transport_seglen(skb);
 }
 
 /* GSO packet is too big, segment it so that tbf can transmit
@@ -202,7 +196,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	int ret;
 
 	if (qdisc_pkt_len(skb) > q->max_size) {
-		if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size)
+		if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
 			return tbf_segment(skb, sch);
 		return qdisc_reshape_fail(skb, sch);
 	}
-- 
1.8.1.5

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] net: add and use skb_gso_transport_seglen()
  2014-01-25 22:48 ` [PATCH 1/2] net: add and use skb_gso_transport_seglen() Florian Westphal
@ 2014-01-26  1:28   ` Eric Dumazet
  2014-01-26  9:19     ` Florian Westphal
  0 siblings, 1 reply; 30+ messages in thread
From: Eric Dumazet @ 2014-01-26  1:28 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netdev

On Sat, 2014-01-25 at 23:48 +0100, Florian Westphal wrote:
> This moves part of Eric Dumazets skb_gso_seglen helper from tbf sched to
> skbuff core so it may be reused by upcoming ip forwarding path patch.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  include/linux/skbuff.h |  1 +
>  net/core/skbuff.c      | 26 ++++++++++++++++++++++++++
>  net/sched/sch_tbf.c    | 12 +++---------
>  3 files changed, 30 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 6f69b3f..3d76066 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -2371,6 +2371,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
>  void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
>  int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
>  void skb_scrub_packet(struct sk_buff *skb, bool xnet);
> +unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
>  struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
>  
>  struct skb_checksum_ops {
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 06e72d3..2e1fd75 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -45,6 +45,7 @@
>  #include <linux/mm.h>
>  #include <linux/interrupt.h>
>  #include <linux/in.h>
> +#include <linux/tcp.h>
>  #include <linux/inet.h>
>  #include <linux/slab.h>
>  #include <linux/netdevice.h>
> @@ -71,6 +72,8 @@
>  #include <trace/events/skb.h>
>  #include <linux/highmem.h>
>  
> +#include <uapi/linux/udp.h>


Normally you should not use uapi/

#include <linux/udp.h>

> +
>  struct kmem_cache *skbuff_head_cache __read_mostly;
>  static struct kmem_cache *skbuff_fclone_cache __read_mostly;
>  
> @@ -3592,3 +3595,26 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
>  	nf_reset_trace(skb);
>  }
>  EXPORT_SYMBOL_GPL(skb_scrub_packet);
> +
> +/**
> + * skb_gso_transport_seglen - Return length of individual segments of a gso packet
> + *
> + * @skb: GSO skb
> + *
> + * skb_gso_transport_seglen is used to determine the real size of the
> + * individual segments, including Layer4 headers (TCP/UDP).
> + *
> + * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
> + */
> +unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
> +{
> +	const struct skb_shared_info *shinfo = skb_shinfo(skb);
> +	unsigned int hdr_len;
> +
> +	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
> +		hdr_len = tcp_hdrlen(skb);
> +	else
> +		hdr_len = sizeof(struct udphdr);
> +	return hdr_len + shinfo->gso_size;
> +}
> +EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
> diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
> index 887e672..837a61b 100644
> --- a/net/sched/sch_tbf.c
> +++ b/net/sched/sch_tbf.c

It seems you forgot to remove from this file this include :

#include <net/tcp.h>

> @@ -148,16 +148,10 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r,
>   * Return length of individual segments of a gso packet,
>   * including all headers (MAC, IP, TCP/UDP)
>   */
> -static unsigned int skb_gso_seglen(const struct sk_buff *skb)
> +static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
>  {
>  	unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
> -	const struct skb_shared_info *shinfo = skb_shinfo(skb);
> -
> -	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
> -		hdr_len += tcp_hdrlen(skb);
> -	else
> -		hdr_len += sizeof(struct udphdr);
> -	return hdr_len + shinfo->gso_size;
> +	return hdr_len + skb_gso_transport_seglen(skb);
>  }
>  
>  /* GSO packet is too big, segment it so that tbf can transmit
> @@ -202,7 +196,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
>  	int ret;
>  
>  	if (qdisc_pkt_len(skb) > q->max_size) {
> -		if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size)
> +		if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
>  			return tbf_segment(skb, sch);
>  		return qdisc_reshape_fail(skb, sch);
>  	}


Otherwise, this seems good, thanks !

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] net: add and use skb_gso_transport_seglen()
  2014-01-26  1:28   ` Eric Dumazet
@ 2014-01-26  9:19     ` Florian Westphal
  0 siblings, 0 replies; 30+ messages in thread
From: Florian Westphal @ 2014-01-26  9:19 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Florian Westphal, netdev

Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Sat, 2014-01-25 at 23:48 +0100, Florian Westphal wrote:
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -45,6 +45,7 @@
> >  #include <linux/mm.h>
> >  #include <linux/interrupt.h>
> >  #include <linux/in.h>
> > +#include <linux/tcp.h>
> >  #include <linux/inet.h>
> >  #include <linux/slab.h>
> >  #include <linux/netdevice.h>
> > @@ -71,6 +72,8 @@
> >  #include <trace/events/skb.h>
> >  #include <linux/highmem.h>
> >  
> > +#include <uapi/linux/udp.h>
> 
> 
> Normally you should not use uapi/

I added this include to ensure sizeof(struct udphdr) works.
I'll change it to linux/udp.h

> > diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
> > index 887e672..837a61b 100644
> > --- a/net/sched/sch_tbf.c
> > +++ b/net/sched/sch_tbf.c
> 
> It seems you forgot to remove from this file this include :
> 
> #include <net/tcp.h>

Indeed, thanks for spotting this.

> Otherwise, this seems good, thanks !

Thanks for reviewing Eric!

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 1/2] net: add and use skb_gso_transport_seglen()
@ 2014-01-27  8:22 Florian Westphal
  2014-01-27  8:22 ` [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path Florian Westphal
  2014-01-27  8:30 ` [PATCH 1/2] net: add and use skb_gso_transport_seglen() David Miller
  0 siblings, 2 replies; 30+ messages in thread
From: Florian Westphal @ 2014-01-27  8:22 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

This moves part of Eric Dumazets skb_gso_seglen helper from tbf sched to
skbuff core so it may be reused by upcoming ip forwarding path patch.

Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 No changes since V2, resending since patch 2/2 had issues
 and I don't see 1/2 in net tree.

 Changes since V1:
  suggestions from Eric Dumazet:
  - don't use uapi udp.h
  - remove tcp.h include from tbf, its not needed anymore

 include/linux/skbuff.h |  1 +
 net/core/skbuff.c      | 25 +++++++++++++++++++++++++
 net/sched/sch_tbf.c    | 13 +++----------
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1f689e6..f589c9a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2456,6 +2456,7 @@ void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from,
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
+unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 
 struct skb_checksum_ops {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8f519db..9ae6d11 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -47,6 +47,8 @@
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/slab.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
 #include <linux/netdevice.h>
 #ifdef CONFIG_NET_CLS_ACT
 #include <net/pkt_sched.h>
@@ -3916,3 +3918,26 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	nf_reset_trace(skb);
 }
 EXPORT_SYMBOL_GPL(skb_scrub_packet);
+
+/**
+ * skb_gso_transport_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_transport_seglen is used to determine the real size of the
+ * individual segments, including Layer4 headers (TCP/UDP).
+ *
+ * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
+ */
+unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	unsigned int hdr_len;
+
+	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+		hdr_len = tcp_hdrlen(skb);
+	else
+		hdr_len = sizeof(struct udphdr);
+	return hdr_len + shinfo->gso_size;
+}
+EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index fbba5b0..1cb413f 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -21,7 +21,6 @@
 #include <net/netlink.h>
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
-#include <net/tcp.h>
 
 
 /*	Simple Token Bucket Filter.
@@ -148,16 +147,10 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r,
  * Return length of individual segments of a gso packet,
  * including all headers (MAC, IP, TCP/UDP)
  */
-static unsigned int skb_gso_seglen(const struct sk_buff *skb)
+static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
 {
 	unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
-	const struct skb_shared_info *shinfo = skb_shinfo(skb);
-
-	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
-		hdr_len += tcp_hdrlen(skb);
-	else
-		hdr_len += sizeof(struct udphdr);
-	return hdr_len + shinfo->gso_size;
+	return hdr_len + skb_gso_transport_seglen(skb);
 }
 
 /* GSO packet is too big, segment it so that tbf can transmit
@@ -202,7 +195,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	int ret;
 
 	if (qdisc_pkt_len(skb) > q->max_size) {
-		if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size)
+		if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
 			return tbf_segment(skb, sch);
 		return qdisc_reshape_fail(skb, sch);
 	}
-- 
1.8.1.5

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-27  8:22 [PATCH 1/2] net: add and use skb_gso_transport_seglen() Florian Westphal
@ 2014-01-27  8:22 ` Florian Westphal
  2014-01-27  8:34   ` David Miller
  2014-01-27 18:22   ` Eric Dumazet
  2014-01-27  8:30 ` [PATCH 1/2] net: add and use skb_gso_transport_seglen() David Miller
  1 sibling, 2 replies; 30+ messages in thread
From: Florian Westphal @ 2014-01-27  8:22 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

Marcelo Ricardo Leitner reported problems when the forwarding link
path has a lower mtu than the incoming link if the inbound interface
supports GRO.

Given:
Host <mtu1500> R1 <mtu1200> R2

Host sends tcp stream which is routed via R1 and R2.  R1 performs GRO.

In this case, the kernel will fail to send ICMP fragmentation needed
messages (or pkt too big for ipv6), as gso packets currently bypass the
dst mtu checks in forward path. Instead, Linux tries to send out packets
exceeding R1-R2 link mtu.

When locking route MTU on Host (i.e., no ipv4 DF bit set), R1 does
not fragment the packets when forwarding, and again tries to send out
packets exceeding R1-R2 link mtu.

This alters the forwarding dstmtu checks to take the individual gso
segment lengths into account.

For ipv6, we send out pkt too big error for gso if the individual
segments are too big.

For ipv4, we either send icmp fragmentation needed, or, if the DF bit
is not set, perform software segmentation and let the output path
create fragments when the packet is leaving the machine.
It is not 100% correct as the error message will contain the headers of
the GRO skb instead of the original/segmented one, but it seems to
work fine in my (limited) tests.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
Changes since V2:
 - make this thing apply to current -net tree
 - kill unused variables in ip_forward/ip6_output

Changes since V1:
 suggestions from Eric Dumazet:
  - skip more expensive computation for small packets in fwd path
  - use netif_skb_features() feature mask and remove GSO flags
    instead of using 0 feature set.

 include/linux/skbuff.h | 17 ++++++++++++++
 net/ipv4/ip_forward.c  | 60 ++++++++++++++++++++++++++++++++++++++++++++++++--
 net/ipv6/ip6_output.c  | 17 ++++++++++++--
 3 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f589c9a..3ebbbe7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2916,5 +2916,22 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb)
 {
 	return !skb->head_frag || skb_cloned(skb);
 }
+
+/**
+ * skb_gso_network_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_network_seglen is used to determine the real size of the
+ * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
+ *
+ * The MAC/L2 header is not accounted for.
+ */
+static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
+{
+	unsigned int hdr_len = skb_transport_header(skb) -
+			       skb_network_header(skb);
+	return hdr_len + skb_gso_transport_seglen(skb);
+}
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e9f1217..91c8f51 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -39,6 +39,60 @@
 #include <net/route.h>
 #include <net/xfrm.h>
 
+static bool ip_may_fragment(const struct sk_buff *skb)
+{
+	return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
+	       !skb->local_df;
+}
+
+static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
+{
+	if (skb->local_df || !skb_is_gso(skb))
+		return false;
+	return skb_gso_network_seglen(skb) > dst_mtu(skb_dst(skb));
+}
+
+static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+	if (skb->len <= mtu || skb->local_df)
+		return false;
+
+	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+		return false;
+
+	return true;
+}
+
+/* called if GSO skb needs to be fragmented on forward.  */
+static int ip_forward_finish_gso(struct sk_buff *skb)
+{
+	netdev_features_t features = netif_skb_features(skb);
+	struct sk_buff *segs;
+	int ret = 0;
+
+	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+	if (IS_ERR(segs)) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+
+	consume_skb(skb);
+
+	do {
+		struct sk_buff *nskb = segs->next;
+		int err;
+
+		segs->next = NULL;
+		err = dst_output(segs);
+
+		if (err && ret == 0)
+			ret = err;
+		segs = nskb;
+	} while (segs);
+
+	return ret;
+}
+
 static int ip_forward_finish(struct sk_buff *skb)
 {
 	struct ip_options *opt	= &(IPCB(skb)->opt);
@@ -49,6 +103,9 @@ static int ip_forward_finish(struct sk_buff *skb)
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
+	if (ip_gso_exceeds_dst_mtu(skb))
+		return ip_forward_finish_gso(skb);
+
 	return dst_output(skb);
 }
 
@@ -91,8 +148,7 @@ int ip_forward(struct sk_buff *skb)
 
 	IPCB(skb)->flags |= IPSKB_FORWARDED;
 	mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
-	if (unlikely(skb->len > mtu && !skb_is_gso(skb) &&
-		     (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
+	if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) {
 		IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 			  htonl(mtu));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ef02b26..070a2fa 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -342,6 +342,20 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 	return mtu;
 }
 
+static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+	if (skb->len <= mtu || skb->local_df)
+		return false;
+
+	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
+		return true;
+
+	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+		return false;
+
+	return true;
+}
+
 int ip6_forward(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -466,8 +480,7 @@ int ip6_forward(struct sk_buff *skb)
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 
-	if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
-	    (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
+	if (ip6_pkt_too_big(skb, mtu)) {
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-- 
1.8.1.5

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/2] net: add and use skb_gso_transport_seglen()
  2014-01-27  8:22 [PATCH 1/2] net: add and use skb_gso_transport_seglen() Florian Westphal
  2014-01-27  8:22 ` [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path Florian Westphal
@ 2014-01-27  8:30 ` David Miller
  1 sibling, 0 replies; 30+ messages in thread
From: David Miller @ 2014-01-27  8:30 UTC (permalink / raw)
  To: fw; +Cc: netdev

From: Florian Westphal <fw@strlen.de>
Date: Mon, 27 Jan 2014 09:22:50 +0100

> This moves part of Eric Dumazets skb_gso_seglen helper from tbf sched to
> skbuff core so it may be reused by upcoming ip forwarding path patch.
> 
> Acked-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  No changes since V2, resending since patch 2/2 had issues
>  and I don't see 1/2 in net tree.

Sorry, I forgot to push after my test builds, done.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-27  8:22 ` [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path Florian Westphal
@ 2014-01-27  8:34   ` David Miller
  2014-01-27  8:36     ` Florian Westphal
  2014-01-27 18:22   ` Eric Dumazet
  1 sibling, 1 reply; 30+ messages in thread
From: David Miller @ 2014-01-27  8:34 UTC (permalink / raw)
  To: fw; +Cc: netdev

From: Florian Westphal <fw@strlen.de>
Date: Mon, 27 Jan 2014 09:22:51 +0100

> Changes since V2:
>  - make this thing apply to current -net tree
>  - kill unused variables in ip_forward/ip6_output

Still need changes.
> +	return skb_gso_network_seglen(skb) > dst_mtu(skb_dst(skb));

You can't use dst_mtu() directly, in order to be consistent with the
rest of the forwarding code in this file you must use something like:

>  	mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-27  8:34   ` David Miller
@ 2014-01-27  8:36     ` Florian Westphal
  0 siblings, 0 replies; 30+ messages in thread
From: Florian Westphal @ 2014-01-27  8:36 UTC (permalink / raw)
  To: David Miller; +Cc: fw, netdev

David Miller <davem@davemloft.net> wrote:
> From: Florian Westphal <fw@strlen.de>
> Date: Mon, 27 Jan 2014 09:22:51 +0100
> > Changes since V2:
> >  - make this thing apply to current -net tree
> >  - kill unused variables in ip_forward/ip6_output
> 
> Still need changes.
> > +	return skb_gso_network_seglen(skb) > dst_mtu(skb_dst(skb));
> 
> You can't use dst_mtu() directly, in order to be consistent with the
> rest of the forwarding code in this file you must use something like:
> 
> >  	mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);

You're right of course.

Sorry.  I will fix this up and NOT resend soon, its clear I need
to do more homework (aka follow Hannes PMTU changes).

Expect a V4 in a couple of hours.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-27  8:22 ` [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path Florian Westphal
  2014-01-27  8:34   ` David Miller
@ 2014-01-27 18:22   ` Eric Dumazet
  2014-01-27 20:58     ` David Miller
                       ` (2 more replies)
  1 sibling, 3 replies; 30+ messages in thread
From: Eric Dumazet @ 2014-01-27 18:22 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netdev

On Mon, 2014-01-27 at 09:22 +0100, Florian Westphal wrote:

> +/* called if GSO skb needs to be fragmented on forward.  */
> +static int ip_forward_finish_gso(struct sk_buff *skb)
> +{
> +	netdev_features_t features = netif_skb_features(skb);
> +	struct sk_buff *segs;
> +	int ret = 0;
> +
> +	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
> +	if (IS_ERR(segs)) {
> +		kfree_skb(skb);
> +		return -ENOMEM;
> +	}
> +
> +	consume_skb(skb);
> +
> +	do {
> +		struct sk_buff *nskb = segs->next;
> +		int err;
> +
> +		segs->next = NULL;
> +		err = dst_output(segs);
> +
> +		if (err && ret == 0)
> +			ret = err;
> +		segs = nskb;
> +	} while (segs);
> +
> +	return ret;
> +}
> +

Its still unclear if this is the best strategy.

TCP stream not using DF flag are very unlikely to care if we adjust
their MTU (lowering gso_size) at this point ?

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-27 18:22   ` Eric Dumazet
@ 2014-01-27 20:58     ` David Miller
  2014-01-27 21:08       ` David Miller
  2014-01-28  0:27     ` Hannes Frederic Sowa
  2014-01-28  8:57     ` Florian Westphal
  2 siblings, 1 reply; 30+ messages in thread
From: David Miller @ 2014-01-27 20:58 UTC (permalink / raw)
  To: eric.dumazet; +Cc: fw, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 27 Jan 2014 10:22:47 -0800

> Its still unclear if this is the best strategy.
> 
> TCP stream not using DF flag are very unlikely to care if we adjust
> their MTU (lowering gso_size) at this point ?

It's better than what happens now when the destination link has a lower
MTU, wouldn't you say?

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-27 20:58     ` David Miller
@ 2014-01-27 21:08       ` David Miller
  0 siblings, 0 replies; 30+ messages in thread
From: David Miller @ 2014-01-27 21:08 UTC (permalink / raw)
  To: eric.dumazet; +Cc: fw, netdev

From: David Miller <davem@davemloft.net>
Date: Mon, 27 Jan 2014 12:58:38 -0800 (PST)

> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 27 Jan 2014 10:22:47 -0800
> 
>> Its still unclear if this is the best strategy.
>> 
>> TCP stream not using DF flag are very unlikely to care if we adjust
>> their MTU (lowering gso_size) at this point ?
> 
> It's better than what happens now when the destination link has a lower
> MTU, wouldn't you say?

In the mean time I'll hold off on this patch while you guys discuss
this.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-27 18:22   ` Eric Dumazet
  2014-01-27 20:58     ` David Miller
@ 2014-01-28  0:27     ` Hannes Frederic Sowa
  2014-01-28  9:12       ` Florian Westphal
  2014-01-29 10:53       ` Florian Westphal
  2014-01-28  8:57     ` Florian Westphal
  2 siblings, 2 replies; 30+ messages in thread
From: Hannes Frederic Sowa @ 2014-01-28  0:27 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Florian Westphal, netdev

On Mon, Jan 27, 2014 at 10:22:47AM -0800, Eric Dumazet wrote:
> On Mon, 2014-01-27 at 09:22 +0100, Florian Westphal wrote:
> 
> > +/* called if GSO skb needs to be fragmented on forward.  */
> > +static int ip_forward_finish_gso(struct sk_buff *skb)
> > +{
> > +	netdev_features_t features = netif_skb_features(skb);

netif_skb_features uses skb->dev for determination of offloading features but
we actually need rt->dst.dev, no?

> > +	struct sk_buff *segs;
> > +	int ret = 0;
> > +
> > +	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
> > +	if (IS_ERR(segs)) {
> > +		kfree_skb(skb);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	consume_skb(skb);
> > +
> > +	do {
> > +		struct sk_buff *nskb = segs->next;
> > +		int err;
> > +
> > +		segs->next = NULL;
> > +		err = dst_output(segs);
> > +
> > +		if (err && ret == 0)
> > +			ret = err;
> > +		segs = nskb;
> > +	} while (segs);
> > +
> > +	return ret;
> > +}
> > +
> 
> Its still unclear if this is the best strategy.
> 
> TCP stream not using DF flag are very unlikely to care if we adjust
> their MTU (lowering gso_size) at this point ?

UDP shouldn't be a problem, too.

Greetings,

  Hannes

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-27 18:22   ` Eric Dumazet
  2014-01-27 20:58     ` David Miller
  2014-01-28  0:27     ` Hannes Frederic Sowa
@ 2014-01-28  8:57     ` Florian Westphal
  2014-01-28 16:34       ` Eric Dumazet
  2 siblings, 1 reply; 30+ messages in thread
From: Florian Westphal @ 2014-01-28  8:57 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Florian Westphal, netdev

Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > +	do {
> > +		struct sk_buff *nskb = segs->next;
> > +		int err;
> > +
> > +		segs->next = NULL;
> > +		err = dst_output(segs);
> > +
> > +		if (err && ret == 0)
> > +			ret = err;
> > +		segs = nskb;
> > +	} while (segs);
> > +
> > +	return ret;
> > +}
> > +
> 
> Its still unclear if this is the best strategy.
> 
> TCP stream not using DF flag are very unlikely to care if we adjust
> their MTU (lowering gso_size) at this point ?

Thanks for this suggestion.  It would indeed be nice to avoid sw
segmentation.  I tried:

static void ip_gso_adjust_seglen(struct sk_buff *skb)
{
        unsigned int mtu;

        if (!skb_is_gso(skb))
                return;

        mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
        skb_shinfo(skb)->gso_size = mtu - sizeof(struct iphdr);
}

But this yields

[   28.644776] kernel BUG at net/net/core/skbuff.c:2984!
[   28.644776] invalid opcode: 0000 [#1] SMP 
[   28.644776] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.13.0+ #35
[   28.644776] task: ffffffff818104c0 ti: ffffffff81800000 task.ti: ffffffff81800000
[   28.644776] RIP: 0010:[<ffffffff813b10d8>]  [<ffffffff813b10d8>] skb_segment+0x808/0x830
[   28.644776] RSP: 0018:ffff88002fc03688  EFLAGS: 00010212
[   28.644776] RAX: 000000000000047c RBX: ffff88002d614b00 RCX: ffff88002d72ab00
[   28.644776] RDX: 000000000000047c RSI: 00000000000050fa RDI: ffff88002cf9f800
[   28.644776] RBP: ffff88002fc03778 R08: 0000000000000000 R09: ffff88002cdaf300
[   28.644776] R10: 0000000000000011 R11: 0000000000004ff2 R12: ffff88002cf9ff80
[   28.644776] R13: 0000000000000011 R14: 00000000000050fa R15: 00000000000054a2
[   28.644776] FS:  00007f27db007700(0000) GS:ffff88002fc00000(0000) knlGS:0000000000000000
[   28.644776] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[   28.644776] CR2: 00007f8176cedcfc CR3: 000000002cd14000 CR4: 00000000000006b0
[   28.644776] Stack:
[   28.644776]  0000000000000046 ffffffff00000014 0000000000000001 ffffffff00000022
[   28.644776]  ffff88002cdaf300 ffff88002d72aaf0 0000000000000000 0000000000004ff2
[   28.644776]  0000000000000014 ffffffff818104c0 ffffffff81810bc8 ffffffffffffffbe
[   28.644776] Call Trace:
[   28.644776]  <IRQ> 
[   28.644776]  [<ffffffff8125e742>] ? number.isra.1+0x302/0x330
[   28.644776]  [<ffffffff8142f35e>] tcp_gso_segment+0x11e/0x3f0
[   28.644776]  [<ffffffff8143f2c9>] inet_gso_segment+0x129/0x350
[   28.644776]  [<ffffffff810832cf>] ?  __lock_acquire+0x2ef/0x1ca0
[   28.644776]  [<ffffffff813bcd9d>] skb_mac_gso_segment+0xdd/0x1e0
[   28.644776]  [<ffffffff813bcd07>] ?  skb_mac_gso_segment+0x47/0x1e0
[   28.644776]  [<ffffffff813bcf00>] __skb_gso_segment+0x60/0xc0
[   28.644776]  [<ffffffff813bd203>] dev_hard_start_xmit+0x183/0x5b0
[   28.644776]  [<ffffffff813e064e>] sch_direct_xmit+0xfe/0x280
[   28.644776]  [<ffffffff813bd843>] __dev_queue_xmit+0x213/0x6b0
[   28.644776]  [<ffffffff813bd635>] ?  __dev_queue_xmit+0x5/0x6b0
[   28.644776]  [<ffffffff813bdcf0>] dev_queue_xmit+0x10/0x20
[   28.644776]  [<ffffffff8140c2a9>] ip_finish_output+0x419/0x600
[   28.644776]  [<ffffffff8140c4de>] ? ip_output+0x4e/0xc0
[   28.644776]  [<ffffffff810803e4>] ? __lock_is_held+0x54/0x80
[   28.644776]  [<ffffffff8140c4de>] ip_output+0x4e/0xc0
[   28.644776]  [<ffffffff81407ffb>] ip_forward+0x21b/0x650

Eric, any chance you know wheter mucking with gso_size in this way
is supposed to work?

I will go through skb_segment and see if I can find out what exactly causes this
BUG_ON to trigger.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-28  0:27     ` Hannes Frederic Sowa
@ 2014-01-28  9:12       ` Florian Westphal
  2014-01-29 10:53       ` Florian Westphal
  1 sibling, 0 replies; 30+ messages in thread
From: Florian Westphal @ 2014-01-28  9:12 UTC (permalink / raw)
  To: Eric Dumazet, Florian Westphal, netdev

Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:
> On Mon, Jan 27, 2014 at 10:22:47AM -0800, Eric Dumazet wrote:
> > On Mon, 2014-01-27 at 09:22 +0100, Florian Westphal wrote:
> > 
> > > +/* called if GSO skb needs to be fragmented on forward.  */
> > > +static int ip_forward_finish_gso(struct sk_buff *skb)
> > > +{
> > > +	netdev_features_t features = netif_skb_features(skb);
> 
> netif_skb_features uses skb->dev for determination of offloading features but
> we actually need rt->dst.dev, no?

good catch, cannot use netif_skb_features as skb->dev still
points to incoming device here...

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-28  8:57     ` Florian Westphal
@ 2014-01-28 16:34       ` Eric Dumazet
  2014-01-28 17:15         ` Florian Westphal
                           ` (2 more replies)
  0 siblings, 3 replies; 30+ messages in thread
From: Eric Dumazet @ 2014-01-28 16:34 UTC (permalink / raw)
  To: Florian Westphal, Herbert Xu; +Cc: netdev

On Tue, 2014-01-28 at 09:57 +0100, Florian Westphal wrote:
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > > +	do {
> > > +		struct sk_buff *nskb = segs->next;
> > > +		int err;
> > > +
> > > +		segs->next = NULL;
> > > +		err = dst_output(segs);
> > > +
> > > +		if (err && ret == 0)
> > > +			ret = err;
> > > +		segs = nskb;
> > > +	} while (segs);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > 
> > Its still unclear if this is the best strategy.
> > 
> > TCP stream not using DF flag are very unlikely to care if we adjust
> > their MTU (lowering gso_size) at this point ?
> 
> Thanks for this suggestion.  It would indeed be nice to avoid sw
> segmentation.  I tried:
> 
> static void ip_gso_adjust_seglen(struct sk_buff *skb)
> {
>         unsigned int mtu;
> 
>         if (!skb_is_gso(skb))
>                 return;
> 
>         mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
>         skb_shinfo(skb)->gso_size = mtu - sizeof(struct iphdr);
> }
> 
> But this yields
> 
> [   28.644776] kernel BUG at net/net/core/skbuff.c:2984!

Yep, lets CC Herbert Xu, as he 'owns' skb_segment()

BUG_ON(skb_headlen(fskb));

I sent once a generic version of skb_segment(), but Herbert preferred a
different one.

> [   28.644776] invalid opcode: 0000 [#1] SMP 
> [   28.644776] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.13.0+ #35
> [   28.644776] task: ffffffff818104c0 ti: ffffffff81800000 task.ti: ffffffff81800000
> [   28.644776] RIP: 0010:[<ffffffff813b10d8>]  [<ffffffff813b10d8>] skb_segment+0x808/0x830
> [   28.644776] RSP: 0018:ffff88002fc03688  EFLAGS: 00010212
> [   28.644776] RAX: 000000000000047c RBX: ffff88002d614b00 RCX: ffff88002d72ab00
> [   28.644776] RDX: 000000000000047c RSI: 00000000000050fa RDI: ffff88002cf9f800
> [   28.644776] RBP: ffff88002fc03778 R08: 0000000000000000 R09: ffff88002cdaf300
> [   28.644776] R10: 0000000000000011 R11: 0000000000004ff2 R12: ffff88002cf9ff80
> [   28.644776] R13: 0000000000000011 R14: 00000000000050fa R15: 00000000000054a2
> [   28.644776] FS:  00007f27db007700(0000) GS:ffff88002fc00000(0000) knlGS:0000000000000000
> [   28.644776] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> [   28.644776] CR2: 00007f8176cedcfc CR3: 000000002cd14000 CR4: 00000000000006b0
> [   28.644776] Stack:
> [   28.644776]  0000000000000046 ffffffff00000014 0000000000000001 ffffffff00000022
> [   28.644776]  ffff88002cdaf300 ffff88002d72aaf0 0000000000000000 0000000000004ff2
> [   28.644776]  0000000000000014 ffffffff818104c0 ffffffff81810bc8 ffffffffffffffbe
> [   28.644776] Call Trace:
> [   28.644776]  <IRQ> 
> [   28.644776]  [<ffffffff8125e742>] ? number.isra.1+0x302/0x330
> [   28.644776]  [<ffffffff8142f35e>] tcp_gso_segment+0x11e/0x3f0
> [   28.644776]  [<ffffffff8143f2c9>] inet_gso_segment+0x129/0x350
> [   28.644776]  [<ffffffff810832cf>] ?  __lock_acquire+0x2ef/0x1ca0
> [   28.644776]  [<ffffffff813bcd9d>] skb_mac_gso_segment+0xdd/0x1e0
> [   28.644776]  [<ffffffff813bcd07>] ?  skb_mac_gso_segment+0x47/0x1e0
> [   28.644776]  [<ffffffff813bcf00>] __skb_gso_segment+0x60/0xc0
> [   28.644776]  [<ffffffff813bd203>] dev_hard_start_xmit+0x183/0x5b0
> [   28.644776]  [<ffffffff813e064e>] sch_direct_xmit+0xfe/0x280
> [   28.644776]  [<ffffffff813bd843>] __dev_queue_xmit+0x213/0x6b0
> [   28.644776]  [<ffffffff813bd635>] ?  __dev_queue_xmit+0x5/0x6b0
> [   28.644776]  [<ffffffff813bdcf0>] dev_queue_xmit+0x10/0x20
> [   28.644776]  [<ffffffff8140c2a9>] ip_finish_output+0x419/0x600
> [   28.644776]  [<ffffffff8140c4de>] ? ip_output+0x4e/0xc0
> [   28.644776]  [<ffffffff810803e4>] ? __lock_is_held+0x54/0x80
> [   28.644776]  [<ffffffff8140c4de>] ip_output+0x4e/0xc0
> [   28.644776]  [<ffffffff81407ffb>] ip_forward+0x21b/0x650
> 
> Eric, any chance you know wheter mucking with gso_size in this way
> is supposed to work?
> 
> I will go through skb_segment and see if I can find out what exactly causes this
> BUG_ON to trigger.

This is definitely net-next material anyway, no hurry ;)

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-28 16:34       ` Eric Dumazet
@ 2014-01-28 17:15         ` Florian Westphal
  2014-01-28 17:30           ` Eric Dumazet
  2014-01-29 11:00         ` Florian Westphal
  2014-02-09  2:55         ` Herbert Xu
  2 siblings, 1 reply; 30+ messages in thread
From: Florian Westphal @ 2014-01-28 17:15 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Florian Westphal, Herbert Xu, netdev

Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > Eric, any chance you know wheter mucking with gso_size in this way
> > is supposed to work?
> > 
> > I will go through skb_segment and see if I can find out what exactly causes this
> > BUG_ON to trigger.
> 
> This is definitely net-next material anyway, no hurry ;)

Yes, looks like it :)

Eric, do you mind if I re-send the patch with skb_gso_segment and a zero
feature mask?

I think thats the best solution for -net.  I would then try to come up
with a version that follows your "shrink gso_size" suggestion for -next.

Cheers,
Florian

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-28 17:15         ` Florian Westphal
@ 2014-01-28 17:30           ` Eric Dumazet
  2014-01-28 17:37             ` Florian Westphal
  0 siblings, 1 reply; 30+ messages in thread
From: Eric Dumazet @ 2014-01-28 17:30 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Herbert Xu, netdev

On Tue, 2014-01-28 at 18:15 +0100, Florian Westphal wrote:
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > > Eric, any chance you know wheter mucking with gso_size in this way
> > > is supposed to work?
> > > 
> > > I will go through skb_segment and see if I can find out what exactly causes this
> > > BUG_ON to trigger.
> > 
> > This is definitely net-next material anyway, no hurry ;)
> 
> Yes, looks like it :)
> 
> Eric, do you mind if I re-send the patch with skb_gso_segment and a zero
> feature mask?



I think the xmit will take care of doing the fallback anyway, if skb
need to be linear or TX checksum be computed. 

> I think thats the best solution for -net.  I would then try to come up
> with a version that follows your "shrink gso_size" suggestion for -next.

Note that I mentioned this MTU thing months ago, and the bug is here
since years. I do not think its a very urgent matter :)

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-28 17:30           ` Eric Dumazet
@ 2014-01-28 17:37             ` Florian Westphal
  0 siblings, 0 replies; 30+ messages in thread
From: Florian Westphal @ 2014-01-28 17:37 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Florian Westphal, Herbert Xu, netdev

Eric Dumazet <eric.dumazet@gmail.com> wrote:
> I think the xmit will take care of doing the fallback anyway, if skb
> need to be linear or TX checksum be computed. 
> 
> > I think thats the best solution for -net.  I would then try to come up
> > with a version that follows your "shrink gso_size" suggestion for -next.
> 
> Note that I mentioned this MTU thing months ago, and the bug is here
> since years. I do not think its a very urgent matter :)

Fair enough.  I'll see that I have something ready when -next opens.

Thanks Eric.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-28  0:27     ` Hannes Frederic Sowa
  2014-01-28  9:12       ` Florian Westphal
@ 2014-01-29 10:53       ` Florian Westphal
  2014-01-29 11:04         ` Hannes Frederic Sowa
  1 sibling, 1 reply; 30+ messages in thread
From: Florian Westphal @ 2014-01-29 10:53 UTC (permalink / raw)
  To: Eric Dumazet, Florian Westphal, netdev

Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:
> > TCP stream not using DF flag are very unlikely to care if we adjust
> > their MTU (lowering gso_size) at this point ?
> 
> UDP shouldn't be a problem, too.

Sorry for late reply, but how can this be safe for UDP?
We should make sure that peer sees original, unchanged datagram?

And only solution for UDP that I can see is to do sw segmentation (i.e.
create ip fragments).

Thanks,
Florian

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-28 16:34       ` Eric Dumazet
  2014-01-28 17:15         ` Florian Westphal
@ 2014-01-29 11:00         ` Florian Westphal
  2014-02-09  2:55         ` Herbert Xu
  2 siblings, 0 replies; 30+ messages in thread
From: Florian Westphal @ 2014-01-29 11:00 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Florian Westphal, Herbert Xu, netdev

Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > > Its still unclear if this is the best strategy.
> > > 
> > > TCP stream not using DF flag are very unlikely to care if we adjust
> > > their MTU (lowering gso_size) at this point ?
> > 
> > Thanks for this suggestion.  It would indeed be nice to avoid sw
> > segmentation.  I tried:
> > 
> > static void ip_gso_adjust_seglen(struct sk_buff *skb)
> > {
> >         unsigned int mtu;
> > 
> >         if (!skb_is_gso(skb))
> >                 return;
> > 
> >         mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
> >         skb_shinfo(skb)->gso_size = mtu - sizeof(struct iphdr);

FWIW Erics idea works fine with:

        headerlen = skb_transport_header(skb) - skb_network_header(skb);
        if (likely(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
                headerlen += tcp_hdrlen(skb);
        skb_shinfo(skb)->gso_size = mtu - headerlen;

and disabling 'sg' on outgoing (lower-mtu) interface.  [ else BUG() ]

> > [   28.644776] kernel BUG at net/net/core/skbuff.c:2984!
> 
> Yep, lets CC Herbert Xu, as he 'owns' skb_segment()
> 
> BUG_ON(skb_headlen(fskb));

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-29 10:53       ` Florian Westphal
@ 2014-01-29 11:04         ` Hannes Frederic Sowa
  0 siblings, 0 replies; 30+ messages in thread
From: Hannes Frederic Sowa @ 2014-01-29 11:04 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Eric Dumazet, netdev

On Wed, Jan 29, 2014 at 11:53:47AM +0100, Florian Westphal wrote:
> Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:
> > > TCP stream not using DF flag are very unlikely to care if we adjust
> > > their MTU (lowering gso_size) at this point ?
> > 
> > UDP shouldn't be a problem, too.
> 
> Sorry for late reply, but how can this be safe for UDP?
> We should make sure that peer sees original, unchanged datagram?

Peer as in original destination? Of course, we must not alter the datagram but
can only do fragmentation or send back frag_needed.

> And only solution for UDP that I can see is to do sw segmentation (i.e.
> create ip fragments).

Hardware(-UFO) would do fragmentation in hardware, too, because there is
no other way to split UDP data in any other way. If UFO is not supported
manual sw segmentation would create the required fragments in output
path, too.

Greetings,

  Hannes

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-01-28 16:34       ` Eric Dumazet
  2014-01-28 17:15         ` Florian Westphal
  2014-01-29 11:00         ` Florian Westphal
@ 2014-02-09  2:55         ` Herbert Xu
  2014-02-10 12:23           ` Florian Westphal
  2 siblings, 1 reply; 30+ messages in thread
From: Herbert Xu @ 2014-02-09  2:55 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Florian Westphal, netdev

On Tue, Jan 28, 2014 at 08:34:43AM -0800, Eric Dumazet wrote:
> On Tue, 2014-01-28 at 09:57 +0100, Florian Westphal wrote:
> > Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > > > +	do {
> > > > +		struct sk_buff *nskb = segs->next;
> > > > +		int err;
> > > > +
> > > > +		segs->next = NULL;
> > > > +		err = dst_output(segs);
> > > > +
> > > > +		if (err && ret == 0)
> > > > +			ret = err;
> > > > +		segs = nskb;
> > > > +	} while (segs);
> > > > +
> > > > +	return ret;
> > > > +}
> > > > +
> > > 
> > > Its still unclear if this is the best strategy.
> > > 
> > > TCP stream not using DF flag are very unlikely to care if we adjust
> > > their MTU (lowering gso_size) at this point ?
> > 
> > Thanks for this suggestion.  It would indeed be nice to avoid sw
> > segmentation.  I tried:
> > 
> > static void ip_gso_adjust_seglen(struct sk_buff *skb)
> > {
> >         unsigned int mtu;
> > 
> >         if (!skb_is_gso(skb))
> >                 return;
> > 
> >         mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
> >         skb_shinfo(skb)->gso_size = mtu - sizeof(struct iphdr);
> > }
> > 
> > But this yields
> > 
> > [   28.644776] kernel BUG at net/net/core/skbuff.c:2984!
> 
> Yep, lets CC Herbert Xu, as he 'owns' skb_segment()

IMHO we should just stop merging ~DF packets altogether, at least
for TCP.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-02-09  2:55         ` Herbert Xu
@ 2014-02-10 12:23           ` Florian Westphal
  2014-02-10 12:31             ` Herbert Xu
  0 siblings, 1 reply; 30+ messages in thread
From: Florian Westphal @ 2014-02-10 12:23 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Eric Dumazet, Florian Westphal, netdev

Herbert Xu <herbert@gondor.apana.org.au> wrote:
> > > static void ip_gso_adjust_seglen(struct sk_buff *skb)
> > > {
> > >         unsigned int mtu;
> > > 
> > >         if (!skb_is_gso(skb))
> > >                 return;
> > > 
> > >         mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
> > >         skb_shinfo(skb)->gso_size = mtu - sizeof(struct iphdr);
> > > }
> > > 
> > > But this yields
> > > 
> > > [   28.644776] kernel BUG at net/net/core/skbuff.c:2984!
> > 
> > Yep, lets CC Herbert Xu, as he 'owns' skb_segment()
> 
> IMHO we should just stop merging ~DF packets altogether, at least
> for TCP.

Eric, you added DF aggregation in db8caf3dbc77599dc90f4ea0a803cd1d97116f30
(gro: should aggregate frames without DF).

I guess you don't want to revert this commit?
Any other ideas?

skb_gso_segment() is already very complex, I don't want to add more code
to it.  And that seems unavoidable if we need to de-couple nr_frags and
gso_size.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-02-10 12:23           ` Florian Westphal
@ 2014-02-10 12:31             ` Herbert Xu
  2014-02-10 12:43               ` Florian Westphal
  2014-02-10 13:12               ` Eric Dumazet
  0 siblings, 2 replies; 30+ messages in thread
From: Herbert Xu @ 2014-02-10 12:31 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Eric Dumazet, netdev

On Mon, Feb 10, 2014 at 01:23:31PM +0100, Florian Westphal wrote:
> Herbert Xu <herbert@gondor.apana.org.au> wrote:
> > > > static void ip_gso_adjust_seglen(struct sk_buff *skb)
> > > > {
> > > >         unsigned int mtu;
> > > > 
> > > >         if (!skb_is_gso(skb))
> > > >                 return;
> > > > 
> > > >         mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
> > > >         skb_shinfo(skb)->gso_size = mtu - sizeof(struct iphdr);
> > > > }
> > > > 
> > > > But this yields
> > > > 
> > > > [   28.644776] kernel BUG at net/net/core/skbuff.c:2984!
> > > 
> > > Yep, lets CC Herbert Xu, as he 'owns' skb_segment()
> > 
> > IMHO we should just stop merging ~DF packets altogether, at least
> > for TCP.
> 
> Eric, you added DF aggregation in db8caf3dbc77599dc90f4ea0a803cd1d97116f30
> (gro: should aggregate frames without DF).
> 
> I guess you don't want to revert this commit?
> Any other ideas?
> 
> skb_gso_segment() is already very complex, I don't want to add more code
> to it.  And that seems unavoidable if we need to de-couple nr_frags and
> gso_size.

I don't think adding all this complexity just to be able to
aggregate ~DF packets (which are just wrong to begin with) is
worth it.

If aggregating ~DF packets was a one-liner then sure, but there
is a reason why I didn't aggregate them in the first place and
you've found it :)

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-02-10 12:31             ` Herbert Xu
@ 2014-02-10 12:43               ` Florian Westphal
  2014-02-10 12:50                 ` Herbert Xu
  2014-02-10 13:15                 ` Eric Dumazet
  2014-02-10 13:12               ` Eric Dumazet
  1 sibling, 2 replies; 30+ messages in thread
From: Florian Westphal @ 2014-02-10 12:43 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Florian Westphal, Eric Dumazet, netdev

Herbert Xu <herbert@gondor.apana.org.au> wrote:
> On Mon, Feb 10, 2014 at 01:23:31PM +0100, Florian Westphal wrote:
> > Herbert Xu <herbert@gondor.apana.org.au> wrote:
> > > > > static void ip_gso_adjust_seglen(struct sk_buff *skb)
> > > > > {
> > > > >         unsigned int mtu;
> > > > > 
> > > > >         if (!skb_is_gso(skb))
> > > > >                 return;
> > > > > 
> > > > >         mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
> > > > >         skb_shinfo(skb)->gso_size = mtu - sizeof(struct iphdr);
> > > > > }
> > > > > 
> > > > > But this yields
> > > > > 
> > > > > [   28.644776] kernel BUG at net/net/core/skbuff.c:2984!
> > > > 
> > > > Yep, lets CC Herbert Xu, as he 'owns' skb_segment()
> > > 
> > > IMHO we should just stop merging ~DF packets altogether, at least
> > > for TCP.
> > 
> > Eric, you added DF aggregation in db8caf3dbc77599dc90f4ea0a803cd1d97116f30
> > (gro: should aggregate frames without DF).
> > 
> > I guess you don't want to revert this commit?
> > Any other ideas?
> > 
> > skb_gso_segment() is already very complex, I don't want to add more code
> > to it.  And that seems unavoidable if we need to de-couple nr_frags and
> > gso_size.
> 
> I don't think adding all this complexity just to be able to
> aggregate ~DF packets (which are just wrong to begin with) is
> worth it.
> 
> If aggregating ~DF packets was a one-liner then sure, but there
> is a reason why I didn't aggregate them in the first place and
> you've found it :)

Well we could go with my original patch that will do software
segmentation on ~DF packets in the forwarding path if the outmtu is too
small for the individual packets.  The output path then simply
creates fragments.

Eric suggested to shrink gso_size instead to avoid segmentation+fragments.
I think its nice idea, but skb_gso_segment makes certain assumptions about
nr_frags and gso_size (it can't handle frag size > desired mss).

Hannes pointed out that we'd also need to deal with
SKB_MAX_FRAGS * gso_size exceeding fragments.

Quite frankly, I'd prefer to go with

skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);

The scenario is rare anyway given the number of bug reports (or lack
thereof) about '~DF tcp doesn't work with gro in fwd path when output
mtu is too small'.

Its not like this could never be improved later on.

Best regards,
Florian

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-02-10 12:43               ` Florian Westphal
@ 2014-02-10 12:50                 ` Herbert Xu
  2014-02-10 13:08                   ` Eric Dumazet
  2014-02-10 13:15                 ` Eric Dumazet
  1 sibling, 1 reply; 30+ messages in thread
From: Herbert Xu @ 2014-02-10 12:50 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Eric Dumazet, netdev

On Mon, Feb 10, 2014 at 01:43:46PM +0100, Florian Westphal wrote:
>
> Eric suggested to shrink gso_size instead to avoid segmentation+fragments.
> I think its nice idea, but skb_gso_segment makes certain assumptions about
> nr_frags and gso_size (it can't handle frag size > desired mss).

This breaks the most important assumption behind GRO which is
to preserve end-to-end connectivity.  Resegmenting packets as
suggested on a router/bridge is just wrong.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-02-10 12:50                 ` Herbert Xu
@ 2014-02-10 13:08                   ` Eric Dumazet
  0 siblings, 0 replies; 30+ messages in thread
From: Eric Dumazet @ 2014-02-10 13:08 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Florian Westphal, netdev

On Mon, 2014-02-10 at 20:50 +0800, Herbert Xu wrote:
> On Mon, Feb 10, 2014 at 01:43:46PM +0100, Florian Westphal wrote:
> >
> > Eric suggested to shrink gso_size instead to avoid segmentation+fragments.
> > I think its nice idea, but skb_gso_segment makes certain assumptions about
> > nr_frags and gso_size (it can't handle frag size > desired mss).
> 
> This breaks the most important assumption behind GRO which is
> to preserve end-to-end connectivity.  Resegmenting packets as
> suggested on a router/bridge is just wrong.

Yeah, this is the old mantra.

Sending TCP packets without DF means the sender do not care by
definition.

If you disable GRO for such packets, it slows down receivers and
increase packet drops.

I've added the segmentation for these packets for a reason, that you are
free to not understand, but there is absolutely no need reason to not
aggregate TCP packets without DF. This is what you suggested to ignore
the problem on skb_segment() being so limited.

Instead of a router being forced to segment all incoming fragments into

X+Y
X+Y
X+Y
X+Y

Its reasonable to send X+X+X+X+X

And we should be reasonable, not trying to enforce a particular view of
what _should_ the traffic looks like on the Internet.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-02-10 12:31             ` Herbert Xu
  2014-02-10 12:43               ` Florian Westphal
@ 2014-02-10 13:12               ` Eric Dumazet
  1 sibling, 0 replies; 30+ messages in thread
From: Eric Dumazet @ 2014-02-10 13:12 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Florian Westphal, netdev

On Mon, 2014-02-10 at 20:31 +0800, Herbert Xu wrote:
> On Mon, Feb 10, 2014 at 01:23:31PM +0100, Florian Westphal wrote:
> > Herbert Xu <herbert@gondor.apana.org.au> wrote:
> > > > > static void ip_gso_adjust_seglen(struct sk_buff *skb)
> > > > > {
> > > > >         unsigned int mtu;
> > > > > 
> > > > >         if (!skb_is_gso(skb))
> > > > >                 return;
> > > > > 
> > > > >         mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
> > > > >         skb_shinfo(skb)->gso_size = mtu - sizeof(struct iphdr);
> > > > > }
> > > > > 
> > > > > But this yields
> > > > > 
> > > > > [   28.644776] kernel BUG at net/net/core/skbuff.c:2984!
> > > > 
> > > > Yep, lets CC Herbert Xu, as he 'owns' skb_segment()
> > > 
> > > IMHO we should just stop merging ~DF packets altogether, at least
> > > for TCP.
> > 
> > Eric, you added DF aggregation in db8caf3dbc77599dc90f4ea0a803cd1d97116f30
> > (gro: should aggregate frames without DF).
> > 
> > I guess you don't want to revert this commit?
> > Any other ideas?
> > 
> > skb_gso_segment() is already very complex, I don't want to add more code
> > to it.  And that seems unavoidable if we need to de-couple nr_frags and
> > gso_size.
> 
> I don't think adding all this complexity just to be able to
> aggregate ~DF packets (which are just wrong to begin with) is
> worth it.

Wrong by your standards. Which are not universal.

> 
> If aggregating ~DF packets was a one-liner then sure, but there
> is a reason why I didn't aggregate them in the first place and
> you've found it :)

I sent months ago a solution for skb_segment() that you ignored.

I understand you never hit cases where DF was not set, I can tell you
its happening in the real world.

GRO stack already breaks reversibility by definition since day-0

Recent tunneling support breaks it as well.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path
  2014-02-10 12:43               ` Florian Westphal
  2014-02-10 12:50                 ` Herbert Xu
@ 2014-02-10 13:15                 ` Eric Dumazet
  1 sibling, 0 replies; 30+ messages in thread
From: Eric Dumazet @ 2014-02-10 13:15 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Herbert Xu, netdev

On Mon, 2014-02-10 at 13:43 +0100, Florian Westphal wrote:

> Well we could go with my original patch that will do software
> segmentation on ~DF packets in the forwarding path if the outmtu is too
> small for the individual packets.  The output path then simply
> creates fragments.

Most linux routers disable GRO anyway.

GRO is mostly used on linux hosts to improve performance, so most GRO
packets are consumed on the receiving host.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 1/2] net: add and use skb_gso_transport_seglen()
  2014-02-22  9:33 [PATCH stable 3.4.y 0/2] gso/gro forwarding changeset Florian Westphal
@ 2014-02-22  9:33 ` Florian Westphal
  0 siblings, 0 replies; 30+ messages in thread
From: Florian Westphal @ 2014-02-22  9:33 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal, David S. Miller

commit de960aa9ab4decc3304959f69533eef64d05d8e8 upstream.

[ no skb_gso_seglen helper in 3.4, leave tbf alone ]

This moves part of Eric Dumazets skb_gso_seglen helper from tbf sched to
skbuff core so it may be reused by upcoming ip forwarding path patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 net/core/skbuff.c      | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 42b919c..9cfc1ff 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2159,6 +2159,8 @@ extern int	       skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
 extern struct sk_buff *skb_segment(struct sk_buff *skb,
 				   netdev_features_t features);
 
+unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
+
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e99aedd..7a597d4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -45,6 +45,8 @@
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/slab.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
 #include <linux/netdevice.h>
 #ifdef CONFIG_NET_CLS_ACT
 #include <net/pkt_sched.h>
@@ -3281,3 +3283,26 @@ void __skb_warn_lro_forwarding(const struct sk_buff *skb)
 			   " while LRO is enabled\n", skb->dev->name);
 }
 EXPORT_SYMBOL(__skb_warn_lro_forwarding);
+
+/**
+ * skb_gso_transport_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_transport_seglen is used to determine the real size of the
+ * individual segments, including Layer4 headers (TCP/UDP).
+ *
+ * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
+ */
+unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	unsigned int hdr_len;
+
+	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+		hdr_len = tcp_hdrlen(skb);
+	else
+		hdr_len = sizeof(struct udphdr);
+	return hdr_len + shinfo->gso_size;
+}
+EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
-- 
1.8.1.5

^ permalink raw reply related	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2014-02-22  9:37 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-01-27  8:22 [PATCH 1/2] net: add and use skb_gso_transport_seglen() Florian Westphal
2014-01-27  8:22 ` [PATCH 2/2] net: ip, ipv6: handle gso skbs in forwarding path Florian Westphal
2014-01-27  8:34   ` David Miller
2014-01-27  8:36     ` Florian Westphal
2014-01-27 18:22   ` Eric Dumazet
2014-01-27 20:58     ` David Miller
2014-01-27 21:08       ` David Miller
2014-01-28  0:27     ` Hannes Frederic Sowa
2014-01-28  9:12       ` Florian Westphal
2014-01-29 10:53       ` Florian Westphal
2014-01-29 11:04         ` Hannes Frederic Sowa
2014-01-28  8:57     ` Florian Westphal
2014-01-28 16:34       ` Eric Dumazet
2014-01-28 17:15         ` Florian Westphal
2014-01-28 17:30           ` Eric Dumazet
2014-01-28 17:37             ` Florian Westphal
2014-01-29 11:00         ` Florian Westphal
2014-02-09  2:55         ` Herbert Xu
2014-02-10 12:23           ` Florian Westphal
2014-02-10 12:31             ` Herbert Xu
2014-02-10 12:43               ` Florian Westphal
2014-02-10 12:50                 ` Herbert Xu
2014-02-10 13:08                   ` Eric Dumazet
2014-02-10 13:15                 ` Eric Dumazet
2014-02-10 13:12               ` Eric Dumazet
2014-01-27  8:30 ` [PATCH 1/2] net: add and use skb_gso_transport_seglen() David Miller
  -- strict thread matches above, loose matches on Subject: below --
2014-02-22  9:33 [PATCH stable 3.4.y 0/2] gso/gro forwarding changeset Florian Westphal
2014-02-22  9:33 ` [PATCH 1/2] net: add and use skb_gso_transport_seglen() Florian Westphal
2014-01-25 22:48 [PATCH 0/2] Fix handling of GRO skbs in forwarding path Florian Westphal
2014-01-25 22:48 ` [PATCH 1/2] net: add and use skb_gso_transport_seglen() Florian Westphal
2014-01-26  1:28   ` Eric Dumazet
2014-01-26  9:19     ` Florian Westphal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).