netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
@ 2004-09-02 18:36 Patrick McHardy
  2004-09-02 19:48 ` YOSHIFUJI Hideaki / 吉藤英明
  2004-09-02 21:44 ` David S. Miller
  0 siblings, 2 replies; 11+ messages in thread
From: Patrick McHardy @ 2004-09-02 18:36 UTC (permalink / raw)
  To: David S. Miller; +Cc: yoshfuji, Herbert Xu, netdev

[-- Attachment #1: Type: text/plain, Size: 810 bytes --]

Yoshifuji's recent fragment patch prevents unnecessary fragmentation
when the data can be kept in a single packet, but only for the first
packet. When fragmenting, all fragments are still truncated to
multiples of 8 and we might end up creating an unnecessary fragment.

This dump shows the problem (MTU 1499):

172.16.1.123.32771 > 172.16.195.3.4135: udp 2937 (frag 7066:1472@0+)
172.16.1.123 > 172.16.195.3: udp (frag 7066:1472@1472+)
172.16.1.123 > 172.16.195.3: udp (frag 7066:1@2944)

This patch always builds mtu sized fragments and truncates the previous
fragment to a multiple of 8 bytes when allocating a new one. With the
patch the dump looks like this:


172.16.1.123.32772 > 172.16.195.3.4135: udp 2937 (frag 49641:1472@0+)
172.16.1.123 > 172.16.195.3: udp (frag 49641:1473@1472)

Regards
Patrick


[-- Attachment #2: frag.diff --]
[-- Type: text/x-patch, Size: 7844 bytes --]

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2004/09/02 17:35:32+02:00 kaber@coreworks.de 
#   [IPV4/IPV6]: Fix suboptimal fragment sizing for last fragment
#   
#   Signed-off-by: Patrick McHardy <kaber@trash.net>
# 
# net/ipv6/ip6_output.c
#   2004/09/02 17:35:14+02:00 kaber@coreworks.de +13 -22
#   [IPV4/IPV6]: Fix suboptimal fragment sizing for last fragment
# 
# net/ipv4/ip_output.c
#   2004/09/02 17:35:14+02:00 kaber@coreworks.de +20 -49
#   [IPV4/IPV6]: Fix suboptimal fragment sizing for last fragment
# 
diff -Nru a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
--- a/net/ipv4/ip_output.c	2004-09-02 17:39:01 +02:00
+++ b/net/ipv4/ip_output.c	2004-09-02 17:39:01 +02:00
@@ -735,10 +735,10 @@
 	int hh_len;
 	int exthdrlen;
 	int mtu;
-	int copy = 0;
+	int copy;
 	int err;
 	int offset = 0;
-	unsigned int maxfraglen, fragheaderlen, fraggap = 0;
+	unsigned int maxfraglen, fragheaderlen;
 	int csummode = CHECKSUM_NONE;
 
 	if (flags&MSG_PROBE)
@@ -781,6 +781,7 @@
 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
+	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
 
 	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
@@ -788,26 +789,11 @@
 	}
 
 	/*
-	 * Let's try using as much space as possible to avoid generating
-	 * additional unnecessary small fragment of length 
-	 * (mtu-fragheaderlen)%8 if mtu-fragheaderlen is not 0 modulo 8.
-	 * -- yoshfuji
-	 */
-	if (fragheaderlen + inet->cork.length + length <= mtu)
-		maxfraglen = mtu;
-	else
-		maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-
-	if (fragheaderlen + inet->cork.length <= mtu &&
-	    fragheaderlen + inet->cork.length + length > mtu)
-		fraggap = 1;
-
-	/*
 	 * transhdrlen > 0 means that this is the first fragment and we wish
 	 * it won't be fragmented in the future.
 	 */
 	if (transhdrlen &&
-	    length + fragheaderlen <= maxfraglen &&
+	    length + fragheaderlen <= mtu &&
 	    rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
 	    !exthdrlen)
 		csummode = CHECKSUM_HW;
@@ -821,34 +807,33 @@
 	 * adding appropriate IP header.
 	 */
 
-	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
-		fraggap = 0;
+	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
 		goto alloc_new_skb;
-	}
 
 	while (length > 0) {
-		if ((copy = maxfraglen - skb->len) <= 0) {
+		if ((copy = mtu - skb->len) <= 0) {
 			char *data;
 			unsigned int datalen;
 			unsigned int fraglen;
+			unsigned int fraggap;
 			unsigned int alloclen;
 			struct sk_buff *skb_prev;
-			BUG_TRAP(fraggap || copy == 0);
+			BUG_TRAP(copy == 0);
 
 alloc_new_skb:
 			skb_prev = skb;
+			fraggap = 0;
+			if (skb_prev)
+				fraggap = mtu - maxfraglen;
 
-			if (fraggap)
-				fraggap = -copy;
-
-			datalen = maxfraglen - fragheaderlen;
+			datalen = mtu - fragheaderlen;
 			if (datalen > length + fraggap)
 				datalen = length + fraggap;
 
 			fraglen = datalen + fragheaderlen;
 			if ((flags & MSG_MORE) && 
 			    !(rt->u.dst.dev->features&NETIF_F_SG))
-				alloclen = maxfraglen;
+				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
 
@@ -913,7 +898,6 @@
 			length -= datalen - fraggap;
 			transhdrlen = 0;
 			exthdrlen = 0;
-			fraggap = 0;
 			csummode = CHECKSUM_NONE;
 
 			/*
@@ -1006,7 +990,7 @@
 	int mtu;
 	int len;
 	int err;
-	unsigned int maxfraglen, fragheaderlen, fraggap = 0;
+	unsigned int maxfraglen, fragheaderlen, fraggap;
 
 	if (inet->hdrincl)
 		return -EPERM;
@@ -1028,27 +1012,13 @@
 	mtu = inet->cork.fragsize;
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
+	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
 
 	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
 		return -EMSGSIZE;
 	}
 
-	/*
-	 * Let's try using as much space as possible to avoid generating
-	 * additional unnecessary small fragment of length 
-	 * (mtu-fragheaderlen)%8 if mtu-fragheaderlen is not 0 modulo 8.
-	 * -- yoshfuji
-	 */
-	if (fragheaderlen + inet->cork.length + size <= mtu)
-		maxfraglen = mtu;
-	else
-		maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-
-	if (fragheaderlen + inet->cork.length <= mtu &&
-	    fragheaderlen + inet->cork.length + size > mtu)
-		fraggap = 1;
-
 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
 		return -EINVAL;
 
@@ -1056,17 +1026,18 @@
 
 	while (size > 0) {
 		int i;
-		if ((len = maxfraglen - skb->len) <= 0) {
+		if ((len = mtu - skb->len) <= 0) {
 			struct sk_buff *skb_prev;
 			char *data;
 			struct iphdr *iph;
 			int alloclen;
 
-			BUG_TRAP(fraggap || len == 0);
+			BUG_TRAP(len == 0);
 
 			skb_prev = skb;
-			if (fraggap)
-				fraggap = -len;
+			fraggap = 0;
+			if (skb_prev)
+				fraggap = mtu - maxfraglen;
 
 			alloclen = fragheaderlen + hh_len + fraggap + 15;
 			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
diff -Nru a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
--- a/net/ipv6/ip6_output.c	2004-09-02 17:39:01 +02:00
+++ b/net/ipv6/ip6_output.c	2004-09-02 17:39:01 +02:00
@@ -814,11 +814,11 @@
 	struct inet_opt *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff *skb;
-	unsigned int maxfraglen, fragheaderlen, fraggap = 0;
+	unsigned int maxfraglen, fragheaderlen;
 	int exthdrlen;
 	int hh_len;
 	int mtu;
-	int copy = 0;
+	int copy;
 	int err;
 	int offset = 0;
 	int csummode = CHECKSUM_NONE;
@@ -867,6 +867,7 @@
 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
 
 	fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
+	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
 
 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
 		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
@@ -883,46 +884,37 @@
 	 *
 	 * Note that we may need to "move" the data from the tail of
 	 * of the buffer to the new fragment when we split 
-	 * the message at the first time.
+	 * the message.
 	 *
 	 * FIXME: It may be fragmented into multiple chunks 
 	 *        at once if non-fragmentable extension headers
 	 *        are too large.
 	 * --yoshfuji 
 	 */
-	if (fragheaderlen + inet->cork.length + length <= mtu)
-		maxfraglen = mtu;
-	else
-		maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen 
-			     - sizeof(struct frag_hdr);
-
-	if (fragheaderlen + inet->cork.length <= mtu &&
-	    fragheaderlen + inet->cork.length + length > mtu)
-		fraggap = 1;
 
 	inet->cork.length += length;
 
-	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
-		fraggap = 0;
+	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
 		goto alloc_new_skb;
-	}
 
 	while (length > 0) {
-		if ((copy = maxfraglen - skb->len) <= 0) {
+		if ((copy = mtu - skb->len) <= 0) {
 			char *data;
 			unsigned int datalen;
 			unsigned int fraglen;
+			unsigned int fraggap;
 			unsigned int alloclen;
 			struct sk_buff *skb_prev;
-			BUG_TRAP(fraggap || copy == 0);
+			BUG_TRAP(copy == 0);
 alloc_new_skb:
 			skb_prev = skb;
 
 			/* There's no room in the current skb */
-			if (fraggap)
-				fraggap = -copy;
+			fraggap = 0;
+			if (skb_prev)
+				fraggap = mtu - maxfraglen;
 
-			datalen = maxfraglen - fragheaderlen;
+			datalen = mtu - fragheaderlen;
 
 			if (datalen > length + fraggap)
 				datalen = length + fraggap;
@@ -930,7 +922,7 @@
 			fraglen = datalen + fragheaderlen;
 			if ((flags & MSG_MORE) &&
 			    !(rt->u.dst.dev->features&NETIF_F_SG))
-				alloclen = maxfraglen;
+				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
 
@@ -1005,7 +997,6 @@
 			length -= datalen - fraggap;
 			transhdrlen = 0;
 			exthdrlen = 0;
-			fraggap = 0;
 			csummode = CHECKSUM_NONE;
 
 			/*

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
  2004-09-02 18:36 [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment Patrick McHardy
@ 2004-09-02 19:48 ` YOSHIFUJI Hideaki / 吉藤英明
  2004-09-02 21:44 ` David S. Miller
  1 sibling, 0 replies; 11+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2004-09-02 19:48 UTC (permalink / raw)
  To: kaber; +Cc: davem, herbert, netdev, yoshfuji

In article <4137681D.3000902@trash.net> (at Thu, 02 Sep 2004 20:36:13 +0200), Patrick McHardy <kaber@trash.net> says:

> Yoshifuji's recent fragment patch prevents unnecessary fragmentation
> when the data can be kept in a single packet, but only for the first
> packet. When fragmenting, all fragments are still truncated to
> multiples of 8 and we might end up creating an unnecessary fragment.
> 
> This dump shows the problem (MTU 1499):
> 
> 172.16.1.123.32771 > 172.16.195.3.4135: udp 2937 (frag 7066:1472@0+)
> 172.16.1.123 > 172.16.195.3: udp (frag 7066:1472@1472+)
> 172.16.1.123 > 172.16.195.3: udp (frag 7066:1@2944)
> 
> This patch always builds mtu sized fragments and truncates the previous
> fragment to a multiple of 8 bytes when allocating a new one. With the
> patch the dump looks like this:
> 
> 
> 172.16.1.123.32772 > 172.16.195.3.4135: udp 2937 (frag 49641:1472@0+)
> 172.16.1.123 > 172.16.195.3: udp (frag 49641:1473@1472)

Let me clarify. 
Are you sending payload of 2945 bytes  (= udp payload of 2937 bytes)?

Good point.

I'll check this patch today. (Let me sleep for now...)
Anyway, please update the comment instead of removing completely.

Thanks.

--yoshfuji

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
  2004-09-02 18:36 [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment Patrick McHardy
  2004-09-02 19:48 ` YOSHIFUJI Hideaki / 吉藤英明
@ 2004-09-02 21:44 ` David S. Miller
  2004-09-02 22:03   ` Herbert Xu
  1 sibling, 1 reply; 11+ messages in thread
From: David S. Miller @ 2004-09-02 21:44 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: yoshfuji, herbert, netdev

On Thu, 02 Sep 2004 20:36:13 +0200
Patrick McHardy <kaber@trash.net> wrote:

> This patch always builds mtu sized fragments and truncates the previous
> fragment to a multiple of 8 bytes when allocating a new one. With the
> patch the dump looks like this:

Looks great Patrick, applied.

I see only one remaining possible improvement.  If the fraggap
area is paged data, we probably should try use page frags
in the new SKB if this split occurs in ip_append_page().

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
  2004-09-02 21:44 ` David S. Miller
@ 2004-09-02 22:03   ` Herbert Xu
  2004-09-02 22:08     ` David S. Miller
  2004-09-03  1:40     ` YOSHIFUJI Hideaki / 吉藤英明
  0 siblings, 2 replies; 11+ messages in thread
From: Herbert Xu @ 2004-09-02 22:03 UTC (permalink / raw)
  To: David S. Miller; +Cc: Patrick McHardy, yoshfuji, netdev

[-- Attachment #1: Type: text/plain, Size: 1374 bytes --]

On Thu, Sep 02, 2004 at 02:44:36PM -0700, David S. Miller wrote:
> 
> I see only one remaining possible improvement.  If the fraggap
> area is paged data, we probably should try use page frags
> in the new SKB if this split occurs in ip_append_page().

Yes.  That could also tie into another optimisation.  But it's
an ugly one :) The sk->csum values are added up at the end of
the processing so when we move bytes to and fro the total csum
value stays the same even if the fragment csum values change.

Therefore we can get rid of the csum adjustments except for the
parity bit in the getfrag call.

Is this an acceptable optimisation to you guys?

Another thing we can do is to not always fill up the frags in the middle
and then move bytes off them.  As it is if you do a send that spans
multiple packets each fragment will be filled up to the full and then
chopped off when the next one is started.

And to finish it off, here is a really trivial patch to shave off 27
bytes from the source code :) It does nothing else, well unless your
compiler decides to compile csum_block_sub out-of-line.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

[-- Attachment #2: p --]
[-- Type: text/plain, Size: 1310 bytes --]

===== net/ipv4/ip_output.c 1.65 vs edited =====
--- 1.65/net/ipv4/ip_output.c	2004-09-02 15:07:28 +10:00
+++ edited/net/ipv4/ip_output.c	2004-09-03 07:53:54 +10:00
@@ -896,8 +896,8 @@
 				skb->csum = skb_copy_and_csum_bits(
 					skb_prev, maxfraglen,
 					data + transhdrlen, fraggap, 0);
-				skb_prev->csum = csum_block_sub(
-					skb_prev->csum, skb->csum, 0);
+				skb_prev->csum = csum_sub(skb_prev->csum,
+							  skb->csum);
 				data += fraggap;
 				skb_trim(skb_prev, maxfraglen);
 			}
@@ -1094,8 +1094,8 @@
 				skb->csum = skb_copy_and_csum_bits(
 					skb_prev, maxfraglen,
 					data, fraggap, 0);
-				skb_prev->csum = csum_block_sub(
-					skb_prev->csum, skb->csum, 0);
+				skb_prev->csum = csum_sub(skb_prev->csum,
+							  skb->csum);
 				skb_trim(skb_prev, maxfraglen);
 			}
 
===== net/ipv6/ip6_output.c 1.70 vs edited =====
--- 1.70/net/ipv6/ip6_output.c	2004-09-02 15:07:29 +10:00
+++ edited/net/ipv6/ip6_output.c	2004-09-03 07:54:41 +10:00
@@ -985,8 +985,8 @@
 				skb->csum = skb_copy_and_csum_bits(
 					skb_prev, maxfraglen,
 					data + transhdrlen, fraggap, 0);
-				skb_prev->csum = csum_block_sub(
-					skb_prev->csum, skb->csum, 0);
+				skb_prev->csum = csum_sub(skb_prev->csum,
+							  skb->csum);
 				data += fraggap;
 				skb_trim(skb_prev, maxfraglen);
 			}

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
  2004-09-02 22:03   ` Herbert Xu
@ 2004-09-02 22:08     ` David S. Miller
  2004-09-03  1:40     ` YOSHIFUJI Hideaki / 吉藤英明
  1 sibling, 0 replies; 11+ messages in thread
From: David S. Miller @ 2004-09-02 22:08 UTC (permalink / raw)
  To: Herbert Xu; +Cc: kaber, yoshfuji, netdev

On Fri, 3 Sep 2004 08:03:44 +1000
Herbert Xu <herbert@gondor.apana.org.au> wrote:

> That could also tie into another optimisation.  But it's
> an ugly one :) The sk->csum values are added up at the end of
> the processing so when we move bytes to and fro the total csum
> value stays the same even if the fragment csum values change.
> 
> Therefore we can get rid of the csum adjustments except for the
> parity bit in the getfrag call.
> 
> Is this an acceptable optimisation to you guys?

I have no problems with this.

> Another thing we can do is to not always fill up the frags in the middle
> and then move bytes off them.  As it is if you do a send that spans
> multiple packets each fragment will be filled up to the full and then
> chopped off when the next one is started.

Please elaborate.

> And to finish it off, here is a really trivial patch to shave off 27
> bytes from the source code :) It does nothing else, well unless your
> compiler decides to compile csum_block_sub out-of-line.

Applied :-)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
  2004-09-02 22:03   ` Herbert Xu
  2004-09-02 22:08     ` David S. Miller
@ 2004-09-03  1:40     ` YOSHIFUJI Hideaki / 吉藤英明
  2004-09-07 20:35       ` David S. Miller
  2004-09-07 23:15       ` Herbert Xu
  1 sibling, 2 replies; 11+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2004-09-03  1:40 UTC (permalink / raw)
  To: davem, herbert; +Cc: kaber, netdev, yoshfuji

In article <20040902220343.GA3250@gondor.apana.org.au> (at Fri, 3 Sep 2004 08:03:44 +1000), Herbert Xu <herbert@gondor.apana.org.au> says:

> Another thing we can do is to not always fill up the frags in the middle
> and then move bytes off them.  As it is if you do a send that spans
> multiple packets each fragment will be filled up to the full and then
> chopped off when the next one is started.

I think I had similar impression when I saw the Patrick's patch.

Here's the optimization: if we know the remaining data exceeds the
mtu, we do not need to fill up full of it.

skb_prev:
                mtu
+----------+--+-+
|          |  | |
+----------+--+-+
           ^  ^
skb_prev->len |
              maxfraglen

appending data:
           +--------+
           |        |
           +--------+
           --------->
                  length

In this case, we know we need more fragment(s).
So, let's fill up to maxfraglen (instead of mtu) 
to avoid needless copy in the next loop.

Signed-off-by: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>

===== net/ipv4/ip_output.c 1.67 vs edited =====
--- 1.67/net/ipv4/ip_output.c	2004-09-03 06:50:20 +09:00
+++ edited/net/ipv4/ip_output.c	2004-09-03 10:15:53 +09:00
@@ -811,26 +811,33 @@
 		goto alloc_new_skb;
 
 	while (length > 0) {
-		if ((copy = mtu - skb->len) <= 0) {
+		/* Check if the remaining data fits into current packet. */
+		copy = mtu - skb->len;
+		if (copy < length)
+			copy = maxfraglen - skb->len;
+		if (copy <= 0) {
 			char *data;
 			unsigned int datalen;
 			unsigned int fraglen;
 			unsigned int fraggap;
 			unsigned int alloclen;
 			struct sk_buff *skb_prev;
-			BUG_TRAP(copy == 0);
-
 alloc_new_skb:
 			skb_prev = skb;
-			fraggap = 0;
 			if (skb_prev)
-				fraggap = mtu - maxfraglen;
-
-			datalen = mtu - fragheaderlen;
-			if (datalen > length + fraggap)
-				datalen = length + fraggap;
+				fraggap = skb_prev->len - maxfraglen;
+			else
+				fraggap = 0;
 
+			/*
+			 * If remaining data exceeds the mtu,
+			 * we know we need more fragment(s).
+			 */
+			datalen = length + fraggap;
+			if (datalen > mtu - fragheaderlen)
+				datalen = maxfraglen - fragheaderlen;
 			fraglen = datalen + fragheaderlen;
+
 			if ((flags & MSG_MORE) && 
 			    !(rt->u.dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
@@ -1026,18 +1033,22 @@
 
 	while (size > 0) {
 		int i;
-		if ((len = mtu - skb->len) <= 0) {
+
+		/* Check if the remaining data fits into current packet. */
+		len = mtu - skb->len;
+		if (len > size)
+			len = maxfraglen - skb->len;
+		if (len <= 0) {
 			struct sk_buff *skb_prev;
 			char *data;
 			struct iphdr *iph;
 			int alloclen;
 
-			BUG_TRAP(len == 0);
-
 			skb_prev = skb;
-			fraggap = 0;
 			if (skb_prev)
-				fraggap = mtu - maxfraglen;
+				fraggap = skb_prev->len - maxfraglen;
+			else
+				fraggap = 0;
 
 			alloclen = fragheaderlen + hh_len + fraggap + 15;
 			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
===== net/ipv6/ip6_output.c 1.72 vs edited =====
--- 1.72/net/ipv6/ip6_output.c	2004-09-03 06:50:20 +09:00
+++ edited/net/ipv6/ip6_output.c	2004-09-03 10:24:57 +09:00
@@ -898,26 +898,34 @@
 		goto alloc_new_skb;
 
 	while (length > 0) {
-		if ((copy = mtu - skb->len) <= 0) {
+		/* Check if the remaining data fits into current packet. */
+		copy = mtu - skb->len;
+		if (copy < length)
+			copy = maxfraglen - skb->len;
+
+		if (copy <= 0) {
 			char *data;
 			unsigned int datalen;
 			unsigned int fraglen;
 			unsigned int fraggap;
 			unsigned int alloclen;
 			struct sk_buff *skb_prev;
-			BUG_TRAP(copy == 0);
 alloc_new_skb:
 			skb_prev = skb;
 
 			/* There's no room in the current skb */
-			fraggap = 0;
 			if (skb_prev)
-				fraggap = mtu - maxfraglen;
-
-			datalen = mtu - fragheaderlen;
+				fraggap = skb_prev->len - maxfraglen;
+			else
+				fraggap = 0;
 
-			if (datalen > length + fraggap)
-				datalen = length + fraggap;
+			/*
+			 * If remaining data exceeds the mtu,
+			 * we know we need more fragment(s).
+			 */
+			datalen = length + fraggap;
+			if (datalen > mtu - fragheaderlen)
+				datalen = maxfraglen - fragheaderlen;
 
 			fraglen = datalen + fragheaderlen;
 			if ((flags & MSG_MORE) &&

-- 
Hideaki YOSHIFUJI @ USAGI Project <yoshfuji@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF  80D8 4807 F894 E062 0EEA

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
  2004-09-03  1:40     ` YOSHIFUJI Hideaki / 吉藤英明
@ 2004-09-07 20:35       ` David S. Miller
  2004-09-07 23:15       ` Herbert Xu
  1 sibling, 0 replies; 11+ messages in thread
From: David S. Miller @ 2004-09-07 20:35 UTC (permalink / raw)
  To: yoshfuji; +Cc: herbert, kaber, netdev, yoshfuji

On Fri, 03 Sep 2004 10:40:50 +0900 (JST)
YOSHIFUJI Hideaki / ^[$B5HF#1QL@^[(B <yoshfuji@linux-ipv6.org> wrote:

> In this case, we know we need more fragment(s).
> So, let's fill up to maxfraglen (instead of mtu) 
> to avoid needless copy in the next loop.
> 
> Signed-off-by: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>

Has anyone tested or reviewed this patch?
I'm just walking through my backlog trying
to figure out what I need to spend time on.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
  2004-09-03  1:40     ` YOSHIFUJI Hideaki / 吉藤英明
  2004-09-07 20:35       ` David S. Miller
@ 2004-09-07 23:15       ` Herbert Xu
  2004-09-07 23:26         ` YOSHIFUJI Hideaki / 吉藤英明
  1 sibling, 1 reply; 11+ messages in thread
From: Herbert Xu @ 2004-09-07 23:15 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki / ?$B5HF#1QL@; +Cc: davem, kaber, netdev

On Fri, Sep 03, 2004 at 10:40:50AM +0900, YOSHIFUJI Hideaki / ?$B5HF#1QL@ wrote:
>
> @@ -1026,18 +1033,22 @@
>  
>  	while (size > 0) {
>  		int i;
> -		if ((len = mtu - skb->len) <= 0) {
> +
> +		/* Check if the remaining data fits into current packet. */
> +		len = mtu - skb->len;
> +		if (len > size)
> +			len = maxfraglen - skb->len;

I think that should be len < size, right?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
  2004-09-07 23:15       ` Herbert Xu
@ 2004-09-07 23:26         ` YOSHIFUJI Hideaki / 吉藤英明
  2004-09-08  3:21           ` Herbert Xu
  0 siblings, 1 reply; 11+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2004-09-07 23:26 UTC (permalink / raw)
  To: herbert, davem; +Cc: kaber, netdev, yoshfuji

In article <20040907231557.GA2254@gondor.apana.org.au> (at Wed, 8 Sep 2004 09:15:57 +1000), Herbert Xu <herbert@gondor.apana.org.au> says:

> > +		len = mtu - skb->len;
> > +		if (len > size)
> > +			len = maxfraglen - skb->len;
> 
> I think that should be len < size, right?

oh, right, thanks.

Signed-off-by: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>

===== net/ipv4/ip_output.c 1.67 vs edited =====
--- 1.67/net/ipv4/ip_output.c	2004-09-03 06:50:20 +09:00
+++ edited/net/ipv4/ip_output.c	2004-09-08 08:22:29 +09:00
@@ -811,26 +811,33 @@
 		goto alloc_new_skb;
 
 	while (length > 0) {
-		if ((copy = mtu - skb->len) <= 0) {
+		/* Check if the remaining data fits into current packet. */
+		copy = mtu - skb->len;
+		if (copy < length)
+			copy = maxfraglen - skb->len;
+		if (copy <= 0) {
 			char *data;
 			unsigned int datalen;
 			unsigned int fraglen;
 			unsigned int fraggap;
 			unsigned int alloclen;
 			struct sk_buff *skb_prev;
-			BUG_TRAP(copy == 0);
-
 alloc_new_skb:
 			skb_prev = skb;
-			fraggap = 0;
 			if (skb_prev)
-				fraggap = mtu - maxfraglen;
-
-			datalen = mtu - fragheaderlen;
-			if (datalen > length + fraggap)
-				datalen = length + fraggap;
+				fraggap = skb_prev->len - maxfraglen;
+			else
+				fraggap = 0;
 
+			/*
+			 * If remaining data exceeds the mtu,
+			 * we know we need more fragment(s).
+			 */
+			datalen = length + fraggap;
+			if (datalen > mtu - fragheaderlen)
+				datalen = maxfraglen - fragheaderlen;
 			fraglen = datalen + fragheaderlen;
+
 			if ((flags & MSG_MORE) && 
 			    !(rt->u.dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
@@ -1026,18 +1033,22 @@
 
 	while (size > 0) {
 		int i;
-		if ((len = mtu - skb->len) <= 0) {
+
+		/* Check if the remaining data fits into current packet. */
+		len = mtu - skb->len;
+		if (len < size)
+			len = maxfraglen - skb->len;
+		if (len <= 0) {
 			struct sk_buff *skb_prev;
 			char *data;
 			struct iphdr *iph;
 			int alloclen;
 
-			BUG_TRAP(len == 0);
-
 			skb_prev = skb;
-			fraggap = 0;
 			if (skb_prev)
-				fraggap = mtu - maxfraglen;
+				fraggap = skb_prev->len - maxfraglen;
+			else
+				fraggap = 0;
 
 			alloclen = fragheaderlen + hh_len + fraggap + 15;
 			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
===== net/ipv6/ip6_output.c 1.72 vs edited =====
--- 1.72/net/ipv6/ip6_output.c	2004-09-03 06:50:20 +09:00
+++ edited/net/ipv6/ip6_output.c	2004-09-08 08:18:28 +09:00
@@ -898,26 +898,34 @@
 		goto alloc_new_skb;
 
 	while (length > 0) {
-		if ((copy = mtu - skb->len) <= 0) {
+		/* Check if the remaining data fits into current packet. */
+		copy = mtu - skb->len;
+		if (copy < length)
+			copy = maxfraglen - skb->len;
+
+		if (copy <= 0) {
 			char *data;
 			unsigned int datalen;
 			unsigned int fraglen;
 			unsigned int fraggap;
 			unsigned int alloclen;
 			struct sk_buff *skb_prev;
-			BUG_TRAP(copy == 0);
 alloc_new_skb:
 			skb_prev = skb;
 
 			/* There's no room in the current skb */
-			fraggap = 0;
 			if (skb_prev)
-				fraggap = mtu - maxfraglen;
-
-			datalen = mtu - fragheaderlen;
+				fraggap = skb_prev->len - maxfraglen;
+			else
+				fraggap = 0;
 
-			if (datalen > length + fraggap)
-				datalen = length + fraggap;
+			/*
+			 * If remaining data exceeds the mtu,
+			 * we know we need more fragment(s).
+			 */
+			datalen = length + fraggap;
+			if (datalen > mtu - fragheaderlen)
+				datalen = maxfraglen - fragheaderlen;
 
 			fraglen = datalen + fragheaderlen;
 			if ((flags & MSG_MORE) &&


-- 
Hideaki YOSHIFUJI @ USAGI Project <yoshfuji@linux-ipv6.org>
GPG FP: 9022 65EB 1ECF 3AD1 0BDF  80D8 4807 F894 E062 0EEA

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
  2004-09-07 23:26         ` YOSHIFUJI Hideaki / 吉藤英明
@ 2004-09-08  3:21           ` Herbert Xu
  2004-09-08 20:38             ` David S. Miller
  0 siblings, 1 reply; 11+ messages in thread
From: Herbert Xu @ 2004-09-08  3:21 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki / ?$B5HF#1QL@; +Cc: davem, kaber, netdev

On Wed, Sep 08, 2004 at 08:26:20AM +0900, YOSHIFUJI Hideaki / ?$B5HF#1QL@ wrote:
> 
> oh, right, thanks.
> 
> Signed-off-by: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>

Looks good.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment
  2004-09-08  3:21           ` Herbert Xu
@ 2004-09-08 20:38             ` David S. Miller
  0 siblings, 0 replies; 11+ messages in thread
From: David S. Miller @ 2004-09-08 20:38 UTC (permalink / raw)
  To: Herbert Xu; +Cc: yoshfuji, kaber, netdev

On Wed, 8 Sep 2004 13:21:58 +1000
Herbert Xu <herbert@gondor.apana.org.au> wrote:

> On Wed, Sep 08, 2004 at 08:26:20AM +0900, YOSHIFUJI Hideaki / ?$B5HF#1QL@ wrote:
> > 
> > oh, right, thanks.
> > 
> > Signed-off-by: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
> 
> Looks good.
> 
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

To me too, applied.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2004-09-08 20:38 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-09-02 18:36 [PATCH 2.6]: Fix suboptimal fragment sizing for last fragment Patrick McHardy
2004-09-02 19:48 ` YOSHIFUJI Hideaki / 吉藤英明
2004-09-02 21:44 ` David S. Miller
2004-09-02 22:03   ` Herbert Xu
2004-09-02 22:08     ` David S. Miller
2004-09-03  1:40     ` YOSHIFUJI Hideaki / 吉藤英明
2004-09-07 20:35       ` David S. Miller
2004-09-07 23:15       ` Herbert Xu
2004-09-07 23:26         ` YOSHIFUJI Hideaki / 吉藤英明
2004-09-08  3:21           ` Herbert Xu
2004-09-08 20:38             ` David S. Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).