netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] skb pcount with MTU discovery
@ 2005-04-01 21:05 John Heffner
  2005-04-01 21:10 ` David S. Miller
  0 siblings, 1 reply; 6+ messages in thread
From: John Heffner @ 2005-04-01 21:05 UTC (permalink / raw)
  To: davem, netdev

The problem is that when doing MTU discovery, the too-large segments in
the write queue will be calculated as having a pcount of >1.  When
tcp_write_xmit() is trying to send, tcp_snd_test() fails the cwnd test
when pcount > cwnd.

The segments are eventually transmitted one at a time by keepalive, but
this can take a long time.

This patch checks if TSO is enabled when setting pcount.

  -John



Signed-off-by: John Heffner <jheffner@psc.edu>


===== include/net/tcp.h 1.114 vs edited =====
--- 1.114/include/net/tcp.h	2005-03-31 11:51:09 -05:00
+++ edited/include/net/tcp.h	2005-04-01 14:44:13 -05:00
@@ -1470,19 +1470,20 @@
 		  tcp_minshall_check(tp))));
 }

-extern void tcp_set_skb_tso_segs(struct sk_buff *, unsigned int);
+extern void tcp_set_skb_tso_segs(struct sock *, struct sk_buff *);

 /* This checks if the data bearing packet SKB (usually sk->sk_send_head)
  * should be put on the wire right now.
  */
-static __inline__ int tcp_snd_test(const struct tcp_sock *tp,
+static __inline__ int tcp_snd_test(struct sock *sk,
 				   struct sk_buff *skb,
 				   unsigned cur_mss, int nonagle)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	int pkts = tcp_skb_pcount(skb);

 	if (!pkts) {
-		tcp_set_skb_tso_segs(skb, tp->mss_cache_std);
+		tcp_set_skb_tso_segs(sk, skb);
 		pkts = tcp_skb_pcount(skb);
 	}

@@ -1543,7 +1544,7 @@
 	if (skb) {
 		if (!tcp_skb_is_last(sk, skb))
 			nonagle = TCP_NAGLE_PUSH;
-		if (!tcp_snd_test(tp, skb, cur_mss, nonagle) ||
+		if (!tcp_snd_test(sk, skb, cur_mss, nonagle) ||
 		    tcp_write_xmit(sk, nonagle))
 			tcp_check_probe_timer(sk, tp);
 	}
@@ -1561,7 +1562,7 @@
 	struct sk_buff *skb = sk->sk_send_head;

 	return (skb &&
-		tcp_snd_test(tp, skb, tcp_current_mss(sk, 1),
+		tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
 			     tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle));
 }

===== net/ipv4/tcp_output.c 1.90 vs edited =====
--- 1.90/net/ipv4/tcp_output.c	2005-04-01 09:08:34 -05:00
+++ edited/net/ipv4/tcp_output.c	2005-04-01 14:45:27 -05:00
@@ -433,7 +433,7 @@
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb = sk->sk_send_head;

-	if (tcp_snd_test(tp, skb, cur_mss, TCP_NAGLE_PUSH)) {
+	if (tcp_snd_test(sk, skb, cur_mss, TCP_NAGLE_PUSH)) {
 		/* Send it out now. */
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 		tcp_tso_set_push(skb);
@@ -446,9 +446,12 @@
 	}
 }

-void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_std)
+void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
 {
-	if (skb->len <= mss_std) {
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (skb->len <= tp->mss_cache_std ||
+	    !(sk->sk_route_caps & NETIF_F_TSO)) {
 		/* Avoid the costly divide in the normal
 		 * non-TSO case.
 		 */
@@ -457,10 +460,10 @@
 	} else {
 		unsigned int factor;

-		factor = skb->len + (mss_std - 1);
-		factor /= mss_std;
+		factor = skb->len + (tp->mss_cache_std - 1);
+		factor /= tp->mss_cache_std;
 		skb_shinfo(skb)->tso_segs = factor;
-		skb_shinfo(skb)->tso_size = mss_std;
+		skb_shinfo(skb)->tso_size = tp->mss_cache_std;
 	}
 }

@@ -531,8 +534,8 @@
 	}

 	/* Fix up tso_factor for both original and new SKB.  */
-	tcp_set_skb_tso_segs(skb, tp->mss_cache_std);
-	tcp_set_skb_tso_segs(buff, tp->mss_cache_std);
+	tcp_set_skb_tso_segs(sk, skb);
+	tcp_set_skb_tso_segs(sk, buff);

 	if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
 		tp->lost_out += tcp_skb_pcount(skb);
@@ -607,7 +610,7 @@
 	 * factor and mss.
 	 */
 	if (tcp_skb_pcount(skb) > 1)
-		tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb));
+		tcp_set_skb_tso_segs(sk, skb);

 	return 0;
 }
@@ -815,7 +818,7 @@
 			sk_stream_free_skb(sk, skb);
 		} else {
 			TCP_SKB_CB(skb)->seq += copy;
-			tcp_set_skb_tso_segs(skb, tp->mss_cache_std);
+			tcp_set_skb_tso_segs(sk, skb);
 		}

 		len += copy;
@@ -824,7 +827,7 @@

 	__skb_insert(nskb, skb->prev, skb, &sk->sk_write_queue);
 	sk->sk_send_head = nskb;
-	tcp_set_skb_tso_segs(nskb, tp->mss_cache_std);
+	tcp_set_skb_tso_segs(sk, nskb);

 	/* We're ready to send.  If this fails, the probe will
 	 * be resegmented into mss-sized pieces by tcp_write_xmit(). */
@@ -885,7 +888,7 @@
 		mss_now = tcp_current_mss(sk, 1);

 		while ((skb = sk->sk_send_head) &&
-		       tcp_snd_test(tp, skb, mss_now,
+		       tcp_snd_test(sk, skb, mss_now,
 			       	    tcp_skb_is_last(sk, skb) ? nonagle :
 				    			       TCP_NAGLE_PUSH)) {
 			if (skb->len > mss_now) {
@@ -1822,7 +1825,7 @@
 					tp->mss_cache = tp->mss_cache_std;
 				}
 			} else if (!tcp_skb_pcount(skb))
-				tcp_set_skb_tso_segs(skb, tp->mss_cache_std);
+				tcp_set_skb_tso_segs(sk, skb);

 			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
 			TCP_SKB_CB(skb)->when = tcp_time_stamp;

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] skb pcount with MTU discovery
  2005-04-01 21:05 [PATCH] skb pcount with MTU discovery John Heffner
@ 2005-04-01 21:10 ` David S. Miller
  2005-04-01 21:22   ` John Heffner
  0 siblings, 1 reply; 6+ messages in thread
From: David S. Miller @ 2005-04-01 21:10 UTC (permalink / raw)
  To: John Heffner; +Cc: netdev

On Fri, 1 Apr 2005 16:05:49 -0500 (EST)
John Heffner <jheffner@psc.edu> wrote:

> The problem is that when doing MTU discovery, the too-large segments in
> the write queue will be calculated as having a pcount of >1.  When
> tcp_write_xmit() is trying to send, tcp_snd_test() fails the cwnd test
> when pcount > cwnd.
> 
> The segments are eventually transmitted one at a time by keepalive, but
> this can take a long time.
> 
> This patch checks if TSO is enabled when setting pcount.

Why isn't the MSS properly updated at this point in time?
If it were, the pcount setting would do the right thing.

That's how this code is supposed to work.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] skb pcount with MTU discovery
  2005-04-01 21:10 ` David S. Miller
@ 2005-04-01 21:22   ` John Heffner
  2005-04-01 22:47     ` Herbert Xu
  0 siblings, 1 reply; 6+ messages in thread
From: John Heffner @ 2005-04-01 21:22 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

On Fri, 1 Apr 2005, David S. Miller wrote:

> On Fri, 1 Apr 2005 16:05:49 -0500 (EST)
> John Heffner <jheffner@psc.edu> wrote:
>
> > The problem is that when doing MTU discovery, the too-large segments in
> > the write queue will be calculated as having a pcount of >1.  When
> > tcp_write_xmit() is trying to send, tcp_snd_test() fails the cwnd test
> > when pcount > cwnd.
> >
> > The segments are eventually transmitted one at a time by keepalive, but
> > this can take a long time.
> >
> > This patch checks if TSO is enabled when setting pcount.
>
> Why isn't the MSS properly updated at this point in time?
> If it were, the pcount setting would do the right thing.
>
> That's how this code is supposed to work.

The problem occurs when TSO is disabled.

Common case, start out with mss of 8948.  Send 2 segments; neither are
acknowledged, and we receive an ICMP can't fragment indicating a pmtu of
1500 so mss is set down to 1448.  Now tcp_set_skb_tso_segs() sets tso_segs
to 6, so tcp_snd_test thinks we are doing TSO and will send the full 6
mss, and fails the cwnd test since cwnd == 2.

  -John

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] skb pcount with MTU discovery
  2005-04-01 21:22   ` John Heffner
@ 2005-04-01 22:47     ` Herbert Xu
  2005-04-02 15:32       ` John Heffner
  0 siblings, 1 reply; 6+ messages in thread
From: Herbert Xu @ 2005-04-01 22:47 UTC (permalink / raw)
  To: John Heffner; +Cc: davem, netdev

John Heffner <jheffner@psc.edu> wrote:
> 
> Common case, start out with mss of 8948.  Send 2 segments; neither are
> acknowledged, and we receive an ICMP can't fragment indicating a pmtu of
> 1500 so mss is set down to 1448.  Now tcp_set_skb_tso_segs() sets tso_segs
> to 6, so tcp_snd_test thinks we are doing TSO and will send the full 6
> mss, and fails the cwnd test since cwnd == 2.

How about fixing tcp_snd_test directly like this?

Of course all this will be moot once Dave finishes his TSO rewrite :)
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
===== include/net/tcp.h 1.107 vs edited =====
--- 1.107/include/net/tcp.h	2005-03-16 10:15:03 +11:00
+++ edited/include/net/tcp.h	2005-04-02 08:45:48 +10:00
@@ -1433,6 +1433,9 @@
 		pkts = tcp_skb_pcount(skb);
 	}
 
+	if (!(tp->inet.sk.sk_route_caps & NETIF_F_TSO))
+		pkts = 1;
+
 	/*	RFC 1122 - section 4.2.3.4
 	 *
 	 *	We must queue if

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] skb pcount with MTU discovery
  2005-04-01 22:47     ` Herbert Xu
@ 2005-04-02 15:32       ` John Heffner
  2005-04-02 19:35         ` Herbert Xu
  0 siblings, 1 reply; 6+ messages in thread
From: John Heffner @ 2005-04-02 15:32 UTC (permalink / raw)
  To: Herbert Xu; +Cc: davem, netdev

On Sat, 2 Apr 2005, Herbert Xu wrote:

> How about fixing tcp_snd_test directly like this?

I tried that first, but it caused a panic.  I assumed some other point in
the code assumed that invariant that if TSO is disabled then tso_segs==1.
I didn't investigate though.

> Of course all this will be moot once Dave finishes his TSO rewrite :)

That will make things much simpler. ;)

  -John

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] skb pcount with MTU discovery
  2005-04-02 15:32       ` John Heffner
@ 2005-04-02 19:35         ` Herbert Xu
  0 siblings, 0 replies; 6+ messages in thread
From: Herbert Xu @ 2005-04-02 19:35 UTC (permalink / raw)
  To: John Heffner; +Cc: davem, netdev

On Sat, Apr 02, 2005 at 10:32:32AM -0500, John Heffner wrote:
> On Sat, 2 Apr 2005, Herbert Xu wrote:
> 
> > How about fixing tcp_snd_test directly like this?
> 
> I tried that first, but it caused a panic.  I assumed some other point in
> the code assumed that invariant that if TSO is disabled then tso_segs==1.
> I didn't investigate though.

Do you remember what the panic looked like? Perhaps it was because
tso_segs wasn't set at all?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2005-04-02 19:35 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-04-01 21:05 [PATCH] skb pcount with MTU discovery John Heffner
2005-04-01 21:10 ` David S. Miller
2005-04-01 21:22   ` John Heffner
2005-04-01 22:47     ` Herbert Xu
2005-04-02 15:32       ` John Heffner
2005-04-02 19:35         ` Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).