All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stephen Hemminger <shemminger@osdl.org>
To: David Miller <davem@davemloft.net>, John Heffner <jheffner@psc.edu>
Cc: netdev@vger.kernel.org
Subject: [RFC] TCP limited slow start
Date: Fri, 2 Jun 2006 18:54:03 -0700	[thread overview]
Message-ID: <20060602185403.1549e3c6@localhost.localdomain> (raw)
In-Reply-To: <20060602161312.3cb2ea66@localhost.localdomain>

Rolled my sleeve's up and gave this a try...

This is a implementation of Sally Floyd's Limited Slow Start
for Large Congestion Windows.

Summary from RFC:
   Limited Slow-Start introduces a parameter, "max_ssthresh", and
   modifies the slow-start mechanism for values of the congestion window
   where "cwnd" is greater than "max_ssthresh".  That is, during Slow-
   Start, when

      cwnd <= max_ssthresh,

   cwnd is increased by one MSS (MAXIMUM SEGMENT SIZE) for every
   arriving ACK (acknowledgement) during slow-start, as is always the
   case.  During Limited Slow-Start, when

      max_ssthresh < cwnd <= ssthresh,

   the invariant is maintained so that the congestion window is
   increased during slow-start by at most max_ssthresh/2 MSS per round-
   trip time.  This is done as follows:

      For each arriving ACK in slow-start:
        If (cwnd <= max_ssthresh)
           cwnd += MSS;
        else
           K = int(cwnd/(0.5 max_ssthresh));
           cwnd += int(MSS/K);

   Thus, during Limited Slow-Start the window is increased by 1/K MSS
   for each arriving ACK, for K = int(cwnd/(0.5 max_ssthresh)), instead
   of by 1 MSS as in standard slow-start [RFC2581].

---

 Documentation/networking/ip-sysctl.txt |    8 +++++-
 include/linux/sysctl.h                 |    1 +
 include/net/tcp.h                      |    1 +
 net/ipv4/sysctl_net_ipv4.c             |    8 ++++++
 net/ipv4/tcp_cong.c                    |   46 ++++++++++++++++++++------------
 net/ipv4/tcp_input.c                   |    1 +
 6 files changed, 47 insertions(+), 18 deletions(-)

0884f45c9f21c50dd9117b2fc02bf5436be3c3bf
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f12007b..9869298 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -103,9 +103,15 @@ TCP variables: 
 
 tcp_abc - INTEGER
 	Controls Appropriate Byte Count defined in RFC3465. If set to
-	0 then does congestion avoid once per ack. 1 is conservative
+	0 then does congestion avoid once per ack. 1 (default) is conservative
 	value, and 2 is more agressive.
 
+tcp_limited_ssthresh - INTEGER
+	Controls the increase of the congestion window during slow start as
+	defined in RFC3742. The purpose is to slow the growth of the congestion
+	window on high delay networks where agressive growth can cause losses
+	of 1000's of packets. Default is 100 packets.
+
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
 	will be retransmitted. Should not be higher than 255. Default value
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 76eaeff..a455165 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -403,6 +403,7 @@ enum
  	NET_TCP_MTU_PROBING=113,
 	NET_TCP_BASE_MSS=114,
 	NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115,
+	NET_TCP_LIMITED_SSTHRESH=116,
 };
 
 enum {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 575636f..3a14861 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -225,6 +225,7 @@ extern int sysctl_tcp_abc;
 extern int sysctl_tcp_mtu_probing;
 extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
+extern int sysctl_tcp_limited_ssthresh;
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6b6c3ad..d1358d3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -688,6 +688,14 @@ #endif
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+	{
+		.ctl_name	= NET_TCP_LIMITED_SSTHRESH,
+		.procname	= "tcp_max_ssthresh",
+		.data		= &sysctl_tcp_limited_ssthresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 857eefc..a27c792 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -180,25 +180,37 @@ int tcp_set_congestion_control(struct so
  */
 void tcp_slow_start(struct tcp_sock *tp)
 {
-	if (sysctl_tcp_abc) {
-		/* RFC3465: Slow Start
-		 * TCP sender SHOULD increase cwnd by the number of
-		 * previously unacknowledged bytes ACKed by each incoming
-		 * acknowledgment, provided the increase is not more than L
-		 */
-		if (tp->bytes_acked < tp->mss_cache)
-			return;
-
-		/* We MAY increase by 2 if discovered delayed ack */
-		if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-		}
+	/* RFC3465: Apprpriate Byte Coute Slow Start
+	 * TCP sender SHOULD increase cwnd by the number of
+	 * previously unacknowledged bytes ACKed by each incoming
+	 * acknowledgment, provided the increase is not more than L
+	 */
+	if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
+		return;
+
+	/* RFC3742: limited slow start
+	 * the window is increased by 1/K MSS for each arriving ACK, 
+	 * for K = int(cwnd/(0.5 max_ssthresh))
+	 */
+	if (sysctl_tcp_limited_ssthresh
+	    && tp->snd_cwnd > sysctl_tcp_limited_ssthresh) {
+		u32 k = max(tp->snd_cwnd / (sysctl_tcp_limited_ssthresh >> 1), 1U);
+ 		if (++tp->snd_cwnd_cnt >= k) {
+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ 				tp->snd_cwnd++;
+ 			tp->snd_cwnd_cnt = 0;
+ 		} 
+	} else {
+		/* ABC: We MAY increase by 2 if discovered delayed ack */
+		if (sysctl_tcp_abc > 1
+		    && tp->bytes_acked > 2*tp->mss_cache 
+		    && tp->snd_cwnd < tp->snd_cwnd_clamp)
+			tp->snd_cwnd++;
+
+		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+			tp->snd_cwnd++;
 	}
 	tp->bytes_acked = 0;
-
-	if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-		tp->snd_cwnd++;
 }
 EXPORT_SYMBOL_GPL(tcp_slow_start);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 718d0f2..80dd5e4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -90,6 +90,7 @@ int sysctl_tcp_nometrics_save;
 
 int sysctl_tcp_moderate_rcvbuf = 1;
 int sysctl_tcp_abc = 1;
+int sysctl_tcp_limited_ssthresh = 100;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
-- 
1.3.3


  reply	other threads:[~2006-06-03  1:54 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-06-02 23:13 TCP Limited slow start Stephen Hemminger
2006-06-03  1:54 ` Stephen Hemminger [this message]
2006-06-03 16:46   ` [RFC] TCP limited " John Heffner
2006-06-05 17:17     ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060602185403.1549e3c6@localhost.localdomain \
    --to=shemminger@osdl.org \
    --cc=davem@davemloft.net \
    --cc=jheffner@psc.edu \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.