All of lore.kernel.org
 help / color / mirror / Atom feed
From: Baruch Even <baruch@ev-en.org>
To: "David S. Miller" <davem@davemloft.net>,
	Stephen Hemminger <shemminger@osdl.org>
Cc: netdev@oss.sgi.com, linux-net@vger.kernel.org,
	Yee-Ting Li <yee-ting.li@nuim.ie>,
	Doug Leith <doug.leith@nuim.ie>
Subject: [PATCH] select congestion control with one sysctl
Date: Wed, 23 Feb 2005 21:30:13 +0000	[thread overview]
Message-ID: <421CF5E5.1060606@ev-en.org> (raw)

[-- Attachment #1: Type: text/plain, Size: 591 bytes --]

This patch makes selection of congestion control algorithm simpler by 
using a single sysctl for that purpose, rather than a cascade of sysctls.

The patch also does some minor cleanups to avoid cascade actions between
algorithms so that flow control is cleaner.

Possible improvements:
  - Use a string when reading/writing from sysctl to make it more
    friendly to humans.
  - And/Or, provide a list of all available congestion control
    algorithms.

The patch is against 2.6.11-rc4-bk9.

Signed-Off-By: Yee-Ting Li <yee-ting.li@nuim.ie>
Signed-Off-By: Baruch Even <baruch@ev-en.org>


[-- Attachment #2: cong_control_change.patch --]
[-- Type: text/x-patch, Size: 10005 bytes --]

This patch makes selection of congestion control algorithm simpler by using a
single sysctl for that purpose, rather than a cascade of sysctls.

The patch also does some minor cleanups to avoid cascade actions between
algorithms so that flow control is cleaner.

Possible improvements:
 - Use a string when reading/writing from sysctl to make it more friendly to humans
 - And/Or, provide a list of all available congestion control algorithms

The patch is against 2.6.11-rc4-bk9.

Signed-Off-By: Yee-Ting Li <yee-ting.li@nuim.ie>
Signed-Off-By: Baruch Even <baruch@ev-en.org>

Index: 2.6.11-select/include/linux/sysctl.h
===================================================================
--- 2.6.11-select.orig/include/linux/sysctl.h
+++ 2.6.11-select/include/linux/sysctl.h
@@ -344,6 +344,7 @@ enum
 	NET_TCP_DEFAULT_WIN_SCALE=105,
 	NET_TCP_MODERATE_RCVBUF=106,
 	NET_TCP_TSO_WIN_DIVISOR=107,
+	NET_TCP_ADV_CONG=108,
 };
 
 enum {
Index: 2.6.11-select/include/net/tcp.h
===================================================================
--- 2.6.11-select.orig/include/net/tcp.h
+++ 2.6.11-select/include/net/tcp.h
@@ -597,13 +597,11 @@ extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_low_latency;
-extern int sysctl_tcp_westwood;
-extern int sysctl_tcp_vegas_cong_avoid;
 extern int sysctl_tcp_vegas_alpha;
 extern int sysctl_tcp_vegas_beta;
 extern int sysctl_tcp_vegas_gamma;
 extern int sysctl_tcp_nometrics_save;
-extern int sysctl_tcp_bic;
+extern int sysctl_tcp_adv_cong;
 extern int sysctl_tcp_bic_fast_convergence;
 extern int sysctl_tcp_bic_low_window;
 extern int sysctl_tcp_moderate_rcvbuf;
@@ -1241,7 +1239,8 @@ static __inline__ unsigned int tcp_packe
  */
 static inline __u32 tcp_recalc_ssthresh(struct tcp_sock *tp)
 {
-	if (tcp_is_bic(tp)) {
+	switch (tp->adv_cong) {
+	case TCP_BIC:
 		if (sysctl_tcp_bic_fast_convergence &&
 		    tp->snd_cwnd < tp->bictcp.last_max_cwnd)
 			tp->bictcp.last_max_cwnd
@@ -1253,9 +1252,11 @@ static inline __u32 tcp_recalc_ssthresh(
 		if (tp->snd_cwnd > sysctl_tcp_bic_low_window)
 			return max(tp->snd_cwnd - (tp->snd_cwnd/BICTCP_1_OVER_BETA),
 				   2U);
-	}
+		break;
 
-	return max(tp->snd_cwnd >> 1U, 2U);
+	default:
+		return max(tp->snd_cwnd >> 1U, 2U);
+	}
 }
 
 /* Stop taking Vegas samples for now. */
@@ -1980,24 +1981,19 @@ static inline void tcp_westwood_update_r
                 tp->westwood.rtt = rtt_seq;
 }
 
-static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_sock *tp)
+static inline __u32 tcp_westwood_bw_rttmin(const struct tcp_sock *tp)
 {
         return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) /
 		   (__u32) (tp->mss_cache_std),
 		   2U);
 }
 
-static inline __u32 tcp_westwood_bw_rttmin(const struct tcp_sock *tp)
-{
-	return tcp_is_westwood(tp) ? __tcp_westwood_bw_rttmin(tp) : 0;
-}
-
 static inline int tcp_westwood_ssthresh(struct tcp_sock *tp)
 {
 	__u32 ssthresh = 0;
 
 	if (tcp_is_westwood(tp)) {
-		ssthresh = __tcp_westwood_bw_rttmin(tp);
+		ssthresh = tcp_westwood_bw_rttmin(tp);
 		if (ssthresh)
 			tp->snd_ssthresh = ssthresh;  
 	}
@@ -2010,7 +2006,7 @@ static inline int tcp_westwood_cwnd(stru
 	__u32 cwnd = 0;
 
 	if (tcp_is_westwood(tp)) {
-		cwnd = __tcp_westwood_bw_rttmin(tp);
+		cwnd = tcp_westwood_bw_rttmin(tp);
 		if (cwnd)
 			tp->snd_cwnd = cwnd;
 	}
Index: 2.6.11-select/net/ipv4/sysctl_net_ipv4.c
===================================================================
--- 2.6.11-select.orig/net/ipv4/sysctl_net_ipv4.c
+++ 2.6.11-select/net/ipv4/sysctl_net_ipv4.c
@@ -602,22 +602,14 @@ ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-	{
-		.ctl_name	= NET_TCP_WESTWOOD, 
-		.procname	= "tcp_westwood",
-		.data		= &sysctl_tcp_westwood,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_TCP_VEGAS,
-		.procname	= "tcp_vegas_cong_avoid",
-		.data		= &sysctl_tcp_vegas_cong_avoid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
+ 	{
+		.ctl_name	= NET_TCP_ADV_CONG,
+ 		.procname	= "tcp_adv_cong",
+ 		.data		= &sysctl_tcp_adv_cong,
+ 		.maxlen		= sizeof(int),
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec,
+ 	},
 	{
 		.ctl_name	= NET_TCP_VEGAS_ALPHA,
 		.procname	= "tcp_vegas_alpha",
@@ -643,14 +635,6 @@ ctl_table ipv4_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 	{
-		.ctl_name	= NET_TCP_BIC,
-		.procname	= "tcp_bic",
-		.data		= &sysctl_tcp_bic,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
 		.ctl_name	= NET_TCP_BIC_FAST_CONVERGENCE,
 		.procname	= "tcp_bic_fast_convergence",
 		.data		= &sysctl_tcp_bic_fast_convergence,
Index: 2.6.11-select/net/ipv4/tcp_input.c
===================================================================
--- 2.6.11-select.orig/net/ipv4/tcp_input.c
+++ 2.6.11-select/net/ipv4/tcp_input.c
@@ -87,8 +87,6 @@ int sysctl_tcp_rfc1337;
 int sysctl_tcp_max_orphans = NR_FILE;
 int sysctl_tcp_frto;
 int sysctl_tcp_nometrics_save;
-int sysctl_tcp_westwood;
-int sysctl_tcp_vegas_cong_avoid;
 
 int sysctl_tcp_moderate_rcvbuf = 1;
 
@@ -99,10 +97,11 @@ int sysctl_tcp_moderate_rcvbuf = 1;
 int sysctl_tcp_vegas_alpha = 1<<V_PARAM_SHIFT;
 int sysctl_tcp_vegas_beta  = 3<<V_PARAM_SHIFT;
 int sysctl_tcp_vegas_gamma = 1<<V_PARAM_SHIFT;
-int sysctl_tcp_bic = 1;
 int sysctl_tcp_bic_fast_convergence = 1;
 int sysctl_tcp_bic_low_window = 14;
 
+int sysctl_tcp_adv_cong;
+
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
 #define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
@@ -561,15 +560,18 @@ static void tcp_event_data_recv(struct s
  */
 void tcp_ca_init(struct tcp_sock *tp)
 {
-	if (sysctl_tcp_westwood) 
-		tp->adv_cong = TCP_WESTWOOD;
-	else if (sysctl_tcp_bic)
-		tp->adv_cong = TCP_BIC;
-	else if (sysctl_tcp_vegas_cong_avoid) {
-		tp->adv_cong = TCP_VEGAS;
-		tp->vegas.baseRTT = 0x7fffffff;
-		tcp_vegas_enable(tp);
-	} 
+	switch (sysctl_tcp_adv_cong) {
+		case TCP_VEGAS:
+			tp->vegas.baseRTT = 0x7fffffff;
+			tcp_vegas_enable(tp);
+			/* Fallthrough */
+		case TCP_BIC:
+		case TCP_WESTWOOD:
+			tp->adv_cong = sysctl_tcp_adv_cong;
+			break;
+		default:
+			tp->adv_cong = TCP_RENO;
+	}
 }
 
 /* Do RTT sampling needed for Vegas.
@@ -1600,18 +1602,25 @@ static void tcp_cwnd_down(struct tcp_soc
 	int decr = tp->snd_cwnd_cnt + 1;
 	__u32 limit;
 
-	/*
-	 * TCP Westwood
-	 * Here limit is evaluated as BWestimation*RTTmin (for obtaining it
-	 * in packets we use mss_cache). If sysctl_tcp_westwood is off
-	 * tcp_westwood_bw_rttmin() returns 0. In such case snd_ssthresh is
-	 * still used as usual. It prevents other strange cases in which
-	 * BWE*RTTmin could assume value 0. It should not happen but...
-	 */
+	switch (tp->adv_cong) {
+		case TCP_WESTWOOD:
+			/*
+			 * TCP Westwood
+			 * Here limit is evaluated as BWestimation*RTTmin (for obtaining it
+			 * in packets we use mss_cache). The guard is against
+			 * strange cases in which BWE*RTTmin could assume value
+			 * 0. It should not happen but...
+			 */
 
-	if (!(limit = tcp_westwood_bw_rttmin(tp)))
-		limit = tp->snd_ssthresh/2;
+			if (!(limit = tcp_westwood_bw_rttmin(tp)))
+				limit = tp->snd_ssthresh/2;
+			break;
 
+		default:
+			limit = tp->snd_ssthresh/2;
+			break;
+	}
+	
 	tp->snd_cwnd_cnt = decr&1;
 	decr >>= 1;
 
@@ -2014,6 +2023,27 @@ static inline void tcp_ack_update_rtt(st
 		tcp_ack_no_tstamp(tp, seq_rtt, flag);
 }
 
+static inline void tcp_slow_start(struct tcp_sock *tp)
+{
+	/* In "safe" area, increase. */
+	if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+		tp->snd_cwnd++;
+}
+
+static inline void tcp_increase_cwnd(struct tcp_sock *tp, __u32 window)
+{
+	/* In dangerous area, increase slowly.
+	 * In theory, for standard tcp, this is tp->snd_cwnd += 1 / window
+	 * (snd_cwnd for Reno)
+	 */
+	if (tp->snd_cwnd_cnt >= window) {
+		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+			tp->snd_cwnd++;
+		tp->snd_cwnd_cnt = 0;
+	} else
+		tp->snd_cwnd_cnt++;		
+}
+
 /*
  * Compute congestion window to use.
  *
@@ -2029,10 +2059,6 @@ static inline void tcp_ack_update_rtt(st
  */
 static inline __u32 bictcp_cwnd(struct tcp_sock *tp)
 {
-	/* orignal Reno behaviour */
-	if (!tcp_is_bic(tp))
-		return tp->snd_cwnd;
-
 	if (tp->bictcp.last_cwnd == tp->snd_cwnd &&
 	   (s32)(tcp_time_stamp - tp->bictcp.last_stamp) <= (HZ>>5))
 		return tp->bictcp.cnt;
@@ -2080,23 +2106,13 @@ static inline __u32 bictcp_cwnd(struct t
 /* This is Jacobson's slow start and congestion avoidance. 
  * SIGCOMM '88, p. 328.
  */
-static inline void reno_cong_avoid(struct tcp_sock *tp)
+static inline void reno_cong_avoid(struct tcp_sock *tp, u32 snd_cwnd)
 {
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
-                /* In "safe" area, increase. */
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
-                /* In dangerous area, increase slowly.
-		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
-		 */
-		if (tp->snd_cwnd_cnt >= bictcp_cwnd(tp)) {
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-			tp->snd_cwnd_cnt=0;
-		} else
-			tp->snd_cwnd_cnt++;
-        }
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else
+		tcp_increase_cwnd(tp, snd_cwnd);
+
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -2324,10 +2340,22 @@ static void vegas_cong_avoid(struct tcp_
 
 static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 seq_rtt)
 {
-	if (tcp_vegas_enabled(tp))
-		vegas_cong_avoid(tp, ack, seq_rtt);
-	else
-		reno_cong_avoid(tp);
+	if (tp->snd_cwnd >= tp->snd_cwnd_clamp)
+		return;
+
+	switch (sysctl_tcp_adv_cong) {
+		case TCP_VEGAS:
+			vegas_cong_avoid(tp, ack, seq_rtt);
+			break;
+
+		case TCP_BIC:
+			reno_cong_avoid(tp, bictcp_cwnd(tp));
+			break;
+
+		default:
+			reno_cong_avoid(tp, tp->snd_cwnd);
+			break;
+	}
 }
 
 /* Restart timer after forward progress on connection.

             reply	other threads:[~2005-02-23 21:30 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-02-23 21:30 Baruch Even [this message]
2005-02-23 21:57 ` [PATCH] select congestion control with one sysctl David S. Miller
2005-02-24  0:23   ` Stephen Hemminger
2005-02-24  0:33     ` David S. Miller
2005-02-26  9:41     ` Arnaldo Carvalho de Melo
     [not found]     ` <421D30FA.1060900@ev-en.org>
     [not found]       ` <20050225120814.5fa77b13@dxpl.pdx.osdl.net>
     [not found]         ` <20050309210442.3e9786a6.davem@davemloft.net>
     [not found]           ` <4230288F.1030202@ev-en.org>
     [not found]             ` <20050310182629.1eab09ec.davem@davemloft.net>
     [not found]               ` <20050311120054.4bbf675a@dxpl.pdx.osdl.net>
     [not found]                 ` <20050311201011.360c00da.davem@davemloft.net>
2005-03-14 23:17                   ` [RFC] TCP congestion schedulers Stephen Hemminger
2005-03-15 19:54                     ` John Heffner
2005-03-15 22:16                     ` John Heffner
2005-03-18  4:12                     ` David S. Miller
2005-03-18 12:53                       ` Arnaldo Carvalho de Melo
2005-03-18 13:43                         ` jamal
2005-03-18 16:13                           ` Arnaldo Carvalho de Melo
2005-03-18 16:45                             ` Stephen Hemminger
2005-03-18 16:59                               ` Arnaldo Carvalho de Melo
2005-03-19 20:19                     ` Andi Kleen
2005-03-21 21:25                       ` John Heffner
2005-03-21 21:51                         ` David S. Miller
2005-03-21 22:30                           ` Baruch Even
2005-03-22  0:10                         ` Rick Jones
2005-03-22  1:41                           ` Olaf Kirch
2005-03-22  7:41                         ` Andi Kleen
2005-03-28 23:51                           ` Stephen Hemminger
2005-03-29 15:25                             ` Andi Kleen
2005-03-29 17:17                               ` Stephen Hemminger
2005-03-29 18:58                                 ` Rick Jones
2005-03-30  9:41                                   ` Matt Mackall
2005-03-29 19:32                           ` John Heffner
2005-03-29 20:03                             ` David S. Miller
2005-03-29 20:09                               ` Rick Jones
2005-04-08 19:33                       ` John Heffner
2005-04-08 20:20                         ` Rick Jones
2005-02-24  1:05   ` [PATCH] select congestion control with one sysctl Daniele Lacamera

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=421CF5E5.1060606@ev-en.org \
    --to=baruch@ev-en.org \
    --cc=davem@davemloft.net \
    --cc=doug.leith@nuim.ie \
    --cc=linux-net@vger.kernel.org \
    --cc=netdev@oss.sgi.com \
    --cc=shemminger@osdl.org \
    --cc=yee-ting.li@nuim.ie \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.