netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] CUBIC v2.3 with new improved slow start
@ 2008-10-29 21:28 Injong Rhee
  2008-10-29 22:39 ` Stephen Hemminger
                   ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Injong Rhee @ 2008-10-29 21:28 UTC (permalink / raw)
  To: netdev

[-- Attachment #1: Type: text/plain, Size: 1264 bytes --]

I am releasing a new patch for CUBIC. This patch implements a new slow start 
mechanism called HyStart. There were some discussions in the mailing list on 
the poor performance of TCP slow start; our patch addresses those 
performance issues arising from slow start.  For more information, please 
refer to the following technical report:

 Sangtae Ha and Injong Rhee, "Taming the Elephants: New TCP Slow
Start", NCSU Technical Report 2008. Available at
http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf


The new update improves the start-up throughput of CUBIC substantially by 
avoiding system overloading during slow start and shortening the 
fast-recovery period after slow start. The key performance issues arising 
when Linux is used with Windows XP or FreeBSD receivers are also addressed. 
Our tests over Internet2 paths are very encouraging. The scheme is verified 
to work well even for asymmetric paths,  with diverse receiver settings of 
delayed acknowledgements, and with various operating systems (Windows XP and 
FreeBSD). You can find the testing results from

http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing

Please let us know if there are other performance issues of TCP that you 
want us to look into.

Injong and Sangtae.


[-- Attachment #2: 0001-TCP-CUBIC-v2.3.patch --]
[-- Type: application/octet-stream, Size: 7663 bytes --]

>From ee61f3e3f5aee0707eac02cd8cec2ab37e7114ee Mon Sep 17 00:00:00 2001
From: Sangtae Ha <sha2@ncsu.edu>
Date: Wed, 29 Oct 2008 00:07:18 -0400
Subject: [PATCH] [TCP] CUBIC v2.3


Signed-off-by: Sangtae Ha <sha2@ncsu.edu>
---
 net/ipv4/tcp_cubic.c |  120 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 109 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 4a1221e..ee467ec 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,13 +1,23 @@
 /*
- * TCP CUBIC: Binary Increase Congestion control for TCP v2.2
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.3
  * Home page:
  *      http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
  * This is from the implementation of CUBIC TCP in
- * Injong Rhee, Lisong Xu.
- *  "CUBIC: A New TCP-Friendly High-Speed TCP Variant
- *  in PFLDnet 2005
+ * Sangtae Ha, Injong Rhee and Lisong Xu,
+ *  "CUBIC: A New TCP-Friendly High-Speed TCP Variant"
+ *  in ACM SIGOPS Operating System Review, July 2008.
  * Available from:
- *  http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf
+ *  http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf
+ *
+ * CUBIC integrates a new slow start algorithm, called HyStart.
+ * The details of HyStart are presented in
+ *  Sangtae Ha and Injong Rhee,
+ *  "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008.
+ * Available from:
+ *  http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf
+ *
+ * All testing results are available from:
+ * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing
  *
  * Unless CUBIC is enabled and congestion window is large
  * this behaves the same as the original Reno.
@@ -23,12 +33,26 @@
 					 */
 #define	BICTCP_HZ		10	/* BIC HZ 2^10 = 1024 */
 
+/* Two methods of hybrid slow start */
+#define HYSTART_ACK_TRAIN	0x1
+#define HYSTART_DELAY		0x2
+
+/* Number of delay samples for detecting the increase of delay */
+#define HYSTART_MIN_SAMPLES	8
+#define HYSTART_DELAY_MIN	(2U<<3)
+#define HYSTART_DELAY_MAX	(16U<<3)
+#define HYSTART_DELAY_THRESH(x)	clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
+
 static int fast_convergence __read_mostly = 1;
 static int beta __read_mostly = 717;	/* = 717/1024 (BICTCP_BETA_SCALE) */
 static int initial_ssthresh __read_mostly;
 static int bic_scale __read_mostly = 41;
 static int tcp_friendliness __read_mostly = 1;
 
+static int hystart __read_mostly = 1;
+static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
+static int hystart_low_window __read_mostly = 16;
+
 static u32 cube_rtt_scale __read_mostly;
 static u32 beta_scale __read_mostly;
 static u64 cube_factor __read_mostly;
@@ -44,6 +68,13 @@ module_param(bic_scale, int, 0444);
 MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
 module_param(tcp_friendliness, int, 0644);
 MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
+module_param(hystart, int, 0644);
+MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
+module_param(hystart_detect, int, 0644);
+MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
+		 " 1: packet-train 2: delay 3: both packet-train and delay");
+module_param(hystart_low_window, int, 0644);
+MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
 
 /* BIC TCP Parameters */
 struct bictcp {
@@ -59,7 +90,13 @@ struct bictcp {
 	u32	ack_cnt;	/* number of acks */
 	u32	tcp_cwnd;	/* estimated tcp cwnd */
 #define ACK_RATIO_SHIFT	4
-	u32	delayed_ack;	/* estimate the ratio of Packets/ACKs << 4 */
+	u16	delayed_ack;	/* estimate the ratio of Packets/ACKs << 4 */
+	u8	sample_cnt;	/* number of samples to decide curr_rtt */
+	u8	found;		/* the exit point is found? */
+	u32	round_start;	/* beginning of each round */
+	u32	end_seq;	/* end_seq of the round */
+	u32	last_jiffies;	/* last time when the ACK spacing is close */
+	u32	curr_rtt;	/* the minimum rtt of current round */
 };
 
 static inline void bictcp_reset(struct bictcp *ca)
@@ -76,12 +113,28 @@ static inline void bictcp_reset(struct bictcp *ca)
 	ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
 	ca->ack_cnt = 0;
 	ca->tcp_cwnd = 0;
+	ca->found = 0;
+}
+
+static inline void bictcp_hystart_reset(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	ca->round_start = ca->last_jiffies = jiffies;
+	ca->end_seq = tp->snd_nxt;
+	ca->curr_rtt = 0;
+	ca->sample_cnt = 0;
 }
 
 static void bictcp_init(struct sock *sk)
 {
 	bictcp_reset(inet_csk_ca(sk));
-	if (initial_ssthresh)
+
+	if (hystart)
+		bictcp_hystart_reset(sk);
+
+	if (!hystart && initial_ssthresh)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 
@@ -235,9 +288,11 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh)
+	if (tp->snd_cwnd <= tp->snd_ssthresh) {
+		if (hystart && after(ack, ca->end_seq))
+			bictcp_hystart_reset(sk);
 		tcp_slow_start(tp);
-	else {
+	} else {
 		bictcp_update(ca, tp->snd_cwnd);
 
 		/* In dangerous area, increase slowly.
@@ -281,8 +336,45 @@ static u32 bictcp_undo_cwnd(struct sock *sk)
 
 static void bictcp_state(struct sock *sk, u8 new_state)
 {
-	if (new_state == TCP_CA_Loss)
+	if (new_state == TCP_CA_Loss) {
 		bictcp_reset(inet_csk_ca(sk));
+		bictcp_hystart_reset(sk);
+	}
+}
+
+static void hystart_update(struct sock *sk, u32 delay)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bictcp *ca = inet_csk_ca(sk);
+
+	if (!(ca->found & hystart_detect)) {
+		u32 curr_jiffies = jiffies;
+
+		/* first detection parameter - ack-train detection */
+		if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) {
+			ca->last_jiffies = curr_jiffies;
+			if (curr_jiffies - ca->round_start >= ca->delay_min>>4)
+				ca->found |= HYSTART_ACK_TRAIN;
+		}
+
+		/* obtain the minimum delay of more than sampling packets */
+		if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
+			if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
+				ca->curr_rtt = delay;
+
+			ca->sample_cnt++;
+		} else {
+			if (ca->curr_rtt > ca->delay_min +
+			    HYSTART_DELAY_THRESH(ca->delay_min>>4))
+				ca->found |= HYSTART_DELAY;
+		}
+		/*
+		 * Either one of two conditions are met,
+		 * we exit from slow start immediately.
+		 */
+		if (ca->found & hystart_detect)
+			tp->snd_ssthresh = tp->snd_cwnd;
+	}
 }
 
 /* Track delayed acknowledgment ratio using sliding window
@@ -291,6 +383,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 	u32 delay;
 
@@ -314,6 +407,11 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 	/* first time call or link delay decreases */
 	if (ca->delay_min == 0 || ca->delay_min > delay)
 		ca->delay_min = delay;
+
+	/* hystart triggers when cwnd is larger than some threshold */
+	if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
+	    tp->snd_cwnd >= hystart_low_window)
+		hystart_update(sk, delay);
 }
 
 static struct tcp_congestion_ops cubictcp = {
@@ -372,4 +470,4 @@ module_exit(cubictcp_unregister);
 MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("CUBIC TCP");
-MODULE_VERSION("2.2");
+MODULE_VERSION("2.3");
-- 
1.5.2.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2008-11-04  1:13 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-29 21:28 [PATCH] CUBIC v2.3 with new improved slow start Injong Rhee
2008-10-29 22:39 ` Stephen Hemminger
2008-10-29 23:14   ` Injong Rhee
2008-10-29 23:40     ` Stephen Hemminger
2008-10-29 23:53       ` Rick Jones
2008-10-30  0:54         ` Injong Rhee
2008-10-30  1:08         ` Rick Jones
2008-10-29 23:45     ` David Miller
2008-10-30 15:36 ` [PATCH] tcp: mark BIC as experimental Stephen Hemminger
2008-10-31  7:50   ` David Miller
2008-10-31 16:41     ` Stephen Hemminger
2008-10-31 19:04       ` David Miller
2008-10-31 19:16     ` [RFC] tcp: make H-TCP the default congestion control Stephen Hemminger
2008-10-31 19:43       ` Sangtae Ha
2008-10-31 20:02         ` rhee
2008-11-02  4:29       ` David Miller
2008-11-02  7:27 ` [PATCH] CUBIC v2.3 with new improved slow start David Miller
2008-11-03 22:19   ` Sangtae Ha
2008-11-03 22:49     ` Stephen Hemminger
2008-11-04  1:12     ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).