* [PATCH net-next 0/3] tcp: fixes some congestion control corner cases
@ 2015-07-09 20:16 Yuchung Cheng
2015-07-09 20:16 ` [PATCH net-next 1/3] tcp: add tcp_in_slow_start helper Yuchung Cheng
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Yuchung Cheng @ 2015-07-09 20:16 UTC (permalink / raw)
To: davem; +Cc: netdev, Van Jacobson, Yuchung Cheng
This patch series fixes corner cases of TCP congestion control.
First issue is to avoid continuing slow start when cwnd reaches ssthresh.
Second issue is incorrectly processing order of congestion state and
cwnd update when entering fast recovery or undoing cwnd.
Yuchung Cheng (3):
tcp: add tcp_in_slow_start helper
tcp: do not slow start when cwnd equals ssthresh
tcp: update congestion state first before raising cwnd
include/net/tcp.h | 7 ++++++-
net/ipv4/tcp_bic.c | 2 +-
net/ipv4/tcp_cdg.c | 2 +-
net/ipv4/tcp_cong.c | 6 ++----
net/ipv4/tcp_cubic.c | 4 ++--
net/ipv4/tcp_highspeed.c | 2 +-
net/ipv4/tcp_htcp.c | 2 +-
net/ipv4/tcp_hybla.c | 2 +-
net/ipv4/tcp_illinois.c | 2 +-
net/ipv4/tcp_input.c | 8 ++++----
net/ipv4/tcp_metrics.c | 2 +-
net/ipv4/tcp_scalable.c | 2 +-
net/ipv4/tcp_vegas.c | 6 +++---
net/ipv4/tcp_veno.c | 2 +-
14 files changed, 26 insertions(+), 23 deletions(-)
--
2.4.3.573.g4eafbef
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH net-next 1/3] tcp: add tcp_in_slow_start helper
2015-07-09 20:16 [PATCH net-next 0/3] tcp: fixes some congestion control corner cases Yuchung Cheng
@ 2015-07-09 20:16 ` Yuchung Cheng
2015-07-09 20:16 ` [PATCH net-next 2/3] tcp: do not slow start when cwnd equals ssthresh Yuchung Cheng
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Yuchung Cheng @ 2015-07-09 20:16 UTC (permalink / raw)
To: davem
Cc: netdev, Van Jacobson, Yuchung Cheng, Neal Cardwell, Eric Dumazet,
Nandita Dukkipati
Add a helper to test the slow start condition in various congestion
control modules and other places. This is to prepare a slight improvement
in policy as to exactly when to slow start.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
---
include/net/tcp.h | 7 ++++++-
net/ipv4/tcp_bic.c | 2 +-
net/ipv4/tcp_cong.c | 2 +-
net/ipv4/tcp_cubic.c | 4 ++--
net/ipv4/tcp_highspeed.c | 2 +-
net/ipv4/tcp_htcp.c | 2 +-
net/ipv4/tcp_illinois.c | 2 +-
net/ipv4/tcp_metrics.c | 2 +-
net/ipv4/tcp_scalable.c | 2 +-
net/ipv4/tcp_vegas.c | 6 +++---
net/ipv4/tcp_veno.c | 2 +-
11 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 950cfec..dba22fc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -989,6 +989,11 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
#define TCP_INFINITE_SSTHRESH 0x7fffffff
+static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+{
+ return tp->snd_cwnd <= tp->snd_ssthresh;
+}
+
static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
{
return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
@@ -1065,7 +1070,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
return tp->snd_cwnd < 2 * tp->max_packets_out;
return tp->is_cwnd_limited;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index c037644..fd1405d 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -146,7 +146,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
bictcp_update(ca, tp->snd_cwnd);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 84be008..654729a 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -413,7 +413,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
/* In "safe" area, increase. */
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
acked = tcp_slow_start(tp, acked);
if (!acked)
return;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 06d3d66..28011fb 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -320,7 +320,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
if (hystart && after(ack, ca->end_seq))
bictcp_hystart_reset(sk);
acked = tcp_slow_start(tp, acked);
@@ -439,7 +439,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
ca->delay_min = delay;
/* hystart triggers when cwnd is larger than some threshold */
- if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
+ if (hystart && tcp_in_slow_start(tp) &&
tp->snd_cwnd >= hystart_low_window)
hystart_update(sk, delay);
}
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 882c08a..db78424 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -116,7 +116,7 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
/* Update AIMD parameters.
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 58469ff..82f0d9e 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -236,7 +236,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
/* In dangerous area, increase slowly.
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index f71002e..2ab9bbb 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -268,7 +268,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
/* In slow start */
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index a51d63a..b3d64f6 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -461,7 +461,7 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND,
tp->snd_cwnd);
}
- } else if (tp->snd_cwnd > tp->snd_ssthresh &&
+ } else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 333bcb2..bf5ea9e 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -22,7 +22,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else
tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a6cea1d..13951c4 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -225,7 +225,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
*/
diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
- if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (diff > gamma && tcp_in_slow_start(tp)) {
/* Going too fast. Time to slow down
* and switch to congestion avoidance.
*/
@@ -240,7 +240,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
- } else if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ } else if (tcp_in_slow_start(tp)) {
/* Slow start. */
tcp_slow_start(tp, acked);
} else {
@@ -281,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
vegas->minRTT = 0x7fffffff;
}
/* Use normal slow start */
- else if (tp->snd_cwnd <= tp->snd_ssthresh)
+ else if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 112151e..0d094b9 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -150,7 +150,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
/* Slow start. */
tcp_slow_start(tp, acked);
} else {
--
2.4.3.573.g4eafbef
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next 2/3] tcp: do not slow start when cwnd equals ssthresh
2015-07-09 20:16 [PATCH net-next 0/3] tcp: fixes some congestion control corner cases Yuchung Cheng
2015-07-09 20:16 ` [PATCH net-next 1/3] tcp: add tcp_in_slow_start helper Yuchung Cheng
@ 2015-07-09 20:16 ` Yuchung Cheng
2015-07-09 20:16 ` [PATCH net-next 3/3] tcp: update congestion state first before raising cwnd Yuchung Cheng
2015-07-09 21:23 ` [PATCH net-next 0/3] tcp: fixes some congestion control corner cases David Miller
3 siblings, 0 replies; 5+ messages in thread
From: Yuchung Cheng @ 2015-07-09 20:16 UTC (permalink / raw)
To: davem
Cc: netdev, Van Jacobson, Yuchung Cheng, Neal Cardwell, Eric Dumazet,
Nandita Dukkipati
In the original design slow start is only used to raise cwnd
when cwnd is stricly below ssthresh. It makes little sense
to slow start when cwnd == ssthresh: especially
when hystart has set ssthresh in the initial ramp, or after
recovery when cwnd resets to ssthresh. Not doing so will
also help reduce the buffer bloat slightly.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
---
include/net/tcp.h | 2 +-
net/ipv4/tcp_cdg.c | 2 +-
net/ipv4/tcp_cong.c | 4 +---
net/ipv4/tcp_hybla.c | 2 +-
4 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index dba22fc..364426a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -991,7 +991,7 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
- return tp->snd_cwnd <= tp->snd_ssthresh;
+ return tp->snd_cwnd < tp->snd_ssthresh;
}
static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 8c6fd3d..167b6a3 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -264,7 +264,7 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
u32 prior_snd_cwnd;
u32 incr;
- if (tp->snd_cwnd < tp->snd_ssthresh && hystart_detect)
+ if (tcp_in_slow_start(tp) && hystart_detect)
tcp_cdg_hystart_update(sk);
if (after(ack, ca->rtt_seq) && ca->rtt.v64) {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 654729a..a2ed23c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -365,10 +365,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
*/
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
- u32 cwnd = tp->snd_cwnd + acked;
+ u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
- if (cwnd > tp->snd_ssthresh)
- cwnd = tp->snd_ssthresh + 1;
acked -= cwnd - tp->snd_cwnd;
tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index f963b27..083831e 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -112,7 +112,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
rho_fractions = ca->rho_3ls - (ca->rho << 3);
- if (tp->snd_cwnd < tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
/*
* slow start
* INC = 2^RHO - 1
--
2.4.3.573.g4eafbef
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next 3/3] tcp: update congestion state first before raising cwnd
2015-07-09 20:16 [PATCH net-next 0/3] tcp: fixes some congestion control corner cases Yuchung Cheng
2015-07-09 20:16 ` [PATCH net-next 1/3] tcp: add tcp_in_slow_start helper Yuchung Cheng
2015-07-09 20:16 ` [PATCH net-next 2/3] tcp: do not slow start when cwnd equals ssthresh Yuchung Cheng
@ 2015-07-09 20:16 ` Yuchung Cheng
2015-07-09 21:23 ` [PATCH net-next 0/3] tcp: fixes some congestion control corner cases David Miller
3 siblings, 0 replies; 5+ messages in thread
From: Yuchung Cheng @ 2015-07-09 20:16 UTC (permalink / raw)
To: davem
Cc: netdev, Van Jacobson, Yuchung Cheng, Neal Cardwell, Eric Dumazet,
Nandita Dukkipati
The congestion state and cwnd can be updated in the wrong order.
For example, upon receiving a dubious ACK, we incorrectly raise
the cwnd first (tcp_may_raise_cwnd()/tcp_cong_avoid()) because
the state is still Open, then enter recovery state to reduce cwnd.
For another example, if the ACK indicates spurious timeout or
retransmits, we first revert the cwnd reduction and congestion
state back to Open state. But we don't raise the cwnd even though
the ACK does not indicate any congestion.
To fix this problem we should first call tcp_fastretrans_alert() to
process the dubious ACK and update the congestion state, then call
tcp_may_raise_cwnd() that raises cwnd based on the current state.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
---
net/ipv4/tcp_input.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ad1482d..a6bbe0f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3566,10 +3566,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
&sack_state);
acked -= tp->packets_out;
- /* Advance cwnd if state allows */
- if (tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, acked);
-
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, acked, prior_unsacked,
@@ -3578,6 +3574,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
+ /* Advance cwnd if state allows */
+ if (tcp_may_raise_cwnd(sk, flag))
+ tcp_cong_avoid(sk, ack, acked);
+
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
struct dst_entry *dst = __sk_dst_get(sk);
if (dst)
--
2.4.3.573.g4eafbef
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH net-next 0/3] tcp: fixes some congestion control corner cases
2015-07-09 20:16 [PATCH net-next 0/3] tcp: fixes some congestion control corner cases Yuchung Cheng
` (2 preceding siblings ...)
2015-07-09 20:16 ` [PATCH net-next 3/3] tcp: update congestion state first before raising cwnd Yuchung Cheng
@ 2015-07-09 21:23 ` David Miller
3 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2015-07-09 21:23 UTC (permalink / raw)
To: ycheng; +Cc: netdev, vanj
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 9 Jul 2015 13:16:28 -0700
> This patch series fixes corner cases of TCP congestion control.
> First issue is to avoid continuing slow start when cwnd reaches ssthresh.
> Second issue is incorrectly processing order of congestion state and
> cwnd update when entering fast recovery or undoing cwnd.
>
> Yuchung Cheng (3):
> tcp: add tcp_in_slow_start helper
> tcp: do not slow start when cwnd equals ssthresh
> tcp: update congestion state first before raising cwnd
This helper function is long overdue, nice work.
Series applied, thanks!
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2015-07-09 21:23 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-09 20:16 [PATCH net-next 0/3] tcp: fixes some congestion control corner cases Yuchung Cheng
2015-07-09 20:16 ` [PATCH net-next 1/3] tcp: add tcp_in_slow_start helper Yuchung Cheng
2015-07-09 20:16 ` [PATCH net-next 2/3] tcp: do not slow start when cwnd equals ssthresh Yuchung Cheng
2015-07-09 20:16 ` [PATCH net-next 3/3] tcp: update congestion state first before raising cwnd Yuchung Cheng
2015-07-09 21:23 ` [PATCH net-next 0/3] tcp: fixes some congestion control corner cases David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).