netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting
@ 2013-05-30  0:20 Yuchung Cheng
  2013-05-30  0:20 ` [PATCH 2/4 net-next] tcp: refactor undo functions Yuchung Cheng
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Yuchung Cheng @ 2013-05-30  0:20 UTC (permalink / raw)
  To: davem, ncardwell, edumazet, nanditad; +Cc: aterzis, netdev, Yuchung Cheng

This patch series fix an undo bug in fast recovery: the sender
mistakenly undos the cwnd too early but continue fast retransmits
until all pending data are acked. This also multiplicates the SNMP
stat PARTIALUNDO events by the degree of the network reordering.

The first patch prepares the fix by consolidating the accounting
of newly_acked_sacked in tcp_cwnd_reduction(), instead of updating
newly_acked_sacked everytime sacked_out is adjusted.  Also pass
acked and prior_unsacked as const type because they are readonly
in the rest of recovery processing.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
 net/ipv4/tcp_input.c | 45 ++++++++++++++++++++-------------------------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9579e1a..86b5fa7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2430,12 +2430,14 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
 	TCP_ECN_queue_cwr(tp);
 }
 
-static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
 			       int fast_rexmit)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int sndcnt = 0;
 	int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
+	int newly_acked_sacked = prior_unsacked -
+				 (tp->packets_out - tp->sacked_out);
 
 	tp->prr_delivered += newly_acked_sacked;
 	if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
@@ -2492,7 +2494,7 @@ static void tcp_try_keep_open(struct sock *sk)
 	}
 }
 
-static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
+static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2509,7 +2511,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
 		if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
 			tcp_moderate_cwnd(tp);
 	} else {
-		tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
+		tcp_cwnd_reduction(sk, prior_unsacked, 0);
 	}
 }
 
@@ -2678,15 +2680,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
  * It does _not_ decide what to send, it is made in function
  * tcp_xmit_retransmit_queue().
  */
-static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
-				  int prior_sacked, int prior_packets,
+static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+				  const int prior_unsacked,
 				  bool is_dupack, int flag)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
 				    (tcp_fackets_out(tp) > tp->reordering));
-	int newly_acked_sacked = 0;
 	int fast_rexmit = 0;
 
 	if (WARN_ON(!tp->packets_out && tp->sacked_out))
@@ -2739,9 +2740,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 			if (tcp_is_reno(tp) && is_dupack)
 				tcp_add_reno_sack(sk);
 		} else
-			do_lost = tcp_try_undo_partial(sk, pkts_acked);
-		newly_acked_sacked = prior_packets - tp->packets_out +
-				     tp->sacked_out - prior_sacked;
+			do_lost = tcp_try_undo_partial(sk, acked);
 		break;
 	case TCP_CA_Loss:
 		tcp_process_loss(sk, flag, is_dupack);
@@ -2755,14 +2754,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 			if (is_dupack)
 				tcp_add_reno_sack(sk);
 		}
-		newly_acked_sacked = prior_packets - tp->packets_out +
-				     tp->sacked_out - prior_sacked;
 
 		if (icsk->icsk_ca_state <= TCP_CA_Disorder)
 			tcp_try_undo_dsack(sk);
 
 		if (!tcp_time_to_recover(sk, flag)) {
-			tcp_try_to_open(sk, flag, newly_acked_sacked);
+			tcp_try_to_open(sk, flag, prior_unsacked);
 			return;
 		}
 
@@ -2784,7 +2781,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 
 	if (do_lost)
 		tcp_update_scoreboard(sk, fast_rexmit);
-	tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
+	tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
 	tcp_xmit_retransmit_queue(sk);
 }
 
@@ -3268,9 +3265,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	u32 prior_in_flight;
 	u32 prior_fackets;
 	int prior_packets = tp->packets_out;
-	int prior_sacked = tp->sacked_out;
-	int pkts_acked = 0;
-	int previous_packets_out = 0;
+	const int prior_unsacked = tp->packets_out - tp->sacked_out;
+	int acked = 0; /* Number of packets newly acked */
 
 	/* If the ack is older than previous acks
 	 * then we can probably ignore it.
@@ -3345,18 +3341,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		goto no_queue;
 
 	/* See if we can take anything off of the retransmit queue. */
-	previous_packets_out = tp->packets_out;
+	acked = tp->packets_out;
 	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
-
-	pkts_acked = previous_packets_out - tp->packets_out;
+	acked -= tp->packets_out;
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		/* Advance CWND, if state allows this. */
 		if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack, prior_in_flight);
 		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
-		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
-				      prior_packets, is_dupack, flag);
+		tcp_fastretrans_alert(sk, acked, prior_unsacked,
+				      is_dupack, flag);
 	} else {
 		if (flag & FLAG_DATA_ACKED)
 			tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -3378,8 +3373,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 no_queue:
 	/* If data was DSACKed, see if we can undo a cwnd reduction. */
 	if (flag & FLAG_DSACKING_ACK)
-		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
-				      prior_packets, is_dupack, flag);
+		tcp_fastretrans_alert(sk, acked, prior_unsacked,
+				      is_dupack, flag);
 	/* If this ack opens up a zero window, clear backoff.  It was
 	 * being used to time the probes, and is probably far higher than
 	 * it needs to be for normal retransmission.
@@ -3401,8 +3396,8 @@ old_ack:
 	 */
 	if (TCP_SKB_CB(skb)->sacked) {
 		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
-		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
-				      prior_packets, is_dupack, flag);
+		tcp_fastretrans_alert(sk, acked, prior_unsacked,
+				      is_dupack, flag);
 	}
 
 	SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
-- 
1.8.2.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4 net-next] tcp: refactor undo functions
  2013-05-30  0:20 [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting Yuchung Cheng
@ 2013-05-30  0:20 ` Yuchung Cheng
  2013-05-30  0:44   ` Neal Cardwell
  2013-05-31  1:07   ` David Miller
  2013-05-30  0:20 ` [PATCH 3/4 net-next] tcp: fix undo on partial ack in recovery Yuchung Cheng
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 12+ messages in thread
From: Yuchung Cheng @ 2013-05-30  0:20 UTC (permalink / raw)
  To: davem, ncardwell, edumazet, nanditad; +Cc: aterzis, netdev, Yuchung Cheng

Refactor and relocate various functions or variables to prepare the
undo fix.  Remove some unused function arguments. Rename tcp_undo_cwr
to tcp_undo_cwnd_reduction to be consistent with the rest of
CWR related function names.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
 net/ipv4/tcp_input.c | 97 +++++++++++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 47 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 86b5fa7..fcb668d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2243,10 +2243,23 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 #define DBGUNDO(x...) do { } while (0)
 #endif
 
-static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
+static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh,
+				    bool unmark_loss)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
+	if (unmark_loss) {
+		struct sk_buff *skb;
+
+		tcp_for_write_queue(skb, sk) {
+			if (skb == tcp_send_head(sk))
+				break;
+			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+		}
+		tp->lost_out = 0;
+		tcp_clear_all_retrans_hints(tp);
+	}
+
 	if (tp->prior_ssthresh) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -2263,6 +2276,9 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
 		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
 	}
 	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+	if (undo_ssthresh)
+		tp->undo_marker = 0;
 }
 
 static inline bool tcp_may_undo(const struct tcp_sock *tp)
@@ -2282,14 +2298,13 @@ static bool tcp_try_undo_recovery(struct sock *sk)
 		 * or our original transmission succeeded.
 		 */
 		DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
-		tcp_undo_cwr(sk, true);
+		tcp_undo_cwnd_reduction(sk, true, false);
 		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
 			mib_idx = LINUX_MIB_TCPLOSSUNDO;
 		else
 			mib_idx = LINUX_MIB_TCPFULLUNDO;
 
 		NET_INC_STATS_BH(sock_net(sk), mib_idx);
-		tp->undo_marker = 0;
 	}
 	if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
 		/* Hold old state until something *above* high_seq
@@ -2309,8 +2324,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
 
 	if (tp->undo_marker && !tp->undo_retrans) {
 		DBGUNDO(sk, "D-SACK");
-		tcp_undo_cwr(sk, true);
-		tp->undo_marker = 0;
+		tcp_undo_cwnd_reduction(sk, true, false);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
 	}
 }
@@ -2344,60 +2358,20 @@ static bool tcp_any_retrans_done(const struct sock *sk)
 	return false;
 }
 
-/* Undo during fast recovery after partial ACK. */
-
-static int tcp_try_undo_partial(struct sock *sk, int acked)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	/* Partial ACK arrived. Force Hoe's retransmit. */
-	int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
-
-	if (tcp_may_undo(tp)) {
-		/* Plain luck! Hole if filled with delayed
-		 * packet, rather than with a retransmit.
-		 */
-		if (!tcp_any_retrans_done(sk))
-			tp->retrans_stamp = 0;
-
-		tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
-
-		DBGUNDO(sk, "Hoe");
-		tcp_undo_cwr(sk, false);
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
-
-		/* So... Do not make Hoe's retransmit yet.
-		 * If the first packet was delayed, the rest
-		 * ones are most probably delayed as well.
-		 */
-		failed = 0;
-	}
-	return failed;
-}
-
 /* Undo during loss recovery after partial ACK or using F-RTO. */
 static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	if (frto_undo || tcp_may_undo(tp)) {
-		struct sk_buff *skb;
-		tcp_for_write_queue(skb, sk) {
-			if (skb == tcp_send_head(sk))
-				break;
-			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
-		}
-
-		tcp_clear_all_retrans_hints(tp);
+		tcp_undo_cwnd_reduction(sk, true, true);
 
 		DBGUNDO(sk, "partial loss");
-		tp->lost_out = 0;
-		tcp_undo_cwr(sk, true);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
 		if (frto_undo)
 			NET_INC_STATS_BH(sock_net(sk),
 					 LINUX_MIB_TCPSPURIOUSRTOS);
 		inet_csk(sk)->icsk_retransmits = 0;
-		tp->undo_marker = 0;
 		if (frto_undo || tcp_is_sack(tp))
 			tcp_set_ca_state(sk, TCP_CA_Open);
 		return true;
@@ -2669,6 +2643,35 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
 	tcp_xmit_retransmit_queue(sk);
 }
 
+/* Undo during fast recovery after partial ACK. */
+static bool tcp_try_undo_partial(struct sock *sk, int acked)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	/* Partial ACK arrived. Force Hoe's retransmit. */
+	bool failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
+
+	if (tcp_may_undo(tp)) {
+		/* Plain luck! Hole if filled with delayed
+		 * packet, rather than with a retransmit.
+		 */
+		if (!tcp_any_retrans_done(sk))
+			tp->retrans_stamp = 0;
+
+		tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+
+		DBGUNDO(sk, "Hoe");
+		tcp_undo_cwnd_reduction(sk, false, false);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
+
+		/* So... Do not make Hoe's retransmit yet.
+		 * If the first packet was delayed, the rest
+		 * ones are most probably delayed as well.
+		 */
+		failed = false;
+	}
+	return failed;
+}
+
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
  * taking into account both packets sitting in receiver's buffer and
@@ -2686,7 +2689,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
+	bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
 				    (tcp_fackets_out(tp) > tp->reordering));
 	int fast_rexmit = 0;
 
-- 
1.8.2.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4 net-next] tcp: fix undo on partial ack in recovery
  2013-05-30  0:20 [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting Yuchung Cheng
  2013-05-30  0:20 ` [PATCH 2/4 net-next] tcp: refactor undo functions Yuchung Cheng
@ 2013-05-30  0:20 ` Yuchung Cheng
  2013-05-30  0:49   ` Neal Cardwell
  2013-05-31  1:07   ` David Miller
  2013-05-30  0:20 ` [PATCH 4/4 net-next] tcp: undo on DSACK during recovery Yuchung Cheng
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 12+ messages in thread
From: Yuchung Cheng @ 2013-05-30  0:20 UTC (permalink / raw)
  To: davem, ncardwell, edumazet, nanditad; +Cc: aterzis, netdev, Yuchung Cheng

Upon detecting spurious fast retransmit via timestamps during recovery,
use PRR to clock out new data packet instead of retransmission. Once
all retransmission are proven spurious, the sender then reverts the
cwnd reduction and congestion state to open or disorder.

The current code does the opposite: it undoes cwnd as soon as any
retransmission is spurious and continues to retransmit until all
data are acked. This nullifies the point to undo the cwnd because
the sender is still retransmistting spuriously. This patch fixes
it. The undo_ssthresh argument of tcp_undo_cwnd_reductiuon() is no
longer needed and is removed.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
 net/ipv4/tcp_input.c | 59 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fcb668d..c35b227 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2243,8 +2243,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 #define DBGUNDO(x...) do { } while (0)
 #endif
 
-static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh,
-				    bool unmark_loss)
+static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2268,7 +2267,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh,
 		else
 			tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
 
-		if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
+		if (tp->prior_ssthresh > tp->snd_ssthresh) {
 			tp->snd_ssthresh = tp->prior_ssthresh;
 			TCP_ECN_withdraw_cwr(tp);
 		}
@@ -2276,9 +2275,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh,
 		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
 	}
 	tp->snd_cwnd_stamp = tcp_time_stamp;
-
-	if (undo_ssthresh)
-		tp->undo_marker = 0;
+	tp->undo_marker = 0;
 }
 
 static inline bool tcp_may_undo(const struct tcp_sock *tp)
@@ -2298,7 +2295,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
 		 * or our original transmission succeeded.
 		 */
 		DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
-		tcp_undo_cwnd_reduction(sk, true, false);
+		tcp_undo_cwnd_reduction(sk, false);
 		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
 			mib_idx = LINUX_MIB_TCPLOSSUNDO;
 		else
@@ -2324,7 +2321,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
 
 	if (tp->undo_marker && !tp->undo_retrans) {
 		DBGUNDO(sk, "D-SACK");
-		tcp_undo_cwnd_reduction(sk, true, false);
+		tcp_undo_cwnd_reduction(sk, false);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
 	}
 }
@@ -2364,7 +2361,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	if (frto_undo || tcp_may_undo(tp)) {
-		tcp_undo_cwnd_reduction(sk, true, true);
+		tcp_undo_cwnd_reduction(sk, true);
 
 		DBGUNDO(sk, "partial loss");
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
@@ -2644,32 +2641,37 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
 }
 
 /* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, int acked)
+static bool tcp_try_undo_partial(struct sock *sk, const int acked,
+				 const int prior_unsacked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	/* Partial ACK arrived. Force Hoe's retransmit. */
-	bool failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
 
-	if (tcp_may_undo(tp)) {
+	if (tp->undo_marker && tcp_packet_delayed(tp)) {
 		/* Plain luck! Hole if filled with delayed
 		 * packet, rather than with a retransmit.
 		 */
+		tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+
+		/* We are getting evidence that the reordering degree is higher
+		 * than we realized. If there are no retransmits out then we
+		 * can undo. Otherwise we clock out new packets but do not
+		 * mark more packets lost or retransmit more.
+		 */
+		if (tp->retrans_out) {
+			tcp_cwnd_reduction(sk, prior_unsacked, 0);
+			return true;
+		}
+
 		if (!tcp_any_retrans_done(sk))
 			tp->retrans_stamp = 0;
 
-		tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
-
-		DBGUNDO(sk, "Hoe");
-		tcp_undo_cwnd_reduction(sk, false, false);
+		DBGUNDO(sk, "partial recovery");
+		tcp_undo_cwnd_reduction(sk, true);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
-
-		/* So... Do not make Hoe's retransmit yet.
-		 * If the first packet was delayed, the rest
-		 * ones are most probably delayed as well.
-		 */
-		failed = false;
+		tcp_try_keep_open(sk);
+		return true;
 	}
-	return failed;
+	return false;
 }
 
 /* Process an event, which can update packets-in-flight not trivially.
@@ -2742,8 +2744,13 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 		if (!(flag & FLAG_SND_UNA_ADVANCED)) {
 			if (tcp_is_reno(tp) && is_dupack)
 				tcp_add_reno_sack(sk);
-		} else
-			do_lost = tcp_try_undo_partial(sk, acked);
+		} else {
+			if (tcp_try_undo_partial(sk, acked, prior_unsacked))
+				return;
+			/* Partial ACK arrived. Force fast retransmit. */
+			do_lost = tcp_is_reno(tp) ||
+				  tcp_fackets_out(tp) > tp->reordering;
+		}
 		break;
 	case TCP_CA_Loss:
 		tcp_process_loss(sk, flag, is_dupack);
-- 
1.8.2.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4 net-next] tcp: undo on DSACK during recovery
  2013-05-30  0:20 [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting Yuchung Cheng
  2013-05-30  0:20 ` [PATCH 2/4 net-next] tcp: refactor undo functions Yuchung Cheng
  2013-05-30  0:20 ` [PATCH 3/4 net-next] tcp: fix undo on partial ack in recovery Yuchung Cheng
@ 2013-05-30  0:20 ` Yuchung Cheng
  2013-05-30  0:50   ` Neal Cardwell
  2013-05-31  1:07   ` David Miller
  2013-05-30  0:41 ` [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting Neal Cardwell
  2013-05-31  1:06 ` David Miller
  4 siblings, 2 replies; 12+ messages in thread
From: Yuchung Cheng @ 2013-05-30  0:20 UTC (permalink / raw)
  To: davem, ncardwell, edumazet, nanditad; +Cc: aterzis, netdev, Yuchung Cheng

If the receiver supports DSACK, sender can detect false recoveries and
revert cwnd reductions triggered by either severe network reordering or
concurrent reordering and loss event.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
 net/ipv4/tcp_input.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c35b227..907311c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2315,7 +2315,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
 }
 
 /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
-static void tcp_try_undo_dsack(struct sock *sk)
+static bool tcp_try_undo_dsack(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2323,7 +2323,9 @@ static void tcp_try_undo_dsack(struct sock *sk)
 		DBGUNDO(sk, "D-SACK");
 		tcp_undo_cwnd_reduction(sk, false);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
+		return true;
 	}
+	return false;
 }
 
 /* We can clear retrans_stamp when there are no retransmissions in the
@@ -2751,6 +2753,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 			do_lost = tcp_is_reno(tp) ||
 				  tcp_fackets_out(tp) > tp->reordering;
 		}
+		if (tcp_try_undo_dsack(sk)) {
+			tcp_try_keep_open(sk);
+			return;
+		}
 		break;
 	case TCP_CA_Loss:
 		tcp_process_loss(sk, flag, is_dupack);
-- 
1.8.2.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting
  2013-05-30  0:20 [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting Yuchung Cheng
                   ` (2 preceding siblings ...)
  2013-05-30  0:20 ` [PATCH 4/4 net-next] tcp: undo on DSACK during recovery Yuchung Cheng
@ 2013-05-30  0:41 ` Neal Cardwell
  2013-05-31  1:06 ` David Miller
  4 siblings, 0 replies; 12+ messages in thread
From: Neal Cardwell @ 2013-05-30  0:41 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: David Miller, Eric Dumazet, Nandita Dukkipati, Andreas Terzis,
	Netdev

On Wed, May 29, 2013 at 8:20 PM, Yuchung Cheng <ycheng@google.com> wrote:
> This patch series fix an undo bug in fast recovery: the sender
> mistakenly undos the cwnd too early but continue fast retransmits
> until all pending data are acked. This also multiplicates the SNMP
> stat PARTIALUNDO events by the degree of the network reordering.
>
> The first patch prepares the fix by consolidating the accounting
> of newly_acked_sacked in tcp_cwnd_reduction(), instead of updating
> newly_acked_sacked everytime sacked_out is adjusted.  Also pass
> acked and prior_unsacked as const type because they are readonly
> in the rest of recovery processing.
>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Acked-by: Neal Cardwell <ncardwell@google.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4 net-next] tcp: refactor undo functions
  2013-05-30  0:20 ` [PATCH 2/4 net-next] tcp: refactor undo functions Yuchung Cheng
@ 2013-05-30  0:44   ` Neal Cardwell
  2013-05-31  1:07   ` David Miller
  1 sibling, 0 replies; 12+ messages in thread
From: Neal Cardwell @ 2013-05-30  0:44 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: David Miller, Eric Dumazet, Nandita Dukkipati, Andreas Terzis,
	Netdev

On Wed, May 29, 2013 at 8:20 PM, Yuchung Cheng <ycheng@google.com> wrote:
> Refactor and relocate various functions or variables to prepare the
> undo fix.  Remove some unused function arguments. Rename tcp_undo_cwr
> to tcp_undo_cwnd_reduction to be consistent with the rest of
> CWR related function names.
>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Acked-by: Neal Cardwell <ncardwell@google.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4 net-next] tcp: fix undo on partial ack in recovery
  2013-05-30  0:20 ` [PATCH 3/4 net-next] tcp: fix undo on partial ack in recovery Yuchung Cheng
@ 2013-05-30  0:49   ` Neal Cardwell
  2013-05-31  1:07   ` David Miller
  1 sibling, 0 replies; 12+ messages in thread
From: Neal Cardwell @ 2013-05-30  0:49 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: David Miller, Eric Dumazet, Nandita Dukkipati, Andreas Terzis,
	Netdev

On Wed, May 29, 2013 at 8:20 PM, Yuchung Cheng <ycheng@google.com> wrote:
> Upon detecting spurious fast retransmit via timestamps during recovery,
> use PRR to clock out new data packet instead of retransmission. Once
> all retransmission are proven spurious, the sender then reverts the
> cwnd reduction and congestion state to open or disorder.
>
> The current code does the opposite: it undoes cwnd as soon as any
> retransmission is spurious and continues to retransmit until all
> data are acked. This nullifies the point to undo the cwnd because
> the sender is still retransmistting spuriously. This patch fixes
> it. The undo_ssthresh argument of tcp_undo_cwnd_reductiuon() is no
> longer needed and is removed.
>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Acked-by: Neal Cardwell <ncardwell@google.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4 net-next] tcp: undo on DSACK during recovery
  2013-05-30  0:20 ` [PATCH 4/4 net-next] tcp: undo on DSACK during recovery Yuchung Cheng
@ 2013-05-30  0:50   ` Neal Cardwell
  2013-05-31  1:07   ` David Miller
  1 sibling, 0 replies; 12+ messages in thread
From: Neal Cardwell @ 2013-05-30  0:50 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: David Miller, Eric Dumazet, Nandita Dukkipati, Andreas Terzis,
	Netdev

On Wed, May 29, 2013 at 8:20 PM, Yuchung Cheng <ycheng@google.com> wrote:
> If the receiver supports DSACK, sender can detect false recoveries and
> revert cwnd reductions triggered by either severe network reordering or
> concurrent reordering and loss event.
>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Acked-by: Neal Cardwell <ncardwell@google.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting
  2013-05-30  0:20 [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting Yuchung Cheng
                   ` (3 preceding siblings ...)
  2013-05-30  0:41 ` [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting Neal Cardwell
@ 2013-05-31  1:06 ` David Miller
  4 siblings, 0 replies; 12+ messages in thread
From: David Miller @ 2013-05-31  1:06 UTC (permalink / raw)
  To: ycheng; +Cc: ncardwell, edumazet, nanditad, aterzis, netdev

From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 29 May 2013 17:20:11 -0700

> This patch series fix an undo bug in fast recovery: the sender
> mistakenly undos the cwnd too early but continue fast retransmits
> until all pending data are acked. This also multiplicates the SNMP
> stat PARTIALUNDO events by the degree of the network reordering.
> 
> The first patch prepares the fix by consolidating the accounting
> of newly_acked_sacked in tcp_cwnd_reduction(), instead of updating
> newly_acked_sacked everytime sacked_out is adjusted.  Also pass
> acked and prior_unsacked as const type because they are readonly
> in the rest of recovery processing.
> 
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Applied.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/4 net-next] tcp: refactor undo functions
  2013-05-30  0:20 ` [PATCH 2/4 net-next] tcp: refactor undo functions Yuchung Cheng
  2013-05-30  0:44   ` Neal Cardwell
@ 2013-05-31  1:07   ` David Miller
  1 sibling, 0 replies; 12+ messages in thread
From: David Miller @ 2013-05-31  1:07 UTC (permalink / raw)
  To: ycheng; +Cc: ncardwell, edumazet, nanditad, aterzis, netdev

From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 29 May 2013 17:20:12 -0700

> Refactor and relocate various functions or variables to prepare the
> undo fix.  Remove some unused function arguments. Rename tcp_undo_cwr
> to tcp_undo_cwnd_reduction to be consistent with the rest of
> CWR related function names.
> 
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Applied, but:

> -static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
> +static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh,
> +				    bool unmark_loss)

Both bool arguments seem const to me.  Please audit the rest of the
functions you changed and send a cleanup patch to consistently use
const wherever possible.

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4 net-next] tcp: fix undo on partial ack in recovery
  2013-05-30  0:20 ` [PATCH 3/4 net-next] tcp: fix undo on partial ack in recovery Yuchung Cheng
  2013-05-30  0:49   ` Neal Cardwell
@ 2013-05-31  1:07   ` David Miller
  1 sibling, 0 replies; 12+ messages in thread
From: David Miller @ 2013-05-31  1:07 UTC (permalink / raw)
  To: ycheng; +Cc: ncardwell, edumazet, nanditad, aterzis, netdev

From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 29 May 2013 17:20:13 -0700

> Upon detecting spurious fast retransmit via timestamps during recovery,
> use PRR to clock out new data packet instead of retransmission. Once
> all retransmission are proven spurious, the sender then reverts the
> cwnd reduction and congestion state to open or disorder.
> 
> The current code does the opposite: it undoes cwnd as soon as any
> retransmission is spurious and continues to retransmit until all
> data are acked. This nullifies the point to undo the cwnd because
> the sender is still retransmistting spuriously. This patch fixes
> it. The undo_ssthresh argument of tcp_undo_cwnd_reductiuon() is no
> longer needed and is removed.
> 
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Applied.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4 net-next] tcp: undo on DSACK during recovery
  2013-05-30  0:20 ` [PATCH 4/4 net-next] tcp: undo on DSACK during recovery Yuchung Cheng
  2013-05-30  0:50   ` Neal Cardwell
@ 2013-05-31  1:07   ` David Miller
  1 sibling, 0 replies; 12+ messages in thread
From: David Miller @ 2013-05-31  1:07 UTC (permalink / raw)
  To: ycheng; +Cc: ncardwell, edumazet, nanditad, aterzis, netdev

From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 29 May 2013 17:20:14 -0700

> If the receiver supports DSACK, sender can detect false recoveries and
> revert cwnd reductions triggered by either severe network reordering or
> concurrent reordering and loss event.
> 
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Applied.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-05-31  1:07 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-05-30  0:20 [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting Yuchung Cheng
2013-05-30  0:20 ` [PATCH 2/4 net-next] tcp: refactor undo functions Yuchung Cheng
2013-05-30  0:44   ` Neal Cardwell
2013-05-31  1:07   ` David Miller
2013-05-30  0:20 ` [PATCH 3/4 net-next] tcp: fix undo on partial ack in recovery Yuchung Cheng
2013-05-30  0:49   ` Neal Cardwell
2013-05-31  1:07   ` David Miller
2013-05-30  0:20 ` [PATCH 4/4 net-next] tcp: undo on DSACK during recovery Yuchung Cheng
2013-05-30  0:50   ` Neal Cardwell
2013-05-31  1:07   ` David Miller
2013-05-30  0:41 ` [PATCH 1/4 net-next] tcp: consolidate PRR packet accounting Neal Cardwell
2013-05-31  1:06 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).