[net-next 05/13] tcp: use sequence to break TS ties for RACK loss detection

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Yuchung Cheng <ycheng@google.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, edumazet@google.com,
	ncardwell@google.com, nanditad@google.com,
	Yuchung Cheng <ycheng@google.com>
Subject: [net-next 05/13] tcp: use sequence to break TS ties for RACK loss detection
Date: Thu, 12 Jan 2017 22:03:46 -0800	[thread overview]
Message-ID: <20170113060354.85234-6-ycheng@google.com> (raw)
In-Reply-To: <20170113060354.85234-1-ycheng@google.com>

The packets inside a jumbo skb (e.g., TSO) share the same skb
timestamp, even though they are sent sequentially on the wire. Since
RACK is based on time, it can not detect some packets inside the
same skb are lost.  However, we can leverage the packet sequence
numbers as extended timestamps to detect losses. Therefore, when
RACK timestamp is identical to skb's timestamp (i.e., one of the
packets of the skb is acked or sacked), we use the sequence numbers
of the acked and unacked packets to break ties.

We can use the same sequence logic to advance RACK xmit time as
well to detect more losses and avoid timeout.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/tcp.h     |  1 +
 include/net/tcp.h       |  2 +-
 net/ipv4/tcp_input.c    |  5 +++--
 net/ipv4/tcp_recovery.c | 17 ++++++++++++++---
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1255c592719c..970d5f00589f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -208,6 +208,7 @@ struct tcp_sock {
 	struct tcp_rack {
 		struct skb_mstamp mstamp; /* (Re)sent time of the skb */
 		u32 rtt_us;  /* Associated RTT */
+		u32 end_seq; /* Ending TCP sequence of the skb */
 		u8 advanced; /* mstamp advanced since last lost marking */
 		u8 reord;    /* reordering detected */
 	} rack;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 64fcdeb3358b..5fb1e75a32a9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1867,7 +1867,7 @@ extern int sysctl_tcp_recovery;
 #define TCP_RACK_LOST_RETRANS  0x1
 
 extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now);
-extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
+extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
 			     const struct skb_mstamp *xmit_time,
 			     const struct skb_mstamp *ack_time);
 extern void tcp_rack_reo_timeout(struct sock *sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index be1191829963..e42ca11c0326 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1218,7 +1218,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
 		return sacked;
 
 	if (!(sacked & TCPCB_SACKED_ACKED)) {
-		tcp_rack_advance(tp, sacked, xmit_time, &state->ack_time);
+		tcp_rack_advance(tp, sacked, end_seq,
+				 xmit_time, &state->ack_time);
 
 		if (sacked & TCPCB_SACKED_RETRANS) {
 			/* If the segment is not tagged as lost,
@@ -3171,7 +3172,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		} else if (tcp_is_sack(tp)) {
 			tp->delivered += acked_pcount;
 			if (!tcp_skb_spurious_retrans(tp, skb))
-				tcp_rack_advance(tp, sacked,
+				tcp_rack_advance(tp, sacked, scb->end_seq,
 						 &skb->skb_mstamp,
 						 &sack->ack_time);
 		}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index eb39b1b6d1dc..1e330a2f913d 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -16,6 +16,14 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
 	}
 }
 
+static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
+				const struct skb_mstamp *t2,
+				u32 seq1, u32 seq2)
+{
+	return skb_mstamp_after(t1, t2) ||
+	       (t1->v64 == t2->v64 && after(seq1, seq2));
+}
+
 /* Marks a packet lost, if some packet sent later has been (s)acked.
  * The underlying idea is similar to the traditional dupthresh and FACK
  * but they look at different metrics:
@@ -60,7 +68,8 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
 		    scb->sacked & TCPCB_SACKED_ACKED)
 			continue;
 
-		if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) {
+		if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp,
+					tp->rack.end_seq, scb->end_seq)) {
 			/* Step 3 in draft-cheng-tcpm-rack-00.txt:
 			 * A packet is lost if its elapsed time is beyond
 			 * the recent RTT plus the reordering window.
@@ -113,14 +122,15 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
  * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
  * draft-cheng-tcpm-rack-00.txt
  */
-void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
+void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
 		      const struct skb_mstamp *xmit_time,
 		      const struct skb_mstamp *ack_time)
 {
 	u32 rtt_us;
 
 	if (tp->rack.mstamp.v64 &&
-	    !skb_mstamp_after(xmit_time, &tp->rack.mstamp))
+	    !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp,
+				 end_seq, tp->rack.end_seq))
 		return;
 
 	rtt_us = skb_mstamp_us_delta(ack_time, xmit_time);
@@ -140,6 +150,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
 	}
 	tp->rack.rtt_us = rtt_us;
 	tp->rack.mstamp = *xmit_time;
+	tp->rack.end_seq = end_seq;
 	tp->rack.advanced = 1;
 }
 
-- 
2.11.0.483.g087da7b7c-goog

next prev parent reply	other threads:[~2017-01-13  6:04 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-13  6:03 [net-next 00/13] RACK fast recovery Yuchung Cheng
2017-01-13  6:03 ` [net-next 01/13] tcp: new helper function for RACK loss detection Yuchung Cheng
2017-01-13  6:07   ` Yuchung Cheng
2017-01-13  6:03 ` [net-next 02/13] tcp: new helper for RACK to detect loss Yuchung Cheng
2017-01-13  6:03 ` [net-next 03/13] tcp: record most recent RTT in RACK loss detection Yuchung Cheng
2017-01-13  6:03 ` [net-next 04/13] tcp: add reordering timer " Yuchung Cheng
2017-01-13  6:03 ` Yuchung Cheng [this message]
2017-01-13  6:03 ` [net-next 06/13] tcp: check undo conditions before detecting losses Yuchung Cheng
2017-01-13  6:03 ` [net-next 07/13] tcp: enable RACK loss detection to trigger recovery Yuchung Cheng
2017-01-13  6:03 ` [net-next 08/13] tcp: extend F-RTO to catch more spurious timeouts Yuchung Cheng
2017-01-13  6:03 ` [net-next 09/13] tcp: remove forward retransmit feature Yuchung Cheng
2017-01-13  6:03 ` [net-next 10/13] tcp: remove early retransmit Yuchung Cheng
2017-01-13  6:03 ` [net-next 11/13] tcp: remove RFC4653 NCR Yuchung Cheng
2017-01-13  6:03 ` [net-next 12/13] tcp: remove thin_dupack feature Yuchung Cheng
2017-01-13  6:03 ` [net-next 13/13] tcp: disable fack by default Yuchung Cheng

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:1255c592719 dfblob:970d5f00589 dfblob:64fcdeb3358
dfblob:5fb1e75a32a dfblob:be119182996 dfblob:e42ca11c032
dfblob:eb39b1b6d1d dfblob:1e330a2f913 )
 OR (
bs:"[net-next 05/13] tcp: use sequence to break TS ties for RACK loss detection" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170113060354.85234-6-ycheng@google.com \
    --to=ycheng@google.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=nanditad@google.com \
    --cc=ncardwell@google.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).