From: David Morley <morleyd.kernel@gmail.com>
To: David Miller <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org, David Morley <morleyd@google.com>,
Neal Cardwell <ncardwell@google.com>,
Yuchung Cheng <ycheng@google.com>
Subject: [PATCH net-next 1/2] tcp: record last received ipv6 flowlabel
Date: Wed, 27 Sep 2023 18:27:45 +0000 [thread overview]
Message-ID: <20230927182747.2005960-1-morleyd.kernel@gmail.com> (raw)
From: David Morley <morleyd@google.com>
In order to better estimate whether a data packet has been
retransmitted or is the result of a TLP, we save the last received
ipv6 flowlabel.
To make space for this field we resize the "ato" field in
inet_connection_sock as the current value of TCP_DELACK_MAX can be
fully contained in 8 bits and add a compile_time_assert ensuring this
field is the required size.
Signed-off-by: David Morley <morleyd@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Tested-by: David Morley <morleyd@google.com>
---
include/net/inet_connection_sock.h | 5 ++++-
include/net/tcp.h | 2 ++
net/ipv4/tcp_input.c | 15 +++++++++++++++
net/ipv4/tcp_timer.c | 2 +-
4 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 5d2fcc137b88..d6d9d1c1985a 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -114,7 +114,10 @@ struct inet_connection_sock {
__u8 quick; /* Scheduled number of quick acks */
__u8 pingpong; /* The session is interactive */
__u8 retry; /* Number of attempts */
- __u32 ato; /* Predicted tick of soft clock */
+ #define ATO_BITS 8
+ __u32 ato:ATO_BITS, /* Predicted tick of soft clock */
+ lrcv_flowlabel:20, /* last received ipv6 flowlabel */
+ unused:4;
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 91688d0dadcd..8a3720c7d082 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -131,6 +131,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
+static_assert(1<<ATO_BITS > TCP_DELACK_MAX);
+
#if HZ >= 100
#define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */
#define TCP_ATO_MIN ((unsigned)(HZ/25))
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 584825ddd0a0..abe7494361c0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -765,6 +765,16 @@ void tcp_rcv_space_adjust(struct sock *sk)
tp->rcvq_space.time = tp->tcp_mstamp;
}
+static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb)));
+#endif
+}
+
/* There is something which you must keep in mind when you analyze the
* behavior of the tp->ato delayed ack timeout interval. When a
* connection starts up, we want to ack as quickly as possible. The
@@ -813,6 +823,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
}
}
icsk->icsk_ack.lrcvtime = now;
+ tcp_save_lrcv_flowlabel(sk, skb);
tcp_ecn_check_ce(sk, skb);
@@ -4506,6 +4517,9 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
sk_rethink_txhash(sk))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
+
+ /* Save last flowlabel after a spurious retrans. */
+ tcp_save_lrcv_flowlabel(sk, skb);
}
static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
@@ -4822,6 +4836,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
u32 seq, end_seq;
bool fragstolen;
+ tcp_save_lrcv_flowlabel(sk, skb);
tcp_ecn_check_ce(sk, skb);
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3f61c6a70a1f..0862b73dd3b5 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -322,7 +322,7 @@ void tcp_delack_timer_handler(struct sock *sk)
if (inet_csk_ack_scheduled(sk)) {
if (!inet_csk_in_pingpong_mode(sk)) {
/* Delayed ACK missed: inflate ATO. */
- icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
+ icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto);
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
--
2.42.0.582.g8ccd20d70d-goog
next reply other threads:[~2023-09-27 18:28 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-27 18:27 David Morley [this message]
2023-09-27 18:27 ` [PATCH net-next 2/2] tcp: change data receiver flowlabel after one dup David Morley
2023-09-29 0:54 ` Eric Dumazet
2023-09-29 0:52 ` [PATCH net-next 1/2] tcp: record last received ipv6 flowlabel Eric Dumazet
2023-09-29 5:57 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230927182747.2005960-1-morleyd.kernel@gmail.com \
--to=morleyd.kernel@gmail.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=morleyd@google.com \
--cc=ncardwell@google.com \
--cc=netdev@vger.kernel.org \
--cc=ycheng@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).