From: atwellwea@gmail.com
To: netdev@vger.kernel.org, davem@davemloft.net, kuba@kernel.org,
pabeni@redhat.com, edumazet@google.com, ncardwell@google.com
Cc: linux-kernel@vger.kernel.org, linux-api@vger.kernel.org,
linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org,
linux-trace-kernel@vger.kernel.org, mptcp@lists.linux.dev,
dsahern@kernel.org, horms@kernel.org, kuniyu@google.com,
andrew+netdev@lunn.ch, willemdebruijn.kernel@gmail.com,
jasowang@redhat.com, skhan@linuxfoundation.org, corbet@lwn.net,
matttbe@kernel.org, martineau@kernel.org, geliang@kernel.org,
rostedt@goodmis.org, mhiramat@kernel.org,
mathieu.desnoyers@efficios.com, 0x7f454c46@gmail.com
Subject: [PATCH net-next v2 04/14] tcp: snapshot the maximum advertised receive window
Date: Sat, 14 Mar 2026 14:13:38 -0600 [thread overview]
Message-ID: <20260314201348.1786972-5-atwellwea@gmail.com> (raw)
In-Reply-To: <20260314201348.1786972-1-atwellwea@gmail.com>
From: Wesley Atwell <atwellwea@gmail.com>
Track the maximum sender-visible receive-window right edge separately
from the live rwnd, along with the scaling basis that was in force when
that larger window was advertised.
This gives later admission and restore paths enough information to
reason about retracted windows without losing the original sender-
visible bound.
Signed-off-by: Wesley Atwell <atwellwea@gmail.com>
---
.../networking/net_cachelines/tcp_sock.rst | 1 +
include/linux/tcp.h | 1 +
include/net/tcp.h | 21 ++++++++++++++++++-
net/ipv4/tcp.c | 1 +
net/ipv4/tcp_fastopen.c | 2 +-
net/ipv4/tcp_input.c | 4 ++--
net/ipv4/tcp_minisocks.c | 2 +-
net/ipv4/tcp_output.c | 2 +-
8 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst
index 09ece1c59c2d..d58a3b1eb55d 100644
--- a/Documentation/networking/net_cachelines/tcp_sock.rst
+++ b/Documentation/networking/net_cachelines/tcp_sock.rst
@@ -11,6 +11,7 @@ Type Name fastpath_tx_access fastpa
struct inet_connection_sock inet_conn
u16 tcp_header_len read_mostly read_mostly tcp_bound_to_half_wnd,tcp_current_mss(tx);tcp_rcv_established(rx)
u16 gso_segs read_mostly tcp_xmit_size_goal
+u8 rcv_mwnd_scaling_ratio read_write read_mostly tcp_init_max_rcv_wnd_seq,tcp_update_max_rcv_wnd_seq,tcp_repair_set_window,do_tcp_getsockopt
u8 rcv_wnd_scaling_ratio read_write read_mostly tcp_set_rcv_wnd,tcp_can_ingest,tcp_repair_set_window,do_tcp_getsockopt
__be32 pred_flags read_write read_mostly tcp_select_window(tx);tcp_rcv_established(rx)
u64 bytes_received read_write tcp_rcv_nxt_update(rx)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2ace563d59d6..e5d7a65ac439 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -297,6 +297,7 @@ struct tcp_sock {
est_ecnfield:2,/* ECN field for AccECN delivered estimates */
accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */
prev_ecnfield:2; /* ECN bits from the previous segment */
+ u8 rcv_mwnd_scaling_ratio; /* 0 if unknown, else tp->rcv_mwnd_seq basis */
u8 rcv_wnd_scaling_ratio; /* 0 if unknown, else tp->rcv_wnd basis */
__be32 pred_flags;
u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6fa7cdb0979e..fc22ab6b80d5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -947,13 +947,21 @@ static inline u32 tcp_max_receive_window(const struct tcp_sock *tp)
return (u32) win;
}
+static inline void tcp_init_max_rcv_wnd_seq(struct tcp_sock *tp)
+{
+ tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd;
+ tp->rcv_mwnd_scaling_ratio = tp->rcv_wnd_scaling_ratio;
+}
+
/* Check if we need to update the maximum receive window sequence number */
static inline void tcp_update_max_rcv_wnd_seq(struct tcp_sock *tp)
{
u32 wre = tp->rcv_wup + tp->rcv_wnd;
- if (after(wre, tp->rcv_mwnd_seq))
+ if (after(wre, tp->rcv_mwnd_seq)) {
tp->rcv_mwnd_seq = wre;
+ tp->rcv_mwnd_scaling_ratio = tp->rcv_wnd_scaling_ratio;
+ }
}
/* Choose a new window, without checks for shrinking, and without
@@ -1766,6 +1774,16 @@ static inline bool tcp_space_from_rcv_wnd(const struct tcp_sock *tp, int win,
space);
}
+/* Same as tcp_space_from_rcv_wnd(), but for the remembered maximum
+ * sender-visible receive window.
+ */
+static inline bool tcp_space_from_rcv_mwnd(const struct tcp_sock *tp, int win,
+ int *space)
+{
+ return tcp_space_from_wnd_snapshot(tp->rcv_mwnd_scaling_ratio, win,
+ space);
+}
+
/* Assume a 50% default for skb->len/skb->truesize ratio.
* This may be adjusted later in tcp_measure_rcv_mss().
*/
@@ -1776,6 +1794,7 @@ static inline void tcp_scaling_ratio_init(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
tp->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
+ tp->rcv_mwnd_scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
tp->rcv_wnd_scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0383ee8d3b78..66706dbb90f5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5275,6 +5275,7 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ce);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_mwnd_scaling_ratio);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd_scaling_ratio);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_mwnd_seq);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 4e389d609f91..56113cf2a165 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -377,7 +377,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
tp->rcv_wup = tp->rcv_nxt;
- tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd;
+ tcp_init_max_rcv_wnd_seq(tp);
/* tcp_conn_request() is sending the SYNACK,
* and queues the child into listener accept queue.
*/
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b8e65e31255e..352f814a4ff6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6902,7 +6902,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
*/
WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
- tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd;
+ tcp_init_max_rcv_wnd_seq(tp);
/* RFC1323: The window in SYN & SYN/ACK segments is
* never scaled.
@@ -7015,7 +7015,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
- tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd;
+ tcp_init_max_rcv_wnd_seq(tp);
/* RFC1323: The window in SYN & SYN/ACK segments is
* never scaled.
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1c02c9cd13fe..85bd9580caf9 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -604,7 +604,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->window_clamp = req->rsk_window_clamp;
newtp->rcv_ssthresh = req->rsk_rcv_wnd;
tcp_set_rcv_wnd(newtp, req->rsk_rcv_wnd);
- newtp->rcv_mwnd_seq = newtp->rcv_wup + req->rsk_rcv_wnd;
+ tcp_init_max_rcv_wnd_seq(newtp);
newtp->rx_opt.wscale_ok = ireq->wscale_ok;
if (newtp->rx_opt.wscale_ok) {
newtp->rx_opt.snd_wscale = ireq->snd_wscale;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0b082726d7c4..57a2a6daaad3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -4171,7 +4171,7 @@ static void tcp_connect_init(struct sock *sk)
else
tp->rcv_tstamp = tcp_jiffies32;
tp->rcv_wup = tp->rcv_nxt;
- tp->rcv_mwnd_seq = tp->rcv_nxt + tp->rcv_wnd;
+ tcp_init_max_rcv_wnd_seq(tp);
WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
--
2.43.0
next prev parent reply other threads:[~2026-03-14 20:14 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-14 20:13 [PATCH net-next v2 00/14] tcp: preserve receive-window accounting across ratio drift atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 01/14] tcp: factor receive-memory accounting helpers atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 02/14] tcp: snapshot advertise-time scaling for rcv_wnd atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 03/14] tcp: refresh rcv_wnd snapshots at TCP write sites atwellwea
2026-03-14 20:13 ` atwellwea [this message]
2026-03-14 20:13 ` [PATCH net-next v2 05/14] tcp: grow rcvbuf to back scaled-window quantization slack atwellwea
2026-03-16 11:04 ` Paolo Abeni
2026-03-16 11:24 ` Paolo Abeni
2026-03-16 11:31 ` Paolo Abeni
2026-03-14 20:13 ` [PATCH net-next v2 06/14] tcp: regrow rcvbuf when scaling_ratio drops after advertisement atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 07/14] tcp: honor the maximum advertised window after live retraction atwellwea
2026-03-16 11:44 ` Paolo Abeni
2026-03-14 20:13 ` [PATCH net-next v2 08/14] tcp: extend TCP_REPAIR_WINDOW for live and max-window snapshots atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 09/14] mptcp: refresh TCP receive-window snapshots on subflows atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 10/14] tcp: expose rmem and backlog in tcp and mptcp rcvbuf_grow tracepoints atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 11/14] selftests: tcp_ao: cover legacy, v1, and retracted repair windows atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 12/14] tun/selftests: add RX truesize injection for TCP window tests atwellwea
2026-03-15 1:18 ` Jakub Kicinski
2026-03-14 20:13 ` [PATCH net-next v2 13/14] netdevsim: add peer RX truesize support for selftests atwellwea
2026-03-15 1:18 ` Jakub Kicinski
2026-03-14 20:13 ` [PATCH net-next v2 14/14] netdevsim: release pinned PSP ext on drop paths atwellwea
2026-03-15 1:19 ` [PATCH net-next v2 00/14] tcp: preserve receive-window accounting across ratio drift Jakub Kicinski
2026-03-16 11:09 ` Paolo Abeni
[not found] ` <CAN=sVvyNpkyok_bt8eQSmqc4f7g7QoZBUmRmNRLoFz1HasEzMA@mail.gmail.com>
2026-03-16 17:47 ` Paolo Abeni
2026-03-16 18:03 ` Wesley Atwell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260314201348.1786972-5-atwellwea@gmail.com \
--to=atwellwea@gmail.com \
--cc=0x7f454c46@gmail.com \
--cc=andrew+netdev@lunn.ch \
--cc=corbet@lwn.net \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=geliang@kernel.org \
--cc=horms@kernel.org \
--cc=jasowang@redhat.com \
--cc=kuba@kernel.org \
--cc=kuniyu@google.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=martineau@kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=matttbe@kernel.org \
--cc=mhiramat@kernel.org \
--cc=mptcp@lists.linux.dev \
--cc=ncardwell@google.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=rostedt@goodmis.org \
--cc=skhan@linuxfoundation.org \
--cc=willemdebruijn.kernel@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox