All of lore.kernel.org
 help / color / mirror / Atom feed
From: atwellwea@gmail.com
To: netdev@vger.kernel.org, davem@davemloft.net, kuba@kernel.org,
	pabeni@redhat.com, edumazet@google.com, ncardwell@google.com
Cc: linux-kernel@vger.kernel.org, linux-api@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org,
	linux-trace-kernel@vger.kernel.org, mptcp@lists.linux.dev,
	dsahern@kernel.org, horms@kernel.org, kuniyu@google.com,
	andrew+netdev@lunn.ch, willemdebruijn.kernel@gmail.com,
	jasowang@redhat.com, skhan@linuxfoundation.org, corbet@lwn.net,
	matttbe@kernel.org, martineau@kernel.org, geliang@kernel.org,
	rostedt@goodmis.org, mhiramat@kernel.org,
	mathieu.desnoyers@efficios.com, 0x7f454c46@gmail.com
Subject: [PATCH net-next v2 07/14] tcp: honor the maximum advertised window after live retraction
Date: Sat, 14 Mar 2026 14:13:41 -0600	[thread overview]
Message-ID: <20260314201348.1786972-8-atwellwea@gmail.com> (raw)
In-Reply-To: <20260314201348.1786972-1-atwellwea@gmail.com>

From: Wesley Atwell <atwellwea@gmail.com>

If receive-side accounting retracts the live rwnd below a larger
sender-visible window that was already advertised, allow one in-order
skb within that historical bound to repair its backing and reach the
normal receive path.

Hard receive-memory admission is still enforced through the existing
prune and collapse path. The rescue only changes how data already
inside sender-visible sequence space is classified and backed.

Signed-off-by: Wesley Atwell <atwellwea@gmail.com>
---
 net/ipv4/tcp_input.c | 92 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 86 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d76e4e4c0e57..4b9309c37e99 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5376,24 +5376,86 @@ static void tcp_ofo_queue(struct sock *sk)
 static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb);
 static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb);
 
+/* Sequence checks run against the sender-visible receive window before this
+ * point. If later receive-side accounting retracts the live receive window
+ * below the maximum right edge we already advertised, allow one in-order skb
+ * which still fits inside that sender-visible bound to reach the normal
+ * receive queue path.
+ *
+ * Keep receive-memory admission itself on the legacy hard-cap path so prune
+ * and collapse behavior stay aligned with the established retracted-window
+ * handling.
+ */
+static bool tcp_skb_in_retracted_window(const struct tcp_sock *tp,
+					const struct sk_buff *skb)
+{
+	u32 live_end = tp->rcv_nxt + tcp_receive_window(tp);
+	u32 max_end = tp->rcv_nxt + tcp_max_receive_window(tp);
+
+	return after(max_end, live_end) &&
+	       after(TCP_SKB_CB(skb)->end_seq, live_end) &&
+	       !after(TCP_SKB_CB(skb)->end_seq, max_end);
+}
+
 static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb)
 {
-	unsigned int rmem = atomic_read(&sk->sk_rmem_alloc);
+	return tcp_rmem_used(sk) <= READ_ONCE(sk->sk_rcvbuf);
+}
+
+/* Caller already established that @skb extends into the retracted-but-still-
+ * valid sender-visible window. For in-order progress, regrow sk_rcvbuf before
+ * falling into prune/forced-mem handling.
+ *
+ * This path intentionally repairs backing for one in-order skb that is already
+ * within sender-visible sequence space, rather than treating it like ordinary
+ * receive-buffer autotuning.
+ *
+ * Keep this rescue bounded to the span accepted by this skb instead of the
+ * full historical tp->rcv_mwnd_seq. However, never grow below skb->truesize,
+ * because sk_rmem_schedule() still charges hard memory, not sender-visible
+ * window bytes.
+ */
+static void tcp_try_grow_retracted_skb(struct sock *sk,
+				       const struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int needed = skb->truesize;
+	int span_space;
+	u32 span_win;
+
+	if (TCP_SKB_CB(skb)->seq != tp->rcv_nxt)
+		return;
+
+	span_win = TCP_SKB_CB(skb)->end_seq - tp->rcv_nxt;
+	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+		span_win--;
+
+	if (tcp_space_from_rcv_mwnd(tp, span_win, &span_space))
+		needed = max_t(int, needed, span_space);
 
-	return rmem <= sk->sk_rcvbuf;
+	tcp_try_grow_rcvbuf(sk, needed);
 }
 
+/* Sender-visible window rescue does not relax hard receive-memory admission.
+ * If growth did not make room, fall back to the established prune/collapse
+ * path.
+ */
 static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb,
 				 unsigned int size)
 {
-	if (!tcp_can_ingest(sk, skb) ||
-	    !sk_rmem_schedule(sk, skb, size)) {
+	bool can_ingest = tcp_can_ingest(sk, skb);
+	bool scheduled = can_ingest && sk_rmem_schedule(sk, skb, size);
+
+	if (!scheduled) {
+		int pruned = tcp_prune_queue(sk, skb);
 
-		if (tcp_prune_queue(sk, skb) < 0)
+		if (pruned < 0)
 			return -1;
 
 		while (!sk_rmem_schedule(sk, skb, size)) {
-			if (!tcp_prune_ofo_queue(sk, skb))
+			bool pruned_ofo = tcp_prune_ofo_queue(sk, skb);
+
+			if (!pruned_ofo)
 				return -1;
 		}
 	}
@@ -5629,6 +5691,7 @@ void tcp_data_ready(struct sock *sk)
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	bool retracted;
 	enum skb_drop_reason reason;
 	bool fragstolen;
 	int eaten;
@@ -5647,6 +5710,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	}
 	tcp_cleanup_skb(skb);
 	__skb_pull(skb, tcp_hdr(skb)->doff * 4);
+	retracted = skb->len && tcp_skb_in_retracted_window(tp, skb);
 
 	reason = SKB_DROP_REASON_NOT_SPECIFIED;
 	tp->rx_opt.dsack = 0;
@@ -5667,6 +5731,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			    (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
 				goto queue_and_out;
 
+			if (retracted)
+				goto queue_and_out;
+
 			reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
 			goto out_of_window;
@@ -5674,7 +5741,20 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 
 		/* Ok. In sequence. In window. */
 queue_and_out:
+		if (unlikely(retracted))
+			tcp_try_grow_retracted_skb(sk, skb);
+
 		if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+			/* If the live rwnd collapsed to zero while rescuing an
+			 * skb that still fit in sender-visible sequence space,
+			 * report zero-window rather than generic proto-mem.
+			 */
+			if (unlikely(!tcp_receive_window(tp) && retracted)) {
+				reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
+				NET_INC_STATS(sock_net(sk),
+					      LINUX_MIB_TCPZEROWINDOWDROP);
+				goto out_of_window;
+			}
 			/* TODO: maybe ratelimit these WIN 0 ACK ? */
 			inet_csk(sk)->icsk_ack.pending |=
 					(ICSK_ACK_NOMEM | ICSK_ACK_NOW);
-- 
2.43.0


  parent reply	other threads:[~2026-03-14 20:14 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-14 20:13 [PATCH net-next v2 00/14] tcp: preserve receive-window accounting across ratio drift atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 01/14] tcp: factor receive-memory accounting helpers atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 02/14] tcp: snapshot advertise-time scaling for rcv_wnd atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 03/14] tcp: refresh rcv_wnd snapshots at TCP write sites atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 04/14] tcp: snapshot the maximum advertised receive window atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 05/14] tcp: grow rcvbuf to back scaled-window quantization slack atwellwea
2026-03-16 11:04   ` Paolo Abeni
2026-03-16 11:24   ` Paolo Abeni
2026-03-16 11:31   ` Paolo Abeni
2026-03-14 20:13 ` [PATCH net-next v2 06/14] tcp: regrow rcvbuf when scaling_ratio drops after advertisement atwellwea
2026-03-14 20:13 ` atwellwea [this message]
2026-03-16 11:44   ` [PATCH net-next v2 07/14] tcp: honor the maximum advertised window after live retraction Paolo Abeni
2026-03-14 20:13 ` [PATCH net-next v2 08/14] tcp: extend TCP_REPAIR_WINDOW for live and max-window snapshots atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 09/14] mptcp: refresh TCP receive-window snapshots on subflows atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 10/14] tcp: expose rmem and backlog in tcp and mptcp rcvbuf_grow tracepoints atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 11/14] selftests: tcp_ao: cover legacy, v1, and retracted repair windows atwellwea
2026-03-14 20:13 ` [PATCH net-next v2 12/14] tun/selftests: add RX truesize injection for TCP window tests atwellwea
2026-03-15  1:18   ` Jakub Kicinski
2026-03-14 20:13 ` [PATCH net-next v2 13/14] netdevsim: add peer RX truesize support for selftests atwellwea
2026-03-15  1:18   ` Jakub Kicinski
2026-03-14 20:13 ` [PATCH net-next v2 14/14] netdevsim: release pinned PSP ext on drop paths atwellwea
2026-03-15  1:19 ` [PATCH net-next v2 00/14] tcp: preserve receive-window accounting across ratio drift Jakub Kicinski
2026-03-16 11:09 ` Paolo Abeni
     [not found]   ` <CAN=sVvyNpkyok_bt8eQSmqc4f7g7QoZBUmRmNRLoFz1HasEzMA@mail.gmail.com>
2026-03-16 17:47     ` Paolo Abeni
2026-03-16 18:03       ` Wesley Atwell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260314201348.1786972-8-atwellwea@gmail.com \
    --to=atwellwea@gmail.com \
    --cc=0x7f454c46@gmail.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=corbet@lwn.net \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=geliang@kernel.org \
    --cc=horms@kernel.org \
    --cc=jasowang@redhat.com \
    --cc=kuba@kernel.org \
    --cc=kuniyu@google.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=martineau@kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=matttbe@kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=mptcp@lists.linux.dev \
    --cc=ncardwell@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=skhan@linuxfoundation.org \
    --cc=willemdebruijn.kernel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.