From: Eric Dumazet <edumazet@google.com>
To: "David S . Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>,
Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>,
Neal Cardwell <ncardwell@google.com>,
Kuniyuki Iwashima <kuniyu@google.com>,
netdev@vger.kernel.org, eric.dumazet@gmail.com,
Eric Dumazet <edumazet@google.com>
Subject: [PATCH v2 net] tcp: fix stale per-CPU tcp_tw_isn leak enabling ISN prediction
Date: Tue, 12 May 2026 17:52:50 +0000 [thread overview]
Message-ID: <20260512175250.346486-1-edumazet@google.com> (raw)
Blamed commit moved the TIME_WAIT-derived ISN from the skb control
block to a per-CPU variable, assuming the value would always be consumed
by tcp_conn_request() for the same packet that wrote it. That assumption
is violated by multiple drop paths between the producer
(__this_cpu_write(tcp_tw_isn, isn) in tcp_v{4,6}_rcv()) and the consumer
(tcp_conn_request()):
- min_ttl / min_hopcount check
- xfrm policy check
- tcp_inbound_hash() MD5/AO mismatch
- tcp_filter() eBPF/SO_ATTACH_FILTER drop
- th->syn && th->fin discard in tcp_rcv_state_process() TCP_LISTEN
- psp_sk_rx_policy_check() in tcp_v{4,6}_do_rcv()
- tcp_checksum_complete() in tcp_v{4,6}_do_rcv()
- tcp_v{4,6}_cookie_check() returning NULL
When a packet is dropped on any of these paths, tcp_tw_isn is left set.
The next SYN processed on the same CPU then consumes the non zero value in
tcp_conn_request(), receiving a predictable ISN.
We could fix this by clearing tcp_tw_isn at tcp_v{4,6}_do_rcv() start,
at the expense of slower fast path.
This patch moves back tcp_tw_isn to skb, but not in skb->cb[] which
was the original problem. Instead, union it with skb->mark /
skb->reserved_tailroom which are unused in TCP receive path.
Fixes: 41eecbd712b7 ("tcp: replace TCP_SKB_CB(skb)->tcp_tw_isn with a per-cpu field")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
v2: get rid of the per-cpu storage, this was too much hassle.
include/linux/skbuff.h | 1 +
include/net/tcp.h | 3 ---
net/ipv4/tcp.c | 3 ---
net/ipv4/tcp_input.c | 15 ++++++---------
net/ipv4/tcp_ipv4.c | 5 ++---
net/ipv4/tcp_minisocks.c | 5 +++--
net/ipv6/tcp_ipv6.c | 5 ++---
7 files changed, 14 insertions(+), 23 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2bcf78a4de7b9edb0d1342319d4340c0a9997eeb..93c6072c97b65ebabb5dd0245edff0728ff082d1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1066,6 +1066,7 @@ struct sk_buff {
union {
__u32 mark;
__u32 reserved_tailroom;
+ u32 tcp_tw_isn;
};
union {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ecbadcb3a7446cb18c245e670ba49ff574dfaff7..74835c51e0d55ee9b35d422d442f951e37d95cbc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -65,8 +65,6 @@ static inline void tcp_orphan_count_dec(void)
this_cpu_dec(tcp_orphan_count);
}
-DECLARE_PER_CPU(u32, tcp_tw_isn);
-
void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
@@ -479,7 +477,6 @@ enum tcp_tw_status {
enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sk_buff *skb,
const struct tcphdr *th,
- u32 *tw_isn,
enum skb_drop_reason *drop_reason);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 432fa28e47d4c8ef5d50339bfdf7da0ea8772b94..389a7cc17110daa5b3b490b3c339e53e212969f8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -299,9 +299,6 @@ enum {
DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
-DEFINE_PER_CPU(u32, tcp_tw_isn);
-EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
-
long sysctl_tcp_mem[3] __read_mostly;
DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d5c9e65d97606d8eb57aba8ebc2373adf1bed62b..203660dc94efddfb6a79db8b5fce4d54dbe0b4ab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -7594,25 +7594,22 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct net *net = sock_net(sk);
struct sock *fastopen_sk = NULL;
union tcp_seq_and_ts_off st;
+ u32 isn = skb->tcp_tw_isn;
struct request_sock *req;
bool want_cookie = false;
struct dst_entry *dst;
struct flowi fl;
u8 syncookies;
- u32 isn;
#ifdef CONFIG_TCP_AO
const struct tcp_ao_hdr *aoh;
#endif
- isn = __this_cpu_read(tcp_tw_isn);
- if (isn) {
- /* TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- __this_cpu_write(tcp_tw_isn, 0);
- } else {
+ /* TW buckets are converted to open requests without
+ * limitations, they conserve resources and peer is
+ * evidently real one.
+ */
+ if (!isn) {
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c0526cc0398049fb34b5de20a1175d54942e80cd..719cfab3cd0296652791b9720fc25c9e437ca403 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2077,7 +2077,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
struct sock *sk = NULL;
bool refcounted;
int ret;
- u32 isn;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
@@ -2198,6 +2197,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
}
}
+ skb->tcp_tw_isn = 0;
process:
if (static_branch_unlikely(&ip4_min_ttl)) {
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
@@ -2299,7 +2299,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
goto csum_error;
}
- tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn,
+ tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th,
&drop_reason);
switch (tw_status) {
case TCP_TW_SYN: {
@@ -2313,7 +2313,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
- __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e6092c3ac840bdc1f62d4435c414e7f79edc10c2..8396f396f326336a31bde408dfffcdd7de95187b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -99,7 +99,7 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq,
*/
enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
- const struct tcphdr *th, u32 *tw_isn,
+ const struct tcphdr *th,
enum skb_drop_reason *drop_reason)
{
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
@@ -255,9 +255,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
(tmp_opt.saw_tstamp &&
(s32)(READ_ONCE(tcptw->tw_ts_recent) - tmp_opt.rcv_tsval) < 0))) {
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
+
if (isn == 0)
isn++;
- *tw_isn = isn;
+ skb->tcp_tw_isn = isn;
return TCP_TW_SYN;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d13d49bfef19457cc5902cb556605a80f4c0ab2c..208ec75703df17d5e53e133b888b6b93a5ec04e6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1722,7 +1722,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
struct sock *sk = NULL;
bool refcounted;
int ret;
- u32 isn;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
@@ -1839,6 +1838,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
}
}
+ skb->tcp_tw_isn = 0;
process:
if (static_branch_unlikely(&ip6_min_hopcount)) {
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
@@ -1937,7 +1937,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
goto csum_error;
}
- tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn,
+ tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th,
&drop_reason);
switch (tw_status) {
case TCP_TW_SYN:
@@ -1956,7 +1956,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
sk = sk2;
tcp_v6_restore_cb(skb);
refcounted = false;
- __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
--
2.54.0.563.g4f69b47b94-goog
next reply other threads:[~2026-05-12 17:52 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-12 17:52 Eric Dumazet [this message]
2026-05-15 0:25 ` [PATCH v2 net] tcp: fix stale per-CPU tcp_tw_isn leak enabling ISN prediction Jakub Kicinski
2026-05-15 7:07 ` Eric Dumazet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260512175250.346486-1-edumazet@google.com \
--to=edumazet@google.com \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=kuniyu@google.com \
--cc=ncardwell@google.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.