Netdev List
 help / color / mirror / Atom feed
* [PATCH net] tcp: secure_seq: add back ports to TS offset
@ 2026-03-02 20:55 Eric Dumazet
  2026-03-02 21:47 ` Kuniyuki Iwashima
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Eric Dumazet @ 2026-03-02 20:55 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Neal Cardwell, Kuniyuki Iwashima, Willy Tarreau,
	netdev, eric.dumazet, Eric Dumazet, Zhouyan Deng,
	Florian Westphal

This reverts 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")

tcp_tw_recycle went away in 2017.

Zhouyan Deng reported off-path TCP source port leakage via
SYN cookie side-channel that can be fixed in multiple ways.

One of them is to bring back TCP ports in TS offset randomization.

As a bonus, we perform a single siphash() computation
to provide both an ISN and a TS offset.

Fixes: 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")
Reported-by: Zhouyan Deng <dengzhouyan_nwpu@163.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
---
 include/net/secure_seq.h | 45 ++++++++++++++++++----
 include/net/tcp.h        |  6 ++-
 net/core/secure_seq.c    | 80 +++++++++++++++-------------------------
 net/ipv4/syncookies.c    | 11 ++++--
 net/ipv4/tcp_input.c     |  8 +++-
 net/ipv4/tcp_ipv4.c      | 37 +++++++++----------
 net/ipv6/syncookies.c    | 11 ++++--
 net/ipv6/tcp_ipv6.c      | 37 +++++++++----------
 8 files changed, 127 insertions(+), 108 deletions(-)

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index cddebafb9f779ebd5d9c02e8ff26c13b5697c7d1..6f996229167b3c3f7861b2d5693ef81b5eed0d74 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -5,16 +5,47 @@
 #include <linux/types.h>
 
 struct net;
+extern struct net init_net;
+
+union tcp_seq_and_ts_off {
+	struct {
+		u32 seq;
+		u32 ts_off;
+	};
+	u64 hash64;
+};
 
 u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport);
-u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
-		   __be16 sport, __be16 dport);
-u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr);
-u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
-		     __be16 sport, __be16 dport);
-u32 secure_tcpv6_ts_off(const struct net *net,
-			const __be32 *saddr, const __be32 *daddr);
+union tcp_seq_and_ts_off
+secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
+			  __be16 sport, __be16 dport);
+
+static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+				 __be16 sport, __be16 dport)
+{
+	union tcp_seq_and_ts_off ts;
+
+	ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr,
+				       sport, dport);
+
+	return ts.seq;
+}
+
+union tcp_seq_and_ts_off
+secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr,
+			    const __be32 *daddr,
+			    __be16 sport, __be16 dport);
+
+static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+				   __be16 sport, __be16 dport)
+{
+	union tcp_seq_and_ts_off ts;
+
+	ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr,
+					 sport, dport);
 
+	return ts.seq;
+}
 #endif /* _NET_SECURE_SEQ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eb8bf63fdafc3243469f293fd06aef0ce086c5a4..978eea2d5df04f378dceb251025bee3101120f69 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -43,6 +43,7 @@
 #include <net/dst.h>
 #include <net/mptcp.h>
 #include <net/xfrm.h>
+#include <net/secure_seq.h>
 
 #include <linux/seq_file.h>
 #include <linux/memcontrol.h>
@@ -2464,8 +2465,9 @@ struct tcp_request_sock_ops {
 				       struct flowi *fl,
 				       struct request_sock *req,
 				       u32 tw_isn);
-	u32 (*init_seq)(const struct sk_buff *skb);
-	u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
+	union tcp_seq_and_ts_off (*init_seq_and_ts_off)(
+					const struct net *net,
+					const struct sk_buff *skb);
 	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
 			   struct flowi *fl, struct request_sock *req,
 			   struct tcp_fastopen_cookie *foc,
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 9a39656804513dcef0888d280d8289913ef27eea..6a6f2cda5aaef82074718439920c75a75592e967 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -20,7 +20,6 @@
 #include <net/tcp.h>
 
 static siphash_aligned_key_t net_secret;
-static siphash_aligned_key_t ts_secret;
 
 #define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
 
@@ -28,11 +27,6 @@ static __always_inline void net_secret_init(void)
 {
 	net_get_random_once(&net_secret, sizeof(net_secret));
 }
-
-static __always_inline void ts_secret_init(void)
-{
-	net_get_random_once(&ts_secret, sizeof(ts_secret));
-}
 #endif
 
 #ifdef CONFIG_INET
@@ -53,28 +47,9 @@ static u32 seq_scale(u32 seq)
 #endif
 
 #if IS_ENABLED(CONFIG_IPV6)
-u32 secure_tcpv6_ts_off(const struct net *net,
-			const __be32 *saddr, const __be32 *daddr)
-{
-	const struct {
-		struct in6_addr saddr;
-		struct in6_addr daddr;
-	} __aligned(SIPHASH_ALIGNMENT) combined = {
-		.saddr = *(struct in6_addr *)saddr,
-		.daddr = *(struct in6_addr *)daddr,
-	};
-
-	if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
-		return 0;
-
-	ts_secret_init();
-	return siphash(&combined, offsetofend(typeof(combined), daddr),
-		       &ts_secret);
-}
-EXPORT_IPV6_MOD(secure_tcpv6_ts_off);
-
-u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
-		     __be16 sport, __be16 dport)
+union tcp_seq_and_ts_off
+secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr,
+			    const __be32 *daddr, __be16 sport, __be16 dport)
 {
 	const struct {
 		struct in6_addr saddr;
@@ -87,14 +62,20 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
 		.sport = sport,
 		.dport = dport
 	};
-	u32 hash;
+	union tcp_seq_and_ts_off st;
 
 	net_secret_init();
-	hash = siphash(&combined, offsetofend(typeof(combined), dport),
-		       &net_secret);
-	return seq_scale(hash);
+
+	st.hash64 = siphash(&combined, offsetofend(typeof(combined), dport),
+			    &net_secret);
+
+	if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
+		st.ts_off = 0;
+
+	st.seq = seq_scale(st.seq);
+	return st;
 }
-EXPORT_SYMBOL(secure_tcpv6_seq);
+EXPORT_SYMBOL(secure_tcpv6_seq_and_ts_off);
 
 u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport)
@@ -118,33 +99,30 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #endif
 
 #ifdef CONFIG_INET
-u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
-{
-	if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
-		return 0;
-
-	ts_secret_init();
-	return siphash_2u32((__force u32)saddr, (__force u32)daddr,
-			    &ts_secret);
-}
-
 /* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
  * but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
  * it would be easy enough to have the former function use siphash_4u32, passing
  * the arguments as separate u32.
  */
-u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
-		   __be16 sport, __be16 dport)
+union tcp_seq_and_ts_off
+secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
+			  __be16 sport, __be16 dport)
 {
-	u32 hash;
+	u32 ports = (__force u32)sport << 16 | (__force u32)dport;
+	union tcp_seq_and_ts_off st;
 
 	net_secret_init();
-	hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
-			    (__force u32)sport << 16 | (__force u32)dport,
-			    &net_secret);
-	return seq_scale(hash);
+
+	st.hash64 = siphash_3u32((__force u32)saddr, (__force u32)daddr,
+				 ports, &net_secret);
+
+	if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
+		st.ts_off = 0;
+
+	st.seq = seq_scale(st.seq);
+	return st;
 }
-EXPORT_SYMBOL_GPL(secure_tcp_seq);
+EXPORT_SYMBOL_GPL(secure_tcp_seq_and_ts_off);
 
 u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 {
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 061751aabc8e16c5d536a19f7b920d1bca2b0f4f..fc3affd9c8014b1d4e9f161421a7753717cdcd73 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -378,9 +378,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
 	tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
 
 	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
-		tsoff = secure_tcp_ts_off(net,
-					  ip_hdr(skb)->daddr,
-					  ip_hdr(skb)->saddr);
+		union tcp_seq_and_ts_off st;
+
+		st = secure_tcp_seq_and_ts_off(net,
+					       ip_hdr(skb)->daddr,
+					       ip_hdr(skb)->saddr,
+					       tcp_hdr(skb)->dest,
+					       tcp_hdr(skb)->source);
+		tsoff = st.ts_off;
 		tcp_opt.rcv_tsecr -= tsoff;
 	}
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7b03f2460751f366dd6cf15505e49ae26cd6466e..cba89733d1216bc2663758b4bda21984835e6055 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -7646,6 +7646,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct net *net = sock_net(sk);
 	struct sock *fastopen_sk = NULL;
+	union tcp_seq_and_ts_off st;
 	struct request_sock *req;
 	bool want_cookie = false;
 	struct dst_entry *dst;
@@ -7715,9 +7716,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if (!dst)
 		goto drop_and_free;
 
+	if (tmp_opt.tstamp_ok || (!want_cookie && !isn))
+		st = af_ops->init_seq_and_ts_off(net, skb);
+
 	if (tmp_opt.tstamp_ok) {
 		tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
-		tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
+		tcp_rsk(req)->ts_off = st.ts_off;
 	}
 	if (!want_cookie && !isn) {
 		int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
@@ -7739,7 +7743,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 			goto drop_and_release;
 		}
 
-		isn = af_ops->init_seq(skb);
+		isn = st.seq;
 	}
 
 	tcp_ecn_create_request(req, skb, sk, dst);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d53d39be291a5750af3ab2a160b35f0f8a28ff9d..56c0db955177edd3fdd04d26d6cd07b5e379e7bc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -104,17 +104,14 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = {
 
 static DEFINE_MUTEX(tcp_exit_batch_mutex);
 
-static u32 tcp_v4_init_seq(const struct sk_buff *skb)
+static union tcp_seq_and_ts_off
+tcp_v4_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
 {
-	return secure_tcp_seq(ip_hdr(skb)->daddr,
-			      ip_hdr(skb)->saddr,
-			      tcp_hdr(skb)->dest,
-			      tcp_hdr(skb)->source);
-}
-
-static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
-{
-	return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
+	return secure_tcp_seq_and_ts_off(net,
+					 ip_hdr(skb)->daddr,
+					 ip_hdr(skb)->saddr,
+					 tcp_hdr(skb)->dest,
+					 tcp_hdr(skb)->source);
 }
 
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -326,15 +323,16 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len
 	rt = NULL;
 
 	if (likely(!tp->repair)) {
+		union tcp_seq_and_ts_off st;
+
+		st = secure_tcp_seq_and_ts_off(net,
+					       inet->inet_saddr,
+					       inet->inet_daddr,
+					       inet->inet_sport,
+					       usin->sin_port);
 		if (!tp->write_seq)
-			WRITE_ONCE(tp->write_seq,
-				   secure_tcp_seq(inet->inet_saddr,
-						  inet->inet_daddr,
-						  inet->inet_sport,
-						  usin->sin_port));
-		WRITE_ONCE(tp->tsoffset,
-			   secure_tcp_ts_off(net, inet->inet_saddr,
-					     inet->inet_daddr));
+			WRITE_ONCE(tp->write_seq, st.seq);
+		WRITE_ONCE(tp->tsoffset, st.ts_off);
 	}
 
 	atomic_set(&inet->inet_id, get_random_u16());
@@ -1676,8 +1674,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 	.cookie_init_seq =	cookie_v4_init_sequence,
 #endif
 	.route_req	=	tcp_v4_route_req,
-	.init_seq	=	tcp_v4_init_seq,
-	.init_ts_off	=	tcp_v4_init_ts_off,
+	.init_seq_and_ts_off	=	tcp_v4_init_seq_and_ts_off,
 	.send_synack	=	tcp_v4_send_synack,
 };
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7e007f013ec827c99bcab4ceb85eb35e9242b439..4f6f0d751d6c533231ca0397319935dc90ba4dba 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -151,9 +151,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
 	tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
 
 	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
-		tsoff = secure_tcpv6_ts_off(net,
-					    ipv6_hdr(skb)->daddr.s6_addr32,
-					    ipv6_hdr(skb)->saddr.s6_addr32);
+		union tcp_seq_and_ts_off st;
+
+		st = secure_tcpv6_seq_and_ts_off(net,
+						 ipv6_hdr(skb)->daddr.s6_addr32,
+						 ipv6_hdr(skb)->saddr.s6_addr32,
+						 tcp_hdr(skb)->dest,
+						 tcp_hdr(skb)->source);
+		tsoff = st.ts_off;
 		tcp_opt.rcv_tsecr -= tsoff;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e46a0efae01235ae7430ed268b92cb47309b8d28..5d95f9ab86973777c29399348adcc4b07e98d2b4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -104,18 +104,14 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 	}
 }
 
-static u32 tcp_v6_init_seq(const struct sk_buff *skb)
+static union tcp_seq_and_ts_off
+tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
 {
-	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
-				ipv6_hdr(skb)->saddr.s6_addr32,
-				tcp_hdr(skb)->dest,
-				tcp_hdr(skb)->source);
-}
-
-static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
-{
-	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
-				   ipv6_hdr(skb)->saddr.s6_addr32);
+	return secure_tcpv6_seq_and_ts_off(net,
+					   ipv6_hdr(skb)->daddr.s6_addr32,
+					   ipv6_hdr(skb)->saddr.s6_addr32,
+					   tcp_hdr(skb)->dest,
+					   tcp_hdr(skb)->source);
 }
 
 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
@@ -319,14 +315,16 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
 	sk_set_txhash(sk);
 
 	if (likely(!tp->repair)) {
+		union tcp_seq_and_ts_off st;
+
+		st = secure_tcpv6_seq_and_ts_off(net,
+						 np->saddr.s6_addr32,
+						 sk->sk_v6_daddr.s6_addr32,
+						 inet->inet_sport,
+						 inet->inet_dport);
 		if (!tp->write_seq)
-			WRITE_ONCE(tp->write_seq,
-				   secure_tcpv6_seq(np->saddr.s6_addr32,
-						    sk->sk_v6_daddr.s6_addr32,
-						    inet->inet_sport,
-						    inet->inet_dport));
-		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
-						   sk->sk_v6_daddr.s6_addr32);
+			WRITE_ONCE(tp->write_seq, st.seq);
+		tp->tsoffset = st.ts_off;
 	}
 
 	if (tcp_fastopen_defer_connect(sk, &err))
@@ -816,8 +814,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 	.cookie_init_seq =	cookie_v6_init_sequence,
 #endif
 	.route_req	=	tcp_v6_route_req,
-	.init_seq	=	tcp_v6_init_seq,
-	.init_ts_off	=	tcp_v6_init_ts_off,
+	.init_seq_and_ts_off	= tcp_v6_init_seq_and_ts_off,
 	.send_synack	=	tcp_v6_send_synack,
 };
 
-- 
2.53.0.473.g4a7958ca14-goog


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-03-02 20:55 [PATCH net] tcp: secure_seq: add back ports to TS offset Eric Dumazet
@ 2026-03-02 21:47 ` Kuniyuki Iwashima
  2026-03-03  1:41 ` Florian Westphal
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 13+ messages in thread
From: Kuniyuki Iwashima @ 2026-03-02 21:47 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Willy Tarreau, netdev, eric.dumazet, Zhouyan Deng,
	Florian Westphal

On Mon, Mar 2, 2026 at 12:55 PM Eric Dumazet <edumazet@google.com> wrote:
>
> This reverts 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")
>
> tcp_tw_recycle went away in 2017.
>
> Zhouyan Deng reported off-path TCP source port leakage via
> SYN cookie side-channel that can be fixed in multiple ways.
>
> One of them is to bring back TCP ports in TS offset randomization.
>
> As a bonus, we perform a single siphash() computation
> to provide both an ISN and a TS offset.
>
> Fixes: 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")
> Reported-by: Zhouyan Deng <dengzhouyan_nwpu@163.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-03-02 20:55 [PATCH net] tcp: secure_seq: add back ports to TS offset Eric Dumazet
  2026-03-02 21:47 ` Kuniyuki Iwashima
@ 2026-03-03  1:41 ` Florian Westphal
  2026-03-03  7:39 ` Jörg Sommer
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2026-03-03  1:41 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, Willy Tarreau, netdev,
	eric.dumazet, Zhouyan Deng

Eric Dumazet <edumazet@google.com> wrote:
> This reverts 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")
> 
> tcp_tw_recycle went away in 2017.

Indeed.  Thanks Eric.  The original situation that prompted per-host
offsets might not be applicable anymore.

I think its worth a try.

Acked-by: Florian Westphal <fw@strlen.de>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-03-02 20:55 [PATCH net] tcp: secure_seq: add back ports to TS offset Eric Dumazet
  2026-03-02 21:47 ` Kuniyuki Iwashima
  2026-03-03  1:41 ` Florian Westphal
@ 2026-03-03  7:39 ` Jörg Sommer
  2026-03-05  2:00 ` patchwork-bot+netdevbpf
  2026-06-06 11:04 ` xietangxin
  4 siblings, 0 replies; 13+ messages in thread
From: Jörg Sommer @ 2026-03-03  7:39 UTC (permalink / raw)
  To: Eric Dumazet, Kuniyuki Iwashima
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Willy Tarreau, netdev, eric.dumazet, Zhouyan Deng,
	Florian Westphal

[-- Attachment #1: Type: text/plain, Size: 2496 bytes --]

Eric Dumazet schrieb am Mo 02. Mär, 20:55 (+0000):
> This reverts 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")
> 
> tcp_tw_recycle went away in 2017.
> 
> Zhouyan Deng reported off-path TCP source port leakage via
> SYN cookie side-channel that can be fixed in multiple ways.
> 
> One of them is to bring back TCP ports in TS offset randomization.
> 
> As a bonus, we perform a single siphash() computation
> to provide both an ISN and a TS offset.

This sounds great! I was questioning myself if the grace period for
tcp_tw_recycle isn't over.

> @@ -118,33 +99,30 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
>  #endif
>  
>  #ifdef CONFIG_INET
> -u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
> -{
> -	if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
> -		return 0;
> -
> -	ts_secret_init();
> -	return siphash_2u32((__force u32)saddr, (__force u32)daddr,
> -			    &ts_secret);
> -}
> -
>  /* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
>   * but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
>   * it would be easy enough to have the former function use siphash_4u32, passing
>   * the arguments as separate u32.
>   */
> -u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
> -		   __be16 sport, __be16 dport)
> +union tcp_seq_and_ts_off
> +secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
> +			  __be16 sport, __be16 dport)
>  {
> -	u32 hash;
> +	u32 ports = (__force u32)sport << 16 | (__force u32)dport;
> +	union tcp_seq_and_ts_off st;
>  
>  	net_secret_init();
> -	hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
> -			    (__force u32)sport << 16 | (__force u32)dport,
> -			    &net_secret);
> -	return seq_scale(hash);
> +
> +	st.hash64 = siphash_3u32((__force u32)saddr, (__force u32)daddr,
> +				 ports, &net_secret);

Sorry, if this is a dump question, but does this make the ts_off unique per
connection or only per quadruple (saddr, sport, daddr, dport), i.e. the same
remote port gets the same ts_off. The documentation says ‘per connection’
and it might be helpful to say it gets the same ts_off if the addresses and
ports are the same.


Kind regards, Jörg

-- 
“Computer games don't affect kids. If Pacman would have affected us as
children, we would now run around in darkened rooms, munching yellow
pills and listening to repetetive music.”

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-03-02 20:55 [PATCH net] tcp: secure_seq: add back ports to TS offset Eric Dumazet
                   ` (2 preceding siblings ...)
  2026-03-03  7:39 ` Jörg Sommer
@ 2026-03-05  2:00 ` patchwork-bot+netdevbpf
  2026-06-06 11:04 ` xietangxin
  4 siblings, 0 replies; 13+ messages in thread
From: patchwork-bot+netdevbpf @ 2026-03-05  2:00 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, kuba, pabeni, horms, ncardwell, kuniyu, w, netdev,
	eric.dumazet, dengzhouyan_nwpu, fw

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Mon,  2 Mar 2026 20:55:27 +0000 you wrote:
> This reverts 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")
> 
> tcp_tw_recycle went away in 2017.
> 
> Zhouyan Deng reported off-path TCP source port leakage via
> SYN cookie side-channel that can be fixed in multiple ways.
> 
> [...]

Here is the summary with links:
  - [net] tcp: secure_seq: add back ports to TS offset
    https://git.kernel.org/netdev/net/c/165573e41f2f

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-03-02 20:55 [PATCH net] tcp: secure_seq: add back ports to TS offset Eric Dumazet
                   ` (3 preceding siblings ...)
  2026-03-05  2:00 ` patchwork-bot+netdevbpf
@ 2026-06-06 11:04 ` xietangxin
  2026-06-08  8:51   ` Eric Dumazet
  4 siblings, 1 reply; 13+ messages in thread
From: xietangxin @ 2026-06-06 11:04 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Neal Cardwell, Kuniyuki Iwashima, Willy Tarreau,
	netdev, eric.dumazet, Zhouyan Deng, Florian Westphal



On 3/3/2026 4:55 AM, Eric Dumazet wrote:
> This reverts 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")
> 
> tcp_tw_recycle went away in 2017.
> 
> Zhouyan Deng reported off-path TCP source port leakage via
> SYN cookie side-channel that can be fixed in multiple ways.
> 
> One of them is to bring back TCP ports in TS offset randomization.
> 
> As a bonus, we perform a single siphash() computation
> to provide both an ISN and a TS offset.
> 
> Fixes: 28ee1b746f49 ("secure_seq: downgrade to per-host timestamp offsets")
> Reported-by: Zhouyan Deng <dengzhouyan_nwpu@163.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Florian Westphal <fw@strlen.de>
> ---
>  include/net/secure_seq.h | 45 ++++++++++++++++++----
>  include/net/tcp.h        |  6 ++-
>  net/core/secure_seq.c    | 80 +++++++++++++++-------------------------
>  net/ipv4/syncookies.c    | 11 ++++--
>  net/ipv4/tcp_input.c     |  8 +++-
>  net/ipv4/tcp_ipv4.c      | 37 +++++++++----------
>  net/ipv6/syncookies.c    | 11 ++++--
>  net/ipv6/tcp_ipv6.c      | 37 +++++++++----------
>  8 files changed, 127 insertions(+), 108 deletions(-)
> 
> diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
> index cddebafb9f779ebd5d9c02e8ff26c13b5697c7d1..6f996229167b3c3f7861b2d5693ef81b5eed0d74 100644
> --- a/include/net/secure_seq.h
> +++ b/include/net/secure_seq.h
> @@ -5,16 +5,47 @@
>  #include <linux/types.h>
>  
>  struct net;
> +extern struct net init_net;
> +
> +union tcp_seq_and_ts_off {
> +	struct {
> +		u32 seq;
> +		u32 ts_off;
> +	};
> +	u64 hash64;
> +};
>  
>  u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
>  u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
>  			       __be16 dport);
> -u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
> -		   __be16 sport, __be16 dport);
> -u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr);
> -u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
> -		     __be16 sport, __be16 dport);
> -u32 secure_tcpv6_ts_off(const struct net *net,
> -			const __be32 *saddr, const __be32 *daddr);
> +union tcp_seq_and_ts_off
> +secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
> +			  __be16 sport, __be16 dport);
> +
> +static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
> +				 __be16 sport, __be16 dport)
> +{
> +	union tcp_seq_and_ts_off ts;
> +
> +	ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr,
> +				       sport, dport);
> +
> +	return ts.seq;
> +}
> +
> +union tcp_seq_and_ts_off
> +secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr,
> +			    const __be32 *daddr,
> +			    __be16 sport, __be16 dport);
> +
> +static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
> +				   __be16 sport, __be16 dport)
> +{
> +	union tcp_seq_and_ts_off ts;
> +
> +	ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr,
> +					 sport, dport);
>  
> +	return ts.seq;
> +}
>  #endif /* _NET_SECURE_SEQ */
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index eb8bf63fdafc3243469f293fd06aef0ce086c5a4..978eea2d5df04f378dceb251025bee3101120f69 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -43,6 +43,7 @@
>  #include <net/dst.h>
>  #include <net/mptcp.h>
>  #include <net/xfrm.h>
> +#include <net/secure_seq.h>
>  
>  #include <linux/seq_file.h>
>  #include <linux/memcontrol.h>
> @@ -2464,8 +2465,9 @@ struct tcp_request_sock_ops {
>  				       struct flowi *fl,
>  				       struct request_sock *req,
>  				       u32 tw_isn);
> -	u32 (*init_seq)(const struct sk_buff *skb);
> -	u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
> +	union tcp_seq_and_ts_off (*init_seq_and_ts_off)(
> +					const struct net *net,
> +					const struct sk_buff *skb);
>  	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
>  			   struct flowi *fl, struct request_sock *req,
>  			   struct tcp_fastopen_cookie *foc,
> diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
> index 9a39656804513dcef0888d280d8289913ef27eea..6a6f2cda5aaef82074718439920c75a75592e967 100644
> --- a/net/core/secure_seq.c
> +++ b/net/core/secure_seq.c
> @@ -20,7 +20,6 @@
>  #include <net/tcp.h>
>  
>  static siphash_aligned_key_t net_secret;
> -static siphash_aligned_key_t ts_secret;
>  
>  #define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
>  
> @@ -28,11 +27,6 @@ static __always_inline void net_secret_init(void)
>  {
>  	net_get_random_once(&net_secret, sizeof(net_secret));
>  }
> -
> -static __always_inline void ts_secret_init(void)
> -{
> -	net_get_random_once(&ts_secret, sizeof(ts_secret));
> -}
>  #endif
>  
>  #ifdef CONFIG_INET
> @@ -53,28 +47,9 @@ static u32 seq_scale(u32 seq)
>  #endif
>  
>  #if IS_ENABLED(CONFIG_IPV6)
> -u32 secure_tcpv6_ts_off(const struct net *net,
> -			const __be32 *saddr, const __be32 *daddr)
> -{
> -	const struct {
> -		struct in6_addr saddr;
> -		struct in6_addr daddr;
> -	} __aligned(SIPHASH_ALIGNMENT) combined = {
> -		.saddr = *(struct in6_addr *)saddr,
> -		.daddr = *(struct in6_addr *)daddr,
> -	};
> -
> -	if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
> -		return 0;
> -
> -	ts_secret_init();
> -	return siphash(&combined, offsetofend(typeof(combined), daddr),
> -		       &ts_secret);
> -}
> -EXPORT_IPV6_MOD(secure_tcpv6_ts_off);
> -
> -u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
> -		     __be16 sport, __be16 dport)
> +union tcp_seq_and_ts_off
> +secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr,
> +			    const __be32 *daddr, __be16 sport, __be16 dport)
>  {
>  	const struct {
>  		struct in6_addr saddr;
> @@ -87,14 +62,20 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
>  		.sport = sport,
>  		.dport = dport
>  	};
> -	u32 hash;
> +	union tcp_seq_and_ts_off st;
>  
>  	net_secret_init();
> -	hash = siphash(&combined, offsetofend(typeof(combined), dport),
> -		       &net_secret);
> -	return seq_scale(hash);
> +
> +	st.hash64 = siphash(&combined, offsetofend(typeof(combined), dport),
> +			    &net_secret);
> +
> +	if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
> +		st.ts_off = 0;
> +
> +	st.seq = seq_scale(st.seq);
> +	return st;
>  }
> -EXPORT_SYMBOL(secure_tcpv6_seq);
> +EXPORT_SYMBOL(secure_tcpv6_seq_and_ts_off);
>  
>  u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
>  			       __be16 dport)
> @@ -118,33 +99,30 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
>  #endif
>  
>  #ifdef CONFIG_INET
> -u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
> -{
> -	if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
> -		return 0;
> -
> -	ts_secret_init();
> -	return siphash_2u32((__force u32)saddr, (__force u32)daddr,
> -			    &ts_secret);
> -}
> -
>  /* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
>   * but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
>   * it would be easy enough to have the former function use siphash_4u32, passing
>   * the arguments as separate u32.
>   */
> -u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
> -		   __be16 sport, __be16 dport)
> +union tcp_seq_and_ts_off
> +secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
> +			  __be16 sport, __be16 dport)
>  {
> -	u32 hash;
> +	u32 ports = (__force u32)sport << 16 | (__force u32)dport;
> +	union tcp_seq_and_ts_off st;
>  
>  	net_secret_init();
> -	hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
> -			    (__force u32)sport << 16 | (__force u32)dport,
> -			    &net_secret);
> -	return seq_scale(hash);
> +
> +	st.hash64 = siphash_3u32((__force u32)saddr, (__force u32)daddr,
> +				 ports, &net_secret);
> +
> +	if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
> +		st.ts_off = 0;
> +
> +	st.seq = seq_scale(st.seq);
> +	return st;
>  }
> -EXPORT_SYMBOL_GPL(secure_tcp_seq);
> +EXPORT_SYMBOL_GPL(secure_tcp_seq_and_ts_off);
>  
>  u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
>  {
> diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> index 061751aabc8e16c5d536a19f7b920d1bca2b0f4f..fc3affd9c8014b1d4e9f161421a7753717cdcd73 100644
> --- a/net/ipv4/syncookies.c
> +++ b/net/ipv4/syncookies.c
> @@ -378,9 +378,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
>  	tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
>  
>  	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
> -		tsoff = secure_tcp_ts_off(net,
> -					  ip_hdr(skb)->daddr,
> -					  ip_hdr(skb)->saddr);
> +		union tcp_seq_and_ts_off st;
> +
> +		st = secure_tcp_seq_and_ts_off(net,
> +					       ip_hdr(skb)->daddr,
> +					       ip_hdr(skb)->saddr,
> +					       tcp_hdr(skb)->dest,
> +					       tcp_hdr(skb)->source);
> +		tsoff = st.ts_off;
>  		tcp_opt.rcv_tsecr -= tsoff;
>  	}
>  
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 7b03f2460751f366dd6cf15505e49ae26cd6466e..cba89733d1216bc2663758b4bda21984835e6055 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -7646,6 +7646,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>  	const struct tcp_sock *tp = tcp_sk(sk);
>  	struct net *net = sock_net(sk);
>  	struct sock *fastopen_sk = NULL;
> +	union tcp_seq_and_ts_off st;
>  	struct request_sock *req;
>  	bool want_cookie = false;
>  	struct dst_entry *dst;
> @@ -7715,9 +7716,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>  	if (!dst)
>  		goto drop_and_free;
>  
> +	if (tmp_opt.tstamp_ok || (!want_cookie && !isn))
> +		st = af_ops->init_seq_and_ts_off(net, skb);
> +
>  	if (tmp_opt.tstamp_ok) {
>  		tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
> -		tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
> +		tcp_rsk(req)->ts_off = st.ts_off;
>  	}
>  	if (!want_cookie && !isn) {
>  		int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
> @@ -7739,7 +7743,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>  			goto drop_and_release;
>  		}
>  
> -		isn = af_ops->init_seq(skb);
> +		isn = st.seq;
>  	}
>  
>  	tcp_ecn_create_request(req, skb, sk, dst);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index d53d39be291a5750af3ab2a160b35f0f8a28ff9d..56c0db955177edd3fdd04d26d6cd07b5e379e7bc 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -104,17 +104,14 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = {
>  
>  static DEFINE_MUTEX(tcp_exit_batch_mutex);
>  
> -static u32 tcp_v4_init_seq(const struct sk_buff *skb)
> +static union tcp_seq_and_ts_off
> +tcp_v4_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
>  {
> -	return secure_tcp_seq(ip_hdr(skb)->daddr,
> -			      ip_hdr(skb)->saddr,
> -			      tcp_hdr(skb)->dest,
> -			      tcp_hdr(skb)->source);
> -}
> -
> -static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
> -{
> -	return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
> +	return secure_tcp_seq_and_ts_off(net,
> +					 ip_hdr(skb)->daddr,
> +					 ip_hdr(skb)->saddr,
> +					 tcp_hdr(skb)->dest,
> +					 tcp_hdr(skb)->source);
>  }
>  
>  int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
> @@ -326,15 +323,16 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len
>  	rt = NULL;
>  
>  	if (likely(!tp->repair)) {
> +		union tcp_seq_and_ts_off st;
> +
> +		st = secure_tcp_seq_and_ts_off(net,
> +					       inet->inet_saddr,
> +					       inet->inet_daddr,
> +					       inet->inet_sport,
> +					       usin->sin_port);
>  		if (!tp->write_seq)
> -			WRITE_ONCE(tp->write_seq,
> -				   secure_tcp_seq(inet->inet_saddr,
> -						  inet->inet_daddr,
> -						  inet->inet_sport,
> -						  usin->sin_port));
> -		WRITE_ONCE(tp->tsoffset,
> -			   secure_tcp_ts_off(net, inet->inet_saddr,
> -					     inet->inet_daddr));
> +			WRITE_ONCE(tp->write_seq, st.seq);
> +		WRITE_ONCE(tp->tsoffset, st.ts_off);
>  	}
>  
>  	atomic_set(&inet->inet_id, get_random_u16());
> @@ -1676,8 +1674,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
>  	.cookie_init_seq =	cookie_v4_init_sequence,
>  #endif
>  	.route_req	=	tcp_v4_route_req,
> -	.init_seq	=	tcp_v4_init_seq,
> -	.init_ts_off	=	tcp_v4_init_ts_off,
> +	.init_seq_and_ts_off	=	tcp_v4_init_seq_and_ts_off,
>  	.send_synack	=	tcp_v4_send_synack,
>  };
>  
> diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
> index 7e007f013ec827c99bcab4ceb85eb35e9242b439..4f6f0d751d6c533231ca0397319935dc90ba4dba 100644
> --- a/net/ipv6/syncookies.c
> +++ b/net/ipv6/syncookies.c
> @@ -151,9 +151,14 @@ static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
>  	tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
>  
>  	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
> -		tsoff = secure_tcpv6_ts_off(net,
> -					    ipv6_hdr(skb)->daddr.s6_addr32,
> -					    ipv6_hdr(skb)->saddr.s6_addr32);
> +		union tcp_seq_and_ts_off st;
> +
> +		st = secure_tcpv6_seq_and_ts_off(net,
> +						 ipv6_hdr(skb)->daddr.s6_addr32,
> +						 ipv6_hdr(skb)->saddr.s6_addr32,
> +						 tcp_hdr(skb)->dest,
> +						 tcp_hdr(skb)->source);
> +		tsoff = st.ts_off;
>  		tcp_opt.rcv_tsecr -= tsoff;
>  	}
>  
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index e46a0efae01235ae7430ed268b92cb47309b8d28..5d95f9ab86973777c29399348adcc4b07e98d2b4 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -104,18 +104,14 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
>  	}
>  }
>  
> -static u32 tcp_v6_init_seq(const struct sk_buff *skb)
> +static union tcp_seq_and_ts_off
> +tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
>  {
> -	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
> -				ipv6_hdr(skb)->saddr.s6_addr32,
> -				tcp_hdr(skb)->dest,
> -				tcp_hdr(skb)->source);
> -}
> -
> -static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
> -{
> -	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
> -				   ipv6_hdr(skb)->saddr.s6_addr32);
> +	return secure_tcpv6_seq_and_ts_off(net,
> +					   ipv6_hdr(skb)->daddr.s6_addr32,
> +					   ipv6_hdr(skb)->saddr.s6_addr32,
> +					   tcp_hdr(skb)->dest,
> +					   tcp_hdr(skb)->source);
>  }
>  
>  static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
> @@ -319,14 +315,16 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
>  	sk_set_txhash(sk);
>  
>  	if (likely(!tp->repair)) {
> +		union tcp_seq_and_ts_off st;
> +
> +		st = secure_tcpv6_seq_and_ts_off(net,
> +						 np->saddr.s6_addr32,
> +						 sk->sk_v6_daddr.s6_addr32,
> +						 inet->inet_sport,
> +						 inet->inet_dport);
>  		if (!tp->write_seq)
> -			WRITE_ONCE(tp->write_seq,
> -				   secure_tcpv6_seq(np->saddr.s6_addr32,
> -						    sk->sk_v6_daddr.s6_addr32,
> -						    inet->inet_sport,
> -						    inet->inet_dport));
> -		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
> -						   sk->sk_v6_daddr.s6_addr32);
> +			WRITE_ONCE(tp->write_seq, st.seq);
> +		tp->tsoffset = st.ts_off;
>  	}
>  
>  	if (tcp_fastopen_defer_connect(sk, &err))
> @@ -816,8 +814,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
>  	.cookie_init_seq =	cookie_v6_init_sequence,
>  #endif
>  	.route_req	=	tcp_v6_route_req,
> -	.init_seq	=	tcp_v6_init_seq,
> -	.init_ts_off	=	tcp_v6_init_ts_off,
> +	.init_seq_and_ts_off	= tcp_v6_init_seq_and_ts_off,
>  	.send_synack	=	tcp_v6_send_synack,
>  };
>  
Hi Eric and netdev,

I noticed a significant TCP performance regression (QPS drop) when using
iptables MASQUERADE with the `--random-fully` option, and I have bisected
it down to commit 165573e41f2f66ef98940cf65f838b2cb575d9d1
(tcp: secure_seq: add back ports to TS offset).

Here is the benchmark environment and test results.
Environment:
- Client & Server: 2 VMs
- Server: Nginx listening on port 80 (HTTP), and ip 10.0.0.1
- Benchmark tool: wrk (short-lived connections with "Connection: close")

Test Commands
1. With random-fully:
   # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE --random-fully
   # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
2. Without random-fully:
   # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE
   # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80

Test Results (QPS):
1. Parent Commit (7f083faf59d14c04e01ec05a7507f036c965acf8):
   - with random-fully:    18145.74, 15006.39, 15716.67
   - without random-fully: 18556.36, 16339.22, 21506.02

2. Bad Commit (165573e41f2f66ef98940cf65f838b2cb575d9d1):
   - with random-fully:    11074.76, 10383.20, 10164.81  <-- (~35% drop)
   - without random-fully: 17310.75, 20279.85, 18399.48

Is this performance degradation an expected side-effect of the security fix,
or is there any sysctl param we should tune when `--random-fully` is
required for high-concurrency short connections?

Looking forward to your insights.


-- 
Best regards,
Tangxin Xie


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-06-06 11:04 ` xietangxin
@ 2026-06-08  8:51   ` Eric Dumazet
  2026-06-08  9:42     ` Willy Tarreau
                       ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Eric Dumazet @ 2026-06-08  8:51 UTC (permalink / raw)
  To: xietangxin, Pablo Neira Ayuso
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Neal Cardwell, Kuniyuki Iwashima, Willy Tarreau, netdev,
	eric.dumazet, Zhouyan Deng, Florian Westphal

On Sat, Jun 6, 2026 at 4:06 AM xietangxin <xietangxin@yeah.net> wrote:
>
>

> Hi Eric and netdev,
>
> I noticed a significant TCP performance regression (QPS drop) when using
> iptables MASQUERADE with the `--random-fully` option, and I have bisected
> it down to commit 165573e41f2f66ef98940cf65f838b2cb575d9d1
> (tcp: secure_seq: add back ports to TS offset).
>
> Here is the benchmark environment and test results.
> Environment:
> - Client & Server: 2 VMs
> - Server: Nginx listening on port 80 (HTTP), and ip 10.0.0.1
> - Benchmark tool: wrk (short-lived connections with "Connection: close")
>
> Test Commands
> 1. With random-fully:
>    # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE --random-fully
>    # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
> 2. Without random-fully:
>    # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE
>    # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
>
> Test Results (QPS):
> 1. Parent Commit (7f083faf59d14c04e01ec05a7507f036c965acf8):
>    - with random-fully:    18145.74, 15006.39, 15716.67
>    - without random-fully: 18556.36, 16339.22, 21506.02
>
> 2. Bad Commit (165573e41f2f66ef98940cf65f838b2cb575d9d1):
>    - with random-fully:    11074.76, 10383.20, 10164.81  <-- (~35% drop)
>    - without random-fully: 17310.75, 20279.85, 18399.48
>
> Is this performance degradation an expected side-effect of the security fix,
> or is there any sysctl param we should tune when `--random-fully` is
> required for high-concurrency short connections?

Hi Tangxin

I do not know why that patch would affect MASQUERADE performance.

Pablo, Florian, do you have an idea?

Thanks.

>
> Looking forward to your insights.
>
>
> --
> Best regards,
> Tangxin Xie
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-06-08  8:51   ` Eric Dumazet
@ 2026-06-08  9:42     ` Willy Tarreau
  2026-06-08 12:51       ` xietangxin
  2026-06-08 11:30     ` Pablo Neira Ayuso
  2026-06-08 12:11     ` Florian Westphal
  2 siblings, 1 reply; 13+ messages in thread
From: Willy Tarreau @ 2026-06-08  9:42 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: xietangxin, Pablo Neira Ayuso, David S . Miller, Jakub Kicinski,
	Paolo Abeni, Simon Horman, Neal Cardwell, Kuniyuki Iwashima,
	netdev, eric.dumazet, Zhouyan Deng, Florian Westphal

On Mon, Jun 08, 2026 at 01:51:49AM -0700, Eric Dumazet wrote:
> On Sat, Jun 6, 2026 at 4:06 AM xietangxin <xietangxin@yeah.net> wrote:
> >
> >
> 
> > Hi Eric and netdev,
> >
> > I noticed a significant TCP performance regression (QPS drop) when using
> > iptables MASQUERADE with the `--random-fully` option, and I have bisected
> > it down to commit 165573e41f2f66ef98940cf65f838b2cb575d9d1
> > (tcp: secure_seq: add back ports to TS offset).
> >
> > Here is the benchmark environment and test results.
> > Environment:
> > - Client & Server: 2 VMs
> > - Server: Nginx listening on port 80 (HTTP), and ip 10.0.0.1
> > - Benchmark tool: wrk (short-lived connections with "Connection: close")
> >
> > Test Commands
> > 1. With random-fully:
> >    # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE --random-fully
> >    # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
> > 2. Without random-fully:
> >    # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE
> >    # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
> >
> > Test Results (QPS):
> > 1. Parent Commit (7f083faf59d14c04e01ec05a7507f036c965acf8):
> >    - with random-fully:    18145.74, 15006.39, 15716.67
> >    - without random-fully: 18556.36, 16339.22, 21506.02
> >
> > 2. Bad Commit (165573e41f2f66ef98940cf65f838b2cb575d9d1):
> >    - with random-fully:    11074.76, 10383.20, 10164.81  <-- (~35% drop)
> >    - without random-fully: 17310.75, 20279.85, 18399.48
> >
> > Is this performance degradation an expected side-effect of the security fix,
> > or is there any sysctl param we should tune when `--random-fully` is
> > required for high-concurrency short connections?
> 
> Hi Tangxin
> 
> I do not know why that patch would affect MASQUERADE performance.
> 
> Pablo, Florian, do you have an idea?

I suspect it's because MASQUERADE can shuffle the ports around and
break the end-to-end mapping. With host-based ISN the increments
remain positive regardless of the ports, while with port-based
increments if you shuffle ports around, two consecutive uses of
the same port can end up showing a decreasing ISN, and some
outgoing SYN will get an ACK instead of a SYN-ACK, then send an
RST, and a SYN again, causing a degradation.

I'm not saying this is necessarily what happens here but based on the
commit message description I suspect that this is what's happening
here. There's always a tradeoff between ISN secrecy and reliability
unfortunately.

Willy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-06-08  8:51   ` Eric Dumazet
  2026-06-08  9:42     ` Willy Tarreau
@ 2026-06-08 11:30     ` Pablo Neira Ayuso
  2026-06-08 12:11     ` Florian Westphal
  2 siblings, 0 replies; 13+ messages in thread
From: Pablo Neira Ayuso @ 2026-06-08 11:30 UTC (permalink / raw)
  To: xietangxin
  Cc: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Neal Cardwell, Kuniyuki Iwashima, Willy Tarreau,
	netdev, eric.dumazet, Zhouyan Deng, Florian Westphal

Hi,

On Mon, Jun 08, 2026 at 01:51:49AM -0700, Eric Dumazet wrote:
> On Sat, Jun 6, 2026 at 4:06 AM xietangxin <xietangxin@yeah.net> wrote:
> >
> >
> 
> > Hi Eric and netdev,
> >
> > I noticed a significant TCP performance regression (QPS drop) when using
> > iptables MASQUERADE with the `--random-fully` option, and I have bisected
> > it down to commit 165573e41f2f66ef98940cf65f838b2cb575d9d1
> > (tcp: secure_seq: add back ports to TS offset).
> >
> > Here is the benchmark environment and test results.
> > Environment:
> > - Client & Server: 2 VMs
> > - Server: Nginx listening on port 80 (HTTP), and ip 10.0.0.1
> > - Benchmark tool: wrk (short-lived connections with "Connection: close")
> >
> > Test Commands
> > 1. With random-fully:
> >    # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE --random-fully
> >    # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
> > 2. Without random-fully:
> >    # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE
> >    # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
> >
> > Test Results (QPS):
> > 1. Parent Commit (7f083faf59d14c04e01ec05a7507f036c965acf8):
> >    - with random-fully:    18145.74, 15006.39, 15716.67
> >    - without random-fully: 18556.36, 16339.22, 21506.02
> >
> > 2. Bad Commit (165573e41f2f66ef98940cf65f838b2cb575d9d1):
> >    - with random-fully:    11074.76, 10383.20, 10164.81  <-- (~35% drop)

Can you check where the packet drops are coming? There are several
tools (iptables trace if it is the ruleset, dropwatch, etc.) to check this.

> >    - without random-fully: 17310.75, 20279.85, 18399.48
> >
> > Is this performance degradation an expected side-effect of the security fix,
> > or is there any sysctl param we should tune when `--random-fully` is
> > required for high-concurrency short connections?
> 
> Hi Tangxin
> 
> I do not know why that patch would affect MASQUERADE performance.
> 
> Pablo, Florian, do you have an idea?

With this information, not yet.

Thanks.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-06-08  8:51   ` Eric Dumazet
  2026-06-08  9:42     ` Willy Tarreau
  2026-06-08 11:30     ` Pablo Neira Ayuso
@ 2026-06-08 12:11     ` Florian Westphal
  2 siblings, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2026-06-08 12:11 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: xietangxin, Pablo Neira Ayuso, David S . Miller, Jakub Kicinski,
	Paolo Abeni, Simon Horman, Neal Cardwell, Kuniyuki Iwashima,
	Willy Tarreau, netdev, eric.dumazet, Zhouyan Deng

Eric Dumazet <edumazet@google.com> wrote:
> > Is this performance degradation an expected side-effect of the security fix,
> > or is there any sysctl param we should tune when `--random-fully` is
> > required for high-concurrency short connections?
> 
> Hi Tangxin
> 
> I do not know why that patch would affect MASQUERADE performance.
> 
> Pablo, Florian, do you have an idea?

tcpdump would help.  Willy could be right, tcpdump would confirm
increase in retries.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-06-08  9:42     ` Willy Tarreau
@ 2026-06-08 12:51       ` xietangxin
  2026-06-08 13:14         ` Jiayuan Chen
  0 siblings, 1 reply; 13+ messages in thread
From: xietangxin @ 2026-06-08 12:51 UTC (permalink / raw)
  To: Willy Tarreau, Eric Dumazet
  Cc: Pablo Neira Ayuso, David S . Miller, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Neal Cardwell, Kuniyuki Iwashima, netdev,
	eric.dumazet, Zhouyan Deng, Florian Westphal



On 6/8/2026 5:42 PM, Willy Tarreau wrote:
> On Mon, Jun 08, 2026 at 01:51:49AM -0700, Eric Dumazet wrote:
>> On Sat, Jun 6, 2026 at 4:06 AM xietangxin <xietangxin@yeah.net> wrote:
>>>
>>>
>>
>>> Hi Eric and netdev,
>>>
>>> I noticed a significant TCP performance regression (QPS drop) when using
>>> iptables MASQUERADE with the `--random-fully` option, and I have bisected
>>> it down to commit 165573e41f2f66ef98940cf65f838b2cb575d9d1
>>> (tcp: secure_seq: add back ports to TS offset).
>>>
>>> Here is the benchmark environment and test results.
>>> Environment:
>>> - Client & Server: 2 VMs
>>> - Server: Nginx listening on port 80 (HTTP), and ip 10.0.0.1
>>> - Benchmark tool: wrk (short-lived connections with "Connection: close")
>>>
>>> Test Commands
>>> 1. With random-fully:
>>>    # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE --random-fully
>>>    # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
>>> 2. Without random-fully:
>>>    # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE
>>>    # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
>>>
>>> Test Results (QPS):
>>> 1. Parent Commit (7f083faf59d14c04e01ec05a7507f036c965acf8):
>>>    - with random-fully:    18145.74, 15006.39, 15716.67
>>>    - without random-fully: 18556.36, 16339.22, 21506.02
>>>
>>> 2. Bad Commit (165573e41f2f66ef98940cf65f838b2cb575d9d1):
>>>    - with random-fully:    11074.76, 10383.20, 10164.81  <-- (~35% drop)
>>>    - without random-fully: 17310.75, 20279.85, 18399.48
>>>
>>> Is this performance degradation an expected side-effect of the security fix,
>>> or is there any sysctl param we should tune when `--random-fully` is
>>> required for high-concurrency short connections?
>>
>> Hi Tangxin
>>
>> I do not know why that patch would affect MASQUERADE performance.
>>
>> Pablo, Florian, do you have an idea?
> 
> I suspect it's because MASQUERADE can shuffle the ports around and
> break the end-to-end mapping. With host-based ISN the increments
> remain positive regardless of the ports, while with port-based
> increments if you shuffle ports around, two consecutive uses of
> the same port can end up showing a decreasing ISN, and some
> outgoing SYN will get an ACK instead of a SYN-ACK, then send an
> RST, and a SYN again, causing a degradation.
> 
> I'm not saying this is necessarily what happens here but based on the
> commit message description I suspect that this is what's happening
> here. There's always a tradeoff between ISN secrecy and reliability
> unfortunately.
> 
> Willy
Hi,

Willy, your hypothesis is 100% correct!
I captured the packets during the benchmark on the bad commit,
and the trace perfectly shows the "SYN -> ACK -> RST".

Here is the key snippet of the packet trace (Client: 10.0.0.2, Server: 10.0.0.1):

// 1. First connection closes, Server sends last ACK(410615916), entering TIME_WAIT.
12105 08:54:39.128861 10.0.0.1 -> 10.0.0.2 TCP 80 → 47824 [ACK] Seq=3315216203 Ack=410615916 TSval=273827652 TSecr=370383870

// 2. ~200ms later, next short-conn reuses port 47824 via MASQUERADE --random-fully
47637 08:54:39.332281 10.0.0.2 -> 10.0.0.1 TCP 47824 → 80 [SYN] Seq=559739866 TSval=4137539723 TSecr=0

// 3. Server is sends a ACK with the old connection's expected ACK(410615916).
48591 08:54:39.337692 10.0.0.1 -> 10.0.0.2 TCP 80 → 47824 [ACK] Seq=3315216203 Ack=410615916 TSval=273827858 TSecr=370383870

// 4. Client receives the unexpected old ACK, responds with RST, and has to retry the connection.
48600 08:54:39.337799 10.0.0.2 -> 10.0.0.1 TCP 47824 → 80 [RST] Seq=410615916 Win=0


Are there any architectural recommendations we should consider here,
or is this considered an acceptable trade-off for security?

-- 
Best regards,
Tangxin Xie


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-06-08 12:51       ` xietangxin
@ 2026-06-08 13:14         ` Jiayuan Chen
  2026-06-08 15:06           ` Willy Tarreau
  0 siblings, 1 reply; 13+ messages in thread
From: Jiayuan Chen @ 2026-06-08 13:14 UTC (permalink / raw)
  To: xietangxin, Willy Tarreau, Eric Dumazet
  Cc: Pablo Neira Ayuso, David S . Miller, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Neal Cardwell, Kuniyuki Iwashima, netdev,
	eric.dumazet, Zhouyan Deng, Florian Westphal


On 6/8/26 8:51 PM, xietangxin wrote:
>
> On 6/8/2026 5:42 PM, Willy Tarreau wrote:
>> On Mon, Jun 08, 2026 at 01:51:49AM -0700, Eric Dumazet wrote:
>>> On Sat, Jun 6, 2026 at 4:06 AM xietangxin <xietangxin@yeah.net> wrote:
>>>>
>>>> Hi Eric and netdev,
>>>>
>>>> I noticed a significant TCP performance regression (QPS drop) when using
>>>> iptables MASQUERADE with the `--random-fully` option, and I have bisected
>>>> it down to commit 165573e41f2f66ef98940cf65f838b2cb575d9d1
>>>> (tcp: secure_seq: add back ports to TS offset).
>>>>
>>>> Here is the benchmark environment and test results.
>>>> Environment:
>>>> - Client & Server: 2 VMs
>>>> - Server: Nginx listening on port 80 (HTTP), and ip 10.0.0.1
>>>> - Benchmark tool: wrk (short-lived connections with "Connection: close")
>>>>
>>>> Test Commands
>>>> 1. With random-fully:
>>>>     # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE --random-fully
>>>>     # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
>>>> 2. Without random-fully:
>>>>     # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE
>>>>     # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
>>>>
>>>> Test Results (QPS):
>>>> 1. Parent Commit (7f083faf59d14c04e01ec05a7507f036c965acf8):
>>>>     - with random-fully:    18145.74, 15006.39, 15716.67
>>>>     - without random-fully: 18556.36, 16339.22, 21506.02
>>>>
>>>> 2. Bad Commit (165573e41f2f66ef98940cf65f838b2cb575d9d1):
>>>>     - with random-fully:    11074.76, 10383.20, 10164.81  <-- (~35% drop)
>>>>     - without random-fully: 17310.75, 20279.85, 18399.48
>>>>
>>>> Is this performance degradation an expected side-effect of the security fix,
>>>> or is there any sysctl param we should tune when `--random-fully` is
>>>> required for high-concurrency short connections?
>>> Hi Tangxin
>>>
>>> I do not know why that patch would affect MASQUERADE performance.
>>>
>>> Pablo, Florian, do you have an idea?
>> I suspect it's because MASQUERADE can shuffle the ports around and
>> break the end-to-end mapping. With host-based ISN the increments
>> remain positive regardless of the ports, while with port-based
>> increments if you shuffle ports around, two consecutive uses of
>> the same port can end up showing a decreasing ISN, and some
>> outgoing SYN will get an ACK instead of a SYN-ACK, then send an
>> RST, and a SYN again, causing a degradation.
>>
>> I'm not saying this is necessarily what happens here but based on the
>> commit message description I suspect that this is what's happening
>> here. There's always a tradeoff between ISN secrecy and reliability
>> unfortunately.
>>
>> Willy
> Hi,
>
> Willy, your hypothesis is 100% correct!
> I captured the packets during the benchmark on the bad commit,
> and the trace perfectly shows the "SYN -> ACK -> RST".
>
> Here is the key snippet of the packet trace (Client: 10.0.0.2, Server: 10.0.0.1):
>
> // 1. First connection closes, Server sends last ACK(410615916), entering TIME_WAIT.
> 12105 08:54:39.128861 10.0.0.1 -> 10.0.0.2 TCP 80 → 47824 [ACK] Seq=3315216203 Ack=410615916 TSval=273827652 TSecr=370383870
>
> // 2. ~200ms later, next short-conn reuses port 47824 via MASQUERADE --random-fully
> 47637 08:54:39.332281 10.0.0.2 -> 10.0.0.1 TCP 47824 → 80 [SYN] Seq=559739866 TSval=4137539723 TSecr=0
>
> // 3. Server is sends a ACK with the old connection's expected ACK(410615916).
> 48591 08:54:39.337692 10.0.0.1 -> 10.0.0.2 TCP 80 → 47824 [ACK] Seq=3315216203 Ack=410615916 TSval=273827858 TSecr=370383870
>
> // 4. Client receives the unexpected old ACK, responds with RST, and has to retry the connection.
> 48600 08:54:39.337799 10.0.0.2 -> 10.0.0.1 TCP 47824 → 80 [RST] Seq=410615916 Win=0
>
>
> Are there any architectural recommendations we should consider here,
> or is this considered an acceptable trade-off for security?


It's classic PAWS problem when packets go through NAT/Gateway.

Can you test the performance with following different two configs (client) ?

     sysctl -w net.ipv4.tcp_timestamps=2

     sysctl -w net.ipv4.tcp_timestamps=0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net] tcp: secure_seq: add back ports to TS offset
  2026-06-08 13:14         ` Jiayuan Chen
@ 2026-06-08 15:06           ` Willy Tarreau
  0 siblings, 0 replies; 13+ messages in thread
From: Willy Tarreau @ 2026-06-08 15:06 UTC (permalink / raw)
  To: Jiayuan Chen
  Cc: xietangxin, Eric Dumazet, Pablo Neira Ayuso, David S . Miller,
	Jakub Kicinski, Paolo Abeni, Simon Horman, Neal Cardwell,
	Kuniyuki Iwashima, netdev, eric.dumazet, Zhouyan Deng,
	Florian Westphal

On Mon, Jun 08, 2026 at 09:14:26PM +0800, Jiayuan Chen wrote:
> 
> On 6/8/26 8:51 PM, xietangxin wrote:
> > 
> > On 6/8/2026 5:42 PM, Willy Tarreau wrote:
> > > On Mon, Jun 08, 2026 at 01:51:49AM -0700, Eric Dumazet wrote:
> > > > On Sat, Jun 6, 2026 at 4:06 AM xietangxin <xietangxin@yeah.net> wrote:
> > > > > 
> > > > > Hi Eric and netdev,
> > > > > 
> > > > > I noticed a significant TCP performance regression (QPS drop) when using
> > > > > iptables MASQUERADE with the `--random-fully` option, and I have bisected
> > > > > it down to commit 165573e41f2f66ef98940cf65f838b2cb575d9d1
> > > > > (tcp: secure_seq: add back ports to TS offset).
> > > > > 
> > > > > Here is the benchmark environment and test results.
> > > > > Environment:
> > > > > - Client & Server: 2 VMs
> > > > > - Server: Nginx listening on port 80 (HTTP), and ip 10.0.0.1
> > > > > - Benchmark tool: wrk (short-lived connections with "Connection: close")
> > > > > 
> > > > > Test Commands
> > > > > 1. With random-fully:
> > > > >     # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE --random-fully
> > > > >     # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
> > > > > 2. Without random-fully:
> > > > >     # iptables -t nat -A POSTROUTING -d 10.0.0.1 -p tcp --dport 80 -j MASQUERADE
> > > > >     # wrk -t8 -c200 -H "Connection: close" -d10s --latency http://10.0.0.1:80
> > > > > 
> > > > > Test Results (QPS):
> > > > > 1. Parent Commit (7f083faf59d14c04e01ec05a7507f036c965acf8):
> > > > >     - with random-fully:    18145.74, 15006.39, 15716.67
> > > > >     - without random-fully: 18556.36, 16339.22, 21506.02
> > > > > 
> > > > > 2. Bad Commit (165573e41f2f66ef98940cf65f838b2cb575d9d1):
> > > > >     - with random-fully:    11074.76, 10383.20, 10164.81  <-- (~35% drop)
> > > > >     - without random-fully: 17310.75, 20279.85, 18399.48
> > > > > 
> > > > > Is this performance degradation an expected side-effect of the security fix,
> > > > > or is there any sysctl param we should tune when `--random-fully` is
> > > > > required for high-concurrency short connections?
> > > > Hi Tangxin
> > > > 
> > > > I do not know why that patch would affect MASQUERADE performance.
> > > > 
> > > > Pablo, Florian, do you have an idea?
> > > I suspect it's because MASQUERADE can shuffle the ports around and
> > > break the end-to-end mapping. With host-based ISN the increments
> > > remain positive regardless of the ports, while with port-based
> > > increments if you shuffle ports around, two consecutive uses of
> > > the same port can end up showing a decreasing ISN, and some
> > > outgoing SYN will get an ACK instead of a SYN-ACK, then send an
> > > RST, and a SYN again, causing a degradation.
> > > 
> > > I'm not saying this is necessarily what happens here but based on the
> > > commit message description I suspect that this is what's happening
> > > here. There's always a tradeoff between ISN secrecy and reliability
> > > unfortunately.
> > > 
> > > Willy
> > Hi,
> > 
> > Willy, your hypothesis is 100% correct!
> > I captured the packets during the benchmark on the bad commit,
> > and the trace perfectly shows the "SYN -> ACK -> RST".
> > 
> > Here is the key snippet of the packet trace (Client: 10.0.0.2, Server: 10.0.0.1):
> > 
> > // 1. First connection closes, Server sends last ACK(410615916), entering TIME_WAIT.
> > 12105 08:54:39.128861 10.0.0.1 -> 10.0.0.2 TCP 80 -> 47824 [ACK] Seq=3315216203 Ack=410615916 TSval=273827652 TSecr=370383870
> > 
> > // 2. ~200ms later, next short-conn reuses port 47824 via MASQUERADE --random-fully
> > 47637 08:54:39.332281 10.0.0.2 -> 10.0.0.1 TCP 47824 -> 80 [SYN] Seq=559739866 TSval=4137539723 TSecr=0
> > 
> > // 3. Server is sends a ACK with the old connection's expected ACK(410615916).
> > 48591 08:54:39.337692 10.0.0.1 -> 10.0.0.2 TCP 80 -> 47824 [ACK] Seq=3315216203 Ack=410615916 TSval=273827858 TSecr=370383870
> > 
> > // 4. Client receives the unexpected old ACK, responds with RST, and has to retry the connection.
> > 48600 08:54:39.337799 10.0.0.2 -> 10.0.0.1 TCP 47824 -> 80 [RST] Seq=410615916 Win=0
> > 
> > 
> > Are there any architectural recommendations we should consider here,
> > or is this considered an acceptable trade-off for security?
> 
> 
> It's classic PAWS problem when packets go through NAT/Gateway.
> 
> Can you test the performance with following different two configs (client) ?
> 
>     sysctl -w net.ipv4.tcp_timestamps=2
> 
>     sysctl -w net.ipv4.tcp_timestamps=0

The thing is, nothing forces a server to use PAWS to distinguish SYNs,
and some OSes only apply the spec to the letter (at least Solaris in
my experience). The spec basically says that PAWS protects against
duplicate non-SYN segments, and duplicate SYNs while there is a
connection. So while Linux (and other OSes) nicely covers the case
of the transition from TIME_WAIT to SYN_RECV, others do not always do
it.

Regardless, while I hate it when we play borderline games with TCP for
the sake of supposed security issues that can only be demonstrated in
a lab, we must admit that masquerading remains an issue when the same
port range is shared between multiple hosts, even with PAWS since there
is no reason for multiple hosts to have the same clock.

Willy

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2026-06-08 15:06 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-02 20:55 [PATCH net] tcp: secure_seq: add back ports to TS offset Eric Dumazet
2026-03-02 21:47 ` Kuniyuki Iwashima
2026-03-03  1:41 ` Florian Westphal
2026-03-03  7:39 ` Jörg Sommer
2026-03-05  2:00 ` patchwork-bot+netdevbpf
2026-06-06 11:04 ` xietangxin
2026-06-08  8:51   ` Eric Dumazet
2026-06-08  9:42     ` Willy Tarreau
2026-06-08 12:51       ` xietangxin
2026-06-08 13:14         ` Jiayuan Chen
2026-06-08 15:06           ` Willy Tarreau
2026-06-08 11:30     ` Pablo Neira Ayuso
2026-06-08 12:11     ` Florian Westphal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox