From: Eric Dumazet <edumazet@google.com>
To: "David S . Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>,
Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>,
Neal Cardwell <ncardwell@google.com>,
Kuniyuki Iwashima <kuniyu@google.com>,
netdev@vger.kernel.org, eric.dumazet@gmail.com,
Eric Dumazet <edumazet@google.com>
Subject: [PATCH net 04/14] tcp: annotate data-races around tp->snd_ssthresh
Date: Thu, 16 Apr 2026 20:03:09 +0000 [thread overview]
Message-ID: <20260416200319.3608680-5-edumazet@google.com> (raw)
In-Reply-To: <20260416200319.3608680-1-edumazet@google.com>
tcp_get_timestamping_opt_stats() intentionally runs lockless, we must
add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy.
Fixes: 7156d194a077 ("tcp: add snd_ssthresh stat in SCM_TIMESTAMPING_OPT_STATS")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/core/filter.c | 2 +-
net/ipv4/tcp.c | 4 ++--
net/ipv4/tcp_bbr.c | 6 +++---
net/ipv4/tcp_bic.c | 2 +-
net/ipv4/tcp_cdg.c | 4 ++--
net/ipv4/tcp_cubic.c | 6 +++---
net/ipv4/tcp_dctcp.c | 2 +-
net/ipv4/tcp_input.c | 8 ++++----
net/ipv4/tcp_metrics.c | 4 ++--
net/ipv4/tcp_nv.c | 4 ++--
net/ipv4/tcp_output.c | 4 ++--
net/ipv4/tcp_vegas.c | 9 +++++----
net/ipv4/tcp_westwood.c | 4 ++--
net/ipv4/tcp_yeah.c | 3 ++-
14 files changed, 32 insertions(+), 30 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index fcfcb72663ca3798bb33d33275d18fc73071c8d4..3b5609fb96de5e92880c6170a7fcf54da4612818 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5396,7 +5396,7 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
if (val <= 0)
return -EINVAL;
tp->snd_cwnd_clamp = val;
- tp->snd_ssthresh = val;
+ WRITE_ONCE(tp->snd_ssthresh, val);
break;
case TCP_BPF_DELACK_MAX:
timeout = usecs_to_jiffies(val);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 24ba80d244b1fb69102b587b568cebe7b78dff9d..802a9ea05211f8eab30b6f937a459a270476974d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3425,7 +3425,7 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_rto = TCP_TIMEOUT_INIT;
WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN);
WRITE_ONCE(icsk->icsk_delack_max, TCP_DELACK_MAX);
- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+ WRITE_ONCE(tp->snd_ssthresh, TCP_INFINITE_SSTHRESH);
tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
tp->snd_cwnd_cnt = 0;
tp->is_cwnd_limited = 0;
@@ -4452,7 +4452,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
nla_put_u8(stats, TCP_NLA_RECUR_RETRANS,
READ_ONCE(inet_csk(sk)->icsk_retransmits));
nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, data_race(!!tp->rate_app_limited));
- nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
+ nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, READ_ONCE(tp->snd_ssthresh));
nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered);
nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 1ddc20a399b07054f8175b5f6459f8ae6dbf34bb..aec7805b1d37634783491649da1fa01602061781 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -897,8 +897,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
bbr->mode = BBR_DRAIN; /* drain queue we created */
- tcp_sk(sk)->snd_ssthresh =
- bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
+ WRITE_ONCE(tcp_sk(sk)->snd_ssthresh,
+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT));
} /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN &&
bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
@@ -1043,7 +1043,7 @@ __bpf_kfunc static void bbr_init(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+ WRITE_ONCE(tp->snd_ssthresh, TCP_INFINITE_SSTHRESH);
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = tp->delivered;
bbr->prev_ca_state = TCP_CA_Open;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 58358bf92e1b8ac43c07789dac9f6031fa2e03dd..65444ff142413aa7ea6151f1cb3cef7d14f253eb 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -74,7 +74,7 @@ static void bictcp_init(struct sock *sk)
bictcp_reset(ca);
if (initial_ssthresh)
- tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
+ WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, initial_ssthresh);
}
/*
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index ceabfd690a296739323a795da5e1fc7453229b3f..0812c390aee5643ee39209f47e8ae901045dc498 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -162,7 +162,7 @@ static void tcp_cdg_hystart_update(struct sock *sk)
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
tcp_snd_cwnd(tp));
- tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp));
return;
}
}
@@ -181,7 +181,7 @@ static void tcp_cdg_hystart_update(struct sock *sk)
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
tcp_snd_cwnd(tp));
- tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp));
}
}
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index ab78b5ae8d0e3d13a39bd1adf1e105b84f806b63..119bf8cbb007c22993367f0f1452a2db4ed9d92b 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -136,7 +136,7 @@ __bpf_kfunc static void cubictcp_init(struct sock *sk)
bictcp_hystart_reset(sk);
if (!hystart && initial_ssthresh)
- tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
+ WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, initial_ssthresh);
}
__bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk)
@@ -420,7 +420,7 @@ static void hystart_update(struct sock *sk, u32 delay)
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
tcp_snd_cwnd(tp));
- tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp));
}
}
}
@@ -440,7 +440,7 @@ static void hystart_update(struct sock *sk, u32 delay)
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
tcp_snd_cwnd(tp));
- tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp));
}
}
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 96c99999e09dde9de9c337e4d6c692f517467c7b..274e628e7cf8621fd955528c8353001e773efb21 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -177,7 +177,7 @@ static void dctcp_react_to_loss(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tcp_snd_cwnd(tp);
- tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U);
+ WRITE_ONCE(tp->snd_ssthresh, max(tcp_snd_cwnd(tp) >> 1U, 2U));
}
__bpf_kfunc static void dctcp_state(struct sock *sk, u8 new_state)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6bb6bf049a35ac91fd53e3e66691f64fc4c93648..c6361447535f0a2b72eccb6fede4618471e38ae5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2567,7 +2567,7 @@ void tcp_enter_loss(struct sock *sk)
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->prior_cwnd = tcp_snd_cwnd(tp);
- tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, icsk->icsk_ca_ops->ssthresh(sk));
tcp_ca_event(sk, CA_EVENT_LOSS);
tcp_init_undo(tp);
}
@@ -2860,7 +2860,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk));
if (tp->prior_ssthresh > tp->snd_ssthresh) {
- tp->snd_ssthresh = tp->prior_ssthresh;
+ WRITE_ONCE(tp->snd_ssthresh, tp->prior_ssthresh);
tcp_ecn_withdraw_cwr(tp);
}
}
@@ -2978,7 +2978,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
tp->prior_cwnd = tcp_snd_cwnd(tp);
tp->prr_delivered = 0;
tp->prr_out = 0;
- tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, inet_csk(sk)->icsk_ca_ops->ssthresh(sk));
tcp_ecn_queue_cwr(tp);
}
@@ -3120,7 +3120,7 @@ static void tcp_non_congestion_loss_retransmit(struct sock *sk)
if (icsk->icsk_ca_state != TCP_CA_Loss) {
tp->high_seq = tp->snd_nxt;
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk));
tp->prior_ssthresh = 0;
tp->undo_marker = 0;
tcp_set_ca_state(sk, TCP_CA_Loss);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 7a9d6d9006f651e91054d3369b47758a6c35253b..dc0c081fc1f33f735a38aaae8a7b4ab3d633b148 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -490,9 +490,9 @@ void tcp_init_metrics(struct sock *sk)
val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
- tp->snd_ssthresh = val;
+ WRITE_ONCE(tp->snd_ssthresh, val);
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
- tp->snd_ssthresh = tp->snd_cwnd_clamp;
+ WRITE_ONCE(tp->snd_ssthresh, tp->snd_cwnd_clamp);
}
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val && tp->reordering != val)
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index a60662f4bdf92cba1d92a3eedd7c607d1537d7f2..f345897a68dfcfe0f620ba50ff88f08b1c23e43f 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -396,8 +396,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
/* We have enough data to determine we are congested */
ca->nv_allow_cwnd_growth = 0;
- tp->snd_ssthresh =
- (nv_ssthresh_factor * max_win) >> 3;
+ WRITE_ONCE(tp->snd_ssthresh,
+ (nv_ssthresh_factor * max_win) >> 3);
if (tcp_snd_cwnd(tp) - max_win > 2) {
/* gap > 2, we do exponential cwnd decrease */
int dec;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d8e8bba2d03a3be5e7a9ebac16e39f4a29ae6037..2663505a0dd7a0eae69f6a91250b4a0b6f357f7d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -171,7 +171,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk));
restart_cwnd = min(restart_cwnd, cwnd);
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
@@ -2143,7 +2143,7 @@ static void tcp_cwnd_application_limited(struct sock *sk)
u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 win_used = max(tp->snd_cwnd_used, init_win);
if (win_used < tcp_snd_cwnd(tp)) {
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk));
tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1);
}
tp->snd_cwnd_used = 0;
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 950a66966059e89fc108a31c106805a0f76fc6f0..574453af6bc03c95e7dee69bff7dde2b63fe65c4 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -245,7 +245,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
*/
tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp),
(u32)target_cwnd + 1));
- tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
+ WRITE_ONCE(tp->snd_ssthresh,
+ tcp_vegas_ssthresh(tp));
} else if (tcp_in_slow_start(tp)) {
/* Slow start. */
@@ -261,8 +262,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* we slow down.
*/
tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
- tp->snd_ssthresh
- = tcp_vegas_ssthresh(tp);
+ WRITE_ONCE(tp->snd_ssthresh,
+ tcp_vegas_ssthresh(tp));
} else if (diff < alpha) {
/* We don't have enough extra packets
* in the network, so speed up.
@@ -280,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk));
}
/* Wipe the slate clean for the next RTT. */
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index c6e97141eef2591c5ab50f4058a5377e0855313f..b5a42adfd6ca1fd93a91b99d7aa16bb4e64a9b3e 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -244,11 +244,11 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
switch (event) {
case CA_EVENT_COMPLETE_CWR:
- tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_westwood_bw_rttmin(sk));
tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
break;
case CA_EVENT_LOSS:
- tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ WRITE_ONCE(tp->snd_ssthresh, tcp_westwood_bw_rttmin(sk));
/* Update RTT_min when next ack arrives */
w->reset_rtt_min = 1;
break;
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index b22b3dccd05efddfe11203578950eddecee14887..9e581154f18f11832fa832544217fc9072b4f452 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -147,7 +147,8 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp),
yeah->reno_count));
- tp->snd_ssthresh = tcp_snd_cwnd(tp);
+ WRITE_ONCE(tp->snd_ssthresh,
+ tcp_snd_cwnd(tp));
}
if (yeah->reno_count <= 2)
--
2.54.0.rc1.513.gad8abe7a5a-goog
next prev parent reply other threads:[~2026-04-16 20:03 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-16 20:03 [PATCH net 00/14] tcp: take care of tcp_get_timestamping_opt_stats() races Eric Dumazet
2026-04-16 20:03 ` [PATCH net 01/14] tcp: annotate data-races in tcp_get_info_chrono_stats() Eric Dumazet
2026-04-16 20:03 ` [PATCH net 02/14] tcp: add data-race annotations around tp->data_segs_out and tp->total_retrans Eric Dumazet
2026-04-16 20:03 ` [PATCH net 03/14] tcp: add data-races annotations around tp->reordering, tp->snd_cwnd Eric Dumazet
2026-04-16 20:03 ` Eric Dumazet [this message]
2026-04-16 20:03 ` [PATCH net 05/14] tcp: annotate data-races around tp->delivered and tp->delivered_ce Eric Dumazet
2026-04-16 20:03 ` [PATCH net 06/14] tcp: add data-race annotations for TCP_NLA_SNDQ_SIZE Eric Dumazet
2026-04-16 20:03 ` [PATCH net 07/14] tcp: annotate data-races around tp->bytes_sent Eric Dumazet
2026-04-16 20:03 ` [PATCH net 08/14] tcp: annotate data-races around tp->bytes_retrans Eric Dumazet
2026-04-16 20:03 ` [PATCH net 09/14] tcp: annotate data-races around tp->dsack_dups Eric Dumazet
2026-04-16 20:03 ` [PATCH net 10/14] tcp: annotate data-races around tp->reord_seen Eric Dumazet
2026-04-16 20:03 ` [PATCH net 11/14] tcp: annotate data-races around tp->srtt_us Eric Dumazet
2026-04-16 20:03 ` [PATCH net 12/14] tcp: annotate data-races around tp->timeout_rehash Eric Dumazet
2026-04-16 20:03 ` [PATCH net 13/14] tcp: annotate data-races around (tp->write_seq - tp->snd_nxt) Eric Dumazet
2026-04-16 20:03 ` [PATCH net 14/14] tcp: annotate data-races around tp->plb_rehash Eric Dumazet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260416200319.3608680-5-edumazet@google.com \
--to=edumazet@google.com \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=kuniyu@google.com \
--cc=ncardwell@google.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox