* [PATCH net-next] tcp: annotate data-races around tp->window_clamp
@ 2024-04-04 11:42 Eric Dumazet
2024-04-05 14:29 ` Jason Xing
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Eric Dumazet @ 2024-04-04 11:42 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: netdev, eric.dumazet, Eric Dumazet
tp->window_clamp can be read locklessly, add READ_ONCE()
and WRITE_ONCE() annotations.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/syncookies.c | 3 ++-
net/ipv4/tcp.c | 8 ++++----
net/ipv4/tcp_input.c | 17 ++++++++++-------
net/ipv4/tcp_output.c | 18 ++++++++++--------
net/ipv6/syncookies.c | 2 +-
net/mptcp/protocol.c | 2 +-
net/mptcp/sockopt.c | 2 +-
7 files changed, 29 insertions(+), 23 deletions(-)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 500f665f98cbce4a3d681f8e39ecd368fe4013b1..b61d36810fe3fd62b1e5c5885bbaf20185f1abf0 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -462,7 +462,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
}
/* Try to redo what tcp_v4_send_synack did. */
- req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :
+ dst_metric(&rt->dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
full_space = tcp_full_space(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e767721b3a588b5d56567ae7badf5dffcd35a76a..92ee60492314a1483cfbfa2f73d32fcad5632773 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1721,7 +1721,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
space = tcp_space_from_win(sk, val);
if (space > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, space);
- tcp_sk(sk)->window_clamp = val;
+ WRITE_ONCE(tcp_sk(sk)->window_clamp, val);
}
return 0;
}
@@ -3379,7 +3379,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
if (!val) {
if (sk->sk_state != TCP_CLOSE)
return -EINVAL;
- tp->window_clamp = 0;
+ WRITE_ONCE(tp->window_clamp, 0);
} else {
u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
@@ -3388,7 +3388,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
if (new_window_clamp == old_window_clamp)
return 0;
- tp->window_clamp = new_window_clamp;
+ WRITE_ONCE(tp->window_clamp, new_window_clamp);
if (new_window_clamp < old_window_clamp) {
/* need to apply the reserved mem provisioning only
* when shrinking the window clamp
@@ -4057,7 +4057,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
- val = tp->window_clamp;
+ val = READ_ONCE(tp->window_clamp);
break;
case TCP_INFO: {
struct tcp_info info;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1b6cd384001202df5f8e8e8c73adff0db89ece63..8d44ab5671eacd4bc06647c7cca387a79e346618 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -563,19 +563,20 @@ static void tcp_init_buffer_space(struct sock *sk)
maxwin = tcp_full_space(sk);
if (tp->window_clamp >= maxwin) {
- tp->window_clamp = maxwin;
+ WRITE_ONCE(tp->window_clamp, maxwin);
if (tcp_app_win && maxwin > 4 * tp->advmss)
- tp->window_clamp = max(maxwin -
- (maxwin >> tcp_app_win),
- 4 * tp->advmss);
+ WRITE_ONCE(tp->window_clamp,
+ max(maxwin - (maxwin >> tcp_app_win),
+ 4 * tp->advmss));
}
/* Force reservation of one segment. */
if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin)
- tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
+ WRITE_ONCE(tp->window_clamp,
+ max(2 * tp->advmss, maxwin - tp->advmss));
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
tp->snd_cwnd_stamp = tcp_jiffies32;
@@ -773,7 +774,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make the window clamp follow along. */
- tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
+ WRITE_ONCE(tp->window_clamp,
+ tcp_win_from_space(sk, rcvbuf));
}
}
tp->rcvq_space.space = copied;
@@ -6426,7 +6428,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
- tp->window_clamp = min(tp->window_clamp, 65535U);
+ WRITE_ONCE(tp->window_clamp,
+ min(tp->window_clamp, 65535U));
}
if (tp->rx_opt.saw_tstamp) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e3167ad965676facaacd8f82848c52cf966f97c3..9282fafc0e6109f3ac86d1641740f24588b2d75d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -203,16 +203,17 @@ static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
* This MUST be enforced by all callers.
*/
void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
- __u32 *rcv_wnd, __u32 *window_clamp,
+ __u32 *rcv_wnd, __u32 *__window_clamp,
int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd)
{
unsigned int space = (__space < 0 ? 0 : __space);
+ u32 window_clamp = READ_ONCE(*__window_clamp);
/* If no clamp set the clamp to the max possible scaled window */
- if (*window_clamp == 0)
- (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
- space = min(*window_clamp, space);
+ if (window_clamp == 0)
+ window_clamp = (U16_MAX << TCP_MAX_WSCALE);
+ space = min(window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
@@ -239,12 +240,13 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
/* Set window scaling on max possible window */
space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
- space = min_t(u32, space, *window_clamp);
+ space = min_t(u32, space, window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
0, TCP_MAX_WSCALE);
}
/* Set the clamp no higher than max representable value */
- (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
+ WRITE_ONCE(*__window_clamp,
+ min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp));
}
EXPORT_SYMBOL(tcp_select_initial_window);
@@ -3855,7 +3857,7 @@ static void tcp_connect_init(struct sock *sk)
tcp_ca_dst_init(sk, dst);
if (!tp->window_clamp)
- tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
+ WRITE_ONCE(tp->window_clamp, dst_metric(dst, RTAX_WINDOW));
tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
tcp_initialize_rcv_mss(sk);
@@ -3863,7 +3865,7 @@ static void tcp_connect_init(struct sock *sk)
/* limit the window selection if the user enforce a smaller rx buffer */
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
(tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
- tp->window_clamp = tcp_full_space(sk);
+ WRITE_ONCE(tp->window_clamp, tcp_full_space(sk));
rcv_wnd = tcp_rwnd_init_bpf(sk);
if (rcv_wnd == 0)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6d8286c299c9d139938ef6751d9958c80d3031e9..bfad1e89b6a6bb99c28b9ef14c142a6c4aeae54b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -246,7 +246,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
}
}
- req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+ req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :dst_metric(dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
full_space = tcp_full_space(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3a1967bc7bad63d5a8a628b3f3b868e3a27baaca..3897a03bb8cb88f7869180b5ec261158e8e5d027 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2056,7 +2056,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
ssk = mptcp_subflow_tcp_sock(subflow);
slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
- tcp_sk(ssk)->window_clamp = window_clamp;
+ WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp);
tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow);
}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index dcd1c76d2a3ba1ccc31a3e9279f725cd6d433782..b702e994633788183ad95b2e12859ee6b60bf208 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1519,7 +1519,7 @@ int mptcp_set_rcvlowat(struct sock *sk, int val)
slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, space);
- tcp_sk(ssk)->window_clamp = val;
+ WRITE_ONCE(tcp_sk(ssk)->window_clamp, val);
unlock_sock_fast(ssk, slow);
}
return 0;
--
2.44.0.478.gd926399ef9-goog
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH net-next] tcp: annotate data-races around tp->window_clamp
2024-04-04 11:42 [PATCH net-next] tcp: annotate data-races around tp->window_clamp Eric Dumazet
@ 2024-04-05 14:29 ` Jason Xing
2024-04-05 14:49 ` Eric Dumazet
2024-04-05 14:58 ` Jason Xing
2024-04-06 6:10 ` patchwork-bot+netdevbpf
2 siblings, 1 reply; 6+ messages in thread
From: Jason Xing @ 2024-04-05 14:29 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
eric.dumazet
On Thu, Apr 4, 2024 at 7:53 PM Eric Dumazet <edumazet@google.com> wrote:
>
> tp->window_clamp can be read locklessly, add READ_ONCE()
> and WRITE_ONCE() annotations.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
> net/ipv4/syncookies.c | 3 ++-
> net/ipv4/tcp.c | 8 ++++----
> net/ipv4/tcp_input.c | 17 ++++++++++-------
> net/ipv4/tcp_output.c | 18 ++++++++++--------
> net/ipv6/syncookies.c | 2 +-
> net/mptcp/protocol.c | 2 +-
> net/mptcp/sockopt.c | 2 +-
> 7 files changed, 29 insertions(+), 23 deletions(-)
>
> diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> index 500f665f98cbce4a3d681f8e39ecd368fe4013b1..b61d36810fe3fd62b1e5c5885bbaf20185f1abf0 100644
> --- a/net/ipv4/syncookies.c
> +++ b/net/ipv4/syncookies.c
> @@ -462,7 +462,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> }
>
> /* Try to redo what tcp_v4_send_synack did. */
> - req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
> + req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :
> + dst_metric(&rt->dst, RTAX_WINDOW);
> /* limit the window selection if the user enforce a smaller rx buffer */
> full_space = tcp_full_space(sk);
> if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index e767721b3a588b5d56567ae7badf5dffcd35a76a..92ee60492314a1483cfbfa2f73d32fcad5632773 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -1721,7 +1721,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
> space = tcp_space_from_win(sk, val);
> if (space > sk->sk_rcvbuf) {
> WRITE_ONCE(sk->sk_rcvbuf, space);
> - tcp_sk(sk)->window_clamp = val;
> + WRITE_ONCE(tcp_sk(sk)->window_clamp, val);
> }
> return 0;
> }
> @@ -3379,7 +3379,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
> if (!val) {
> if (sk->sk_state != TCP_CLOSE)
> return -EINVAL;
> - tp->window_clamp = 0;
> + WRITE_ONCE(tp->window_clamp, 0);
> } else {
> u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
> u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
> @@ -3388,7 +3388,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
> if (new_window_clamp == old_window_clamp)
> return 0;
>
> - tp->window_clamp = new_window_clamp;
> + WRITE_ONCE(tp->window_clamp, new_window_clamp);
> if (new_window_clamp < old_window_clamp) {
> /* need to apply the reserved mem provisioning only
> * when shrinking the window clamp
> @@ -4057,7 +4057,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
> TCP_RTO_MAX / HZ);
> break;
> case TCP_WINDOW_CLAMP:
> - val = tp->window_clamp;
> + val = READ_ONCE(tp->window_clamp);
> break;
> case TCP_INFO: {
> struct tcp_info info;
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 1b6cd384001202df5f8e8e8c73adff0db89ece63..8d44ab5671eacd4bc06647c7cca387a79e346618 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -563,19 +563,20 @@ static void tcp_init_buffer_space(struct sock *sk)
> maxwin = tcp_full_space(sk);
>
> if (tp->window_clamp >= maxwin) {
I wonder if it is necessary to locklessly protect the above line with
READ_ONCE() because I saw the full reader protection in the
tcp_select_initial_window()? There are some other places like this.
Any special reason?
Thanks,
Jason
> - tp->window_clamp = maxwin;
> + WRITE_ONCE(tp->window_clamp, maxwin);
>
> if (tcp_app_win && maxwin > 4 * tp->advmss)
> - tp->window_clamp = max(maxwin -
> - (maxwin >> tcp_app_win),
> - 4 * tp->advmss);
> + WRITE_ONCE(tp->window_clamp,
> + max(maxwin - (maxwin >> tcp_app_win),
> + 4 * tp->advmss));
> }
>
> /* Force reservation of one segment. */
> if (tcp_app_win &&
> tp->window_clamp > 2 * tp->advmss &&
> tp->window_clamp + tp->advmss > maxwin)
> - tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
> + WRITE_ONCE(tp->window_clamp,
> + max(2 * tp->advmss, maxwin - tp->advmss));
>
> tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
> tp->snd_cwnd_stamp = tcp_jiffies32;
> @@ -773,7 +774,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
> WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
>
> /* Make the window clamp follow along. */
> - tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
> + WRITE_ONCE(tp->window_clamp,
> + tcp_win_from_space(sk, rcvbuf));
> }
> }
> tp->rcvq_space.space = copied;
> @@ -6426,7 +6428,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
>
> if (!tp->rx_opt.wscale_ok) {
> tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
> - tp->window_clamp = min(tp->window_clamp, 65535U);
> + WRITE_ONCE(tp->window_clamp,
> + min(tp->window_clamp, 65535U));
> }
>
> if (tp->rx_opt.saw_tstamp) {
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index e3167ad965676facaacd8f82848c52cf966f97c3..9282fafc0e6109f3ac86d1641740f24588b2d75d 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -203,16 +203,17 @@ static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
> * This MUST be enforced by all callers.
> */
> void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
> - __u32 *rcv_wnd, __u32 *window_clamp,
> + __u32 *rcv_wnd, __u32 *__window_clamp,
> int wscale_ok, __u8 *rcv_wscale,
> __u32 init_rcv_wnd)
> {
> unsigned int space = (__space < 0 ? 0 : __space);
> + u32 window_clamp = READ_ONCE(*__window_clamp);
>
> /* If no clamp set the clamp to the max possible scaled window */
> - if (*window_clamp == 0)
> - (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
> - space = min(*window_clamp, space);
> + if (window_clamp == 0)
> + window_clamp = (U16_MAX << TCP_MAX_WSCALE);
> + space = min(window_clamp, space);
>
> /* Quantize space offering to a multiple of mss if possible. */
> if (space > mss)
> @@ -239,12 +240,13 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
> /* Set window scaling on max possible window */
> space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
> space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
> - space = min_t(u32, space, *window_clamp);
> + space = min_t(u32, space, window_clamp);
> *rcv_wscale = clamp_t(int, ilog2(space) - 15,
> 0, TCP_MAX_WSCALE);
> }
> /* Set the clamp no higher than max representable value */
> - (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
> + WRITE_ONCE(*__window_clamp,
> + min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp));
> }
> EXPORT_SYMBOL(tcp_select_initial_window);
>
> @@ -3855,7 +3857,7 @@ static void tcp_connect_init(struct sock *sk)
> tcp_ca_dst_init(sk, dst);
>
> if (!tp->window_clamp)
> - tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
> + WRITE_ONCE(tp->window_clamp, dst_metric(dst, RTAX_WINDOW));
> tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
>
> tcp_initialize_rcv_mss(sk);
> @@ -3863,7 +3865,7 @@ static void tcp_connect_init(struct sock *sk)
> /* limit the window selection if the user enforce a smaller rx buffer */
> if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
> (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
> - tp->window_clamp = tcp_full_space(sk);
> + WRITE_ONCE(tp->window_clamp, tcp_full_space(sk));
>
> rcv_wnd = tcp_rwnd_init_bpf(sk);
> if (rcv_wnd == 0)
> diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
> index 6d8286c299c9d139938ef6751d9958c80d3031e9..bfad1e89b6a6bb99c28b9ef14c142a6c4aeae54b 100644
> --- a/net/ipv6/syncookies.c
> +++ b/net/ipv6/syncookies.c
> @@ -246,7 +246,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
> }
> }
>
> - req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
> + req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :dst_metric(dst, RTAX_WINDOW);
> /* limit the window selection if the user enforce a smaller rx buffer */
> full_space = tcp_full_space(sk);
> if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 3a1967bc7bad63d5a8a628b3f3b868e3a27baaca..3897a03bb8cb88f7869180b5ec261158e8e5d027 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -2056,7 +2056,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
> ssk = mptcp_subflow_tcp_sock(subflow);
> slow = lock_sock_fast(ssk);
> WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
> - tcp_sk(ssk)->window_clamp = window_clamp;
> + WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp);
> tcp_cleanup_rbuf(ssk, 1);
> unlock_sock_fast(ssk, slow);
> }
> diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
> index dcd1c76d2a3ba1ccc31a3e9279f725cd6d433782..b702e994633788183ad95b2e12859ee6b60bf208 100644
> --- a/net/mptcp/sockopt.c
> +++ b/net/mptcp/sockopt.c
> @@ -1519,7 +1519,7 @@ int mptcp_set_rcvlowat(struct sock *sk, int val)
>
> slow = lock_sock_fast(ssk);
> WRITE_ONCE(ssk->sk_rcvbuf, space);
> - tcp_sk(ssk)->window_clamp = val;
> + WRITE_ONCE(tcp_sk(ssk)->window_clamp, val);
> unlock_sock_fast(ssk, slow);
> }
> return 0;
> --
> 2.44.0.478.gd926399ef9-goog
>
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH net-next] tcp: annotate data-races around tp->window_clamp
2024-04-05 14:29 ` Jason Xing
@ 2024-04-05 14:49 ` Eric Dumazet
2024-04-05 14:57 ` Jason Xing
0 siblings, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2024-04-05 14:49 UTC (permalink / raw)
To: Jason Xing
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
eric.dumazet
On Fri, Apr 5, 2024 at 4:29 PM Jason Xing <kerneljasonxing@gmail.com> wrote:
>
> On Thu, Apr 4, 2024 at 7:53 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > tp->window_clamp can be read locklessly, add READ_ONCE()
> > and WRITE_ONCE() annotations.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> > net/ipv4/syncookies.c | 3 ++-
> > net/ipv4/tcp.c | 8 ++++----
> > net/ipv4/tcp_input.c | 17 ++++++++++-------
> > net/ipv4/tcp_output.c | 18 ++++++++++--------
> > net/ipv6/syncookies.c | 2 +-
> > net/mptcp/protocol.c | 2 +-
> > net/mptcp/sockopt.c | 2 +-
> > 7 files changed, 29 insertions(+), 23 deletions(-)
> >
> > diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> > index 500f665f98cbce4a3d681f8e39ecd368fe4013b1..b61d36810fe3fd62b1e5c5885bbaf20185f1abf0 100644
> > --- a/net/ipv4/syncookies.c
> > +++ b/net/ipv4/syncookies.c
> > @@ -462,7 +462,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> > }
> >
> > /* Try to redo what tcp_v4_send_synack did. */
> > - req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
> > + req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :
> > + dst_metric(&rt->dst, RTAX_WINDOW);
> > /* limit the window selection if the user enforce a smaller rx buffer */
> > full_space = tcp_full_space(sk);
> > if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
> > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> > index e767721b3a588b5d56567ae7badf5dffcd35a76a..92ee60492314a1483cfbfa2f73d32fcad5632773 100644
> > --- a/net/ipv4/tcp.c
> > +++ b/net/ipv4/tcp.c
> > @@ -1721,7 +1721,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
> > space = tcp_space_from_win(sk, val);
> > if (space > sk->sk_rcvbuf) {
> > WRITE_ONCE(sk->sk_rcvbuf, space);
> > - tcp_sk(sk)->window_clamp = val;
> > + WRITE_ONCE(tcp_sk(sk)->window_clamp, val);
> > }
> > return 0;
> > }
> > @@ -3379,7 +3379,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
> > if (!val) {
> > if (sk->sk_state != TCP_CLOSE)
> > return -EINVAL;
> > - tp->window_clamp = 0;
> > + WRITE_ONCE(tp->window_clamp, 0);
> > } else {
> > u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
> > u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
> > @@ -3388,7 +3388,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
> > if (new_window_clamp == old_window_clamp)
> > return 0;
> >
> > - tp->window_clamp = new_window_clamp;
> > + WRITE_ONCE(tp->window_clamp, new_window_clamp);
> > if (new_window_clamp < old_window_clamp) {
> > /* need to apply the reserved mem provisioning only
> > * when shrinking the window clamp
> > @@ -4057,7 +4057,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
> > TCP_RTO_MAX / HZ);
> > break;
> > case TCP_WINDOW_CLAMP:
> > - val = tp->window_clamp;
> > + val = READ_ONCE(tp->window_clamp);
> > break;
> > case TCP_INFO: {
> > struct tcp_info info;
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 1b6cd384001202df5f8e8e8c73adff0db89ece63..8d44ab5671eacd4bc06647c7cca387a79e346618 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -563,19 +563,20 @@ static void tcp_init_buffer_space(struct sock *sk)
> > maxwin = tcp_full_space(sk);
> >
> > if (tp->window_clamp >= maxwin) {
>
> I wonder if it is necessary to locklessly protect the above line with
> READ_ONCE() because I saw the full reader protection in the
> tcp_select_initial_window()? There are some other places like this.
> Any special reason?
We hold the socket lock at this point.
READ_ONCE() is only needed if another thread can potentially change
the value under us.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH net-next] tcp: annotate data-races around tp->window_clamp
2024-04-05 14:49 ` Eric Dumazet
@ 2024-04-05 14:57 ` Jason Xing
0 siblings, 0 replies; 6+ messages in thread
From: Jason Xing @ 2024-04-05 14:57 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
eric.dumazet
On Fri, Apr 5, 2024 at 10:49 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Fri, Apr 5, 2024 at 4:29 PM Jason Xing <kerneljasonxing@gmail.com> wrote:
> >
> > On Thu, Apr 4, 2024 at 7:53 PM Eric Dumazet <edumazet@google.com> wrote:
> > >
> > > tp->window_clamp can be read locklessly, add READ_ONCE()
> > > and WRITE_ONCE() annotations.
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > ---
> > > net/ipv4/syncookies.c | 3 ++-
> > > net/ipv4/tcp.c | 8 ++++----
> > > net/ipv4/tcp_input.c | 17 ++++++++++-------
> > > net/ipv4/tcp_output.c | 18 ++++++++++--------
> > > net/ipv6/syncookies.c | 2 +-
> > > net/mptcp/protocol.c | 2 +-
> > > net/mptcp/sockopt.c | 2 +-
> > > 7 files changed, 29 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> > > index 500f665f98cbce4a3d681f8e39ecd368fe4013b1..b61d36810fe3fd62b1e5c5885bbaf20185f1abf0 100644
> > > --- a/net/ipv4/syncookies.c
> > > +++ b/net/ipv4/syncookies.c
> > > @@ -462,7 +462,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> > > }
> > >
> > > /* Try to redo what tcp_v4_send_synack did. */
> > > - req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
> > > + req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :
> > > + dst_metric(&rt->dst, RTAX_WINDOW);
> > > /* limit the window selection if the user enforce a smaller rx buffer */
> > > full_space = tcp_full_space(sk);
> > > if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
> > > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> > > index e767721b3a588b5d56567ae7badf5dffcd35a76a..92ee60492314a1483cfbfa2f73d32fcad5632773 100644
> > > --- a/net/ipv4/tcp.c
> > > +++ b/net/ipv4/tcp.c
> > > @@ -1721,7 +1721,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
> > > space = tcp_space_from_win(sk, val);
> > > if (space > sk->sk_rcvbuf) {
> > > WRITE_ONCE(sk->sk_rcvbuf, space);
> > > - tcp_sk(sk)->window_clamp = val;
> > > + WRITE_ONCE(tcp_sk(sk)->window_clamp, val);
> > > }
> > > return 0;
> > > }
> > > @@ -3379,7 +3379,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
> > > if (!val) {
> > > if (sk->sk_state != TCP_CLOSE)
> > > return -EINVAL;
> > > - tp->window_clamp = 0;
> > > + WRITE_ONCE(tp->window_clamp, 0);
> > > } else {
> > > u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
> > > u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
> > > @@ -3388,7 +3388,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
> > > if (new_window_clamp == old_window_clamp)
> > > return 0;
> > >
> > > - tp->window_clamp = new_window_clamp;
> > > + WRITE_ONCE(tp->window_clamp, new_window_clamp);
> > > if (new_window_clamp < old_window_clamp) {
> > > /* need to apply the reserved mem provisioning only
> > > * when shrinking the window clamp
> > > @@ -4057,7 +4057,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
> > > TCP_RTO_MAX / HZ);
> > > break;
> > > case TCP_WINDOW_CLAMP:
> > > - val = tp->window_clamp;
> > > + val = READ_ONCE(tp->window_clamp);
> > > break;
> > > case TCP_INFO: {
> > > struct tcp_info info;
> > > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > > index 1b6cd384001202df5f8e8e8c73adff0db89ece63..8d44ab5671eacd4bc06647c7cca387a79e346618 100644
> > > --- a/net/ipv4/tcp_input.c
> > > +++ b/net/ipv4/tcp_input.c
> > > @@ -563,19 +563,20 @@ static void tcp_init_buffer_space(struct sock *sk)
> > > maxwin = tcp_full_space(sk);
> > >
> > > if (tp->window_clamp >= maxwin) {
> >
> > I wonder if it is necessary to locklessly protect the above line with
> > READ_ONCE() because I saw the full reader protection in the
> > tcp_select_initial_window()? There are some other places like this.
> > Any special reason?
>
> We hold the socket lock at this point.
>
> READ_ONCE() is only needed if another thread can potentially change
> the value under us.
Oh right, thanks. The socket will be locked as soon as the skb enters
into the TCP layer.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH net-next] tcp: annotate data-races around tp->window_clamp
2024-04-04 11:42 [PATCH net-next] tcp: annotate data-races around tp->window_clamp Eric Dumazet
2024-04-05 14:29 ` Jason Xing
@ 2024-04-05 14:58 ` Jason Xing
2024-04-06 6:10 ` patchwork-bot+netdevbpf
2 siblings, 0 replies; 6+ messages in thread
From: Jason Xing @ 2024-04-05 14:58 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, netdev,
eric.dumazet
On Thu, Apr 4, 2024 at 7:53 PM Eric Dumazet <edumazet@google.com> wrote:
>
> tp->window_clamp can be read locklessly, add READ_ONCE()
> and WRITE_ONCE() annotations.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Thanks!
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH net-next] tcp: annotate data-races around tp->window_clamp
2024-04-04 11:42 [PATCH net-next] tcp: annotate data-races around tp->window_clamp Eric Dumazet
2024-04-05 14:29 ` Jason Xing
2024-04-05 14:58 ` Jason Xing
@ 2024-04-06 6:10 ` patchwork-bot+netdevbpf
2 siblings, 0 replies; 6+ messages in thread
From: patchwork-bot+netdevbpf @ 2024-04-06 6:10 UTC (permalink / raw)
To: Eric Dumazet; +Cc: davem, kuba, pabeni, netdev, eric.dumazet
Hello:
This patch was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:
On Thu, 4 Apr 2024 11:42:31 +0000 you wrote:
> tp->window_clamp can be read locklessly, add READ_ONCE()
> and WRITE_ONCE() annotations.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
> net/ipv4/syncookies.c | 3 ++-
> net/ipv4/tcp.c | 8 ++++----
> net/ipv4/tcp_input.c | 17 ++++++++++-------
> net/ipv4/tcp_output.c | 18 ++++++++++--------
> net/ipv6/syncookies.c | 2 +-
> net/mptcp/protocol.c | 2 +-
> net/mptcp/sockopt.c | 2 +-
> 7 files changed, 29 insertions(+), 23 deletions(-)
Here is the summary with links:
- [net-next] tcp: annotate data-races around tp->window_clamp
https://git.kernel.org/netdev/net-next/c/f410cbea9f3d
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-04-06 6:10 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-04 11:42 [PATCH net-next] tcp: annotate data-races around tp->window_clamp Eric Dumazet
2024-04-05 14:29 ` Jason Xing
2024-04-05 14:49 ` Eric Dumazet
2024-04-05 14:57 ` Jason Xing
2024-04-05 14:58 ` Jason Xing
2024-04-06 6:10 ` patchwork-bot+netdevbpf
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.