netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc
@ 2025-03-20 12:16 Eric Dumazet
  2025-03-20 14:43 ` Neal Cardwell
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Eric Dumazet @ 2025-03-20 12:16 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni, Neal Cardwell
  Cc: Kuniyuki Iwashima, Simon Horman, netdev, eric.dumazet,
	Eric Dumazet

TCP uses generic skb_set_owner_r() and sock_rfree()
for received packets, with socket lock being owned.

Switch to private versions, avoiding two atomic operations
per packet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/tcp.h       | 15 +++++++++++++++
 net/ipv4/tcp.c          | 18 ++++++++++++++++--
 net/ipv4/tcp_fastopen.c |  2 +-
 net/ipv4/tcp_input.c    |  6 +++---
 4 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index d08fbf90495de69b157d3c87c50e82d781a365df..dd6d63a6f42b99774e9461b69d3e7932cf629082 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -779,6 +779,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 
 /* tcp.c */
 void tcp_get_info(struct sock *, struct tcp_info *);
+void tcp_sock_rfree(struct sk_buff *skb);
 
 /* Read 'sendfile()'-style from a TCP socket */
 int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
@@ -2898,4 +2899,18 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk,
 		const void *saddr, const void *daddr,
 		int family, int dif, int sdif);
 
+/* version of skb_set_owner_r() avoiding one atomic_add() */
+static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = tcp_sock_rfree;
+
+	sock_owned_by_me(sk);
+	atomic_set(&sk->sk_rmem_alloc,
+		   atomic_read(&sk->sk_rmem_alloc) + skb->truesize);
+
+	sk_forward_alloc_add(sk, -skb->truesize);
+}
+
 #endif	/* _TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 989c3c3d8e757361a0ac4a9f039a3cfca10d9612..b1306038b8e6e8c55fd1b4803c5d8ca626491aae 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1525,11 +1525,25 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
 	__tcp_cleanup_rbuf(sk, copied);
 }
 
+/* private version of sock_rfree() avoiding one atomic_sub() */
+void tcp_sock_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	unsigned int len = skb->truesize;
+
+	sock_owned_by_me(sk);
+	atomic_set(&sk->sk_rmem_alloc,
+		   atomic_read(&sk->sk_rmem_alloc) - len);
+
+	sk_forward_alloc_add(sk, len);
+	sk_mem_reclaim(sk);
+}
+
 static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	__skb_unlink(skb, &sk->sk_receive_queue);
-	if (likely(skb->destructor == sock_rfree)) {
-		sock_rfree(skb);
+	if (likely(skb->destructor == tcp_sock_rfree)) {
+		tcp_sock_rfree(skb);
 		skb->destructor = NULL;
 		skb->sk = NULL;
 		return skb_attempt_defer_free(skb);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 1a6b1bc5424514e27a99cbb2fcedf001afd51d98..ca40665145c692ce0de518886bb366406606f7ac 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -189,7 +189,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
 	tcp_segs_in(tp, skb);
 	__skb_pull(skb, tcp_hdrlen(skb));
 	sk_forced_mem_schedule(sk, skb->truesize);
-	skb_set_owner_r(skb, sk);
+	tcp_skb_set_owner_r(skb, sk);
 
 	TCP_SKB_CB(skb)->seq++;
 	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 72382ee4456dbd89fd1b69f3bdbf6b9c8ef5aa78..cac6e86196975cc793f499126302fc5220a1dfc7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5171,7 +5171,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 		if (tcp_is_sack(tp))
 			tcp_grow_window(sk, skb, false);
 		skb_condense(skb);
-		skb_set_owner_r(skb, sk);
+		tcp_skb_set_owner_r(skb, sk);
 	}
 }
 
@@ -5187,7 +5187,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
 	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
 	if (!eaten) {
 		tcp_add_receive_queue(sk, skb);
-		skb_set_owner_r(skb, sk);
+		tcp_skb_set_owner_r(skb, sk);
 	}
 	return eaten;
 }
@@ -5504,7 +5504,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
 			__skb_queue_before(list, skb, nskb);
 		else
 			__skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
-		skb_set_owner_r(nskb, sk);
+		tcp_skb_set_owner_r(nskb, sk);
 		mptcp_skb_ext_move(nskb, skb);
 
 		/* Copy data, releasing collapsed skbs. */
-- 
2.49.0.rc1.451.g8f38331e32-goog


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc
  2025-03-20 12:16 [PATCH net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc Eric Dumazet
@ 2025-03-20 14:43 ` Neal Cardwell
  2025-03-24 22:32 ` Kuniyuki Iwashima
  2025-03-25 15:00 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 4+ messages in thread
From: Neal Cardwell @ 2025-03-20 14:43 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Kuniyuki Iwashima,
	Simon Horman, netdev, eric.dumazet

On Thu, Mar 20, 2025 at 8:16 AM Eric Dumazet <edumazet@google.com> wrote:
>
> TCP uses generic skb_set_owner_r() and sock_rfree()
> for received packets, with socket lock being owned.
>
> Switch to private versions, avoiding two atomic operations
> per packet.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/net/tcp.h       | 15 +++++++++++++++
>  net/ipv4/tcp.c          | 18 ++++++++++++++++--
>  net/ipv4/tcp_fastopen.c |  2 +-
>  net/ipv4/tcp_input.c    |  6 +++---
>  4 files changed, 35 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index d08fbf90495de69b157d3c87c50e82d781a365df..dd6d63a6f42b99774e9461b69d3e7932cf629082 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h

Very nice. Thanks!

Reviewed-by: Neal Cardwell <ncardwell@google.com>

A couple quick thoughts:

> @@ -779,6 +779,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
>
>  /* tcp.c */
>  void tcp_get_info(struct sock *, struct tcp_info *);
> +void tcp_sock_rfree(struct sk_buff *skb);
>
>  /* Read 'sendfile()'-style from a TCP socket */
>  int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
> @@ -2898,4 +2899,18 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk,
>                 const void *saddr, const void *daddr,
>                 int family, int dif, int sdif);
>
> +/* version of skb_set_owner_r() avoiding one atomic_add() */
> +static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
> +{
> +       skb_orphan(skb);
> +       skb->sk = sk;
> +       skb->destructor = tcp_sock_rfree;
> +
> +       sock_owned_by_me(sk);
> +       atomic_set(&sk->sk_rmem_alloc,
> +                  atomic_read(&sk->sk_rmem_alloc) + skb->truesize);
> +
> +       sk_forward_alloc_add(sk, -skb->truesize);
> +}
> +
>  #endif /* _TCP_H */
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 989c3c3d8e757361a0ac4a9f039a3cfca10d9612..b1306038b8e6e8c55fd1b4803c5d8ca626491aae 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -1525,11 +1525,25 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
>         __tcp_cleanup_rbuf(sk, copied);
>  }
>
> +/* private version of sock_rfree() avoiding one atomic_sub() */
> +void tcp_sock_rfree(struct sk_buff *skb)
> +{
> +       struct sock *sk = skb->sk;
> +       unsigned int len = skb->truesize;
> +
> +       sock_owned_by_me(sk);
> +       atomic_set(&sk->sk_rmem_alloc,
> +                  atomic_read(&sk->sk_rmem_alloc) - len);
> +
> +       sk_forward_alloc_add(sk, len);
> +       sk_mem_reclaim(sk);

One thought on readability: it might be nice to make these functions
both use skb->truesize rather than having one use skb->truesize and
one use len (particularly since "len" in the skb context often refers
to the payload length). I realize the "len" helper variable was
inherited from sock_rfree() but it might be nice to make the TCP
versions easier to read and audit?

Also, it might be nice to have the comments above
tcp_skb_set_owner_r() and tcp_sock_rfree() reference the other
function, so maintainers can be reminded of the fact that the
arithmetic in the two functions needs to be kept exactly in sync?
Perhaps something like:

/* A version of skb_set_owner_r() avoiding one atomic_add().
 * These adjustments are later inverted by tcp_sock_rfree().
 */
static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
...

/* A private version of sock_rfree() avoiding one atomic_sub().
 * Inverts the earlier adjustments made by tcp_skb_set_owner_r().
 */
void tcp_sock_rfree(struct sk_buff *skb)

Anyway, the patches LGTM. Those were just some thoughts. :-)

Thanks!

neal

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc
  2025-03-20 12:16 [PATCH net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc Eric Dumazet
  2025-03-20 14:43 ` Neal Cardwell
@ 2025-03-24 22:32 ` Kuniyuki Iwashima
  2025-03-25 15:00 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 4+ messages in thread
From: Kuniyuki Iwashima @ 2025-03-24 22:32 UTC (permalink / raw)
  To: edumazet
  Cc: davem, eric.dumazet, horms, kuba, kuniyu, ncardwell, netdev,
	pabeni

From: Eric Dumazet <edumazet@google.com>
Date: Thu, 20 Mar 2025 12:16:04 +0000
> TCP uses generic skb_set_owner_r() and sock_rfree()
> for received packets, with socket lock being owned.
> 
> Switch to private versions, avoiding two atomic operations
> per packet.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/net/tcp.h       | 15 +++++++++++++++
>  net/ipv4/tcp.c          | 18 ++++++++++++++++--
>  net/ipv4/tcp_fastopen.c |  2 +-
>  net/ipv4/tcp_input.c    |  6 +++---
>  4 files changed, 35 insertions(+), 6 deletions(-)
> 
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index d08fbf90495de69b157d3c87c50e82d781a365df..dd6d63a6f42b99774e9461b69d3e7932cf629082 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -779,6 +779,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
>  
>  /* tcp.c */
>  void tcp_get_info(struct sock *, struct tcp_info *);
> +void tcp_sock_rfree(struct sk_buff *skb);
>  
>  /* Read 'sendfile()'-style from a TCP socket */
>  int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
> @@ -2898,4 +2899,18 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk,
>  		const void *saddr, const void *daddr,
>  		int family, int dif, int sdif);
>  
> +/* version of skb_set_owner_r() avoiding one atomic_add() */
> +static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
> +{
> +	skb_orphan(skb);
> +	skb->sk = sk;
> +	skb->destructor = tcp_sock_rfree;
> +
> +	sock_owned_by_me(sk);
> +	atomic_set(&sk->sk_rmem_alloc,
> +		   atomic_read(&sk->sk_rmem_alloc) + skb->truesize);

It's nice to learn that atomic_set() and _read() are just __WRITE_ONCE()
and __READ_ONCE() and have no LOCK_PREFIX :)

Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>

Thanks!

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc
  2025-03-20 12:16 [PATCH net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc Eric Dumazet
  2025-03-20 14:43 ` Neal Cardwell
  2025-03-24 22:32 ` Kuniyuki Iwashima
@ 2025-03-25 15:00 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 4+ messages in thread
From: patchwork-bot+netdevbpf @ 2025-03-25 15:00 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, kuba, pabeni, ncardwell, kuniyu, horms, netdev,
	eric.dumazet

Hello:

This patch was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Thu, 20 Mar 2025 12:16:04 +0000 you wrote:
> TCP uses generic skb_set_owner_r() and sock_rfree()
> for received packets, with socket lock being owned.
> 
> Switch to private versions, avoiding two atomic operations
> per packet.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> 
> [...]

Here is the summary with links:
  - [net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc
    https://git.kernel.org/netdev/net-next/c/0de2a5c4b824

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-03-25 15:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-20 12:16 [PATCH net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc Eric Dumazet
2025-03-20 14:43 ` Neal Cardwell
2025-03-24 22:32 ` Kuniyuki Iwashima
2025-03-25 15:00 ` patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).