From: Kuniyuki Iwashima <kuniyu@amazon.com>
To: <pabeni@redhat.com>
Cc: <davem@davemloft.net>, <dsahern@kernel.org>,
<edumazet@google.com>, <kuba@kernel.org>, <kuniyu@amazon.com>,
<mathew.j.martineau@linux.intel.com>,
<matthieu.baerts@tessares.net>, <mptcp@lists.linux.dev>,
<netdev@vger.kernel.org>
Subject: Re: [PATCH net-next v2 2/2] udp: track the forward memory release threshold in an hot cacheline
Date: Thu, 20 Oct 2022 11:20:08 -0700 [thread overview]
Message-ID: <20221020182008.293-1-kuniyu@amazon.com> (raw)
In-Reply-To: <2dede94e742d8096d6ac5e0f1979054ee158d9a8.1666287924.git.pabeni@redhat.com>
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 20 Oct 2022 19:48:52 +0200
> When the receiver process and the BH runs on different cores,
> udp_rmem_release() experience a cache miss while accessing sk_rcvbuf,
> as the latter shares the same cacheline with sk_forward_alloc, written
> by the BH.
>
> With this patch, UDP tracks the rcvbuf value and its update via custom
> SOL_SOCKET socket options, and copies the forward memory threshold value
> used by udp_rmem_release() in a different cacheline, already accessed by
> the above function and uncontended.
>
> Since the UDP socket init operation grown a bit, factor out the common
> code between v4 and v6 in a shared helper.
>
> Overall the above give a 10% peek throughput increase under UDP flood.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Thank you!
> ---
> v1 -> v2:
> - factor out common init helper for udp && udpv6 sock (Kuniyuki)
> ---
> include/linux/udp.h | 3 +++
> include/net/udp.h | 9 +++++++++
> net/ipv4/udp.c | 18 +++++++++++++++---
> net/ipv6/udp.c | 4 ++--
> 4 files changed, 29 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/udp.h b/include/linux/udp.h
> index e96da4157d04..5cdba00a904a 100644
> --- a/include/linux/udp.h
> +++ b/include/linux/udp.h
> @@ -87,6 +87,9 @@ struct udp_sock {
>
> /* This field is dirtied by udp_recvmsg() */
> int forward_deficit;
> +
> + /* This fields follows rcvbuf value, and is touched by udp_recvmsg */
> + int forward_threshold;
> };
>
> #define UDP_MAX_SEGMENTS (1 << 6UL)
> diff --git a/include/net/udp.h b/include/net/udp.h
> index fee053bcd17c..de4b528522bb 100644
> --- a/include/net/udp.h
> +++ b/include/net/udp.h
> @@ -174,6 +174,15 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
> struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> netdev_features_t features, bool is_ipv6);
>
> +static inline void udp_lib_init_sock(struct sock *sk)
> +{
> + struct udp_sock *up = udp_sk(sk);
> +
> + skb_queue_head_init(&up->reader_queue);
> + up->forward_threshold = sk->sk_rcvbuf >> 2;
> + set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
> +}
> +
> /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
> static inline int udp_lib_hash(struct sock *sk)
> {
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 8126f67d18b3..e361ad93999e 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -1448,7 +1448,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
> if (likely(partial)) {
> up->forward_deficit += size;
> size = up->forward_deficit;
> - if (size < (sk->sk_rcvbuf >> 2) &&
> + if (size < READ_ONCE(up->forward_threshold) &&
> !skb_queue_empty(&up->reader_queue))
> return;
> } else {
> @@ -1622,7 +1622,7 @@ static void udp_destruct_sock(struct sock *sk)
>
> int udp_init_sock(struct sock *sk)
> {
> - skb_queue_head_init(&udp_sk(sk)->reader_queue);
> + udp_lib_init_sock(sk);
> sk->sk_destruct = udp_destruct_sock;
> return 0;
> }
> @@ -2671,6 +2671,18 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
> int err = 0;
> int is_udplite = IS_UDPLITE(sk);
>
> + if (level == SOL_SOCKET) {
> + err = sk_setsockopt(sk, level, optname, optval, optlen);
> +
> + if (optname == SO_RCVBUF || optname == SO_RCVBUFFORCE) {
> + sockopt_lock_sock(sk);
> + /* paired with READ_ONCE in udp_rmem_release() */
> + WRITE_ONCE(up->forward_threshold, sk->sk_rcvbuf >> 2);
> + sockopt_release_sock(sk);
> + }
> + return err;
> + }
> +
> if (optlen < sizeof(int))
> return -EINVAL;
>
> @@ -2784,7 +2796,7 @@ EXPORT_SYMBOL(udp_lib_setsockopt);
> int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
> unsigned int optlen)
> {
> - if (level == SOL_UDP || level == SOL_UDPLITE)
> + if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET)
> return udp_lib_setsockopt(sk, level, optname,
> optval, optlen,
> udp_push_pending_frames);
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 8d09f0ea5b8c..b0bc4e27ec2f 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -64,7 +64,7 @@ static void udpv6_destruct_sock(struct sock *sk)
>
> int udpv6_init_sock(struct sock *sk)
> {
> - skb_queue_head_init(&udp_sk(sk)->reader_queue);
> + udp_lib_init_sock(sk);
> sk->sk_destruct = udpv6_destruct_sock;
> return 0;
> }
> @@ -1671,7 +1671,7 @@ void udpv6_destroy_sock(struct sock *sk)
> int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
> unsigned int optlen)
> {
> - if (level == SOL_UDP || level == SOL_UDPLITE)
> + if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET)
> return udp_lib_setsockopt(sk, level, optname,
> optval, optlen,
> udp_v6_push_pending_frames);
> --
> 2.37.3
next prev parent reply other threads:[~2022-10-20 18:20 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-20 17:48 [PATCH net-next v2 0/2] udp: avoid false sharing on receive Paolo Abeni
2022-10-20 17:48 ` [PATCH net-next v2 1/2] net: introduce and use custom sockopt socket flag Paolo Abeni
2022-10-20 18:11 ` Eric Dumazet
2022-10-20 18:19 ` Kuniyuki Iwashima
2022-10-20 17:48 ` [PATCH net-next v2 2/2] udp: track the forward memory release threshold in an hot cacheline Paolo Abeni
2022-10-20 18:10 ` Eric Dumazet
2022-10-20 18:20 ` Kuniyuki Iwashima [this message]
2022-10-20 21:14 ` udp: track the forward memory release threshold in an hot cacheline: Tests Results MPTCP CI
2022-10-24 10:30 ` [PATCH net-next v2 0/2] udp: avoid false sharing on receive patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221020182008.293-1-kuniyu@amazon.com \
--to=kuniyu@amazon.com \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=mathew.j.martineau@linux.intel.com \
--cc=matthieu.baerts@tessares.net \
--cc=mptcp@lists.linux.dev \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.