From: sdf@google.com
To: Martin KaFai Lau <kafai@fb.com>
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
David Miller <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>,
kernel-team@fb.com, Paolo Abeni <pabeni@redhat.com>
Subject: Re: [PATCH bpf-next 14/17] bpf: Change bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt()
Date: Thu, 25 Aug 2022 11:36:36 -0700 [thread overview]
Message-ID: <YwfBNEVrxkafzpYE@google.com> (raw)
In-Reply-To: <20220824222730.1923992-1-kafai@fb.com>
On 08/24, Martin KaFai Lau wrote:
> This patch changes bpf_getsockopt(SOL_TCP) to reuse
> do_tcp_getsockopt(). It removes the duplicated code from
> bpf_getsockopt(SOL_TCP).
> Before this patch, there were some optnames available to
> bpf_setsockopt(SOL_TCP) but missing in bpf_getsockopt(SOL_TCP).
> For example, TCP_NODELAY, TCP_MAXSEG, TCP_KEEPIDLE, TCP_KEEPINTVL,
> and a few more. It surprises users from time to time. This patch
> automatically closes this gap without duplicating more code.
> bpf_getsockopt(TCP_SAVED_SYN) does not free the saved_syn,
> so it stays in sol_tcp_sockopt().
> For string name value like TCP_CONGESTION, bpf expects it
> is always null terminated, so sol_tcp_sockopt() decrements
> optlen by one before calling do_tcp_getsockopt() and
> the 'if (optlen < saved_optlen) memset(..,0,..);'
> in __bpf_getsockopt() will always do a null termination.
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> ---
> include/net/tcp.h | 2 ++
> net/core/filter.c | 70 ++++++++++++++++++++++++++---------------------
> net/ipv4/tcp.c | 4 +--
> 3 files changed, 43 insertions(+), 33 deletions(-)
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index c03a50c72f40..735e957f7f4b 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -402,6 +402,8 @@ void tcp_init_sock(struct sock *sk);
> void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
> __poll_t tcp_poll(struct file *file, struct socket *sock,
> struct poll_table_struct *wait);
> +int do_tcp_getsockopt(struct sock *sk, int level,
> + int optname, sockptr_t optval, sockptr_t optlen);
> int tcp_getsockopt(struct sock *sk, int level, int optname,
> char __user *optval, int __user *optlen);
> bool tcp_bpf_bypass_getsockopt(int level, int optname);
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 68b52243b306..cdbbcec46e8b 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5096,8 +5096,9 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk,
> int optname,
> return 0;
> }
> -static int sol_tcp_setsockopt(struct sock *sk, int optname,
> - char *optval, int optlen)
> +static int sol_tcp_sockopt(struct sock *sk, int optname,
> + char *optval, int *optlen,
> + bool getopt)
> {
> if (sk->sk_prot->setsockopt != tcp_setsockopt)
> return -EINVAL;
> @@ -5114,17 +5115,47 @@ static int sol_tcp_setsockopt(struct sock *sk,
> int optname,
> case TCP_USER_TIMEOUT:
> case TCP_NOTSENT_LOWAT:
> case TCP_SAVE_SYN:
> - if (optlen != sizeof(int))
> + if (*optlen != sizeof(int))
> return -EINVAL;
> break;
[..]
> case TCP_CONGESTION:
> + if (*optlen < 2)
> + return -EINVAL;
> + break;
> + case TCP_SAVED_SYN:
> + if (*optlen < 1)
> + return -EINVAL;
> break;
This looks a bit inconsistent vs '*optlen != sizeof(int)' above. Maybe
if (*optlen < sizeof(u16))
if (*optlen < sizeof(u8))
?
> default:
> - return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen);
> + if (getopt)
> + return -EINVAL;
> + return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
> + }
> +
> + if (getopt) {
> + if (optname == TCP_SAVED_SYN) {
> + struct tcp_sock *tp = tcp_sk(sk);
> +
> + if (!tp->saved_syn ||
> + *optlen > tcp_saved_syn_len(tp->saved_syn))
> + return -EINVAL;
You mention in the description that bpf doesn't doesn't free saved_syn,
maybe worth putting a comment with the rationale here as well?
I'm assuming we don't free from bpf because we want userspace to
have an opportunity to read it as well?
> + memcpy(optval, tp->saved_syn->data, *optlen);
> + return 0;
> + }
> +
> + if (optname == TCP_CONGESTION) {
> + if (!inet_csk(sk)->icsk_ca_ops)
> + return -EINVAL;
Is it worth it doing null termination more explicitly here?
For readability sake:
/* BPF always expects NULL-terminated strings. */
optval[*optlen-1] = '\0';
> + (*optlen)--;
> + }
> +
> + return do_tcp_getsockopt(sk, SOL_TCP, optname,
> + KERNEL_SOCKPTR(optval),
> + KERNEL_SOCKPTR(optlen));
> }
> return do_tcp_setsockopt(sk, SOL_TCP, optname,
> - KERNEL_SOCKPTR(optval), optlen);
> + KERNEL_SOCKPTR(optval), *optlen);
> }
> static int sol_ip_setsockopt(struct sock *sk, int optname,
> @@ -5179,7 +5210,7 @@ static int __bpf_setsockopt(struct sock *sk, int
> level, int optname,
> else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
> return sol_ipv6_setsockopt(sk, optname, optval, optlen);
> else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
> - return sol_tcp_setsockopt(sk, optname, optval, optlen);
> + return sol_tcp_sockopt(sk, optname, optval, &optlen, false);
> return -EINVAL;
> }
> @@ -5202,31 +5233,8 @@ static int __bpf_getsockopt(struct sock *sk, int
> level, int optname,
> if (level == SOL_SOCKET) {
> err = sol_socket_sockopt(sk, optname, optval, &optlen, true);
> - } else if (IS_ENABLED(CONFIG_INET) &&
> - level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
> - struct inet_connection_sock *icsk;
> - struct tcp_sock *tp;
> -
> - switch (optname) {
> - case TCP_CONGESTION:
> - icsk = inet_csk(sk);
> -
> - if (!icsk->icsk_ca_ops || optlen <= 1)
> - goto err_clear;
> - strncpy(optval, icsk->icsk_ca_ops->name, optlen);
> - optval[optlen - 1] = 0;
> - break;
> - case TCP_SAVED_SYN:
> - tp = tcp_sk(sk);
> -
> - if (optlen <= 0 || !tp->saved_syn ||
> - optlen > tcp_saved_syn_len(tp->saved_syn))
> - goto err_clear;
> - memcpy(optval, tp->saved_syn->data, optlen);
> - break;
> - default:
> - goto err_clear;
> - }
> + } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) {
> + err = sol_tcp_sockopt(sk, optname, optval, &optlen, true);
> } else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) {
> struct inet_sock *inet = inet_sk(sk);
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index ab8118225797..a47cb5662be6 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -4043,8 +4043,8 @@ struct sk_buff
> *tcp_get_timestamping_opt_stats(const struct sock *sk,
> return stats;
> }
> -static int do_tcp_getsockopt(struct sock *sk, int level,
> - int optname, sockptr_t optval, sockptr_t optlen)
> +int do_tcp_getsockopt(struct sock *sk, int level,
> + int optname, sockptr_t optval, sockptr_t optlen)
> {
> struct inet_connection_sock *icsk = inet_csk(sk);
> struct tcp_sock *tp = tcp_sk(sk);
> --
> 2.30.2
next prev parent reply other threads:[~2022-08-25 18:36 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20220824222601.1916776-1-kafai@fb.com>
[not found] ` <20220824222614.1918332-1-kafai@fb.com>
2022-08-25 18:07 ` [PATCH bpf-next 02/17] bpf: net: Change sk_getsockopt() to take the sockptr_t argument Stanislav Fomichev
2022-08-26 19:15 ` Martin KaFai Lau
2022-08-26 20:40 ` Stanislav Fomichev
[not found] ` <20220824222730.1923992-1-kafai@fb.com>
2022-08-25 18:36 ` sdf [this message]
2022-08-26 19:24 ` [PATCH bpf-next 14/17] bpf: Change bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt() Martin KaFai Lau
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YwfBNEVrxkafzpYE@google.com \
--to=sdf@google.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kafai@fb.com \
--cc=kernel-team@fb.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).