From: Martin KaFai Lau <kafai@fb.com>
To: <bpf@vger.kernel.org>, <netdev@vger.kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
David Miller <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, <kernel-team@fb.com>,
Paolo Abeni <pabeni@redhat.com>,
Martin KaFai Lau <martin.lau@kernel.org>
Subject: [PATCH v2 bpf-next 14/17] bpf: Change bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt()
Date: Thu, 1 Sep 2022 17:29:18 -0700 [thread overview]
Message-ID: <20220902002918.2894511-1-kafai@fb.com> (raw)
In-Reply-To: <20220902002750.2887415-1-kafai@fb.com>
From: Martin KaFai Lau <martin.lau@kernel.org>
This patch changes bpf_getsockopt(SOL_TCP) to reuse
do_tcp_getsockopt(). It removes the duplicated code from
bpf_getsockopt(SOL_TCP).
Before this patch, there were some optnames available to
bpf_setsockopt(SOL_TCP) but missing in bpf_getsockopt(SOL_TCP).
For example, TCP_NODELAY, TCP_MAXSEG, TCP_KEEPIDLE, TCP_KEEPINTVL,
and a few more. It surprises users from time to time. This patch
automatically closes this gap without duplicating more code.
bpf_getsockopt(TCP_SAVED_SYN) does not free the saved_syn,
so it stays in sol_tcp_sockopt().
For string name value like TCP_CONGESTION, bpf expects it
is always null terminated, so sol_tcp_sockopt() decrements
optlen by one before calling do_tcp_getsockopt() and
the 'if (optlen < saved_optlen) memset(..,0,..);'
in __bpf_getsockopt() will always do a null termination.
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
include/net/tcp.h | 2 ++
net/core/filter.c | 74 +++++++++++++++++++++++++++--------------------
net/ipv4/tcp.c | 4 +--
3 files changed, 47 insertions(+), 33 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c03a50c72f40..735e957f7f4b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -402,6 +402,8 @@ void tcp_init_sock(struct sock *sk);
void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
__poll_t tcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
+int do_tcp_getsockopt(struct sock *sk, int level,
+ int optname, sockptr_t optval, sockptr_t optlen);
int tcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
bool tcp_bpf_bypass_getsockopt(int level, int optname);
diff --git a/net/core/filter.c b/net/core/filter.c
index 68b52243b306..e427f40f0cee 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5096,8 +5096,9 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
return 0;
}
-static int sol_tcp_setsockopt(struct sock *sk, int optname,
- char *optval, int optlen)
+static int sol_tcp_sockopt(struct sock *sk, int optname,
+ char *optval, int *optlen,
+ bool getopt)
{
if (sk->sk_prot->setsockopt != tcp_setsockopt)
return -EINVAL;
@@ -5114,17 +5115,51 @@ static int sol_tcp_setsockopt(struct sock *sk, int optname,
case TCP_USER_TIMEOUT:
case TCP_NOTSENT_LOWAT:
case TCP_SAVE_SYN:
- if (optlen != sizeof(int))
+ if (*optlen != sizeof(int))
return -EINVAL;
break;
case TCP_CONGESTION:
+ if (*optlen < 2)
+ return -EINVAL;
+ break;
+ case TCP_SAVED_SYN:
+ if (*optlen < 1)
+ return -EINVAL;
break;
default:
- return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen);
+ if (getopt)
+ return -EINVAL;
+ return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
+ }
+
+ if (getopt) {
+ if (optname == TCP_SAVED_SYN) {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!tp->saved_syn ||
+ *optlen > tcp_saved_syn_len(tp->saved_syn))
+ return -EINVAL;
+ memcpy(optval, tp->saved_syn->data, *optlen);
+ /* It cannot free tp->saved_syn here because it
+ * does not know if the user space still needs it.
+ */
+ return 0;
+ }
+
+ if (optname == TCP_CONGESTION) {
+ if (!inet_csk(sk)->icsk_ca_ops)
+ return -EINVAL;
+ /* BPF expects NULL-terminated tcp-cc string */
+ optval[--(*optlen)] = '\0';
+ }
+
+ return do_tcp_getsockopt(sk, SOL_TCP, optname,
+ KERNEL_SOCKPTR(optval),
+ KERNEL_SOCKPTR(optlen));
}
return do_tcp_setsockopt(sk, SOL_TCP, optname,
- KERNEL_SOCKPTR(optval), optlen);
+ KERNEL_SOCKPTR(optval), *optlen);
}
static int sol_ip_setsockopt(struct sock *sk, int optname,
@@ -5179,7 +5214,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
return sol_ipv6_setsockopt(sk, optname, optval, optlen);
else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
- return sol_tcp_setsockopt(sk, optname, optval, optlen);
+ return sol_tcp_sockopt(sk, optname, optval, &optlen, false);
return -EINVAL;
}
@@ -5202,31 +5237,8 @@ static int __bpf_getsockopt(struct sock *sk, int level, int optname,
if (level == SOL_SOCKET) {
err = sol_socket_sockopt(sk, optname, optval, &optlen, true);
- } else if (IS_ENABLED(CONFIG_INET) &&
- level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
- struct inet_connection_sock *icsk;
- struct tcp_sock *tp;
-
- switch (optname) {
- case TCP_CONGESTION:
- icsk = inet_csk(sk);
-
- if (!icsk->icsk_ca_ops || optlen <= 1)
- goto err_clear;
- strncpy(optval, icsk->icsk_ca_ops->name, optlen);
- optval[optlen - 1] = 0;
- break;
- case TCP_SAVED_SYN:
- tp = tcp_sk(sk);
-
- if (optlen <= 0 || !tp->saved_syn ||
- optlen > tcp_saved_syn_len(tp->saved_syn))
- goto err_clear;
- memcpy(optval, tp->saved_syn->data, optlen);
- break;
- default:
- goto err_clear;
- }
+ } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) {
+ err = sol_tcp_sockopt(sk, optname, optval, &optlen, true);
} else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) {
struct inet_sock *inet = inet_sk(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 45c737ee95a1..a822cc627e2a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4043,8 +4043,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
return stats;
}
-static int do_tcp_getsockopt(struct sock *sk, int level,
- int optname, sockptr_t optval, sockptr_t optlen)
+int do_tcp_getsockopt(struct sock *sk, int level,
+ int optname, sockptr_t optval, sockptr_t optlen)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
--
2.30.2
next prev parent reply other threads:[~2022-09-02 0:31 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-09-02 0:27 [PATCH v2 bpf-next 00/17] bpf: net: Remove duplicated code from bpf_getsockopt() Martin KaFai Lau
2022-09-02 0:27 ` [PATCH v2 bpf-next 01/17] net: Change sock_getsockopt() to take the sk ptr instead of the sock ptr Martin KaFai Lau
2022-09-02 0:28 ` [PATCH v2 bpf-next 02/17] bpf: net: Change sk_getsockopt() to take the sockptr_t argument Martin KaFai Lau
2022-09-02 0:28 ` [PATCH v2 bpf-next 03/17] bpf: net: Avoid sk_getsockopt() taking sk lock when called from bpf Martin KaFai Lau
2022-09-02 0:28 ` [PATCH v2 bpf-next 04/17] bpf: net: Change do_tcp_getsockopt() to take the sockptr_t argument Martin KaFai Lau
2022-09-02 0:28 ` [PATCH v2 bpf-next 05/17] bpf: net: Avoid do_tcp_getsockopt() taking sk lock when called from bpf Martin KaFai Lau
2022-09-02 0:28 ` [PATCH v2 bpf-next 06/17] bpf: net: Change do_ip_getsockopt() to take the sockptr_t argument Martin KaFai Lau
2022-09-02 0:28 ` [PATCH v2 bpf-next 07/17] bpf: net: Avoid do_ip_getsockopt() taking sk lock when called from bpf Martin KaFai Lau
2022-09-02 0:28 ` [PATCH v2 bpf-next 08/17] net: Remove unused flags argument from do_ipv6_getsockopt Martin KaFai Lau
2022-09-02 0:28 ` [PATCH v2 bpf-next 09/17] net: Add a len argument to compat_ipv6_get_msfilter() Martin KaFai Lau
2022-09-02 0:28 ` [PATCH v2 bpf-next 10/17] bpf: net: Change do_ipv6_getsockopt() to take the sockptr_t argument Martin KaFai Lau
2022-09-02 0:28 ` [PATCH v2 bpf-next 11/17] bpf: net: Avoid do_ipv6_getsockopt() taking sk lock when called from bpf Martin KaFai Lau
2022-09-02 0:29 ` [PATCH v2 bpf-next 12/17] bpf: Embed kernel CONFIG check into the if statement in bpf_getsockopt Martin KaFai Lau
2022-09-02 0:29 ` [PATCH v2 bpf-next 13/17] bpf: Change bpf_getsockopt(SOL_SOCKET) to reuse sk_getsockopt() Martin KaFai Lau
2022-09-02 0:29 ` Martin KaFai Lau [this message]
2022-09-02 0:29 ` [PATCH v2 bpf-next 15/17] bpf: Change bpf_getsockopt(SOL_IP) to reuse do_ip_getsockopt() Martin KaFai Lau
2022-09-02 0:29 ` [PATCH v2 bpf-next 16/17] bpf: Change bpf_getsockopt(SOL_IPV6) to reuse do_ipv6_getsockopt() Martin KaFai Lau
2022-09-02 0:29 ` [PATCH v2 bpf-next 17/17] selftest/bpf: Add test for bpf_getsockopt() Martin KaFai Lau
2022-09-02 22:16 ` [PATCH v2 bpf-next 00/17] bpf: net: Remove duplicated code from bpf_getsockopt() sdf
2022-09-03 3:50 ` patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220902002918.2894511-1-kafai@fb.com \
--to=kafai@fb.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kernel-team@fb.com \
--cc=kuba@kernel.org \
--cc=martin.lau@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox