From: Jason Xing <kerneljasonxing@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, dsahern@kernel.org,
willemdebruijn.kernel@gmail.com, willemb@google.com,
ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
martin.lau@linux.dev, eddyz87@gmail.com, song@kernel.org,
yonghong.song@linux.dev, john.fastabend@gmail.com,
kpsingh@kernel.org, sdf@fomichev.me, haoluo@google.com,
jolsa@kernel.org, horms@kernel.org
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
Jason Xing <kerneljasonxing@gmail.com>
Subject: [PATCH bpf-next v7 03/13] bpf: stop unsafely accessing TCP fields in bpf callbacks
Date: Tue, 28 Jan 2025 16:46:10 +0800 [thread overview]
Message-ID: <20250128084620.57547-4-kerneljasonxing@gmail.com> (raw)
In-Reply-To: <20250128084620.57547-1-kerneljasonxing@gmail.com>
The "allow_tcp_access" flag is added to indicate that the callback
site has a tcp_sock locked.
Applying the new member allow_tcp_access in the existing callbacks
where is_fullsock is set to 1 can help us stop UDP socket accessing
struct tcp_sock and stop TCP socket without sk lock protecting does
the similar thing, or else it could be catastrophe leading to panic.
To keep it simple, instead of distinguishing between read and write
access, we disallow all read/write access to the tcp_sock through
the older bpf_sock_ops ctx. The new timestamping callbacks can use
newer helpers to read everything from a sk (e.g. bpf_core_cast), so
nothing is lost.
Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
---
include/linux/filter.h | 5 +++++
include/net/tcp.h | 1 +
net/core/filter.c | 8 ++++----
net/ipv4/tcp_input.c | 2 ++
net/ipv4/tcp_output.c | 2 ++
5 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a3ea46281595..1569e9f31a8c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1508,6 +1508,11 @@ struct bpf_sock_ops_kern {
void *skb_data_end;
u8 op;
u8 is_fullsock;
+ u8 allow_tcp_access; /* Indicate that the callback site
+ * has a tcp_sock locked. Then it
+ * would be safe to access struct
+ * tcp_sock.
+ */
u8 remaining_opt_len;
u64 temp; /* temp and everything after is not
* initialized to 0 before calling
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5b2b04835688..293047694710 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2649,6 +2649,7 @@ static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
if (sk_fullsock(sk)) {
sock_ops.is_fullsock = 1;
+ sock_ops.allow_tcp_access = 1;
sock_owned_by_me(sk);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 1c6c07507a78..dc0e67c5776a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10381,10 +10381,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
} \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
- is_fullsock), \
+ allow_tcp_access), \
fullsock_reg, si->src_reg, \
offsetof(struct bpf_sock_ops_kern, \
- is_fullsock)); \
+ allow_tcp_access)); \
*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
if (si->dst_reg == si->src_reg) \
*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
@@ -10469,10 +10469,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
temp)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
- is_fullsock), \
+ allow_tcp_access), \
reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, \
- is_fullsock)); \
+ allow_tcp_access)); \
*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, sk),\
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb82e01da911..77185479ed5e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -169,6 +169,7 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
sock_ops.is_fullsock = 1;
+ sock_ops.allow_tcp_access = 1;
sock_ops.sk = sk;
bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
@@ -185,6 +186,7 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
sock_ops.op = bpf_op;
sock_ops.is_fullsock = 1;
+ sock_ops.allow_tcp_access = 1;
sock_ops.sk = sk;
/* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
if (skb)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0e5b9a654254..695749807c09 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -522,6 +522,7 @@ static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
sock_owned_by_me(sk);
sock_ops.is_fullsock = 1;
+ sock_ops.allow_tcp_access = 1;
sock_ops.sk = sk;
}
@@ -567,6 +568,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
sock_owned_by_me(sk);
sock_ops.is_fullsock = 1;
+ sock_ops.allow_tcp_access = 1;
sock_ops.sk = sk;
}
--
2.43.5
next prev parent reply other threads:[~2025-01-28 8:46 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-28 8:46 [PATCH bpf-next v7 00/13] net-timestamp: bpf extension to equip applications transparently Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 01/13] net-timestamp: add support for bpf_setsockopt() Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 02/13] net-timestamp: prepare for timestamping callbacks use Jason Xing
2025-01-28 8:46 ` Jason Xing [this message]
2025-01-28 8:46 ` [PATCH bpf-next v7 04/13] bpf: stop calling some sock_op BPF CALLs in new timestamping callbacks Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 05/13] net-timestamp: prepare for isolating two modes of SO_TIMESTAMPING Jason Xing
2025-02-03 23:14 ` Martin KaFai Lau
2025-02-04 0:18 ` Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 06/13] net-timestamp: support SCM_TSTAMP_SCHED for bpf extension Jason Xing
2025-02-03 23:23 ` Martin KaFai Lau
2025-02-04 0:19 ` Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 07/13] net-timestamp: support sw SCM_TSTAMP_SND " Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 08/13] net-timestamp: support hw " Jason Xing
2025-02-04 0:56 ` Martin KaFai Lau
2025-02-04 1:13 ` Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 09/13] net-timestamp: support SCM_TSTAMP_ACK " Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 10/13] net-timestamp: make TCP tx timestamp bpf extension work Jason Xing
2025-02-04 1:03 ` Martin KaFai Lau
2025-02-04 1:15 ` Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 11/13] net-timestamp: add a new callback in tcp_tx_timestamp() Jason Xing
2025-02-04 1:16 ` Martin KaFai Lau
2025-02-04 1:25 ` Jason Xing
2025-02-04 17:08 ` Willem de Bruijn
2025-02-04 18:09 ` Jason Xing
2025-02-05 3:05 ` Jason Xing
2025-02-05 5:13 ` Jason Xing
2025-02-05 15:20 ` Willem de Bruijn
2025-02-05 15:47 ` Jason Xing
2025-02-05 21:02 ` Willem de Bruijn
2025-02-06 0:33 ` Jason Xing
2025-02-06 3:00 ` Willem de Bruijn
2025-02-06 4:03 ` Jason Xing
2025-02-06 16:22 ` Willem de Bruijn
2025-02-07 0:35 ` Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 12/13] net-timestamp: introduce cgroup lock to avoid affecting non-bpf cases Jason Xing
2025-02-04 1:21 ` Martin KaFai Lau
2025-02-04 1:25 ` Jason Xing
2025-01-28 8:46 ` [PATCH bpf-next v7 13/13] bpf: add simple bpf tests in the tx path for so_timestamping feature Jason Xing
2025-02-04 2:02 ` Martin KaFai Lau
2025-02-04 5:32 ` Jason Xing
2025-02-04 2:27 ` [PATCH bpf-next v7 00/13] net-timestamp: bpf extension to equip applications transparently Martin KaFai Lau
2025-02-04 2:44 ` Jason Xing
2025-02-04 17:11 ` Willem de Bruijn
2025-02-04 18:12 ` Jason Xing
2025-02-04 17:06 ` Willem de Bruijn
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250128084620.57547-4-kerneljasonxing@gmail.com \
--to=kerneljasonxing@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=eddyz87@gmail.com \
--cc=edumazet@google.com \
--cc=haoluo@google.com \
--cc=horms@kernel.org \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=martin.lau@linux.dev \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
--cc=song@kernel.org \
--cc=willemb@google.com \
--cc=willemdebruijn.kernel@gmail.com \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).