[PATCH AUTOSEL 5.10 077/114] bpf: Prevent unsafe access to the sock fields in the BPF timestamping callback

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Jason Xing <kerneljasonxing@gmail.com>,
	Martin KaFai Lau <martin.lau@kernel.org>,
	Sasha Levin <sashal@kernel.org>,
	ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	edumazet@google.com, ncardwell@google.com, davem@davemloft.net,
	kuba@kernel.org, pabeni@redhat.com, martin.lau@linux.dev,
	dsahern@kernel.org, bpf@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH AUTOSEL 5.10 077/114] bpf: Prevent unsafe access to the sock fields in the BPF timestamping callback
Date: Mon,  5 May 2025 19:17:40 -0400	[thread overview]
Message-ID: <20250505231817.2697367-77-sashal@kernel.org> (raw)
In-Reply-To: <20250505231817.2697367-1-sashal@kernel.org>

From: Jason Xing <kerneljasonxing@gmail.com>

[ Upstream commit fd93eaffb3f977b23bc0a48d4c8616e654fcf133 ]

The subsequent patch will implement BPF TX timestamping. It will
call the sockops BPF program without holding the sock lock.

This breaks the current assumption that all sock ops programs will
hold the sock lock. The sock's fields of the uapi's bpf_sock_ops
requires this assumption.

To address this, a new "u8 is_locked_tcp_sock;" field is added. This
patch sets it in the current sock_ops callbacks. The "is_fullsock"
test is then replaced by the "is_locked_tcp_sock" test during
sock_ops_convert_ctx_access().

The new TX timestamping callbacks added in the subsequent patch will
not have this set. This will prevent unsafe access from the new
timestamping callbacks.

Potentially, we could allow read-only access. However, this would
require identifying which callback is read-safe-only and also requires
additional BPF instruction rewrites in the covert_ctx. Since the BPF
program can always read everything from a socket (e.g., by using
bpf_core_cast), this patch keeps it simple and disables all read
and write access to any socket fields through the bpf_sock_ops
UAPI from the new TX timestamping callback.

Moreover, note that some of the fields in bpf_sock_ops are specific
to tcp_sock, and sock_ops currently only supports tcp_sock. In
the future, UDP timestamping will be added, which will also break
this assumption. The same idea used in this patch will be reused.
Considering that the current sock_ops only supports tcp_sock, the
variable is named is_locked_"tcp"_sock.

Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20250220072940.99994-4-kerneljasonxing@gmail.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 include/linux/filter.h | 1 +
 include/net/tcp.h      | 1 +
 net/core/filter.c      | 8 ++++----
 net/ipv4/tcp_input.c   | 2 ++
 net/ipv4/tcp_output.c  | 2 ++
 5 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index e3aca0dc7d9c6..a963a4495b0d0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1277,6 +1277,7 @@ struct bpf_sock_ops_kern {
 	void	*skb_data_end;
 	u8	op;
 	u8	is_fullsock;
+	u8	is_locked_tcp_sock;
 	u8	remaining_opt_len;
 	u64	temp;			/* temp and everything after is not
 					 * initialized to 0 before calling
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2aad2e79ac6ad..02e8ef3a49192 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2311,6 +2311,7 @@ static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
 	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
 	if (sk_fullsock(sk)) {
 		sock_ops.is_fullsock = 1;
+		sock_ops.is_locked_tcp_sock = 1;
 		sock_owned_by_me(sk);
 	}
 
diff --git a/net/core/filter.c b/net/core/filter.c
index b262cad02bad9..73df612426a2a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9194,10 +9194,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 		}							      \
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
 						struct bpf_sock_ops_kern,     \
-						is_fullsock),		      \
+						is_locked_tcp_sock),	      \
 				      fullsock_reg, si->src_reg,	      \
 				      offsetof(struct bpf_sock_ops_kern,      \
-					       is_fullsock));		      \
+					       is_locked_tcp_sock));	      \
 		*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);	      \
 		if (si->dst_reg == si->src_reg)				      \
 			*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,	      \
@@ -9282,10 +9282,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 					       temp));			      \
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
 						struct bpf_sock_ops_kern,     \
-						is_fullsock),		      \
+						is_locked_tcp_sock),	      \
 				      reg, si->dst_reg,			      \
 				      offsetof(struct bpf_sock_ops_kern,      \
-					       is_fullsock));		      \
+					       is_locked_tcp_sock));	      \
 		*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);		      \
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
 						struct bpf_sock_ops_kern, sk),\
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7c2e714527f68..5b751f9c6fd16 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -167,6 +167,7 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
 	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
 	sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
 	sock_ops.is_fullsock = 1;
+	sock_ops.is_locked_tcp_sock = 1;
 	sock_ops.sk = sk;
 	bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
 
@@ -183,6 +184,7 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
 	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
 	sock_ops.op = bpf_op;
 	sock_ops.is_fullsock = 1;
+	sock_ops.is_locked_tcp_sock = 1;
 	sock_ops.sk = sk;
 	/* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
 	if (skb)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 32e38ac5ee2bd..ae4f23455f985 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -506,6 +506,7 @@ static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
 		sock_owned_by_me(sk);
 
 		sock_ops.is_fullsock = 1;
+		sock_ops.is_locked_tcp_sock = 1;
 		sock_ops.sk = sk;
 	}
 
@@ -551,6 +552,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
 		sock_owned_by_me(sk);
 
 		sock_ops.is_fullsock = 1;
+		sock_ops.is_locked_tcp_sock = 1;
 		sock_ops.sk = sk;
 	}
 
-- 
2.39.5

next prev parent reply	other threads:[~2025-05-05 23:20 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20250505231817.2697367-1-sashal@kernel.org>
2025-05-05 23:16 ` [PATCH AUTOSEL 5.10 008/114] SUNRPC: rpc_clnt_set_transport() must not change the autobind setting Sasha Levin
2025-05-05 23:16 ` [PATCH AUTOSEL 5.10 009/114] SUNRPC: rpcbind should never reset the port to the value '0' Sasha Levin
2025-05-05 23:16 ` [PATCH AUTOSEL 5.10 027/114] tcp: reorganize tcp_in_ack_event() and tcp_count_delivered() Sasha Levin
2025-05-05 23:16 ` [PATCH AUTOSEL 5.10 033/114] netfilter: conntrack: Bound nf_conntrack sysctl writes Sasha Levin
2025-05-05 23:16 ` [PATCH AUTOSEL 5.10 036/114] ipv6: save dontfrag in cork Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 039/114] tcp: bring back NUMA dispersion in inet_ehash_locks_alloc() Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 041/114] ieee802154: ca8210: Use proper setters and getters for bitwise types Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 049/114] net: ethernet: ti: cpsw_new: populate netdev of_node Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 050/114] net: pktgen: fix mpls maximum labels list parsing Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 051/114] ipv4: fib: Move fib_valid_key_len() to rtm_to_fib_config() Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 059/114] net/mlx5: Avoid report two health errors on same syndrome Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 061/114] net: xgene-v2: remove incorrect ACPI_PTR annotation Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 062/114] bonding: report duplicate MAC address in all situations Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 075/114] net: pktgen: fix access outside of user given buffer in pktgen_thread_write() Sasha Levin
2025-05-05 23:17 ` Sasha Levin [this message]
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 079/114] eth: mlx4: don't try to complete XDP frames in netpoll Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 082/114] net/mlx5: Modify LSB bitmask in temperature event to include only the first bit Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 083/114] net/mlx5: Apply rate-limiting to high temperature warning Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 090/114] net/mlx4_core: Avoid impossible mlx4_db_alloc() order value Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 093/114] net/mlx5: Extend Ethtool loopback selftest to support non-linear SKB Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 094/114] net/mlx5e: set the tx_queue_len for pfifo_fast Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 095/114] net/mlx5e: reduce rep rxq depth to 256 for ECPF Sasha Levin
2025-05-05 23:17 ` [PATCH AUTOSEL 5.10 096/114] ip: fib_rules: Fetch net from fib_rule in fib[46]_rule_configure() Sasha Levin
2025-05-05 23:18 ` [PATCH AUTOSEL 5.10 100/114] vxlan: Annotate FDB data races Sasha Levin
2025-05-05 23:18 ` [PATCH AUTOSEL 5.10 101/114] net-sysfs: prevent uncleared queues from being re-added Sasha Levin

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:e3aca0dc7d9c dfblob:a963a4495b0d dfblob:2aad2e79ac6a
dfblob:02e8ef3a4919 dfblob:b262cad02bad dfblob:73df612426a2
dfblob:7c2e714527f6 dfblob:5b751f9c6fd1 dfblob:32e38ac5ee2b
dfblob:ae4f23455f98 )
 OR (
bs:"[PATCH AUTOSEL 5.10 077/114] bpf: Prevent unsafe access to the sock fields in the BPF timestamping callback" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250505231817.2697367-77-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=kerneljasonxing@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=martin.lau@kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=ncardwell@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).