From: Werner Kasselman <werner@verivus.ai>
To: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>,
John Fastabend <john.fastabend@gmail.com>,
"David S . Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Shuah Khan <shuah@kernel.org>,
"bpf@vger.kernel.org" <bpf@vger.kernel.org>,
"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
"linux-kselftest@vger.kernel.org"
<linux-kselftest@vger.kernel.org>,
"stable@vger.kernel.org" <stable@vger.kernel.org>,
Werner Kasselman <werner@verivus.ai>
Subject: [PATCH v2] bpf: guard sock_ops rtt_min access with is_locked_tcp_sock
Date: Thu, 9 Apr 2026 06:10:28 +0000 [thread overview]
Message-ID: <20260409061026.3926858-1-werner@verivus.com> (raw)
In-Reply-To: <20260406224953.2787289-1-werner@verivus.com>
sock_ops_convert_ctx_access() emits guarded reads for tcp_sock-backed
bpf_sock_ops fields such as snd_cwnd, srtt_us, snd_ssthresh, rcv_nxt,
snd_nxt, snd_una, mss_cache, ecn_flags, rate_delivered, and
rate_interval_us. Those accesses go through SOCK_OPS_GET_TCP_SOCK_FIELD(),
which checks is_locked_tcp_sock before dereferencing sock_ops.sk.
The rtt_min case is different. Because it reads a subfield of
struct minmax, it uses a custom open-coded load sequence instead of the
usual helper macro, and that sequence currently dereferences sock_ops.sk
without checking is_locked_tcp_sock first.
This is unsafe when sock_ops.sk points to a request_sock-backed object
instead of a locked full tcp_sock. That is reachable not only from the
SYNACK header option callbacks, but also from other request_sock-backed
sock_ops callbacks such as BPF_SOCK_OPS_TIMEOUT_INIT,
BPF_SOCK_OPS_RWND_INIT, and BPF_SOCK_OPS_NEEDS_ECN. In those cases,
reading ctx->rtt_min makes the generated code treat a request_sock as a
tcp_sock and read beyond the end of the request_sock allocation.
Fix the rtt_min conversion by adding the same is_locked_tcp_sock guard
used for the other tcp_sock field reads. Also make the accessed subfield
explicit by using offsetof(struct minmax_sample, v).
Add a selftest that verifies request_sock-backed sock_ops callbacks see
ctx->rtt_min as zero after the fix.
Found via AST-based call-graph analysis using sqry.
Fixes: 44f0e43037d3 ("bpf: Add support for reading sk_state and more")
Cc: stable@vger.kernel.org
Signed-off-by: Werner Kasselman <werner@verivus.com>
---
net/core/filter.c | 53 +++++++++++++++----
.../selftests/bpf/prog_tests/tcpbpf_user.c | 9 ++++
.../selftests/bpf/progs/test_tcpbpf_kern.c | 21 ++++++++
tools/testing/selftests/bpf/test_tcpbpf.h | 6 +++
4 files changed, 79 insertions(+), 10 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index 78b548158..5040bf7e4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10827,16 +10827,49 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct bpf_sock_ops, rtt_min):
BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
sizeof(struct minmax));
- BUILD_BUG_ON(sizeof(struct minmax) <
- sizeof(struct minmax_sample));
-
- *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
- struct bpf_sock_ops_kern, sk),
- si->dst_reg, si->src_reg,
- offsetof(struct bpf_sock_ops_kern, sk));
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
- offsetof(struct tcp_sock, rtt_min) +
- sizeof_field(struct minmax_sample, t));
+ BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, rtt_min) !=
+ sizeof_field(struct minmax_sample, v));
+ off = offsetof(struct tcp_sock, rtt_min) +
+ offsetof(struct minmax_sample, v);
+
+ {
+ int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2;
+
+ if (si->dst_reg == reg || si->src_reg == reg)
+ reg--;
+ if (si->dst_reg == reg || si->src_reg == reg)
+ reg--;
+ if (si->dst_reg == si->src_reg) {
+ *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg,
+ offsetof(struct bpf_sock_ops_kern,
+ temp));
+ fullsock_reg = reg;
+ jmp += 2;
+ }
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern,
+ is_locked_tcp_sock),
+ fullsock_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ is_locked_tcp_sock));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);
+ if (si->dst_reg == si->src_reg)
+ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ temp));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ off);
+ if (si->dst_reg == si->src_reg) {
+ *insn++ = BPF_JMP_A(1);
+ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ temp));
+ }
+ }
break;
case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index 7e8fe1bad..d243d6713 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -42,6 +42,15 @@ static void verify_result(struct tcpbpf_globals *result)
/* check getsockopt for window_clamp */
ASSERT_EQ(result->window_clamp_client, 9216, "window_clamp_client");
ASSERT_EQ(result->window_clamp_server, 9216, "window_clamp_server");
+
+ ASSERT_EQ(result->timeout_init_req_seen, 1, "timeout_init_req_seen");
+ ASSERT_EQ(result->timeout_init_req_rtt_min, 0, "timeout_init_req_rtt_min");
+
+ ASSERT_EQ(result->rwnd_init_req_seen, 1, "rwnd_init_req_seen");
+ ASSERT_EQ(result->rwnd_init_req_rtt_min, 0, "rwnd_init_req_rtt_min");
+
+ ASSERT_EQ(result->needs_ecn_req_seen, 1, "needs_ecn_req_seen");
+ ASSERT_EQ(result->needs_ecn_req_rtt_min, 0, "needs_ecn_req_rtt_min");
}
static void run_test(struct tcpbpf_globals *result)
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 6935f32ee..79757a19b 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -85,6 +85,27 @@ int bpf_testcb(struct bpf_sock_ops *skops)
global.event_map |= (1 << op);
switch (op) {
+ case BPF_SOCK_OPS_TIMEOUT_INIT:
+ if (!skops->is_fullsock) {
+ global.timeout_init_req_seen = 1;
+ global.timeout_init_req_rtt_min = skops->rtt_min;
+ }
+ rv = -1;
+ break;
+ case BPF_SOCK_OPS_RWND_INIT:
+ if (!skops->is_fullsock) {
+ global.rwnd_init_req_seen = 1;
+ global.rwnd_init_req_rtt_min = skops->rtt_min;
+ }
+ rv = 0;
+ break;
+ case BPF_SOCK_OPS_NEEDS_ECN:
+ if (!skops->is_fullsock) {
+ global.needs_ecn_req_seen = 1;
+ global.needs_ecn_req_rtt_min = skops->rtt_min;
+ }
+ rv = 0;
+ break;
case BPF_SOCK_OPS_TCP_CONNECT_CB:
rv = bpf_setsockopt(skops, SOL_TCP, TCP_WINDOW_CLAMP,
&window_clamp, sizeof(window_clamp));
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
index 9dd9b5590..46500c1d6 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf.h
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -18,5 +18,11 @@ struct tcpbpf_globals {
__u32 tcp_saved_syn;
__u32 window_clamp_client;
__u32 window_clamp_server;
+ __u32 timeout_init_req_seen;
+ __u32 timeout_init_req_rtt_min;
+ __u32 rwnd_init_req_seen;
+ __u32 rwnd_init_req_rtt_min;
+ __u32 needs_ecn_req_seen;
+ __u32 needs_ecn_req_rtt_min;
};
#endif
--
2.43.0
prev parent reply other threads:[~2026-04-09 6:10 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-06 22:49 [PATCH] bpf: add is_locked_tcp_sock guard for sock_ops rtt_min access Werner Kasselman
2026-04-07 1:25 ` Martin KaFai Lau
2026-04-07 1:56 ` Werner Kasselman
2026-04-09 6:10 ` Werner Kasselman [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260409061026.3926858-1-werner@verivus.com \
--to=werner@verivus.ai \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=john.fastabend@gmail.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=martin.lau@linux.dev \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=shuah@kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox