* [PATCH bpf-next v2 1/2] net: clear the dst when performing encap / decap
@ 2026-03-29 18:04 Jakub Kicinski
2026-03-29 18:04 ` [PATCH bpf-next v2 2/2] selftests/bpf: test that dst is cleared on same-protocol encap Jakub Kicinski
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Kicinski @ 2026-03-29 18:04 UTC (permalink / raw)
To: bpf
Cc: netdev, davem, edumazet, pabeni, andrew+netdev, horms,
Jakub Kicinski, maze, willemdebruijn.kernel, ast, daniel, andrii,
martin.lau, eddyz87, song, yonghong.song, john.fastabend, kpsingh,
sdf, haoluo, jolsa
Commit ba9db6f907ac ("net: clear the dst when changing skb protocol")
added dst clearing when a BPF program changes the skb protocol
(e.g. IPv4 to IPv6). Since that was a fix we only cleared the dst when
the L3 protocol actually changes to keep it minimal. As suggested during
the discussion (see Link) encap or decap operation which wraps or unwraps
a same-protocol header may also render the existing dst incorrect - even
if that doesn't result in a crash, just the wrong route for the now-outermost
IP dst.
Make dropping dst unconditional for bpf_skb_change_proto() and all
L3 encap / decap ops.
Link: https://lore.kernel.org/CANP3RGfRaYwve_xgxH6Tp2zenzKn2-DjZ9tg023WVzfdJF3p_w@mail.gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
v2:
- move the pointer recalc
v1: https://lore.kernel.org/20260319234531.3022634-1-kuba@kernel.org
CC: maze@google.com
CC: willemdebruijn.kernel@gmail.com
CC: ast@kernel.org
CC: daniel@iogearbox.net
CC: andrii@kernel.org
CC: martin.lau@linux.dev
CC: eddyz87@gmail.com
CC: song@kernel.org
CC: yonghong.song@linux.dev
CC: john.fastabend@gmail.com
CC: kpsingh@kernel.org
CC: sdf@fomichev.me
CC: haoluo@google.com
CC: jolsa@kernel.org
CC: bpf@vger.kernel.org
---
net/core/filter.c | 50 +++++++++++++++++++++++++----------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index 78b548158fb0..8a352257656e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3257,13 +3257,6 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto)
-{
- skb->protocol = htons(proto);
- if (skb_valid_dst(skb))
- skb_dst_drop(skb);
-}
-
static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
{
/* Caller already did skb_cow() with meta_len+len as headroom,
@@ -3362,7 +3355,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
shinfo->gso_type |= SKB_GSO_DODGY;
}
- bpf_skb_change_protocol(skb, ETH_P_IPV6);
+ skb->protocol = htons(ETH_P_IPV6);
skb_clear_hash(skb);
return 0;
@@ -3393,7 +3386,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
shinfo->gso_type |= SKB_GSO_DODGY;
}
- bpf_skb_change_protocol(skb, ETH_P_IP);
+ skb->protocol = htons(ETH_P_IP);
skb_clear_hash(skb);
return 0;
@@ -3441,7 +3434,13 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
*/
ret = bpf_skb_proto_xlat(skb, proto);
bpf_compute_data_pointers(skb);
- return ret;
+ if (ret)
+ return ret;
+
+ if (skb_valid_dst(skb))
+ skb_dst_drop(skb);
+
+ return 0;
}
static const struct bpf_func_proto bpf_skb_change_proto_proto = {
@@ -3583,12 +3582,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
}
/* Match skb->protocol to new outer l3 protocol */
- if (skb->protocol == htons(ETH_P_IP) &&
- flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
- bpf_skb_change_protocol(skb, ETH_P_IPV6);
- else if (skb->protocol == htons(ETH_P_IPV6) &&
- flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
- bpf_skb_change_protocol(skb, ETH_P_IP);
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+ skb->protocol = htons(ETH_P_IPV6);
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
+ skb->protocol = htons(ETH_P_IP);
+
+ if (skb_valid_dst(skb))
+ skb_dst_drop(skb);
}
if (skb_is_gso(skb)) {
@@ -3616,6 +3616,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
{
+ bool decap = flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK;
int ret;
if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
@@ -3638,13 +3639,16 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
if (unlikely(ret < 0))
return ret;
- /* Match skb->protocol to new outer l3 protocol */
- if (skb->protocol == htons(ETH_P_IP) &&
- flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
- bpf_skb_change_protocol(skb, ETH_P_IPV6);
- else if (skb->protocol == htons(ETH_P_IPV6) &&
- flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
- bpf_skb_change_protocol(skb, ETH_P_IP);
+ if (decap) {
+ /* Match skb->protocol to new outer l3 protocol */
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+ skb->protocol = htons(ETH_P_IPV6);
+ else if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
+ skb->protocol = htons(ETH_P_IP);
+
+ if (skb_valid_dst(skb))
+ skb_dst_drop(skb);
+ }
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
--
2.53.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH bpf-next v2 2/2] selftests/bpf: test that dst is cleared on same-protocol encap
2026-03-29 18:04 [PATCH bpf-next v2 1/2] net: clear the dst when performing encap / decap Jakub Kicinski
@ 2026-03-29 18:04 ` Jakub Kicinski
0 siblings, 0 replies; 2+ messages in thread
From: Jakub Kicinski @ 2026-03-29 18:04 UTC (permalink / raw)
To: bpf
Cc: netdev, davem, edumazet, pabeni, andrew+netdev, horms,
Jakub Kicinski, andrii, eddyz87, ast, daniel, martin.lau, song,
yonghong.song, john.fastabend, kpsingh, sdf, haoluo, jolsa, shuah,
linux-kselftest
Verify that bpf_skb_adjust_room() clears the routing dst even when
the encap L3 protocol matches the original packet (e.g. IPIP).
The dst selected for the inner packet is not valid for the
encapsulated result; a stale dst could lead to misrouting.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
CC: andrii@kernel.org
CC: eddyz87@gmail.com
CC: ast@kernel.org
CC: daniel@iogearbox.net
CC: martin.lau@linux.dev
CC: song@kernel.org
CC: yonghong.song@linux.dev
CC: john.fastabend@gmail.com
CC: kpsingh@kernel.org
CC: sdf@fomichev.me
CC: haoluo@google.com
CC: jolsa@kernel.org
CC: shuah@kernel.org
CC: bpf@vger.kernel.org
CC: linux-kselftest@vger.kernel.org
---
.../selftests/bpf/prog_tests/test_dst_clear.c | 75 +++++++++++++++++++
.../selftests/bpf/progs/test_dst_clear.c | 57 ++++++++++++++
2 files changed, 132 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/test_dst_clear.c
create mode 100644 tools/testing/selftests/bpf/progs/test_dst_clear.c
diff --git a/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c b/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c
new file mode 100644
index 000000000000..8190c56556fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_dst_clear.skel.h"
+
+#define NS_TEST "dst_clear_ns"
+#define IPV4_IFACE_ADDR "1.0.0.1"
+#define UDP_TEST_PORT 7777
+
+void test_dst_clear(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach);
+ struct nstoken *nstoken = NULL;
+ struct test_dst_clear *skel;
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+ char buf[128] = {};
+ int sockfd, err;
+
+ skel = test_dst_clear__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s addr add %s/8 dev lo", NS_TEST, IPV4_IFACE_ADDR);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
+ qdisc_hook.ifindex = if_nametoindex("lo");
+ if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo"))
+ goto fail;
+
+ err = bpf_tc_hook_create(&qdisc_hook);
+ if (!ASSERT_OK(err, "create qdisc hook"))
+ goto fail;
+
+ tc_attach.prog_fd = bpf_program__fd(skel->progs.dst_clear);
+ err = bpf_tc_attach(&qdisc_hook, &tc_attach);
+ if (!ASSERT_OK(err, "attach filter"))
+ goto fail;
+
+ addrlen = sizeof(addr);
+ err = make_sockaddr(AF_INET, IPV4_IFACE_ADDR, UDP_TEST_PORT,
+ (void *)&addr, &addrlen);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ goto fail;
+ sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (!ASSERT_NEQ(sockfd, -1, "socket"))
+ goto fail;
+ err = sendto(sockfd, buf, sizeof(buf), 0, (void *)&addr, addrlen);
+ close(sockfd);
+ if (!ASSERT_EQ(err, sizeof(buf), "send"))
+ goto fail;
+
+ ASSERT_TRUE(skel->bss->had_dst, "had_dst");
+ ASSERT_TRUE(skel->bss->dst_cleared, "dst_cleared");
+
+fail:
+ if (nstoken) {
+ bpf_tc_hook_destroy(&qdisc_hook);
+ close_netns(nstoken);
+ }
+ SYS_NOFAIL("ip netns del " NS_TEST);
+ test_dst_clear__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_dst_clear.c b/tools/testing/selftests/bpf/progs/test_dst_clear.c
new file mode 100644
index 000000000000..7ac9604fd99c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_dst_clear.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define UDP_TEST_PORT 7777
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+
+bool had_dst = false;
+bool dst_cleared = false;
+
+SEC("tc")
+int dst_clear(struct __sk_buff *skb)
+{
+ struct sk_buff *kskb;
+ struct iphdr iph;
+ struct udphdr udph;
+ int err;
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph, sizeof(iph)))
+ return TC_ACT_OK;
+
+ if (iph.protocol != IPPROTO_UDP)
+ return TC_ACT_OK;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph), &udph, sizeof(udph)))
+ return TC_ACT_OK;
+
+ if (udph.dest != __bpf_constant_htons(UDP_TEST_PORT))
+ return TC_ACT_OK;
+
+ kskb = bpf_cast_to_kern_ctx(skb);
+ had_dst = (kskb->_skb_refdst != 0);
+
+ /* Same-protocol encap (IPIP): protocol stays IPv4, but the dst
+ * from the original routing is no longer valid for the outer hdr.
+ */
+ err = bpf_skb_adjust_room(skb, (s32)sizeof(struct iphdr),
+ BPF_ADJ_ROOM_MAC,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_ENCAP_L3_IPV4);
+ if (err)
+ return TC_ACT_SHOT;
+
+ dst_cleared = (kskb->_skb_refdst == 0);
+
+ return TC_ACT_SHOT;
+}
+
+char __license[] SEC("license") = "GPL";
--
2.53.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-03-29 18:04 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-29 18:04 [PATCH bpf-next v2 1/2] net: clear the dst when performing encap / decap Jakub Kicinski
2026-03-29 18:04 ` [PATCH bpf-next v2 2/2] selftests/bpf: test that dst is cleared on same-protocol encap Jakub Kicinski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox