* [PATCH bpf v4 1/2] bpf: Run generic devmap egress prog on private skb
2026-06-11 8:08 [PATCH bpf v4 0/2] bpf: Fix generic devmap egress skb sharing Sun Jian
@ 2026-06-11 8:08 ` Sun Jian
2026-06-11 8:30 ` Toke Høiland-Jørgensen
2026-06-11 8:08 ` [PATCH bpf v4 2/2] selftests/bpf: Cover generic devmap egress last-dst rewrite Sun Jian
1 sibling, 1 reply; 5+ messages in thread
From: Sun Jian @ 2026-06-11 8:08 UTC (permalink / raw)
To: bpf
Cc: netdev, linux-kernel, linux-kselftest, ast, daniel, andrii,
martin.lau, davem, kuba, hawk, john.fastabend, sdf, shuah,
jiayuan.chen, toke, menglong.dong, emil, Sun Jian
Generic XDP devmap multi redirect uses skb_clone() for intermediate
destinations and sends the last destination with the original skb. This
can leave multiple destinations sharing the same packet data.
This becomes visible after generic devmap egress-program support was
added: a devmap egress program may mutate packet data, and another
destination sharing the same data can observe that mutation.
Native XDP broadcast redirect does not have this issue because
xdpf_clone() copies the frame data for each destination. Generic XDP
should provide the same per-destination isolation before running a
devmap egress program.
Fix this by making cloned skbs private before running the generic devmap
egress program. Use skb_copy() instead of skb_unshare() so allocation
failure does not consume the skb and the existing caller error paths keep
their ownership semantics.
Fixes: 2ea5eabaf04a ("bpf: devmap: Implement devmap prog execution for generic XDP")
Suggested-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Sun Jian <sun.jian.kdev@gmail.com>
---
kernel/bpf/devmap.c | 41 +++++++++++++++++++++++++++++++----------
1 file changed, 31 insertions(+), 10 deletions(-)
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index cc0a43ebab6b..a3d6c60dbddb 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -512,35 +512,52 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
return 0;
}
-static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst)
+static int dev_map_bpf_prog_run_skb(struct sk_buff **pskb,
+ struct bpf_dtab_netdev *dst,
+ u32 *act)
{
+ struct sk_buff *skb = *pskb;
struct xdp_txq_info txq = { .dev = dst->dev };
struct xdp_buff xdp;
- u32 act;
- if (!dst->xdp_prog)
- return XDP_PASS;
+ if (!dst->xdp_prog) {
+ *act = XDP_PASS;
+ return 0;
+ }
+
+ if (skb_cloned(skb)) {
+ struct sk_buff *nskb;
+
+ nskb = skb_copy(skb, GFP_ATOMIC);
+ if (!nskb)
+ return -ENOMEM;
+
+ nskb->mac_len = skb->mac_len;
+ consume_skb(skb);
+ skb = nskb;
+ *pskb = nskb;
+ }
__skb_pull(skb, skb->mac_len);
xdp.txq = &txq;
- act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog);
- switch (act) {
+ *act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog);
+ switch (*act) {
case XDP_PASS:
__skb_push(skb, skb->mac_len);
break;
default:
- bpf_warn_invalid_xdp_action(NULL, dst->xdp_prog, act);
+ bpf_warn_invalid_xdp_action(NULL, dst->xdp_prog, *act);
fallthrough;
case XDP_ABORTED:
- trace_xdp_exception(dst->dev, dst->xdp_prog, act);
+ trace_xdp_exception(dst->dev, dst->xdp_prog, *act);
fallthrough;
case XDP_DROP:
kfree_skb(skb);
break;
}
- return act;
+ return 0;
}
int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
@@ -700,6 +717,7 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
const struct bpf_prog *xdp_prog)
{
+ u32 act;
int err;
err = xdp_ok_fwd_dev(dst->dev, skb->len);
@@ -710,7 +728,10 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
* return 0 even if packet is dropped. Helper below takes care of
* freeing skb.
*/
- if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS)
+ err = dev_map_bpf_prog_run_skb(&skb, dst, &act);
+ if (err)
+ return err;
+ if (act != XDP_PASS)
return 0;
skb->dev = dst->dev;
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH bpf v4 2/2] selftests/bpf: Cover generic devmap egress last-dst rewrite
2026-06-11 8:08 [PATCH bpf v4 0/2] bpf: Fix generic devmap egress skb sharing Sun Jian
2026-06-11 8:08 ` [PATCH bpf v4 1/2] bpf: Run generic devmap egress prog on private skb Sun Jian
@ 2026-06-11 8:08 ` Sun Jian
1 sibling, 0 replies; 5+ messages in thread
From: Sun Jian @ 2026-06-11 8:08 UTC (permalink / raw)
To: bpf
Cc: netdev, linux-kernel, linux-kselftest, ast, daniel, andrii,
martin.lau, davem, kuba, hawk, john.fastabend, sdf, shuah,
jiayuan.chen, toke, menglong.dong, emil, Sun Jian
Strengthen xdp_veth_egress to check that each destination observes the
MAC selected for its own egress ifindex, instead of only checking that
the observed MAC differs from a single magic value.
Add a generic XDP last-destination test where earlier destinations do
not have a devmap egress program while the final destination does. This
covers the case where the final destination runs on the original skb and
could otherwise rewrite packet data still shared with earlier cloned
skbs.
Use deterministic DEVMAP_HASH keys for the egress map so the intended
last destination is stable.
Suggested-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Signed-off-by: Sun Jian <sun.jian.kdev@gmail.com>
---
.../selftests/bpf/prog_tests/test_xdp_veth.c | 152 +++++++++++++++++-
1 file changed, 149 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
index 3e98a1665936..0fa2cf835c62 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
@@ -456,7 +456,11 @@ static void xdp_veth_egress(u32 flags)
.remote_flags = flags,
}
};
- const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
+ const unsigned char egress_macs[VETH_PAIRS_COUNT][ETH_ALEN] = {
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x01 },
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x02 },
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x03 },
+ };
struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
struct xdp_redirect_map *xdp_redirect_map;
@@ -512,7 +516,7 @@ static void xdp_veth_egress(u32 flags)
&net_config, prog_cfg, i))
goto destroy_xdp_redirect_map;
- err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0);
+ err = bpf_map_update_elem(mac_map, &ifindex, egress_macs[i], 0);
if (!ASSERT_OK(err, "bpf_map_update_elem"))
goto destroy_xdp_redirect_map;
@@ -530,16 +534,155 @@ static void xdp_veth_egress(u32 flags)
goto destroy_xdp_redirect_map;
for (i = 0; i < 2; i++) {
+ u32 key = i;
+ __be64 expected = 0;
+ u64 res;
+
+ err = bpf_map_lookup_elem(res_map, &key, &res);
+ if (!ASSERT_OK(err, "get MAC res"))
+ goto destroy_xdp_redirect_map;
+
+ /* store_mac_1/2 run on the second/third remote veths. */
+ memcpy(&expected, egress_macs[i + 1], ETH_ALEN);
+ ASSERT_EQ(res, expected, "compare mac");
+ }
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+ xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
+destroy_xdp_dummy:
+ xdp_dummy__destroy(xdp_dummy);
+
+ cleanup_network(&net_config);
+}
+
+static void xdp_veth_egress_last_dst(u32 flags)
+{
+ struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_1",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_2",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ }
+ };
+ const unsigned char egress_macs[VETH_PAIRS_COUNT][ETH_ALEN] = {
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x01 },
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x02 },
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x03 },
+ };
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+ struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct net_configuration net_config = {};
+ int mac_map, egress_map, res_map;
+ struct nstoken *nstoken = NULL;
+ struct xdp_dummy *xdp_dummy;
+ __be64 last_mac = 0;
+ bool found = false;
+ int err;
+ int i;
+
+ xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+ return;
+
+ xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
+ goto destroy_xdp_dummy;
+
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_redirect_multi_kern;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map);
+ if (!ASSERT_OK_FD(mac_map, "open mac_map"))
+ goto destroy_xdp_redirect_map;
+
+ egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress);
+ if (!ASSERT_OK_FD(egress_map, "open map_egress"))
+ goto destroy_xdp_redirect_map;
+
+ bpf_objs[0] = xdp_dummy->obj;
+ bpf_objs[1] = xdp_redirect_multi_kern->obj;
+ bpf_objs[2] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ struct bpf_devmap_val devmap_val = {};
+ int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+ u32 key = i;
+
+ SYS(destroy_xdp_redirect_map,
+ "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+ net_config.veth_cfg[i].namespace, IP_NEIGH,
+ net_config.veth_cfg[i].remote_veth);
+
+ if (attach_programs_to_veth_pair(bpf_objs, VETH_EGRESS_SKEL_NB,
+ &net_config, prog_cfg, i))
+ goto destroy_xdp_redirect_map;
+
+ err = bpf_map_update_elem(mac_map, &ifindex, egress_macs[i], 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+
+ devmap_val.ifindex = ifindex;
+ devmap_val.bpf_prog.fd = -1;
+
+ if (i == VETH_PAIRS_COUNT - 1)
+ devmap_val.bpf_prog.fd =
+ bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog);
+
+ err = bpf_map_update_elem(egress_map, &key, &devmap_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+ }
+
+ SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+ net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+ res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac);
+ if (!ASSERT_OK_FD(res_map, "open rx_map"))
+ goto destroy_xdp_redirect_map;
+
+ memcpy(&last_mac, egress_macs[VETH_PAIRS_COUNT - 1], ETH_ALEN);
+
+ for (i = 0; i < VETH_PAIRS_COUNT - 1; i++) {
u32 key = i;
u64 res;
err = bpf_map_lookup_elem(res_map, &key, &res);
+ if (err == -ENOENT)
+ continue;
if (!ASSERT_OK(err, "get MAC res"))
goto destroy_xdp_redirect_map;
- ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac");
+ found = true;
+ ASSERT_NEQ(res, last_mac, "compare last dst mac");
}
+ ASSERT_TRUE(found, "found earlier dst mac");
+
destroy_xdp_redirect_map:
close_netns(nstoken);
xdp_redirect_map__destroy(xdp_redirect_map);
@@ -596,4 +739,7 @@ void test_xdp_veth_egress(void)
if (test__start_subtest("SKB_MODE/egress"))
xdp_veth_egress(XDP_FLAGS_SKB_MODE);
+
+ if (test__start_subtest("SKB_MODE/egress_last_dst"))
+ xdp_veth_egress_last_dst(XDP_FLAGS_SKB_MODE);
}
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread