Netdev List
 help / color / mirror / Atom feed
* [PATCH bpf v2] bpf: Run generic devmap egress prog on private skb
@ 2026-06-10 10:28 Sun Jian
  2026-06-10 10:52 ` Toke Høiland-Jørgensen
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Sun Jian @ 2026-06-10 10:28 UTC (permalink / raw)
  To: bpf
  Cc: Menglong Dong, Emil Tsalapatis, Sun Jian, Jiayuan Chen,
	Alexei Starovoitov, Daniel Borkmann, David S. Miller,
	Jakub Kicinski, Jesper Dangaard Brouer, John Fastabend,
	Stanislav Fomichev, Andrii Nakryiko, Martin KaFai Lau,
	Eduard Zingerman, Kumar Kartikeya Dwivedi, Song Liu,
	Yonghong Song, Jiri Olsa, Shuah Khan, Hangbin Liu,
	Toke Høiland-Jørgensen, netdev, linux-kernel,
	linux-kselftest

Generic XDP devmap multi redirect uses skb_clone() for the
intermediate destinations and sends the last destination with the
original skb. This can leave multiple destinations sharing the same
packet data.

This becomes visible when a devmap egress program mutates packet data.
One destination can observe changes made for another destination. The
last-destination path has the same problem: the last destination runs on
the original skb, so its egress program can modify packet data still
shared with earlier cloned skbs.

Native XDP broadcast redirect does not have this issue because
xdpf_clone() copies the frame data for each destination. Generic XDP
should provide the same per-destination isolation before running a
devmap egress program.

Fix this by making cloned skbs private in dev_map_generic_redirect()
before running the devmap egress program. Use skb_copy() instead of
skb_unshare() so that allocation failure does not consume the skb and
the existing caller error paths keep their ownership semantics.

Add a selftest that covers the last-destination case where earlier
destinations do not have a devmap egress program, while the final
destination does.

Tested with:
  ./test_progs -t xdp_veth_egress
  ./test_progs -t xdp_veth
  ./test_progs -t xdp

Fixes: e624d4ed4aa8 ("xdp: Extend xdp_redirect_map with broadcast support")
Suggested-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Signed-off-by: Sun Jian <sun.jian.kdev@gmail.com>
---
v1: https://lore.kernel.org/bpf/CABFUUZFimdrZdq=NWi+N-0sJZWvMwY=f4iF6-3TVMS8=m07Zmw@mail.gmail.com/

Changes in v2:
- Move the private-copy step into dev_map_generic_redirect() so the
  last-destination path is covered as well.
- Use skb_copy() instead of skb_unshare() to keep caller ownership
  unchanged on allocation failure.
- Add a generic XDP last-destination selftest case.

 kernel/bpf/devmap.c                           |  10 ++
 .../selftests/bpf/prog_tests/test_xdp_veth.c  | 151 +++++++++++++++++-
 2 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index cc0a43ebab6b..59f267685bc6 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -700,12 +700,22 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     const struct bpf_prog *xdp_prog)
 {
+	struct sk_buff *nskb;
 	int err;
 
 	err = xdp_ok_fwd_dev(dst->dev, skb->len);
 	if (unlikely(err))
 		return err;
 
+	if (dst->xdp_prog && skb_cloned(skb)) {
+		nskb = skb_copy(skb, GFP_ATOMIC);
+		if (!nskb)
+			return -ENOMEM;
+
+		consume_skb(skb);
+		skb = nskb;
+	}
+
 	/* Redirect has already succeeded semantically at this point, so we just
 	 * return 0 even if packet is dropped. Helper below takes care of
 	 * freeing skb.
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
index 3e98a1665936..1f0b9ade12fe 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
@@ -456,7 +456,11 @@ static void xdp_veth_egress(u32 flags)
 			.remote_flags = flags,
 		}
 	};
-	const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
+	const unsigned char egress_macs[VETH_PAIRS_COUNT][ETH_ALEN] = {
+		{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x01 },
+		{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x02 },
+		{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x03 },
+	};
 	struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
 	struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
 	struct xdp_redirect_map *xdp_redirect_map;
@@ -512,7 +516,7 @@ static void xdp_veth_egress(u32 flags)
 						 &net_config, prog_cfg, i))
 			goto destroy_xdp_redirect_map;
 
-		err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0);
+		err = bpf_map_update_elem(mac_map, &ifindex, egress_macs[i], 0);
 		if (!ASSERT_OK(err, "bpf_map_update_elem"))
 			goto destroy_xdp_redirect_map;
 
@@ -531,13 +535,16 @@ static void xdp_veth_egress(u32 flags)
 
 	for (i = 0; i < 2; i++) {
 		u32 key = i;
+		__be64 expected = 0;
 		u64 res;
 
 		err = bpf_map_lookup_elem(res_map, &key, &res);
 		if (!ASSERT_OK(err, "get MAC res"))
 			goto destroy_xdp_redirect_map;
 
-		ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac");
+		/* store_mac_1/2 run on the second/third remote veths. */
+		memcpy(&expected, egress_macs[i + 1], ETH_ALEN);
+		ASSERT_EQ(res, expected, "compare mac");
 	}
 
 destroy_xdp_redirect_map:
@@ -551,6 +558,141 @@ static void xdp_veth_egress(u32 flags)
 	cleanup_network(&net_config);
 }
 
+static void xdp_veth_egress_last_dst(u32 flags)
+{
+	struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+		{
+			.local_name = "xdp_redirect_map_all_prog",
+			.remote_name = "store_mac_1",
+			.local_flags = flags,
+			.remote_flags = flags,
+		},
+		{
+			.local_name = "xdp_redirect_map_all_prog",
+			.remote_name = "store_mac_2",
+			.local_flags = flags,
+			.remote_flags = flags,
+		},
+		{
+			.local_name = "xdp_redirect_map_all_prog",
+			.remote_name = "xdp_dummy_prog",
+			.local_flags = flags,
+			.remote_flags = flags,
+		}
+	};
+	const unsigned char egress_macs[VETH_PAIRS_COUNT][ETH_ALEN] = {
+		{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x01 },
+		{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x02 },
+		{ 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x03 },
+	};
+	struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+	struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
+	struct xdp_redirect_map *xdp_redirect_map;
+	struct net_configuration net_config;
+	int mac_map, egress_map, res_map;
+	struct nstoken *nstoken = NULL;
+	struct xdp_dummy *xdp_dummy;
+	__be64 last_mac = 0;
+	bool found = false;
+	int err;
+	int i;
+
+	xdp_dummy = xdp_dummy__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+		return;
+
+	xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
+		goto destroy_xdp_dummy;
+
+	xdp_redirect_map = xdp_redirect_map__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+		goto destroy_xdp_redirect_multi_kern;
+
+	if (!ASSERT_OK(create_network(&net_config), "create network"))
+		goto destroy_xdp_redirect_map;
+
+	mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map);
+	if (!ASSERT_OK_FD(mac_map, "open mac_map"))
+		goto destroy_xdp_redirect_map;
+
+	egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress);
+	if (!ASSERT_OK_FD(egress_map, "open map_egress"))
+		goto destroy_xdp_redirect_map;
+
+	bpf_objs[0] = xdp_dummy->obj;
+	bpf_objs[1] = xdp_redirect_multi_kern->obj;
+	bpf_objs[2] = xdp_redirect_map->obj;
+
+	nstoken = open_netns(net_config.ns0_name);
+	if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+		goto destroy_xdp_redirect_map;
+
+	for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+		struct bpf_devmap_val devmap_val = {};
+		int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+		SYS(destroy_xdp_redirect_map,
+		    "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+		    net_config.veth_cfg[i].namespace, IP_NEIGH,
+		    net_config.veth_cfg[i].remote_veth);
+
+		if (attach_programs_to_veth_pair(bpf_objs, VETH_EGRESS_SKEL_NB,
+						 &net_config, prog_cfg, i))
+			goto destroy_xdp_redirect_map;
+
+		err = bpf_map_update_elem(mac_map, &ifindex, egress_macs[i], 0);
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
+			goto destroy_xdp_redirect_map;
+
+		devmap_val.ifindex = ifindex;
+		devmap_val.bpf_prog.fd = -1;
+
+		if (i == VETH_PAIRS_COUNT - 1)
+			devmap_val.bpf_prog.fd =
+				bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog);
+
+		err = bpf_map_update_elem(egress_map, &ifindex, &devmap_val, 0);
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
+			goto destroy_xdp_redirect_map;
+	}
+
+	SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+		   net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+	res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac);
+	if (!ASSERT_OK_FD(res_map, "open rx_map"))
+		goto destroy_xdp_redirect_map;
+
+	memcpy(&last_mac, egress_macs[VETH_PAIRS_COUNT - 1], ETH_ALEN);
+
+	for (i = 0; i < VETH_PAIRS_COUNT - 1; i++) {
+		u32 key = i;
+		u64 res;
+
+		err = bpf_map_lookup_elem(res_map, &key, &res);
+		if (err == -ENOENT)
+			continue;
+		if (!ASSERT_OK(err, "get MAC res"))
+			goto destroy_xdp_redirect_map;
+
+		found = true;
+		ASSERT_NEQ(res, last_mac, "compare last dst mac");
+	}
+
+	ASSERT_TRUE(found, "found earlier dst mac");
+
+destroy_xdp_redirect_map:
+	close_netns(nstoken);
+	xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+	xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
+destroy_xdp_dummy:
+	xdp_dummy__destroy(xdp_dummy);
+
+	cleanup_network(&net_config);
+}
+
 void test_xdp_veth_redirect(void)
 {
 	if (test__start_subtest("0"))
@@ -596,4 +738,7 @@ void test_xdp_veth_egress(void)
 
 	if (test__start_subtest("SKB_MODE/egress"))
 		xdp_veth_egress(XDP_FLAGS_SKB_MODE);
+
+	if (test__start_subtest("SKB_MODE/egress_last_dst"))
+		xdp_veth_egress_last_dst(XDP_FLAGS_SKB_MODE);
 }

base-commit: e7ae89a0c97ce2b68b0983cd01eda67cf373517d
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf v2] bpf: Run generic devmap egress prog on private skb
  2026-06-10 10:28 [PATCH bpf v2] bpf: Run generic devmap egress prog on private skb Sun Jian
@ 2026-06-10 10:52 ` Toke Høiland-Jørgensen
  2026-06-10 11:04 ` bot+bpf-ci
  2026-06-10 12:59 ` Jiayuan Chen
  2 siblings, 0 replies; 4+ messages in thread
From: Toke Høiland-Jørgensen @ 2026-06-10 10:52 UTC (permalink / raw)
  To: Sun Jian, bpf
  Cc: Menglong Dong, Emil Tsalapatis, Sun Jian, Jiayuan Chen,
	Alexei Starovoitov, Daniel Borkmann, David S. Miller,
	Jakub Kicinski, Jesper Dangaard Brouer, John Fastabend,
	Stanislav Fomichev, Andrii Nakryiko, Martin KaFai Lau,
	Eduard Zingerman, Kumar Kartikeya Dwivedi, Song Liu,
	Yonghong Song, Jiri Olsa, Shuah Khan, Hangbin Liu, netdev,
	linux-kernel, linux-kselftest

Sun Jian <sun.jian.kdev@gmail.com> writes:

> Generic XDP devmap multi redirect uses skb_clone() for the
> intermediate destinations and sends the last destination with the
> original skb. This can leave multiple destinations sharing the same
> packet data.
>
> This becomes visible when a devmap egress program mutates packet data.
> One destination can observe changes made for another destination. The
> last-destination path has the same problem: the last destination runs on
> the original skb, so its egress program can modify packet data still
> shared with earlier cloned skbs.
>
> Native XDP broadcast redirect does not have this issue because
> xdpf_clone() copies the frame data for each destination. Generic XDP
> should provide the same per-destination isolation before running a
> devmap egress program.
>
> Fix this by making cloned skbs private in dev_map_generic_redirect()
> before running the devmap egress program. Use skb_copy() instead of
> skb_unshare() so that allocation failure does not consume the skb and
> the existing caller error paths keep their ownership semantics.
>
> Add a selftest that covers the last-destination case where earlier
> destinations do not have a devmap egress program, while the final
> destination does.
>
> Tested with:
>   ./test_progs -t xdp_veth_egress
>   ./test_progs -t xdp_veth
>   ./test_progs -t xdp
>
> Fixes: e624d4ed4aa8 ("xdp: Extend xdp_redirect_map with broadcast support")
> Suggested-by: Jiayuan Chen <jiayuan.chen@linux.dev>
> Signed-off-by: Sun Jian <sun.jian.kdev@gmail.com>

With a few nits (see below):

Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>

> ---
> v1: https://lore.kernel.org/bpf/CABFUUZFimdrZdq=NWi+N-0sJZWvMwY=f4iF6-3TVMS8=m07Zmw@mail.gmail.com/
>
> Changes in v2:
> - Move the private-copy step into dev_map_generic_redirect() so the
>   last-destination path is covered as well.
> - Use skb_copy() instead of skb_unshare() to keep caller ownership
>   unchanged on allocation failure.
> - Add a generic XDP last-destination selftest case.
>
>  kernel/bpf/devmap.c                           |  10 ++
>  .../selftests/bpf/prog_tests/test_xdp_veth.c  | 151 +++++++++++++++++-
>  2 files changed, 158 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index cc0a43ebab6b..59f267685bc6 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -700,12 +700,22 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
>  int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
>  			     const struct bpf_prog *xdp_prog)
>  {
> +	struct sk_buff *nskb;

nit: this definition could go inside the if statement block below, to
make it obvious that nskb is not used outside that branch.

>  	int err;
>  
>  	err = xdp_ok_fwd_dev(dst->dev, skb->len);
>  	if (unlikely(err))
>  		return err;
>  
> +	if (dst->xdp_prog && skb_cloned(skb)) {
> +		nskb = skb_copy(skb, GFP_ATOMIC);
> +		if (!nskb)
> +			return -ENOMEM;
> +
> +		consume_skb(skb);
> +		skb = nskb;
> +	}
> +
>  	/* Redirect has already succeeded semantically at this point, so we just
>  	 * return 0 even if packet is dropped. Helper below takes care of
>  	 * freeing skb.
> diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
> index 3e98a1665936..1f0b9ade12fe 100644
> --- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
> +++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c

It's customary to split changes to the selftests into their own commits.

-Toke


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf v2] bpf: Run generic devmap egress prog on private skb
  2026-06-10 10:28 [PATCH bpf v2] bpf: Run generic devmap egress prog on private skb Sun Jian
  2026-06-10 10:52 ` Toke Høiland-Jørgensen
@ 2026-06-10 11:04 ` bot+bpf-ci
  2026-06-10 12:59 ` Jiayuan Chen
  2 siblings, 0 replies; 4+ messages in thread
From: bot+bpf-ci @ 2026-06-10 11:04 UTC (permalink / raw)
  To: sun.jian.kdev, bpf
  Cc: menglong.dong, emil, sun.jian.kdev, jiayuan.chen, ast, daniel,
	davem, kuba, hawk, john.fastabend, sdf, andrii, martin.lau,
	eddyz87, memxor, song, yonghong.song, jolsa, shuah, liuhangbin,
	toke, netdev, linux-kernel, linux-kselftest, ast, andrii, daniel,
	martin.lau, eddyz87, yonghong.song, clm, ihor.solodrai

[-- Attachment #1: Type: text/plain, Size: 2947 bytes --]

>     Generic XDP devmap multi redirect uses skb_clone() for the
>     intermediate destinations and sends the last destination with the
>     original skb. This can leave multiple destinations sharing the same
>     packet data.
>     This becomes visible when a devmap egress program mutates packet data.
>     One destination can observe changes made for another destination. The
>     last-destination path has the same problem: the last destination runs on
>     the original skb, so its egress program can modify packet data still
>     shared with earlier cloned skbs.
>     Native XDP broadcast redirect does not have this issue because
>     xdpf_clone() copies the frame data for each destination. Generic XDP
>     should provide the same per-destination isolation before running a
>     devmap egress program.
>     Fix this by making cloned skbs private in dev_map_generic_redirect()
>     before running the devmap egress program. Use skb_copy() instead of
>     skb_unshare() so that allocation failure does not consume the skb and
>     the existing caller error paths keep their ownership semantics.
>     Add a selftest that covers the last-destination case where earlier
>     destinations do not have a devmap egress program, while the final
>     destination does.
>     Fixes: e624d4ed4aa8 ("xdp: Extend xdp_redirect_map with broadcast support")
>     Suggested-by: Jiayuan Chen <jiayuan.chen@linux.dev>
>     Signed-off-by: Sun Jian <sun.jian.kdev@gmail.com>

Is the Fixes: tag pointing at the right commit?

The tag references e624d4ed4aa8 ("xdp: Extend xdp_redirect_map with
broadcast support"), which introduced the generic-XDP multi/broadcast
redirect that clones skbs and sends the last destination on the original
skb. At that commit, though, dev_map_generic_redirect() only called
generic_xdp_tx() and ran no per-destination program, so sharing skb data
between clones was harmless.

The corruption being fixed only appears once a devmap egress program
runs on the shared skb data. That egress-program execution in the
generic path (dev_map_bpf_prog_run_skb(), gated on dst->xdp_prog) was
added later by 2ea5eabaf04a ("bpf: devmap: Implement devmap prog
execution for generic XDP"). The fix here is itself gated on
dst->xdp_prog && skb_cloned(skb), matching the commit message statement
that the problem "becomes visible when a devmap egress program mutates
packet data".

Would this be more accurate?

  Fixes: 2ea5eabaf04a ("bpf: devmap: Implement devmap prog execution for generic XDP")

e624d4ed4aa8 provides the clone-sharing infrastructure and could be kept
as an additional reference, but 2ea5eabaf04a is the commit that
introduced the writer into the shared-data path.

---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/27270693851

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf v2] bpf: Run generic devmap egress prog on private skb
  2026-06-10 10:28 [PATCH bpf v2] bpf: Run generic devmap egress prog on private skb Sun Jian
  2026-06-10 10:52 ` Toke Høiland-Jørgensen
  2026-06-10 11:04 ` bot+bpf-ci
@ 2026-06-10 12:59 ` Jiayuan Chen
  2 siblings, 0 replies; 4+ messages in thread
From: Jiayuan Chen @ 2026-06-10 12:59 UTC (permalink / raw)
  To: Sun Jian, bpf
  Cc: Menglong Dong, Emil Tsalapatis, Jiayuan Chen, Alexei Starovoitov,
	Daniel Borkmann, David S. Miller, Jakub Kicinski,
	Jesper Dangaard Brouer, John Fastabend, Stanislav Fomichev,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman,
	Kumar Kartikeya Dwivedi, Song Liu, Yonghong Song, Jiri Olsa,
	Shuah Khan, Hangbin Liu, Toke Høiland-Jørgensen, netdev,
	linux-kernel, linux-kselftest


On 6/10/26 6:28 PM, Sun Jian wrote:
> Generic XDP devmap multi redirect uses skb_clone() for the
> intermediate destinations and sends the last destination with the
> original skb. This can leave multiple destinations sharing the same
> packet data.
>
> This becomes visible when a devmap egress program mutates packet data.
> One destination can observe changes made for another destination. The
> last-destination path has the same problem: the last destination runs on
> the original skb, so its egress program can modify packet data still
> shared with earlier cloned skbs.
>
> Native XDP broadcast redirect does not have this issue because
> xdpf_clone() copies the frame data for each destination. Generic XDP
> should provide the same per-destination isolation before running a
> devmap egress program.
>
> Fix this by making cloned skbs private in dev_map_generic_redirect()
> before running the devmap egress program. Use skb_copy() instead of
> skb_unshare() so that allocation failure does not consume the skb and
> the existing caller error paths keep their ownership semantics.
>
> Add a selftest that covers the last-destination case where earlier
> destinations do not have a devmap egress program, while the final
> destination does.
>
> Tested with:
>    ./test_progs -t xdp_veth_egress
>    ./test_progs -t xdp_veth
>    ./test_progs -t xdp
>
> Fixes: e624d4ed4aa8 ("xdp: Extend xdp_redirect_map with broadcast support")
> Suggested-by: Jiayuan Chen <jiayuan.chen@linux.dev>
> Signed-off-by: Sun Jian <sun.jian.kdev@gmail.com>
> ---
> v1: https://lore.kernel.org/bpf/CABFUUZFimdrZdq=NWi+N-0sJZWvMwY=f4iF6-3TVMS8=m07Zmw@mail.gmail.com/
>
> Changes in v2:
> - Move the private-copy step into dev_map_generic_redirect() so the
>    last-destination path is covered as well.
> - Use skb_copy() instead of skb_unshare() to keep caller ownership
>    unchanged on allocation failure.
> - Add a generic XDP last-destination selftest case.
>
>   kernel/bpf/devmap.c                           |  10 ++
>   .../selftests/bpf/prog_tests/test_xdp_veth.c  | 151 +++++++++++++++++-
>   2 files changed, 158 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index cc0a43ebab6b..59f267685bc6 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -700,12 +700,22 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
>   int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
>   			     const struct bpf_prog *xdp_prog)
>   {
> +	struct sk_buff *nskb;
>   	int err;
>   
>   	err = xdp_ok_fwd_dev(dst->dev, skb->len);
>   	if (unlikely(err))
>   		return err;
>   
> +	if (dst->xdp_prog && skb_cloned(skb)) {
> +		nskb = skb_copy(skb, GFP_ATOMIC);
> +		if (!nskb)
> +			return -ENOMEM;
> +
> +		consume_skb(skb);
> +		skb = nskb;
> +	}
> +


LGTM.


Please split selftest as a seprated patch of a patchset.

>   	/* Redirect has already succeeded semantically at this point, so we just
>   	 * return 0 even if packet is dropped. Helper below takes care of
>   	 * freeing skb.
> diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
> index 3e98a1665936..1f0b9ade12fe 100644
> --- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
> +++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
> @@ -456,7 +456,11 @@ static void xdp_veth_egress(u32 flags)
>   			.remote_flags = flags,
>   		


[...]

> +	for (i = 0; i < VETH_PAIRS_COUNT; i++) {
> +		struct bpf_devmap_val devmap_val = {};
> +		int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
> +
> +		SYS(destroy_xdp_redirect_map,
> +		    "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
> +		    net_config.veth_cfg[i].namespace, IP_NEIGH,
> +		    net_config.veth_cfg[i].remote_veth);
> +
> +		if (attach_programs_to_veth_pair(bpf_objs, VETH_EGRESS_SKEL_NB,
> +						 &net_config, prog_cfg, i))
> +			goto destroy_xdp_redirect_map;
> +
> +		err = bpf_map_update_elem(mac_map, &ifindex, egress_macs[i], 0);
> +		if (!ASSERT_OK(err, "bpf_map_update_elem"))
> +			goto destroy_xdp_redirect_map;
> +
> +		devmap_val.ifindex = ifindex;
> +		devmap_val.bpf_prog.fd = -1;
> +
> +		if (i == VETH_PAIRS_COUNT - 1)
> +			devmap_val.bpf_prog.fd =
> +				bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog);
> +
> +		err = bpf_map_update_elem(egress_map, &ifindex, &devmap_val, 0);


The kernel walks DEVMAP_HASH in bucket order so I don't think we can 
guarantee that the entry at VETH_PAIRS_COUNT - 1 is the last dst.

Could we use i as the key instead, so the entry order becomes 
deterministic? (bucket is key & (n_buckets - 1).)

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-06-10 13:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-10 10:28 [PATCH bpf v2] bpf: Run generic devmap egress prog on private skb Sun Jian
2026-06-10 10:52 ` Toke Høiland-Jørgensen
2026-06-10 11:04 ` bot+bpf-ci
2026-06-10 12:59 ` Jiayuan Chen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox