* [PATCH bpf-next v9 1/5] bpf: add bpf_icmp_send kfunc
2026-06-24 18:55 [PATCH bpf-next v9 0/5] bpf: add icmp_send kfunc Mahe Tardy
@ 2026-06-24 18:55 ` Mahe Tardy
2026-06-24 19:10 ` Emil Tsalapatis
` (2 more replies)
2026-06-24 18:55 ` [PATCH bpf-next v9 2/5] selftests/bpf: add bpf_icmp_send kfunc cgroup_skb tests Mahe Tardy
` (3 subsequent siblings)
4 siblings, 3 replies; 14+ messages in thread
From: Mahe Tardy @ 2026-06-24 18:55 UTC (permalink / raw)
To: bpf
Cc: andrii, ast, daniel, john.fastabend, jordan, martin.lau,
yonghong.song, emil, Mahe Tardy
This is needed in the context of Tetragon to provide improved feedback
(in contrast to just dropping packets) to east-west traffic when blocked
by policies using cgroup_skb programs.
This reuses concepts from netfilter reject target codepath with the
differences that:
* Packets are cloned since the BPF user can still let the packet pass
(SK_PASS from the cgroup_skb progs for example) and the current skb
need to stay untouched (cgroup_skb hooks only allow read-only skb
payload).
* We protect against recursion since the kfunc, by generating an ICMP
error message, could retrigger the BPF prog that invoked it.
Only ICMP_DEST_UNREACH and ICMPV6_DEST_UNREACH are currently supported.
The interface accepts a type parameter to facilitate future extension to
other ICMP control message types.
Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
---
net/core/filter.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 91 insertions(+)
diff --git a/net/core/filter.c b/net/core/filter.c
index 2e96b4b847ce..f3aa494ed105 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -84,6 +84,9 @@
#include <linux/un.h>
#include <net/xdp_sock_drv.h>
#include <net/inet_dscp.h>
+#include <linux/icmpv6.h>
+#include <net/icmp.h>
+#include <net/ip6_route.h>
#include "dev.h"
@@ -12546,6 +12549,84 @@ __bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
return 0;
}
+/**
+ * bpf_icmp_send - Send an ICMP control message
+ * @skb_ctx: Packet that triggered the control message
+ * @type: ICMP type (only ICMP_DEST_UNREACH/ICMPV6_DEST_UNREACH supported)
+ * @code: ICMP code (0-15 except ICMP_FRAG_NEEDED for IPv4, 0-6 for IPv6)
+ *
+ * Sends an ICMP control message in response to the packet. The original packet
+ * is cloned before sending the ICMP message, so the BPF program can still let
+ * the packet pass if desired.
+ *
+ * Currently only ICMP_DEST_UNREACH (IPv4) and ICMPV6_DEST_UNREACH (IPv6) are
+ * supported.
+ *
+ * Return: 0 on success (send attempt), negative error code on failure:
+ * -EBUSY: Recursion detected
+ * -EPROTONOSUPPORT: Non-IP protocol
+ * -EOPNOTSUPP: Unsupported ICMP type
+ * -EINVAL: Invalid code parameter
+ * -ENETUNREACH: Unusable IPv4 route/dst attached to the skb
+ * -ENOMEM: Memory allocation failed
+ */
+__bpf_kfunc int bpf_icmp_send(struct __sk_buff *skb_ctx, int type, int code)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct sk_buff *nskb;
+ struct sock *sk;
+
+ sk = skb_to_full_sk(skb);
+ if (sk && sk->sk_kern_sock &&
+ (sk->sk_protocol == IPPROTO_ICMP || sk->sk_protocol == IPPROTO_ICMPV6))
+ return -EBUSY;
+
+ switch (skb->protocol) {
+#if IS_ENABLED(CONFIG_INET)
+ case htons(ETH_P_IP): {
+ if (type != ICMP_DEST_UNREACH)
+ return -EOPNOTSUPP;
+ if (code < 0 || code > NR_ICMP_UNREACH ||
+ code == ICMP_FRAG_NEEDED) /* needs a valid next-hop MTU */
+ return -EINVAL;
+
+ /* icmp_send requires a rtable; test-run synthetic skbs lack one. */
+ if (!skb_valid_dst(skb))
+ return -ENETUNREACH;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return -ENOMEM;
+
+ memset(IPCB(nskb), 0, sizeof(*IPCB(nskb)));
+ icmp_send(nskb, type, code, 0);
+ consume_skb(nskb);
+ break;
+ }
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ if (type != ICMPV6_DEST_UNREACH)
+ return -EOPNOTSUPP;
+ if (code < 0 || code > ICMPV6_REJECT_ROUTE)
+ return -EINVAL;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return -ENOMEM;
+
+ memset(IP6CB(nskb), 0, sizeof(*IP6CB(nskb)));
+ icmpv6_send(nskb, type, code, 0);
+ consume_skb(nskb);
+ break;
+#endif
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ return 0;
+}
+
__bpf_kfunc_end_defs();
int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
@@ -12588,6 +12669,10 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_sock_ops)
BTF_ID_FLAGS(func, bpf_sock_ops_enable_tx_tstamp)
BTF_KFUNCS_END(bpf_kfunc_check_set_sock_ops)
+BTF_KFUNCS_START(bpf_kfunc_check_set_icmp_send)
+BTF_ID_FLAGS(func, bpf_icmp_send)
+BTF_KFUNCS_END(bpf_kfunc_check_set_icmp_send)
+
static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
.owner = THIS_MODULE,
.set = &bpf_kfunc_check_set_skb,
@@ -12618,6 +12703,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = {
.set = &bpf_kfunc_check_set_sock_ops,
};
+static const struct btf_kfunc_id_set bpf_kfunc_set_icmp_send = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_icmp_send,
+};
+
static int __init bpf_kfunc_init(void)
{
int ret;
@@ -12639,6 +12729,7 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
&bpf_kfunc_set_sock_addr);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_icmp_send);
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops);
}
late_initcall(bpf_kfunc_init);
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH bpf-next v9 1/5] bpf: add bpf_icmp_send kfunc
2026-06-24 18:55 ` [PATCH bpf-next v9 1/5] bpf: add bpf_icmp_send kfunc Mahe Tardy
@ 2026-06-24 19:10 ` Emil Tsalapatis
2026-06-24 19:19 ` sashiko-bot
2026-06-24 21:24 ` Jordan Rife
2 siblings, 0 replies; 14+ messages in thread
From: Emil Tsalapatis @ 2026-06-24 19:10 UTC (permalink / raw)
To: Mahe Tardy, bpf
Cc: andrii, ast, daniel, john.fastabend, jordan, martin.lau,
yonghong.song, emil
On Wed Jun 24, 2026 at 2:55 PM EDT, Mahe Tardy wrote:
> This is needed in the context of Tetragon to provide improved feedback
> (in contrast to just dropping packets) to east-west traffic when blocked
> by policies using cgroup_skb programs.
>
> This reuses concepts from netfilter reject target codepath with the
> differences that:
> * Packets are cloned since the BPF user can still let the packet pass
> (SK_PASS from the cgroup_skb progs for example) and the current skb
> need to stay untouched (cgroup_skb hooks only allow read-only skb
> payload).
> * We protect against recursion since the kfunc, by generating an ICMP
> error message, could retrigger the BPF prog that invoked it.
>
> Only ICMP_DEST_UNREACH and ICMPV6_DEST_UNREACH are currently supported.
> The interface accepts a type parameter to facilitate future extension to
> other ICMP control message types.
>
> Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
> ---
> net/core/filter.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 91 insertions(+)
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2e96b4b847ce..f3aa494ed105 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -84,6 +84,9 @@
> #include <linux/un.h>
> #include <net/xdp_sock_drv.h>
> #include <net/inet_dscp.h>
> +#include <linux/icmpv6.h>
> +#include <net/icmp.h>
> +#include <net/ip6_route.h>
>
> #include "dev.h"
>
> @@ -12546,6 +12549,84 @@ __bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
> return 0;
> }
>
> +/**
> + * bpf_icmp_send - Send an ICMP control message
> + * @skb_ctx: Packet that triggered the control message
> + * @type: ICMP type (only ICMP_DEST_UNREACH/ICMPV6_DEST_UNREACH supported)
> + * @code: ICMP code (0-15 except ICMP_FRAG_NEEDED for IPv4, 0-6 for IPv6)
> + *
> + * Sends an ICMP control message in response to the packet. The original packet
> + * is cloned before sending the ICMP message, so the BPF program can still let
> + * the packet pass if desired.
> + *
> + * Currently only ICMP_DEST_UNREACH (IPv4) and ICMPV6_DEST_UNREACH (IPv6) are
> + * supported.
> + *
> + * Return: 0 on success (send attempt), negative error code on failure:
> + * -EBUSY: Recursion detected
> + * -EPROTONOSUPPORT: Non-IP protocol
> + * -EOPNOTSUPP: Unsupported ICMP type
> + * -EINVAL: Invalid code parameter
> + * -ENETUNREACH: Unusable IPv4 route/dst attached to the skb
> + * -ENOMEM: Memory allocation failed
> + */
> +__bpf_kfunc int bpf_icmp_send(struct __sk_buff *skb_ctx, int type, int code)
> +{
> + struct sk_buff *skb = (struct sk_buff *)skb_ctx;
> + struct sk_buff *nskb;
> + struct sock *sk;
> +
> + sk = skb_to_full_sk(skb);
> + if (sk && sk->sk_kern_sock &&
> + (sk->sk_protocol == IPPROTO_ICMP || sk->sk_protocol == IPPROTO_ICMPV6))
> + return -EBUSY;
> +
> + switch (skb->protocol) {
> +#if IS_ENABLED(CONFIG_INET)
> + case htons(ETH_P_IP): {
> + if (type != ICMP_DEST_UNREACH)
> + return -EOPNOTSUPP;
> + if (code < 0 || code > NR_ICMP_UNREACH ||
> + code == ICMP_FRAG_NEEDED) /* needs a valid next-hop MTU */
> + return -EINVAL;
> +
> + /* icmp_send requires a rtable; test-run synthetic skbs lack one. */
> + if (!skb_valid_dst(skb))
> + return -ENETUNREACH;
> +
> + nskb = skb_clone(skb, GFP_ATOMIC);
> + if (!nskb)
> + return -ENOMEM;
> +
> + memset(IPCB(nskb), 0, sizeof(*IPCB(nskb)));
> + icmp_send(nskb, type, code, 0);
> + consume_skb(nskb);
> + break;
> + }
> +#endif
> +#if IS_ENABLED(CONFIG_IPV6)
> + case htons(ETH_P_IPV6):
> + if (type != ICMPV6_DEST_UNREACH)
> + return -EOPNOTSUPP;
> + if (code < 0 || code > ICMPV6_REJECT_ROUTE)
> + return -EINVAL;
> +
> + nskb = skb_clone(skb, GFP_ATOMIC);
> + if (!nskb)
> + return -ENOMEM;
> +
> + memset(IP6CB(nskb), 0, sizeof(*IP6CB(nskb)));
> + icmpv6_send(nskb, type, code, 0);
> + consume_skb(nskb);
> + break;
> +#endif
> + default:
> + return -EPROTONOSUPPORT;
> + }
> +
> + return 0;
> +}
> +
> __bpf_kfunc_end_defs();
>
> int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
> @@ -12588,6 +12669,10 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_sock_ops)
> BTF_ID_FLAGS(func, bpf_sock_ops_enable_tx_tstamp)
> BTF_KFUNCS_END(bpf_kfunc_check_set_sock_ops)
>
> +BTF_KFUNCS_START(bpf_kfunc_check_set_icmp_send)
> +BTF_ID_FLAGS(func, bpf_icmp_send)
> +BTF_KFUNCS_END(bpf_kfunc_check_set_icmp_send)
> +
> static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
> .owner = THIS_MODULE,
> .set = &bpf_kfunc_check_set_skb,
> @@ -12618,6 +12703,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = {
> .set = &bpf_kfunc_check_set_sock_ops,
> };
>
> +static const struct btf_kfunc_id_set bpf_kfunc_set_icmp_send = {
> + .owner = THIS_MODULE,
> + .set = &bpf_kfunc_check_set_icmp_send,
> +};
> +
> static int __init bpf_kfunc_init(void)
> {
> int ret;
> @@ -12639,6 +12729,7 @@ static int __init bpf_kfunc_init(void)
> ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
> &bpf_kfunc_set_sock_addr);
> ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk);
> + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_icmp_send);
> return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops);
> }
> late_initcall(bpf_kfunc_init);
> --
> 2.34.1
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH bpf-next v9 1/5] bpf: add bpf_icmp_send kfunc
2026-06-24 18:55 ` [PATCH bpf-next v9 1/5] bpf: add bpf_icmp_send kfunc Mahe Tardy
2026-06-24 19:10 ` Emil Tsalapatis
@ 2026-06-24 19:19 ` sashiko-bot
2026-06-24 20:51 ` Alexei Starovoitov
2026-06-24 21:24 ` Jordan Rife
2 siblings, 1 reply; 14+ messages in thread
From: sashiko-bot @ 2026-06-24 19:19 UTC (permalink / raw)
To: Mahe Tardy; +Cc: bpf
Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [High] Missing `skb_valid_dst()` check for IPv6 allows type confusion with metadata dst entries, leading to an out-of-bounds read and kernel crash.
--
commit a59a53956c9b8f69a9dfa47bc54cc9fbcd9759ec
Author: Mahe Tardy <mahe.tardy@gmail.com>
bpf: add bpf_icmp_send kfunc
This commit introduces a new bpf_icmp_send kfunc to allow BPF programs to
generate ICMP error messages. It clones the original skb and invokes the
underlying IPv4 or IPv6 ICMP send routines.
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2e96b4b847ce1..f3aa494ed1053 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
[ ... ]
> +__bpf_kfunc int bpf_icmp_send(struct __sk_buff *skb_ctx, int type, int code)
> +{
> + struct sk_buff *skb = (struct sk_buff *)skb_ctx;
> + struct sk_buff *nskb;
> + struct sock *sk;
[ ... ]
> + switch (skb->protocol) {
> +#if IS_ENABLED(CONFIG_INET)
> + case htons(ETH_P_IP): {
> + if (type != ICMP_DEST_UNREACH)
> + return -EOPNOTSUPP;
> + if (code < 0 || code > NR_ICMP_UNREACH ||
> + code == ICMP_FRAG_NEEDED) /* needs a valid next-hop MTU */
> + return -EINVAL;
> +
> + /* icmp_send requires a rtable; test-run synthetic skbs lack one. */
> + if (!skb_valid_dst(skb))
> + return -ENETUNREACH;
> +
> + nskb = skb_clone(skb, GFP_ATOMIC);
[ ... ]
> +#if IS_ENABLED(CONFIG_IPV6)
> + case htons(ETH_P_IPV6):
> + if (type != ICMPV6_DEST_UNREACH)
> + return -EOPNOTSUPP;
> + if (code < 0 || code > ICMPV6_REJECT_ROUTE)
> + return -EINVAL;
> +
[Severity: High]
Should there be a check for skb_valid_dst(skb) here before cloning and
sending the IPv6 ICMP message?
It looks like the IPv4 branch validates the dst to prevent routing table
lookups on synthetic or metadata-dst packets, but the IPv6 branch does not.
If an IPv6 packet with a metadata_dst (e.g., from LWT or a tunnel) is
passed to bpf_icmp_send(), could icmpv6_send() process the invalid dst and
cast it to a struct rt6_info in icmp6_dev()? Dereferencing rt6->rt6i_idev
might then read out of bounds from the metadata_dst struct and cause a
kernel crash.
> + nskb = skb_clone(skb, GFP_ATOMIC);
> + if (!nskb)
> + return -ENOMEM;
> +
> + memset(IP6CB(nskb), 0, sizeof(*IP6CB(nskb)));
> + icmpv6_send(nskb, type, code, 0);
> + consume_skb(nskb);
> + break;
> +#endif
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260624185554.362555-1-mahe.tardy@gmail.com?part=1
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH bpf-next v9 1/5] bpf: add bpf_icmp_send kfunc
2026-06-24 19:19 ` sashiko-bot
@ 2026-06-24 20:51 ` Alexei Starovoitov
0 siblings, 0 replies; 14+ messages in thread
From: Alexei Starovoitov @ 2026-06-24 20:51 UTC (permalink / raw)
To: sashiko-reviews, Mahe Tardy; +Cc: bpf
On Wed Jun 24, 2026 at 12:19 PM PDT, sashiko-bot wrote:
> Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
> - [High] Missing `skb_valid_dst()` check for IPv6 allows type confusion with metadata dst entries, leading to an out-of-bounds read and kernel crash.
> --
>
> commit a59a53956c9b8f69a9dfa47bc54cc9fbcd9759ec
> Author: Mahe Tardy <mahe.tardy@gmail.com>
>
> bpf: add bpf_icmp_send kfunc
>
> This commit introduces a new bpf_icmp_send kfunc to allow BPF programs to
> generate ICMP error messages. It clones the original skb and invokes the
> underlying IPv4 or IPv6 ICMP send routines.
>
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 2e96b4b847ce1..f3aa494ed1053 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>
> [ ... ]
>
>> +__bpf_kfunc int bpf_icmp_send(struct __sk_buff *skb_ctx, int type, int code)
>> +{
>> + struct sk_buff *skb = (struct sk_buff *)skb_ctx;
>> + struct sk_buff *nskb;
>> + struct sock *sk;
>
> [ ... ]
>
>> + switch (skb->protocol) {
>> +#if IS_ENABLED(CONFIG_INET)
>> + case htons(ETH_P_IP): {
>> + if (type != ICMP_DEST_UNREACH)
>> + return -EOPNOTSUPP;
>> + if (code < 0 || code > NR_ICMP_UNREACH ||
>> + code == ICMP_FRAG_NEEDED) /* needs a valid next-hop MTU */
>> + return -EINVAL;
>> +
>> + /* icmp_send requires a rtable; test-run synthetic skbs lack one. */
>> + if (!skb_valid_dst(skb))
>> + return -ENETUNREACH;
>> +
>> + nskb = skb_clone(skb, GFP_ATOMIC);
>
> [ ... ]
>
>> +#if IS_ENABLED(CONFIG_IPV6)
>> + case htons(ETH_P_IPV6):
>> + if (type != ICMPV6_DEST_UNREACH)
>> + return -EOPNOTSUPP;
>> + if (code < 0 || code > ICMPV6_REJECT_ROUTE)
>> + return -EINVAL;
>> +
>
> [Severity: High]
> Should there be a check for skb_valid_dst(skb) here before cloning and
> sending the IPv6 ICMP message?
>
> It looks like the IPv4 branch validates the dst to prevent routing table
> lookups on synthetic or metadata-dst packets, but the IPv6 branch does not.
>
> If an IPv6 packet with a metadata_dst (e.g., from LWT or a tunnel) is
> passed to bpf_icmp_send(), could icmpv6_send() process the invalid dst and
> cast it to a struct rt6_info in icmp6_dev()? Dereferencing rt6->rt6i_idev
> might then read out of bounds from the metadata_dst struct and cause a
> kernel crash.
Sounds like a real issue.
Mahe,
when you respin please cc netdev.
We need an ack from networking maintainers.
pw-bot: cr
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH bpf-next v9 1/5] bpf: add bpf_icmp_send kfunc
2026-06-24 18:55 ` [PATCH bpf-next v9 1/5] bpf: add bpf_icmp_send kfunc Mahe Tardy
2026-06-24 19:10 ` Emil Tsalapatis
2026-06-24 19:19 ` sashiko-bot
@ 2026-06-24 21:24 ` Jordan Rife
2 siblings, 0 replies; 14+ messages in thread
From: Jordan Rife @ 2026-06-24 21:24 UTC (permalink / raw)
To: Mahe Tardy
Cc: bpf, andrii, ast, daniel, john.fastabend, martin.lau,
yonghong.song, emil
On Wed, Jun 24, 2026 at 06:55:50PM +0000, Mahe Tardy wrote:
> This is needed in the context of Tetragon to provide improved feedback
> (in contrast to just dropping packets) to east-west traffic when blocked
> by policies using cgroup_skb programs.
>
> This reuses concepts from netfilter reject target codepath with the
> differences that:
> * Packets are cloned since the BPF user can still let the packet pass
> (SK_PASS from the cgroup_skb progs for example) and the current skb
> need to stay untouched (cgroup_skb hooks only allow read-only skb
> payload).
> * We protect against recursion since the kfunc, by generating an ICMP
> error message, could retrigger the BPF prog that invoked it.
>
> Only ICMP_DEST_UNREACH and ICMPV6_DEST_UNREACH are currently supported.
> The interface accepts a type parameter to facilitate future extension to
> other ICMP control message types.
>
> Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
> ---
> net/core/filter.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 91 insertions(+)
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2e96b4b847ce..f3aa494ed105 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -84,6 +84,9 @@
> #include <linux/un.h>
> #include <net/xdp_sock_drv.h>
> #include <net/inet_dscp.h>
> +#include <linux/icmpv6.h>
> +#include <net/icmp.h>
> +#include <net/ip6_route.h>
>
> #include "dev.h"
>
> @@ -12546,6 +12549,84 @@ __bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
> return 0;
> }
>
> +/**
> + * bpf_icmp_send - Send an ICMP control message
> + * @skb_ctx: Packet that triggered the control message
> + * @type: ICMP type (only ICMP_DEST_UNREACH/ICMPV6_DEST_UNREACH supported)
> + * @code: ICMP code (0-15 except ICMP_FRAG_NEEDED for IPv4, 0-6 for IPv6)
> + *
> + * Sends an ICMP control message in response to the packet. The original packet
> + * is cloned before sending the ICMP message, so the BPF program can still let
> + * the packet pass if desired.
> + *
> + * Currently only ICMP_DEST_UNREACH (IPv4) and ICMPV6_DEST_UNREACH (IPv6) are
> + * supported.
> + *
> + * Return: 0 on success (send attempt), negative error code on failure:
> + * -EBUSY: Recursion detected
> + * -EPROTONOSUPPORT: Non-IP protocol
> + * -EOPNOTSUPP: Unsupported ICMP type
> + * -EINVAL: Invalid code parameter
> + * -ENETUNREACH: Unusable IPv4 route/dst attached to the skb
> + * -ENOMEM: Memory allocation failed
> + */
> +__bpf_kfunc int bpf_icmp_send(struct __sk_buff *skb_ctx, int type, int code)
> +{
> + struct sk_buff *skb = (struct sk_buff *)skb_ctx;
> + struct sk_buff *nskb;
> + struct sock *sk;
> +
> + sk = skb_to_full_sk(skb);
> + if (sk && sk->sk_kern_sock &&
> + (sk->sk_protocol == IPPROTO_ICMP || sk->sk_protocol == IPPROTO_ICMPV6))
> + return -EBUSY;
> +
> + switch (skb->protocol) {
> +#if IS_ENABLED(CONFIG_INET)
> + case htons(ETH_P_IP): {
> + if (type != ICMP_DEST_UNREACH)
> + return -EOPNOTSUPP;
> + if (code < 0 || code > NR_ICMP_UNREACH ||
> + code == ICMP_FRAG_NEEDED) /* needs a valid next-hop MTU */
> + return -EINVAL;
> +
> + /* icmp_send requires a rtable; test-run synthetic skbs lack one. */
> + if (!skb_valid_dst(skb))
> + return -ENETUNREACH;
> +
> + nskb = skb_clone(skb, GFP_ATOMIC);
> + if (!nskb)
> + return -ENOMEM;
> +
> + memset(IPCB(nskb), 0, sizeof(*IPCB(nskb)));
> + icmp_send(nskb, type, code, 0);
> + consume_skb(nskb);
> + break;
> + }
> +#endif
> +#if IS_ENABLED(CONFIG_IPV6)
> + case htons(ETH_P_IPV6):
> + if (type != ICMPV6_DEST_UNREACH)
> + return -EOPNOTSUPP;
> + if (code < 0 || code > ICMPV6_REJECT_ROUTE)
> + return -EINVAL;
> +
> + nskb = skb_clone(skb, GFP_ATOMIC);
> + if (!nskb)
> + return -ENOMEM;
> +
> + memset(IP6CB(nskb), 0, sizeof(*IP6CB(nskb)));
> + icmpv6_send(nskb, type, code, 0);
> + consume_skb(nskb);
> + break;
> +#endif
> + default:
> + return -EPROTONOSUPPORT;
> + }
> +
> + return 0;
> +}
> +
> __bpf_kfunc_end_defs();
>
> int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
> @@ -12588,6 +12669,10 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_sock_ops)
> BTF_ID_FLAGS(func, bpf_sock_ops_enable_tx_tstamp)
> BTF_KFUNCS_END(bpf_kfunc_check_set_sock_ops)
>
> +BTF_KFUNCS_START(bpf_kfunc_check_set_icmp_send)
> +BTF_ID_FLAGS(func, bpf_icmp_send)
> +BTF_KFUNCS_END(bpf_kfunc_check_set_icmp_send)
> +
> static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
> .owner = THIS_MODULE,
> .set = &bpf_kfunc_check_set_skb,
> @@ -12618,6 +12703,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = {
> .set = &bpf_kfunc_check_set_sock_ops,
> };
>
> +static const struct btf_kfunc_id_set bpf_kfunc_set_icmp_send = {
> + .owner = THIS_MODULE,
> + .set = &bpf_kfunc_check_set_icmp_send,
> +};
> +
> static int __init bpf_kfunc_init(void)
> {
> int ret;
> @@ -12639,6 +12729,7 @@ static int __init bpf_kfunc_init(void)
> ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
> &bpf_kfunc_set_sock_addr);
> ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk);
> + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_icmp_send);
> return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops);
> }
> late_initcall(bpf_kfunc_init);
> --
> 2.34.1
>
Reviewed-by: Jordan Rife <jordan@jrife.io>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH bpf-next v9 2/5] selftests/bpf: add bpf_icmp_send kfunc cgroup_skb tests
2026-06-24 18:55 [PATCH bpf-next v9 0/5] bpf: add icmp_send kfunc Mahe Tardy
2026-06-24 18:55 ` [PATCH bpf-next v9 1/5] bpf: add bpf_icmp_send kfunc Mahe Tardy
@ 2026-06-24 18:55 ` Mahe Tardy
2026-06-24 18:55 ` [PATCH bpf-next v9 3/5] selftests/bpf: add bpf_icmp_send kfunc cgroup_skb IPv6 tests Mahe Tardy
` (2 subsequent siblings)
4 siblings, 0 replies; 14+ messages in thread
From: Mahe Tardy @ 2026-06-24 18:55 UTC (permalink / raw)
To: bpf
Cc: andrii, ast, daniel, john.fastabend, jordan, martin.lau,
yonghong.song, emil, Mahe Tardy
This test opens a server and client, enters a new cgroup, attach a
cgroup_skb program on egress and calls the bpf_icmp_send function from
the client egress so that an ICMP unreach control message is sent back
to the client. It then fetches the message from the error queue to
confirm the correct ICMP unreach code has been sent.
Note that, for the client, we have to connect in non-blocking mode to
let the test execute faster. Otherwise, we need to wait for the TCP
three-way handshake to timeout in the kernel before reading the errno.
Also note that we don't set IP_RECVERR on the socket in
connect_to_fd_nonblock since the error will be transferred anyway in our
test because the connection is rejected at the beginning of the TCP
handshake. See in net/ipv4/tcp_ipv4.c:tcp_v4_err for more details.
Reviewed-by: Jordan Rife <jordan@jrife.io>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
---
.../bpf/prog_tests/icmp_send_kfunc.c | 164 ++++++++++++++++++
tools/testing/selftests/bpf/progs/icmp_send.c | 38 ++++
2 files changed, 202 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
create mode 100644 tools/testing/selftests/bpf/progs/icmp_send.c
diff --git a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
new file mode 100644
index 000000000000..b8a98c90053e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <linux/errqueue.h>
+#include <poll.h>
+#include "icmp_send.skel.h"
+
+#define TIMEOUT_MS 1000
+
+#define ICMP_DEST_UNREACH 3
+
+#define ICMP_FRAG_NEEDED 4
+#define NR_ICMP_UNREACH 15
+
+#define KFUNC_RET_UNSET -1
+
+static int connect_to_fd_nonblock(int server_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd, err;
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len))
+ return -1;
+
+ fd = socket(addr.ss_family, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -1;
+
+ err = connect(fd, (struct sockaddr *)&addr, len);
+ if (err < 0 && errno != EINPROGRESS) {
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static void read_icmp_errqueue(int sockfd, int expected_code)
+{
+ struct sock_extended_err *sock_err;
+ char ctrl_buf[512];
+ struct msghdr msg = {
+ .msg_control = ctrl_buf,
+ .msg_controllen = sizeof(ctrl_buf),
+ };
+ struct pollfd pfd = {
+ .fd = sockfd,
+ .events = POLLERR,
+ };
+ struct cmsghdr *cm;
+ ssize_t n;
+
+ if (!ASSERT_GE(poll(&pfd, 1, TIMEOUT_MS), 1, "poll_errqueue"))
+ return;
+
+ n = recvmsg(sockfd, &msg, MSG_ERRQUEUE);
+ if (!ASSERT_GE(n, 0, "recvmsg_errqueue"))
+ return;
+
+ cm = CMSG_FIRSTHDR(&msg);
+ if (!ASSERT_NEQ(cm, NULL, "cm_firsthdr_null"))
+ return;
+
+ for (; cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if (cm->cmsg_level != IPPROTO_IP || cm->cmsg_type != IP_RECVERR)
+ continue;
+
+ sock_err = (struct sock_extended_err *)CMSG_DATA(cm);
+
+ if (!ASSERT_EQ(sock_err->ee_origin, SO_EE_ORIGIN_ICMP,
+ "sock_err_origin_icmp"))
+ return;
+ if (!ASSERT_EQ(sock_err->ee_type, ICMP_DEST_UNREACH,
+ "sock_err_type_dest_unreach"))
+ return;
+ ASSERT_EQ(sock_err->ee_code, expected_code, "sock_err_code");
+ return;
+ }
+
+ ASSERT_FAIL("no IP_RECVERR control message found");
+}
+
+static bool valid_unreach_code(int code)
+{
+ if (code < 0)
+ return false;
+
+ return code <= NR_ICMP_UNREACH && code != ICMP_FRAG_NEEDED;
+}
+
+static void trigger_prog_read_icmp_errqueue(struct icmp_send *skel, int code)
+{
+ int srv_fd = -1, client_fd = -1;
+ int port;
+
+ srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS);
+ if (!ASSERT_OK_FD(srv_fd, "start_server"))
+ return;
+
+ port = get_socket_local_port(srv_fd);
+ if (!ASSERT_GE(port, 0, "get_socket_local_port")) {
+ close(srv_fd);
+ return;
+ }
+
+ skel->bss->server_port = ntohs(port);
+ skel->bss->unreach_code = code;
+ skel->data->kfunc_ret = KFUNC_RET_UNSET;
+
+ client_fd = connect_to_fd_nonblock(srv_fd);
+ if (!ASSERT_OK_FD(client_fd, "client_connect_nonblock")) {
+ close(srv_fd);
+ return;
+ }
+
+ if (valid_unreach_code(code))
+ read_icmp_errqueue(client_fd, code);
+
+ close(client_fd);
+ close(srv_fd);
+}
+
+void test_icmp_send_unreach_cgroup(void)
+{
+ struct icmp_send *skel;
+ int cgroup_fd = -1;
+
+ skel = icmp_send__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ cgroup_fd = test__join_cgroup("/icmp_send_unreach_cgroup");
+ if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup"))
+ goto cleanup;
+
+ skel->links.egress =
+ bpf_program__attach_cgroup(skel->progs.egress, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.egress, "prog_attach_cgroup"))
+ goto cleanup;
+
+ for (int code = 0; code <= NR_ICMP_UNREACH; code++) {
+ if (code == ICMP_FRAG_NEEDED)
+ continue;
+
+ trigger_prog_read_icmp_errqueue(skel, code);
+ ASSERT_EQ(skel->data->kfunc_ret, 0, "kfunc_ret");
+ }
+
+ /* Test invalid codes */
+ trigger_prog_read_icmp_errqueue(skel, -1);
+ ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
+
+ trigger_prog_read_icmp_errqueue(skel, NR_ICMP_UNREACH + 1);
+ ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
+
+ trigger_prog_read_icmp_errqueue(skel, ICMP_FRAG_NEEDED);
+ ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
+
+cleanup:
+ icmp_send__destroy(skel);
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/icmp_send.c b/tools/testing/selftests/bpf/progs/icmp_send.c
new file mode 100644
index 000000000000..6d0be0a9afe1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/icmp_send.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* 127.0.0.1 in host byte order */
+#define SERVER_IP 0x7F000001
+
+#define ICMP_DEST_UNREACH 3
+
+__u16 server_port = 0;
+int unreach_code = 0;
+int kfunc_ret = -1;
+
+SEC("cgroup_skb/egress")
+int egress(struct __sk_buff *skb)
+{
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+
+ iph = data;
+ if ((void *)(iph + 1) > data_end || iph->version != 4 ||
+ iph->protocol != IPPROTO_TCP || iph->daddr != bpf_htonl(SERVER_IP))
+ return SK_PASS;
+
+ tcph = (void *)iph + iph->ihl * 4;
+ if ((void *)(tcph + 1) > data_end ||
+ tcph->dest != bpf_htons(server_port))
+ return SK_PASS;
+
+ kfunc_ret = bpf_icmp_send(skb, ICMP_DEST_UNREACH, unreach_code);
+
+ return SK_DROP;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH bpf-next v9 3/5] selftests/bpf: add bpf_icmp_send kfunc cgroup_skb IPv6 tests
2026-06-24 18:55 [PATCH bpf-next v9 0/5] bpf: add icmp_send kfunc Mahe Tardy
2026-06-24 18:55 ` [PATCH bpf-next v9 1/5] bpf: add bpf_icmp_send kfunc Mahe Tardy
2026-06-24 18:55 ` [PATCH bpf-next v9 2/5] selftests/bpf: add bpf_icmp_send kfunc cgroup_skb tests Mahe Tardy
@ 2026-06-24 18:55 ` Mahe Tardy
2026-06-24 21:24 ` Jordan Rife
2026-06-24 18:55 ` [PATCH bpf-next v9 4/5] selftests/bpf: add bpf_icmp_send recursion test Mahe Tardy
2026-06-24 18:55 ` [PATCH bpf-next v9 5/5] selftests/bpf: add bpf_icmp_send no route test Mahe Tardy
4 siblings, 1 reply; 14+ messages in thread
From: Mahe Tardy @ 2026-06-24 18:55 UTC (permalink / raw)
To: bpf
Cc: andrii, ast, daniel, john.fastabend, jordan, martin.lau,
yonghong.song, emil, Mahe Tardy
This test extends the existing cgroup_skb tests with IPv6 support.
Note that we need to set IPV6_RECVERR on the socket for IPv6 in
connect_to_fd_nonblock otherwise the error will be ignored even if we
are in the middle of the TCP handshake. See in
net/ipv6/datagram.c:ipv6_icmp_error for more details.
Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
---
.../bpf/prog_tests/icmp_send_kfunc.c | 91 +++++++++++++------
tools/testing/selftests/bpf/progs/icmp_send.c | 48 ++++++++--
2 files changed, 101 insertions(+), 38 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
index b8a98c90053e..bbb3c3d4509c 100644
--- a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
@@ -8,9 +8,11 @@
#define TIMEOUT_MS 1000
#define ICMP_DEST_UNREACH 3
+#define ICMPV6_DEST_UNREACH 1
#define ICMP_FRAG_NEEDED 4
#define NR_ICMP_UNREACH 15
+#define ICMPV6_REJECT_ROUTE 6
#define KFUNC_RET_UNSET -1
@@ -18,7 +20,7 @@ static int connect_to_fd_nonblock(int server_fd)
{
struct sockaddr_storage addr;
socklen_t len = sizeof(addr);
- int fd, err;
+ int fd, err, on = 1;
if (getsockname(server_fd, (struct sockaddr *)&addr, &len))
return -1;
@@ -27,6 +29,12 @@ static int connect_to_fd_nonblock(int server_fd)
if (fd < 0)
return -1;
+ if (addr.ss_family == AF_INET6 &&
+ setsockopt(fd, IPPROTO_IPV6, IPV6_RECVERR, &on, sizeof(on)) < 0) {
+ close(fd);
+ return -1;
+ }
+
err = connect(fd, (struct sockaddr *)&addr, len);
if (err < 0 && errno != EINPROGRESS) {
close(fd);
@@ -36,8 +44,14 @@ static int connect_to_fd_nonblock(int server_fd)
return fd;
}
-static void read_icmp_errqueue(int sockfd, int expected_code)
+static void read_icmp_errqueue(int sockfd, int expected_code, int af)
{
+ int expected_ee_type = (af == AF_INET) ? ICMP_DEST_UNREACH :
+ ICMPV6_DEST_UNREACH;
+ int expected_origin = (af == AF_INET) ? SO_EE_ORIGIN_ICMP :
+ SO_EE_ORIGIN_ICMP6;
+ int expected_level = (af == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
+ int expected_type = (af == AF_INET) ? IP_RECVERR : IPV6_RECVERR;
struct sock_extended_err *sock_err;
char ctrl_buf[512];
struct msghdr msg = {
@@ -63,38 +77,43 @@ static void read_icmp_errqueue(int sockfd, int expected_code)
return;
for (; cm; cm = CMSG_NXTHDR(&msg, cm)) {
- if (cm->cmsg_level != IPPROTO_IP || cm->cmsg_type != IP_RECVERR)
+ if (cm->cmsg_level != expected_level ||
+ cm->cmsg_type != expected_type)
continue;
sock_err = (struct sock_extended_err *)CMSG_DATA(cm);
- if (!ASSERT_EQ(sock_err->ee_origin, SO_EE_ORIGIN_ICMP,
- "sock_err_origin_icmp"))
+ if (!ASSERT_EQ(sock_err->ee_origin, expected_origin,
+ "sock_err_origin"))
return;
- if (!ASSERT_EQ(sock_err->ee_type, ICMP_DEST_UNREACH,
+ if (!ASSERT_EQ(sock_err->ee_type, expected_ee_type,
"sock_err_type_dest_unreach"))
return;
ASSERT_EQ(sock_err->ee_code, expected_code, "sock_err_code");
return;
}
- ASSERT_FAIL("no IP_RECVERR control message found");
+ ASSERT_FAIL("no IP_RECVERR/IPV6_RECVERR control message found");
}
-static bool valid_unreach_code(int code)
+static bool valid_unreach_code(int code, int af)
{
if (code < 0)
return false;
- return code <= NR_ICMP_UNREACH && code != ICMP_FRAG_NEEDED;
+ if (af == AF_INET)
+ return code <= NR_ICMP_UNREACH && code != ICMP_FRAG_NEEDED;
+
+ return code <= ICMPV6_REJECT_ROUTE;
}
-static void trigger_prog_read_icmp_errqueue(struct icmp_send *skel, int code)
+static void trigger_prog_read_icmp_errqueue(struct icmp_send *skel, int code,
+ int af, const char *ip)
{
int srv_fd = -1, client_fd = -1;
int port;
- srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS);
+ srv_fd = start_server(af, SOCK_STREAM, ip, 0, TIMEOUT_MS);
if (!ASSERT_OK_FD(srv_fd, "start_server"))
return;
@@ -105,6 +124,8 @@ static void trigger_prog_read_icmp_errqueue(struct icmp_send *skel, int code)
}
skel->bss->server_port = ntohs(port);
+ skel->bss->unreach_type = (af == AF_INET) ? ICMP_DEST_UNREACH :
+ ICMPV6_DEST_UNREACH;
skel->bss->unreach_code = code;
skel->data->kfunc_ret = KFUNC_RET_UNSET;
@@ -114,13 +135,37 @@ static void trigger_prog_read_icmp_errqueue(struct icmp_send *skel, int code)
return;
}
- if (valid_unreach_code(code))
- read_icmp_errqueue(client_fd, code);
+ if (valid_unreach_code(code, af))
+ read_icmp_errqueue(client_fd, code, af);
close(client_fd);
close(srv_fd);
}
+static void run_icmp_test(struct icmp_send *skel, int af, const char *ip,
+ int max_code)
+{
+ for (int code = 0; code <= max_code; code++) {
+ if (af == AF_INET && code == ICMP_FRAG_NEEDED)
+ continue;
+
+ trigger_prog_read_icmp_errqueue(skel, code, af, ip);
+ ASSERT_EQ(skel->data->kfunc_ret, 0, "kfunc_ret");
+ }
+
+ /* Test invalid codes */
+ trigger_prog_read_icmp_errqueue(skel, -1, af, ip);
+ ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
+
+ trigger_prog_read_icmp_errqueue(skel, max_code + 1, af, ip);
+ ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
+
+ if (af == AF_INET) {
+ trigger_prog_read_icmp_errqueue(skel, ICMP_FRAG_NEEDED, af, ip);
+ ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
+ }
+}
+
void test_icmp_send_unreach_cgroup(void)
{
struct icmp_send *skel;
@@ -139,23 +184,11 @@ void test_icmp_send_unreach_cgroup(void)
if (!ASSERT_OK_PTR(skel->links.egress, "prog_attach_cgroup"))
goto cleanup;
- for (int code = 0; code <= NR_ICMP_UNREACH; code++) {
- if (code == ICMP_FRAG_NEEDED)
- continue;
-
- trigger_prog_read_icmp_errqueue(skel, code);
- ASSERT_EQ(skel->data->kfunc_ret, 0, "kfunc_ret");
- }
-
- /* Test invalid codes */
- trigger_prog_read_icmp_errqueue(skel, -1);
- ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
+ if (test__start_subtest("ipv4"))
+ run_icmp_test(skel, AF_INET, "127.0.0.1", NR_ICMP_UNREACH);
- trigger_prog_read_icmp_errqueue(skel, NR_ICMP_UNREACH + 1);
- ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
-
- trigger_prog_read_icmp_errqueue(skel, ICMP_FRAG_NEEDED);
- ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
+ if (test__start_subtest("ipv6"))
+ run_icmp_test(skel, AF_INET6, "::1", ICMPV6_REJECT_ROUTE);
cleanup:
icmp_send__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/icmp_send.c b/tools/testing/selftests/bpf/progs/icmp_send.c
index 6d0be0a9afe1..6e1ba539eeb0 100644
--- a/tools/testing/selftests/bpf/progs/icmp_send.c
+++ b/tools/testing/selftests/bpf/progs/icmp_send.c
@@ -5,10 +5,11 @@
/* 127.0.0.1 in host byte order */
#define SERVER_IP 0x7F000001
-
-#define ICMP_DEST_UNREACH 3
+/* ::1 in host byte order (last 32-bit word) */
+#define SERVER_IP6_LO 0x00000001
__u16 server_port = 0;
+int unreach_type = 0;
int unreach_code = 0;
int kfunc_ret = -1;
@@ -18,19 +19,48 @@ int egress(struct __sk_buff *skb)
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
struct iphdr *iph;
+ struct ipv6hdr *ip6h;
struct tcphdr *tcph;
+ __u8 version;
- iph = data;
- if ((void *)(iph + 1) > data_end || iph->version != 4 ||
- iph->protocol != IPPROTO_TCP || iph->daddr != bpf_htonl(SERVER_IP))
+ if (data + 1 > data_end)
return SK_PASS;
- tcph = (void *)iph + iph->ihl * 4;
- if ((void *)(tcph + 1) > data_end ||
- tcph->dest != bpf_htons(server_port))
+ version = (*((__u8 *)data)) >> 4;
+
+ if (version == 4) {
+ iph = data;
+ if ((void *)(iph + 1) > data_end ||
+ iph->protocol != IPPROTO_TCP ||
+ iph->daddr != bpf_htonl(SERVER_IP))
+ return SK_PASS;
+
+ tcph = (void *)iph + iph->ihl * 4;
+ if ((void *)(tcph + 1) > data_end ||
+ tcph->dest != bpf_htons(server_port))
+ return SK_PASS;
+
+ } else if (version == 6) {
+ ip6h = data;
+ if ((void *)(ip6h + 1) > data_end ||
+ ip6h->nexthdr != IPPROTO_TCP)
+ return SK_PASS;
+
+ if (ip6h->daddr.in6_u.u6_addr32[0] != 0 ||
+ ip6h->daddr.in6_u.u6_addr32[1] != 0 ||
+ ip6h->daddr.in6_u.u6_addr32[2] != 0 ||
+ ip6h->daddr.in6_u.u6_addr32[3] != bpf_htonl(SERVER_IP6_LO))
+ return SK_PASS;
+
+ tcph = (void *)(ip6h + 1);
+ if ((void *)(tcph + 1) > data_end ||
+ tcph->dest != bpf_htons(server_port))
+ return SK_PASS;
+ } else {
return SK_PASS;
+ }
- kfunc_ret = bpf_icmp_send(skb, ICMP_DEST_UNREACH, unreach_code);
+ kfunc_ret = bpf_icmp_send(skb, unreach_type, unreach_code);
return SK_DROP;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH bpf-next v9 3/5] selftests/bpf: add bpf_icmp_send kfunc cgroup_skb IPv6 tests
2026-06-24 18:55 ` [PATCH bpf-next v9 3/5] selftests/bpf: add bpf_icmp_send kfunc cgroup_skb IPv6 tests Mahe Tardy
@ 2026-06-24 21:24 ` Jordan Rife
0 siblings, 0 replies; 14+ messages in thread
From: Jordan Rife @ 2026-06-24 21:24 UTC (permalink / raw)
To: Mahe Tardy
Cc: bpf, andrii, ast, daniel, john.fastabend, martin.lau,
yonghong.song, emil
On Wed, Jun 24, 2026 at 06:55:52PM +0000, Mahe Tardy wrote:
> This test extends the existing cgroup_skb tests with IPv6 support.
>
> Note that we need to set IPV6_RECVERR on the socket for IPv6 in
> connect_to_fd_nonblock otherwise the error will be ignored even if we
> are in the middle of the TCP handshake. See in
> net/ipv6/datagram.c:ipv6_icmp_error for more details.
>
> Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
> ---
> .../bpf/prog_tests/icmp_send_kfunc.c | 91 +++++++++++++------
> tools/testing/selftests/bpf/progs/icmp_send.c | 48 ++++++++--
> 2 files changed, 101 insertions(+), 38 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> index b8a98c90053e..bbb3c3d4509c 100644
> --- a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> +++ b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> @@ -8,9 +8,11 @@
> #define TIMEOUT_MS 1000
>
> #define ICMP_DEST_UNREACH 3
> +#define ICMPV6_DEST_UNREACH 1
>
> #define ICMP_FRAG_NEEDED 4
> #define NR_ICMP_UNREACH 15
> +#define ICMPV6_REJECT_ROUTE 6
>
> #define KFUNC_RET_UNSET -1
>
> @@ -18,7 +20,7 @@ static int connect_to_fd_nonblock(int server_fd)
> {
> struct sockaddr_storage addr;
> socklen_t len = sizeof(addr);
> - int fd, err;
> + int fd, err, on = 1;
>
> if (getsockname(server_fd, (struct sockaddr *)&addr, &len))
> return -1;
> @@ -27,6 +29,12 @@ static int connect_to_fd_nonblock(int server_fd)
> if (fd < 0)
> return -1;
>
> + if (addr.ss_family == AF_INET6 &&
> + setsockopt(fd, IPPROTO_IPV6, IPV6_RECVERR, &on, sizeof(on)) < 0) {
> + close(fd);
> + return -1;
> + }
> +
> err = connect(fd, (struct sockaddr *)&addr, len);
> if (err < 0 && errno != EINPROGRESS) {
> close(fd);
> @@ -36,8 +44,14 @@ static int connect_to_fd_nonblock(int server_fd)
> return fd;
> }
>
> -static void read_icmp_errqueue(int sockfd, int expected_code)
> +static void read_icmp_errqueue(int sockfd, int expected_code, int af)
> {
> + int expected_ee_type = (af == AF_INET) ? ICMP_DEST_UNREACH :
> + ICMPV6_DEST_UNREACH;
> + int expected_origin = (af == AF_INET) ? SO_EE_ORIGIN_ICMP :
> + SO_EE_ORIGIN_ICMP6;
> + int expected_level = (af == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
> + int expected_type = (af == AF_INET) ? IP_RECVERR : IPV6_RECVERR;
> struct sock_extended_err *sock_err;
> char ctrl_buf[512];
> struct msghdr msg = {
> @@ -63,38 +77,43 @@ static void read_icmp_errqueue(int sockfd, int expected_code)
> return;
>
> for (; cm; cm = CMSG_NXTHDR(&msg, cm)) {
> - if (cm->cmsg_level != IPPROTO_IP || cm->cmsg_type != IP_RECVERR)
> + if (cm->cmsg_level != expected_level ||
> + cm->cmsg_type != expected_type)
> continue;
>
> sock_err = (struct sock_extended_err *)CMSG_DATA(cm);
>
> - if (!ASSERT_EQ(sock_err->ee_origin, SO_EE_ORIGIN_ICMP,
> - "sock_err_origin_icmp"))
> + if (!ASSERT_EQ(sock_err->ee_origin, expected_origin,
> + "sock_err_origin"))
> return;
> - if (!ASSERT_EQ(sock_err->ee_type, ICMP_DEST_UNREACH,
> + if (!ASSERT_EQ(sock_err->ee_type, expected_ee_type,
> "sock_err_type_dest_unreach"))
> return;
> ASSERT_EQ(sock_err->ee_code, expected_code, "sock_err_code");
> return;
> }
>
> - ASSERT_FAIL("no IP_RECVERR control message found");
> + ASSERT_FAIL("no IP_RECVERR/IPV6_RECVERR control message found");
> }
>
> -static bool valid_unreach_code(int code)
> +static bool valid_unreach_code(int code, int af)
> {
> if (code < 0)
> return false;
>
> - return code <= NR_ICMP_UNREACH && code != ICMP_FRAG_NEEDED;
> + if (af == AF_INET)
> + return code <= NR_ICMP_UNREACH && code != ICMP_FRAG_NEEDED;
> +
> + return code <= ICMPV6_REJECT_ROUTE;
> }
>
> -static void trigger_prog_read_icmp_errqueue(struct icmp_send *skel, int code)
> +static void trigger_prog_read_icmp_errqueue(struct icmp_send *skel, int code,
> + int af, const char *ip)
> {
> int srv_fd = -1, client_fd = -1;
> int port;
>
> - srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS);
> + srv_fd = start_server(af, SOCK_STREAM, ip, 0, TIMEOUT_MS);
> if (!ASSERT_OK_FD(srv_fd, "start_server"))
> return;
>
> @@ -105,6 +124,8 @@ static void trigger_prog_read_icmp_errqueue(struct icmp_send *skel, int code)
> }
>
> skel->bss->server_port = ntohs(port);
> + skel->bss->unreach_type = (af == AF_INET) ? ICMP_DEST_UNREACH :
> + ICMPV6_DEST_UNREACH;
> skel->bss->unreach_code = code;
> skel->data->kfunc_ret = KFUNC_RET_UNSET;
>
> @@ -114,13 +135,37 @@ static void trigger_prog_read_icmp_errqueue(struct icmp_send *skel, int code)
> return;
> }
>
> - if (valid_unreach_code(code))
> - read_icmp_errqueue(client_fd, code);
> + if (valid_unreach_code(code, af))
> + read_icmp_errqueue(client_fd, code, af);
>
> close(client_fd);
> close(srv_fd);
> }
>
> +static void run_icmp_test(struct icmp_send *skel, int af, const char *ip,
> + int max_code)
> +{
> + for (int code = 0; code <= max_code; code++) {
> + if (af == AF_INET && code == ICMP_FRAG_NEEDED)
> + continue;
> +
> + trigger_prog_read_icmp_errqueue(skel, code, af, ip);
> + ASSERT_EQ(skel->data->kfunc_ret, 0, "kfunc_ret");
> + }
> +
> + /* Test invalid codes */
> + trigger_prog_read_icmp_errqueue(skel, -1, af, ip);
> + ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
> +
> + trigger_prog_read_icmp_errqueue(skel, max_code + 1, af, ip);
> + ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
> +
> + if (af == AF_INET) {
> + trigger_prog_read_icmp_errqueue(skel, ICMP_FRAG_NEEDED, af, ip);
> + ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
> + }
> +}
> +
> void test_icmp_send_unreach_cgroup(void)
> {
> struct icmp_send *skel;
> @@ -139,23 +184,11 @@ void test_icmp_send_unreach_cgroup(void)
> if (!ASSERT_OK_PTR(skel->links.egress, "prog_attach_cgroup"))
> goto cleanup;
>
> - for (int code = 0; code <= NR_ICMP_UNREACH; code++) {
> - if (code == ICMP_FRAG_NEEDED)
> - continue;
> -
> - trigger_prog_read_icmp_errqueue(skel, code);
> - ASSERT_EQ(skel->data->kfunc_ret, 0, "kfunc_ret");
> - }
> -
> - /* Test invalid codes */
> - trigger_prog_read_icmp_errqueue(skel, -1);
> - ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
> + if (test__start_subtest("ipv4"))
> + run_icmp_test(skel, AF_INET, "127.0.0.1", NR_ICMP_UNREACH);
>
> - trigger_prog_read_icmp_errqueue(skel, NR_ICMP_UNREACH + 1);
> - ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
> -
> - trigger_prog_read_icmp_errqueue(skel, ICMP_FRAG_NEEDED);
> - ASSERT_EQ(skel->data->kfunc_ret, -EINVAL, "kfunc_ret");
> + if (test__start_subtest("ipv6"))
> + run_icmp_test(skel, AF_INET6, "::1", ICMPV6_REJECT_ROUTE);
>
> cleanup:
> icmp_send__destroy(skel);
> diff --git a/tools/testing/selftests/bpf/progs/icmp_send.c b/tools/testing/selftests/bpf/progs/icmp_send.c
> index 6d0be0a9afe1..6e1ba539eeb0 100644
> --- a/tools/testing/selftests/bpf/progs/icmp_send.c
> +++ b/tools/testing/selftests/bpf/progs/icmp_send.c
> @@ -5,10 +5,11 @@
>
> /* 127.0.0.1 in host byte order */
> #define SERVER_IP 0x7F000001
> -
> -#define ICMP_DEST_UNREACH 3
> +/* ::1 in host byte order (last 32-bit word) */
> +#define SERVER_IP6_LO 0x00000001
>
> __u16 server_port = 0;
> +int unreach_type = 0;
> int unreach_code = 0;
> int kfunc_ret = -1;
>
> @@ -18,19 +19,48 @@ int egress(struct __sk_buff *skb)
> void *data = (void *)(long)skb->data;
> void *data_end = (void *)(long)skb->data_end;
> struct iphdr *iph;
> + struct ipv6hdr *ip6h;
> struct tcphdr *tcph;
> + __u8 version;
>
> - iph = data;
> - if ((void *)(iph + 1) > data_end || iph->version != 4 ||
> - iph->protocol != IPPROTO_TCP || iph->daddr != bpf_htonl(SERVER_IP))
> + if (data + 1 > data_end)
> return SK_PASS;
>
> - tcph = (void *)iph + iph->ihl * 4;
> - if ((void *)(tcph + 1) > data_end ||
> - tcph->dest != bpf_htons(server_port))
> + version = (*((__u8 *)data)) >> 4;
> +
> + if (version == 4) {
> + iph = data;
> + if ((void *)(iph + 1) > data_end ||
> + iph->protocol != IPPROTO_TCP ||
> + iph->daddr != bpf_htonl(SERVER_IP))
> + return SK_PASS;
> +
> + tcph = (void *)iph + iph->ihl * 4;
> + if ((void *)(tcph + 1) > data_end ||
> + tcph->dest != bpf_htons(server_port))
> + return SK_PASS;
> +
> + } else if (version == 6) {
> + ip6h = data;
> + if ((void *)(ip6h + 1) > data_end ||
> + ip6h->nexthdr != IPPROTO_TCP)
> + return SK_PASS;
> +
> + if (ip6h->daddr.in6_u.u6_addr32[0] != 0 ||
> + ip6h->daddr.in6_u.u6_addr32[1] != 0 ||
> + ip6h->daddr.in6_u.u6_addr32[2] != 0 ||
> + ip6h->daddr.in6_u.u6_addr32[3] != bpf_htonl(SERVER_IP6_LO))
> + return SK_PASS;
> +
> + tcph = (void *)(ip6h + 1);
> + if ((void *)(tcph + 1) > data_end ||
> + tcph->dest != bpf_htons(server_port))
> + return SK_PASS;
> + } else {
> return SK_PASS;
> + }
>
> - kfunc_ret = bpf_icmp_send(skb, ICMP_DEST_UNREACH, unreach_code);
> + kfunc_ret = bpf_icmp_send(skb, unreach_type, unreach_code);
>
> return SK_DROP;
> }
> --
> 2.34.1
>
Reviewed-by: Jordan Rife <jordan@jrife.io>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH bpf-next v9 4/5] selftests/bpf: add bpf_icmp_send recursion test
2026-06-24 18:55 [PATCH bpf-next v9 0/5] bpf: add icmp_send kfunc Mahe Tardy
` (2 preceding siblings ...)
2026-06-24 18:55 ` [PATCH bpf-next v9 3/5] selftests/bpf: add bpf_icmp_send kfunc cgroup_skb IPv6 tests Mahe Tardy
@ 2026-06-24 18:55 ` Mahe Tardy
2026-06-24 21:24 ` Jordan Rife
2026-06-24 18:55 ` [PATCH bpf-next v9 5/5] selftests/bpf: add bpf_icmp_send no route test Mahe Tardy
4 siblings, 1 reply; 14+ messages in thread
From: Mahe Tardy @ 2026-06-24 18:55 UTC (permalink / raw)
To: bpf
Cc: andrii, ast, daniel, john.fastabend, jordan, martin.lau,
yonghong.song, emil, Mahe Tardy
This test is similar to test_icmp_send_unreach_cgroup but checks that,
in case of recursion, meaning that the BPF program calling the kfunc was
re-triggered by the icmp_send done by the kfunc, the kfunc will stop
early and return -EBUSY.
The test attaches to the root cgroup to ensure the ICMP packet generated
by the kfunc re-triggers the BPF program.
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
---
.../bpf/prog_tests/icmp_send_kfunc.c | 46 ++++++++++++++++
tools/testing/selftests/bpf/progs/icmp_send.c | 55 +++++++++++++++++++
2 files changed, 101 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
index bbb3c3d4509c..bb532aa0d158 100644
--- a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
+#include <cgroup_helpers.h>
#include <linux/errqueue.h>
#include <poll.h>
+#include <unistd.h>
#include "icmp_send.skel.h"
#define TIMEOUT_MS 1000
@@ -10,6 +12,7 @@
#define ICMP_DEST_UNREACH 3
#define ICMPV6_DEST_UNREACH 1
+#define ICMP_HOST_UNREACH 1
#define ICMP_FRAG_NEEDED 4
#define NR_ICMP_UNREACH 15
#define ICMPV6_REJECT_ROUTE 6
@@ -195,3 +198,46 @@ void test_icmp_send_unreach_cgroup(void)
if (cgroup_fd >= 0)
close(cgroup_fd);
}
+
+void test_icmp_send_unreach_recursion(void)
+{
+ struct icmp_send *skel;
+ int cgroup_fd = -1;
+ int err;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ return;
+
+ skel = icmp_send__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ cgroup_fd = get_root_cgroup();
+ if (!ASSERT_OK_FD(cgroup_fd, "get_root_cgroup"))
+ goto cleanup;
+
+ skel->data->target_pid = getpid();
+ skel->links.recursion =
+ bpf_program__attach_cgroup(skel->progs.recursion, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.recursion, "prog_attach_cgroup"))
+ goto cleanup;
+
+ trigger_prog_read_icmp_errqueue(skel, ICMP_HOST_UNREACH, AF_INET,
+ "127.0.0.1");
+
+ /*
+ * Because there's recursion involved, the first call will return at
+ * index 1 since it will return the second, and the second call will
+ * return at index 0 since it will return the first.
+ */
+ ASSERT_EQ(skel->bss->rec_count, 2, "rec_count");
+ ASSERT_EQ(skel->data->rec_kfunc_rets[0], -EBUSY, "kfunc_rets[0]");
+ ASSERT_EQ(skel->data->rec_kfunc_rets[1], 0, "kfunc_rets[1]");
+
+cleanup:
+ icmp_send__destroy(skel);
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/progs/icmp_send.c b/tools/testing/selftests/bpf/progs/icmp_send.c
index 6e1ba539eeb0..c642ccdf9fd5 100644
--- a/tools/testing/selftests/bpf/progs/icmp_send.c
+++ b/tools/testing/selftests/bpf/progs/icmp_send.c
@@ -12,6 +12,10 @@ __u16 server_port = 0;
int unreach_type = 0;
int unreach_code = 0;
int kfunc_ret = -1;
+int target_pid = -1;
+
+unsigned int rec_count = 0;
+int rec_kfunc_rets[] = { -1, -1 };
SEC("cgroup_skb/egress")
int egress(struct __sk_buff *skb)
@@ -65,4 +69,55 @@ int egress(struct __sk_buff *skb)
return SK_DROP;
}
+SEC("cgroup_skb/egress")
+int recursion(struct __sk_buff *skb)
+{
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct icmphdr *icmph;
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ int ret;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return SK_PASS;
+
+ iph = data;
+ if ((void *)(iph + 1) > data_end || iph->version != 4)
+ return SK_PASS;
+
+ if (iph->daddr != bpf_htonl(SERVER_IP))
+ return SK_PASS;
+
+ if (iph->protocol == IPPROTO_TCP) {
+ tcph = (void *)iph + iph->ihl * 4;
+ if ((void *)(tcph + 1) > data_end ||
+ tcph->dest != bpf_htons(server_port))
+ return SK_PASS;
+ } else if (iph->protocol == IPPROTO_ICMP) {
+ icmph = (void *)iph + iph->ihl * 4;
+ if ((void *)(icmph + 1) > data_end ||
+ icmph->type != unreach_type || icmph->code != unreach_code)
+ return SK_PASS;
+ } else {
+ return SK_PASS;
+ }
+
+ /*
+ * This call will provoke a recursion: the ICMP packet generated by the
+ * kfunc will re-trigger this program since we are in the root cgroup in
+ * which the kernel ICMP socket belongs. However when re-entering the
+ * kfunc, it should return EBUSY.
+ */
+ ret = bpf_icmp_send(skb, unreach_type, unreach_code);
+ rec_kfunc_rets[rec_count & 1] = ret;
+ __sync_fetch_and_add(&rec_count, 1);
+
+ /* Let the first ICMP error message pass */
+ if (iph->protocol == IPPROTO_ICMP)
+ return SK_PASS;
+
+ return SK_DROP;
+}
+
char LICENSE[] SEC("license") = "Dual BSD/GPL";
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH bpf-next v9 4/5] selftests/bpf: add bpf_icmp_send recursion test
2026-06-24 18:55 ` [PATCH bpf-next v9 4/5] selftests/bpf: add bpf_icmp_send recursion test Mahe Tardy
@ 2026-06-24 21:24 ` Jordan Rife
0 siblings, 0 replies; 14+ messages in thread
From: Jordan Rife @ 2026-06-24 21:24 UTC (permalink / raw)
To: Mahe Tardy
Cc: bpf, andrii, ast, daniel, john.fastabend, martin.lau,
yonghong.song, emil
On Wed, Jun 24, 2026 at 06:55:53PM +0000, Mahe Tardy wrote:
> This test is similar to test_icmp_send_unreach_cgroup but checks that,
> in case of recursion, meaning that the BPF program calling the kfunc was
> re-triggered by the icmp_send done by the kfunc, the kfunc will stop
> early and return -EBUSY.
>
> The test attaches to the root cgroup to ensure the ICMP packet generated
> by the kfunc re-triggers the BPF program.
>
> Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
> Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
> ---
> .../bpf/prog_tests/icmp_send_kfunc.c | 46 ++++++++++++++++
> tools/testing/selftests/bpf/progs/icmp_send.c | 55 +++++++++++++++++++
> 2 files changed, 101 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> index bbb3c3d4509c..bb532aa0d158 100644
> --- a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> +++ b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> @@ -1,8 +1,10 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <test_progs.h>
> #include <network_helpers.h>
> +#include <cgroup_helpers.h>
> #include <linux/errqueue.h>
> #include <poll.h>
> +#include <unistd.h>
> #include "icmp_send.skel.h"
>
> #define TIMEOUT_MS 1000
> @@ -10,6 +12,7 @@
> #define ICMP_DEST_UNREACH 3
> #define ICMPV6_DEST_UNREACH 1
>
> +#define ICMP_HOST_UNREACH 1
> #define ICMP_FRAG_NEEDED 4
> #define NR_ICMP_UNREACH 15
> #define ICMPV6_REJECT_ROUTE 6
> @@ -195,3 +198,46 @@ void test_icmp_send_unreach_cgroup(void)
> if (cgroup_fd >= 0)
> close(cgroup_fd);
> }
> +
> +void test_icmp_send_unreach_recursion(void)
> +{
> + struct icmp_send *skel;
> + int cgroup_fd = -1;
> + int err;
> +
> + err = setup_cgroup_environment();
> + if (!ASSERT_OK(err, "setup_cgroup_environment"))
> + return;
> +
> + skel = icmp_send__open_and_load();
> + if (!ASSERT_OK_PTR(skel, "skel_open"))
> + goto cleanup;
> +
> + cgroup_fd = get_root_cgroup();
> + if (!ASSERT_OK_FD(cgroup_fd, "get_root_cgroup"))
> + goto cleanup;
> +
> + skel->data->target_pid = getpid();
> + skel->links.recursion =
> + bpf_program__attach_cgroup(skel->progs.recursion, cgroup_fd);
> + if (!ASSERT_OK_PTR(skel->links.recursion, "prog_attach_cgroup"))
> + goto cleanup;
> +
> + trigger_prog_read_icmp_errqueue(skel, ICMP_HOST_UNREACH, AF_INET,
> + "127.0.0.1");
> +
> + /*
> + * Because there's recursion involved, the first call will return at
> + * index 1 since it will return the second, and the second call will
> + * return at index 0 since it will return the first.
> + */
> + ASSERT_EQ(skel->bss->rec_count, 2, "rec_count");
> + ASSERT_EQ(skel->data->rec_kfunc_rets[0], -EBUSY, "kfunc_rets[0]");
> + ASSERT_EQ(skel->data->rec_kfunc_rets[1], 0, "kfunc_rets[1]");
> +
> +cleanup:
> + icmp_send__destroy(skel);
> + if (cgroup_fd >= 0)
> + close(cgroup_fd);
> + cleanup_cgroup_environment();
> +}
> diff --git a/tools/testing/selftests/bpf/progs/icmp_send.c b/tools/testing/selftests/bpf/progs/icmp_send.c
> index 6e1ba539eeb0..c642ccdf9fd5 100644
> --- a/tools/testing/selftests/bpf/progs/icmp_send.c
> +++ b/tools/testing/selftests/bpf/progs/icmp_send.c
> @@ -12,6 +12,10 @@ __u16 server_port = 0;
> int unreach_type = 0;
> int unreach_code = 0;
> int kfunc_ret = -1;
> +int target_pid = -1;
> +
> +unsigned int rec_count = 0;
> +int rec_kfunc_rets[] = { -1, -1 };
>
> SEC("cgroup_skb/egress")
> int egress(struct __sk_buff *skb)
> @@ -65,4 +69,55 @@ int egress(struct __sk_buff *skb)
> return SK_DROP;
> }
>
> +SEC("cgroup_skb/egress")
> +int recursion(struct __sk_buff *skb)
> +{
> + void *data = (void *)(long)skb->data;
> + void *data_end = (void *)(long)skb->data_end;
> + struct icmphdr *icmph;
> + struct tcphdr *tcph;
> + struct iphdr *iph;
> + int ret;
> +
> + if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
> + return SK_PASS;
> +
> + iph = data;
> + if ((void *)(iph + 1) > data_end || iph->version != 4)
> + return SK_PASS;
> +
> + if (iph->daddr != bpf_htonl(SERVER_IP))
> + return SK_PASS;
> +
> + if (iph->protocol == IPPROTO_TCP) {
> + tcph = (void *)iph + iph->ihl * 4;
> + if ((void *)(tcph + 1) > data_end ||
> + tcph->dest != bpf_htons(server_port))
> + return SK_PASS;
> + } else if (iph->protocol == IPPROTO_ICMP) {
> + icmph = (void *)iph + iph->ihl * 4;
> + if ((void *)(icmph + 1) > data_end ||
> + icmph->type != unreach_type || icmph->code != unreach_code)
> + return SK_PASS;
> + } else {
> + return SK_PASS;
> + }
> +
> + /*
> + * This call will provoke a recursion: the ICMP packet generated by the
> + * kfunc will re-trigger this program since we are in the root cgroup in
> + * which the kernel ICMP socket belongs. However when re-entering the
> + * kfunc, it should return EBUSY.
> + */
> + ret = bpf_icmp_send(skb, unreach_type, unreach_code);
> + rec_kfunc_rets[rec_count & 1] = ret;
> + __sync_fetch_and_add(&rec_count, 1);
> +
> + /* Let the first ICMP error message pass */
> + if (iph->protocol == IPPROTO_ICMP)
> + return SK_PASS;
> +
> + return SK_DROP;
> +}
> +
> char LICENSE[] SEC("license") = "Dual BSD/GPL";
> --
> 2.34.1
>
Reviewed-by: Jordan Rife <jordan@jrife.io>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH bpf-next v9 5/5] selftests/bpf: add bpf_icmp_send no route test
2026-06-24 18:55 [PATCH bpf-next v9 0/5] bpf: add icmp_send kfunc Mahe Tardy
` (3 preceding siblings ...)
2026-06-24 18:55 ` [PATCH bpf-next v9 4/5] selftests/bpf: add bpf_icmp_send recursion test Mahe Tardy
@ 2026-06-24 18:55 ` Mahe Tardy
2026-06-24 19:13 ` Emil Tsalapatis
2026-06-24 21:24 ` Jordan Rife
4 siblings, 2 replies; 14+ messages in thread
From: Mahe Tardy @ 2026-06-24 18:55 UTC (permalink / raw)
To: bpf
Cc: andrii, ast, daniel, john.fastabend, jordan, martin.lau,
yonghong.song, emil, Mahe Tardy
For normal live cgroup_skb paths, the skb should already be routed. The
exception is for test run via BPF_PROG_TEST_RUN with packets created
via bpf_prog_test_run_skb. Those lack dst route and thus the icmp_send
would quietly fail by returning early.
This test exercises this and makes sure the kfunc returns -ENETUNREACH.
Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
---
.../bpf/prog_tests/icmp_send_kfunc.c | 26 +++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
index bb532aa0d158..ffaf0fe1880b 100644
--- a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
@@ -169,6 +169,29 @@ static void run_icmp_test(struct icmp_send *skel, int af, const char *ip,
}
}
+static void run_icmp_no_route_test(struct icmp_send *skel)
+{
+ struct ipv4_packet pkt = pkt_v4;
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt,
+ .data_size_in = sizeof(pkt),
+ );
+ int err;
+
+ pkt.iph.version = 4;
+ pkt.iph.daddr = inet_addr("127.0.0.1");
+ pkt.tcp.dest = htons(80);
+ skel->bss->server_port = 80;
+ skel->bss->unreach_type = ICMP_DEST_UNREACH;
+ skel->bss->unreach_code = ICMP_HOST_UNREACH;
+ skel->data->kfunc_ret = KFUNC_RET_UNSET;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.egress), &opts);
+ if (!ASSERT_OK(err, "test_run"))
+ return;
+ ASSERT_EQ(skel->data->kfunc_ret, -ENETUNREACH, "kfunc_ret_no_route");
+}
+
void test_icmp_send_unreach_cgroup(void)
{
struct icmp_send *skel;
@@ -193,6 +216,9 @@ void test_icmp_send_unreach_cgroup(void)
if (test__start_subtest("ipv6"))
run_icmp_test(skel, AF_INET6, "::1", ICMPV6_REJECT_ROUTE);
+ if (test__start_subtest("no_route"))
+ run_icmp_no_route_test(skel);
+
cleanup:
icmp_send__destroy(skel);
if (cgroup_fd >= 0)
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH bpf-next v9 5/5] selftests/bpf: add bpf_icmp_send no route test
2026-06-24 18:55 ` [PATCH bpf-next v9 5/5] selftests/bpf: add bpf_icmp_send no route test Mahe Tardy
@ 2026-06-24 19:13 ` Emil Tsalapatis
2026-06-24 21:24 ` Jordan Rife
1 sibling, 0 replies; 14+ messages in thread
From: Emil Tsalapatis @ 2026-06-24 19:13 UTC (permalink / raw)
To: Mahe Tardy, bpf
Cc: andrii, ast, daniel, john.fastabend, jordan, martin.lau,
yonghong.song, emil
On Wed Jun 24, 2026 at 2:55 PM EDT, Mahe Tardy wrote:
> For normal live cgroup_skb paths, the skb should already be routed. The
> exception is for test run via BPF_PROG_TEST_RUN with packets created
> via bpf_prog_test_run_skb. Those lack dst route and thus the icmp_send
> would quietly fail by returning early.
>
> This test exercises this and makes sure the kfunc returns -ENETUNREACH.
>
> Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
> ---
> .../bpf/prog_tests/icmp_send_kfunc.c | 26 +++++++++++++++++++
> 1 file changed, 26 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> index bb532aa0d158..ffaf0fe1880b 100644
> --- a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> +++ b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> @@ -169,6 +169,29 @@ static void run_icmp_test(struct icmp_send *skel, int af, const char *ip,
> }
> }
>
> +static void run_icmp_no_route_test(struct icmp_send *skel)
> +{
> + struct ipv4_packet pkt = pkt_v4;
> + LIBBPF_OPTS(bpf_test_run_opts, opts,
> + .data_in = &pkt,
> + .data_size_in = sizeof(pkt),
> + );
> + int err;
> +
> + pkt.iph.version = 4;
> + pkt.iph.daddr = inet_addr("127.0.0.1");
> + pkt.tcp.dest = htons(80);
> + skel->bss->server_port = 80;
> + skel->bss->unreach_type = ICMP_DEST_UNREACH;
> + skel->bss->unreach_code = ICMP_HOST_UNREACH;
> + skel->data->kfunc_ret = KFUNC_RET_UNSET;
> +
> + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.egress), &opts);
> + if (!ASSERT_OK(err, "test_run"))
> + return;
> + ASSERT_EQ(skel->data->kfunc_ret, -ENETUNREACH, "kfunc_ret_no_route");
> +}
> +
> void test_icmp_send_unreach_cgroup(void)
> {
> struct icmp_send *skel;
> @@ -193,6 +216,9 @@ void test_icmp_send_unreach_cgroup(void)
> if (test__start_subtest("ipv6"))
> run_icmp_test(skel, AF_INET6, "::1", ICMPV6_REJECT_ROUTE);
>
> + if (test__start_subtest("no_route"))
> + run_icmp_no_route_test(skel);
> +
> cleanup:
> icmp_send__destroy(skel);
> if (cgroup_fd >= 0)
> --
> 2.34.1
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH bpf-next v9 5/5] selftests/bpf: add bpf_icmp_send no route test
2026-06-24 18:55 ` [PATCH bpf-next v9 5/5] selftests/bpf: add bpf_icmp_send no route test Mahe Tardy
2026-06-24 19:13 ` Emil Tsalapatis
@ 2026-06-24 21:24 ` Jordan Rife
1 sibling, 0 replies; 14+ messages in thread
From: Jordan Rife @ 2026-06-24 21:24 UTC (permalink / raw)
To: Mahe Tardy
Cc: bpf, andrii, ast, daniel, john.fastabend, martin.lau,
yonghong.song, emil
On Wed, Jun 24, 2026 at 06:55:54PM +0000, Mahe Tardy wrote:
> For normal live cgroup_skb paths, the skb should already be routed. The
> exception is for test run via BPF_PROG_TEST_RUN with packets created
> via bpf_prog_test_run_skb. Those lack dst route and thus the icmp_send
> would quietly fail by returning early.
>
> This test exercises this and makes sure the kfunc returns -ENETUNREACH.
>
> Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
> ---
> .../bpf/prog_tests/icmp_send_kfunc.c | 26 +++++++++++++++++++
> 1 file changed, 26 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> index bb532aa0d158..ffaf0fe1880b 100644
> --- a/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> +++ b/tools/testing/selftests/bpf/prog_tests/icmp_send_kfunc.c
> @@ -169,6 +169,29 @@ static void run_icmp_test(struct icmp_send *skel, int af, const char *ip,
> }
> }
>
> +static void run_icmp_no_route_test(struct icmp_send *skel)
> +{
> + struct ipv4_packet pkt = pkt_v4;
> + LIBBPF_OPTS(bpf_test_run_opts, opts,
> + .data_in = &pkt,
> + .data_size_in = sizeof(pkt),
> + );
> + int err;
> +
> + pkt.iph.version = 4;
> + pkt.iph.daddr = inet_addr("127.0.0.1");
> + pkt.tcp.dest = htons(80);
> + skel->bss->server_port = 80;
> + skel->bss->unreach_type = ICMP_DEST_UNREACH;
> + skel->bss->unreach_code = ICMP_HOST_UNREACH;
> + skel->data->kfunc_ret = KFUNC_RET_UNSET;
> +
> + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.egress), &opts);
> + if (!ASSERT_OK(err, "test_run"))
> + return;
> + ASSERT_EQ(skel->data->kfunc_ret, -ENETUNREACH, "kfunc_ret_no_route");
> +}
> +
> void test_icmp_send_unreach_cgroup(void)
> {
> struct icmp_send *skel;
> @@ -193,6 +216,9 @@ void test_icmp_send_unreach_cgroup(void)
> if (test__start_subtest("ipv6"))
> run_icmp_test(skel, AF_INET6, "::1", ICMPV6_REJECT_ROUTE);
>
> + if (test__start_subtest("no_route"))
> + run_icmp_no_route_test(skel);
> +
> cleanup:
> icmp_send__destroy(skel);
> if (cgroup_fd >= 0)
> --
> 2.34.1
>
Reviewed-by: Jordan Rife <jordan@jrife.io>
^ permalink raw reply [flat|nested] 14+ messages in thread