From: Nick Hudson <nhudson@akamai.com>
To: bpf@vger.kernel.org, netdev@vger.kernel.org,
Willem de Bruijn <willemb@google.com>,
Martin KaFai Lau <martin.lau@linux.dev>
Cc: Nick Hudson <nhudson@akamai.com>,
Andrii Nakryiko <andrii@kernel.org>,
Eduard Zingerman <eddyz87@gmail.com>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Kumar Kartikeya Dwivedi <memxor@gmail.com>,
Shuah Khan <shuah@kernel.org>,
linux-kselftest@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v6 6/6] selftests/bpf: tc_tunnel - validate decap GSO and encapsulation state
Date: Mon, 4 May 2026 11:17:59 +0100 [thread overview]
Message-ID: <20260504101759.3319427-7-nhudson@akamai.com> (raw)
In-Reply-To: <20260504101759.3319427-1-nhudson@akamai.com>
tc_tunnel only partially validated decap state and missed some tunnel
cases. In particular, IPXIP decap checks were not exercised for
IPIP/SIT paths, and non-GSO decap encapsulation state was not
verified.
Tighten the test by:
- setting DECAP_IPXIP4/6 flags for IPIP/SIT/IP6 decap paths based on
the outer tunnel header family;
- requiring needed DECAP enum values via CO-RE enum existence checks
so missing kernel support fails fast;
- validating post-decap tunnel state for both GSO and non-GSO packets:
expected gso_type bits must be cleared and skb->encapsulation must
match remaining tunnel flags;
- removing forced TSO disable in the test harness so GSO validation is
exercised.
This improves coverage for decap tunnel-state regressions and ensures
sit_none/ipip-style paths are checked correctly.
Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
.../selftests/bpf/prog_tests/test_tc_tunnel.c | 1 -
.../selftests/bpf/progs/test_tc_tunnel.c | 91 +++++++++++++++++--
2 files changed, 84 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
index 1aa7c9463980..67ba27d69347 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
@@ -438,7 +438,6 @@ static int setup(void)
SYS(fail_close_ns_client, "ip link add %s type veth peer name %s",
"veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1,
"veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2);
- SYS(fail_close_ns_client, "ethtool -K veth1 tso off");
SYS(fail_close_ns_client, "ip link set veth1 up");
nstoken_server = open_netns(SERVER_NS);
if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 7376df405a6b..853bca962910 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -6,6 +6,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
#include "bpf_tracing_net.h"
#include "bpf_compiler.h"
@@ -37,6 +38,22 @@ struct vxlanhdr___local {
#define EXTPROTO_VXLAN 0x1
+#define SKB_GSO_UDP_TUNNEL_MASK (SKB_GSO_UDP_TUNNEL | \
+ SKB_GSO_UDP_TUNNEL_CSUM)
+
+#define SKB_GSO_TUNNEL_MASK (SKB_GSO_UDP_TUNNEL_MASK | \
+ SKB_GSO_GRE | \
+ SKB_GSO_GRE_CSUM | \
+ SKB_GSO_IPXIP4 | \
+ SKB_GSO_IPXIP6 | \
+ SKB_GSO_ESP)
+
+#define BPF_F_ADJ_ROOM_DECAP_L4_MASK (BPF_F_ADJ_ROOM_DECAP_L4_UDP | \
+ BPF_F_ADJ_ROOM_DECAP_L4_GRE)
+
+#define BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK (BPF_F_ADJ_ROOM_DECAP_IPXIP4 | \
+ BPF_F_ADJ_ROOM_DECAP_IPXIP6)
+
#define VXLAN_FLAGS bpf_htonl(1<<27)
#define VNI_ID 1
#define VXLAN_VNI bpf_htonl(VNI_ID << 8)
@@ -589,9 +606,12 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
-static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
+static int decap_internal(struct __sk_buff *skb, int off, int len, char proto,
+ __u64 ipxip_flag)
{
__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
+ struct sk_buff *kskb;
+ struct skb_shared_info *shinfo;
struct ipv6_opt_hdr ip6_opt_hdr;
struct gre_hdr greh;
struct udphdr udph;
@@ -599,10 +619,12 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
switch (proto) {
case IPPROTO_IPIP:
- flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4 |
+ ipxip_flag;
break;
case IPPROTO_IPV6:
- flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6 |
+ ipxip_flag;
break;
case NEXTHDR_DEST:
if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
@@ -610,10 +632,12 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
return TC_ACT_OK;
switch (ip6_opt_hdr.nexthdr) {
case IPPROTO_IPIP:
- flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4 |
+ ipxip_flag;
break;
case IPPROTO_IPV6:
- flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6 |
+ ipxip_flag;
break;
default:
return TC_ACT_OK;
@@ -621,6 +645,11 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
break;
case IPPROTO_GRE:
olen += sizeof(struct gre_hdr);
+ if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+ BPF_F_ADJ_ROOM_DECAP_L4_GRE))
+ return TC_ACT_SHOT;
+ flags |= BPF_F_ADJ_ROOM_DECAP_L4_GRE;
+
if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
return TC_ACT_OK;
switch (bpf_ntohs(greh.protocol)) {
@@ -634,6 +663,10 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
break;
case IPPROTO_UDP:
olen += sizeof(struct udphdr);
+ if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+ BPF_F_ADJ_ROOM_DECAP_L4_UDP))
+ return TC_ACT_SHOT;
+ flags |= BPF_F_ADJ_ROOM_DECAP_L4_UDP;
if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
return TC_ACT_OK;
switch (bpf_ntohs(udph.dest)) {
@@ -655,6 +688,40 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
return TC_ACT_SHOT;
+ kskb = bpf_cast_to_kern_ctx(skb);
+ shinfo = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info);
+ if (shinfo->gso_size) {
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&
+ (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_MASK))
+ return TC_ACT_SHOT;
+
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE) &&
+ (shinfo->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM)))
+ return TC_ACT_SHOT;
+
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4) &&
+ (shinfo->gso_type & SKB_GSO_IPXIP4))
+ return TC_ACT_SHOT;
+
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6) &&
+ (shinfo->gso_type & SKB_GSO_IPXIP6))
+ return TC_ACT_SHOT;
+
+ if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
+ BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {
+ if ((shinfo->gso_type & SKB_GSO_TUNNEL_MASK) &&
+ !kskb->encapsulation)
+ return TC_ACT_SHOT;
+ if (!(shinfo->gso_type & SKB_GSO_TUNNEL_MASK) &&
+ kskb->encapsulation)
+ return TC_ACT_SHOT;
+ }
+ } else if ((flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
+ BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) &&
+ kskb->encapsulation) {
+ return TC_ACT_SHOT;
+ }
+
return TC_ACT_OK;
}
@@ -662,6 +729,10 @@ static int decap_ipv4(struct __sk_buff *skb)
{
struct iphdr iph_outer;
+ if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+ BPF_F_ADJ_ROOM_DECAP_IPXIP4))
+ return TC_ACT_SHOT;
+
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
sizeof(iph_outer)) < 0)
return TC_ACT_OK;
@@ -670,19 +741,25 @@ static int decap_ipv4(struct __sk_buff *skb)
return TC_ACT_OK;
return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
- iph_outer.protocol);
+ iph_outer.protocol,
+ BPF_F_ADJ_ROOM_DECAP_IPXIP4);
}
static int decap_ipv6(struct __sk_buff *skb)
{
struct ipv6hdr iph_outer;
+ if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+ BPF_F_ADJ_ROOM_DECAP_IPXIP6))
+ return TC_ACT_SHOT;
+
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
sizeof(iph_outer)) < 0)
return TC_ACT_OK;
return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
- iph_outer.nexthdr);
+ iph_outer.nexthdr,
+ BPF_F_ADJ_ROOM_DECAP_IPXIP6);
}
SEC("tc")
--
2.34.1
prev parent reply other threads:[~2026-05-04 10:18 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-04 10:17 [PATCH bpf-next v6 0/6] bpf: decap flags and GSO state updates Nick Hudson
2026-05-04 10:17 ` [PATCH v6 1/6] bpf: name the enum for BPF_FUNC_skb_adjust_room flags Nick Hudson
2026-05-04 11:03 ` bot+bpf-ci
2026-05-04 10:17 ` [PATCH v6 2/6] bpf: refactor masks for ADJ_ROOM flags and encap validation Nick Hudson
2026-05-04 11:03 ` bot+bpf-ci
2026-05-04 17:14 ` Willem de Bruijn
2026-05-04 10:17 ` [PATCH v6 3/6] bpf: add BPF_F_ADJ_ROOM_DECAP_* flags for tunnel decapsulation Nick Hudson
2026-05-04 11:03 ` bot+bpf-ci
2026-05-04 10:17 ` [PATCH v6 4/6] bpf: allow new DECAP flags and add guard rails Nick Hudson
2026-05-04 10:17 ` [PATCH v6 5/6] bpf: clear decap state on skb_adjust_room shrink path Nick Hudson
2026-05-04 17:15 ` Willem de Bruijn
2026-05-04 10:17 ` Nick Hudson [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260504101759.3319427-7-nhudson@akamai.com \
--to=nhudson@akamai.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=martin.lau@linux.dev \
--cc=memxor@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=shuah@kernel.org \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox