public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Nick Hudson <nhudson@akamai.com>
To: bpf@vger.kernel.org, netdev@vger.kernel.org,
	Willem de Bruijn <willemb@google.com>,
	Martin KaFai Lau <martin.lau@linux.dev>
Cc: Nick Hudson <nhudson@akamai.com>,
	Andrii Nakryiko <andrii@kernel.org>,
	Eduard Zingerman <eddyz87@gmail.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>,
	Shuah Khan <shuah@kernel.org>,
	linux-kselftest@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v6 6/6] selftests/bpf: tc_tunnel - validate decap GSO and encapsulation state
Date: Mon,  4 May 2026 11:17:59 +0100	[thread overview]
Message-ID: <20260504101759.3319427-7-nhudson@akamai.com> (raw)
In-Reply-To: <20260504101759.3319427-1-nhudson@akamai.com>

tc_tunnel only partially validated decap state and missed some tunnel
cases. In particular, IPXIP decap checks were not exercised for
IPIP/SIT paths, and non-GSO decap encapsulation state was not
verified.

Tighten the test by:

- setting DECAP_IPXIP4/6 flags for IPIP/SIT/IP6 decap paths based on
  the outer tunnel header family;
- requiring needed DECAP enum values via CO-RE enum existence checks
  so missing kernel support fails fast;
- validating post-decap tunnel state for both GSO and non-GSO packets:
  expected gso_type bits must be cleared and skb->encapsulation must
  match remaining tunnel flags;
- removing forced TSO disable in the test harness so GSO validation is
  exercised.

This improves coverage for decap tunnel-state regressions and ensures
sit_none/ipip-style paths are checked correctly.

Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
 .../selftests/bpf/prog_tests/test_tc_tunnel.c |  1 -
 .../selftests/bpf/progs/test_tc_tunnel.c      | 91 +++++++++++++++++--
 2 files changed, 84 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
index 1aa7c9463980..67ba27d69347 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
@@ -438,7 +438,6 @@ static int setup(void)
 	SYS(fail_close_ns_client, "ip link add %s type veth peer name %s",
 	    "veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1,
 	    "veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2);
-	SYS(fail_close_ns_client, "ethtool -K veth1 tso off");
 	SYS(fail_close_ns_client, "ip link set veth1 up");
 	nstoken_server = open_netns(SERVER_NS);
 	if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 7376df405a6b..853bca962910 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -6,6 +6,7 @@
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
 #include "bpf_tracing_net.h"
 #include "bpf_compiler.h"
 
@@ -37,6 +38,22 @@ struct vxlanhdr___local {
 
 #define	EXTPROTO_VXLAN	0x1
 
+#define SKB_GSO_UDP_TUNNEL_MASK	(SKB_GSO_UDP_TUNNEL |			\
+				 SKB_GSO_UDP_TUNNEL_CSUM)
+
+#define SKB_GSO_TUNNEL_MASK	(SKB_GSO_UDP_TUNNEL_MASK |		\
+				 SKB_GSO_GRE |				\
+				 SKB_GSO_GRE_CSUM |			\
+				 SKB_GSO_IPXIP4 |			\
+				 SKB_GSO_IPXIP6 |			\
+				 SKB_GSO_ESP)
+
+#define BPF_F_ADJ_ROOM_DECAP_L4_MASK	(BPF_F_ADJ_ROOM_DECAP_L4_UDP |	\
+					 BPF_F_ADJ_ROOM_DECAP_L4_GRE)
+
+#define BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK	(BPF_F_ADJ_ROOM_DECAP_IPXIP4 |	\
+					 BPF_F_ADJ_ROOM_DECAP_IPXIP6)
+
 #define	VXLAN_FLAGS     bpf_htonl(1<<27)
 #define	VNI_ID		1
 #define	VXLAN_VNI	bpf_htonl(VNI_ID << 8)
@@ -589,9 +606,12 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
+static int decap_internal(struct __sk_buff *skb, int off, int len, char proto,
+			  __u64 ipxip_flag)
 {
 	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
+	struct sk_buff *kskb;
+	struct skb_shared_info *shinfo;
 	struct ipv6_opt_hdr ip6_opt_hdr;
 	struct gre_hdr greh;
 	struct udphdr udph;
@@ -599,10 +619,12 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 
 	switch (proto) {
 	case IPPROTO_IPIP:
-		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4 |
+			 ipxip_flag;
 		break;
 	case IPPROTO_IPV6:
-		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6 |
+			 ipxip_flag;
 		break;
 	case NEXTHDR_DEST:
 		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
@@ -610,10 +632,12 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 			return TC_ACT_OK;
 		switch (ip6_opt_hdr.nexthdr) {
 		case IPPROTO_IPIP:
-			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4 |
+				 ipxip_flag;
 			break;
 		case IPPROTO_IPV6:
-			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6 |
+				 ipxip_flag;
 			break;
 		default:
 			return TC_ACT_OK;
@@ -621,6 +645,11 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 		break;
 	case IPPROTO_GRE:
 		olen += sizeof(struct gre_hdr);
+		if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+						BPF_F_ADJ_ROOM_DECAP_L4_GRE))
+			return TC_ACT_SHOT;
+		flags |= BPF_F_ADJ_ROOM_DECAP_L4_GRE;
+
 		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
 			return TC_ACT_OK;
 		switch (bpf_ntohs(greh.protocol)) {
@@ -634,6 +663,10 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 		break;
 	case IPPROTO_UDP:
 		olen += sizeof(struct udphdr);
+		if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+						BPF_F_ADJ_ROOM_DECAP_L4_UDP))
+			return TC_ACT_SHOT;
+		flags |= BPF_F_ADJ_ROOM_DECAP_L4_UDP;
 		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
 			return TC_ACT_OK;
 		switch (bpf_ntohs(udph.dest)) {
@@ -655,6 +688,40 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
 		return TC_ACT_SHOT;
 
+	kskb = bpf_cast_to_kern_ctx(skb);
+	shinfo = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info);
+	if (shinfo->gso_size) {
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&
+		    (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_MASK))
+			return TC_ACT_SHOT;
+
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE) &&
+		    (shinfo->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM)))
+			return TC_ACT_SHOT;
+
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4) &&
+		    (shinfo->gso_type & SKB_GSO_IPXIP4))
+			return TC_ACT_SHOT;
+
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6) &&
+		    (shinfo->gso_type & SKB_GSO_IPXIP6))
+			return TC_ACT_SHOT;
+
+		if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
+			     BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {
+			if ((shinfo->gso_type & SKB_GSO_TUNNEL_MASK) &&
+			    !kskb->encapsulation)
+				return TC_ACT_SHOT;
+			if (!(shinfo->gso_type & SKB_GSO_TUNNEL_MASK) &&
+			    kskb->encapsulation)
+				return TC_ACT_SHOT;
+		}
+	} else if ((flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
+			     BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) &&
+		   kskb->encapsulation) {
+		return TC_ACT_SHOT;
+	}
+
 	return TC_ACT_OK;
 }
 
@@ -662,6 +729,10 @@ static int decap_ipv4(struct __sk_buff *skb)
 {
 	struct iphdr iph_outer;
 
+	if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+					BPF_F_ADJ_ROOM_DECAP_IPXIP4))
+		return TC_ACT_SHOT;
+
 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
 			       sizeof(iph_outer)) < 0)
 		return TC_ACT_OK;
@@ -670,19 +741,25 @@ static int decap_ipv4(struct __sk_buff *skb)
 		return TC_ACT_OK;
 
 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
-			      iph_outer.protocol);
+			      iph_outer.protocol,
+			      BPF_F_ADJ_ROOM_DECAP_IPXIP4);
 }
 
 static int decap_ipv6(struct __sk_buff *skb)
 {
 	struct ipv6hdr iph_outer;
 
+	if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+					BPF_F_ADJ_ROOM_DECAP_IPXIP6))
+		return TC_ACT_SHOT;
+
 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
 			       sizeof(iph_outer)) < 0)
 		return TC_ACT_OK;
 
 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
-			      iph_outer.nexthdr);
+			      iph_outer.nexthdr,
+			      BPF_F_ADJ_ROOM_DECAP_IPXIP6);
 }
 
 SEC("tc")
-- 
2.34.1


      parent reply	other threads:[~2026-05-04 10:18 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-04 10:17 [PATCH bpf-next v6 0/6] bpf: decap flags and GSO state updates Nick Hudson
2026-05-04 10:17 ` [PATCH v6 1/6] bpf: name the enum for BPF_FUNC_skb_adjust_room flags Nick Hudson
2026-05-04 11:03   ` bot+bpf-ci
2026-05-04 10:17 ` [PATCH v6 2/6] bpf: refactor masks for ADJ_ROOM flags and encap validation Nick Hudson
2026-05-04 11:03   ` bot+bpf-ci
2026-05-04 17:14   ` Willem de Bruijn
2026-05-04 10:17 ` [PATCH v6 3/6] bpf: add BPF_F_ADJ_ROOM_DECAP_* flags for tunnel decapsulation Nick Hudson
2026-05-04 11:03   ` bot+bpf-ci
2026-05-04 10:17 ` [PATCH v6 4/6] bpf: allow new DECAP flags and add guard rails Nick Hudson
2026-05-04 10:17 ` [PATCH v6 5/6] bpf: clear decap state on skb_adjust_room shrink path Nick Hudson
2026-05-04 17:15   ` Willem de Bruijn
2026-05-04 10:17 ` Nick Hudson [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260504101759.3319427-7-nhudson@akamai.com \
    --to=nhudson@akamai.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=memxor@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=shuah@kernel.org \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox