* [PATCH bpf-next v3 1/6] bpf: name the enum for BPF_FUNC_skb_adjust_room flags
[not found] <20260407105005.1639815-1-nhudson@akamai.com>
@ 2026-04-07 10:50 ` Nick Hudson
2026-04-07 10:50 ` [PATCH bpf-next v3 2/6] bpf: refactor masks for ADJ_ROOM flags and encap validation Nick Hudson
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Nick Hudson @ 2026-04-07 10:50 UTC (permalink / raw)
To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman,
Kumar Kartikeya Dwivedi, linux-kernel
The existing anonymous enum for BPF_FUNC_skb_adjust_room flags is
named to enum bpf_adj_room_flags to enable CO-RE (Compile Once -
Run Everywhere) lookups in BPF programs.
Co-developed-by: Max Tottenham <mtottenh@akamai.com>
Signed-off-by: Max Tottenham <mtottenh@akamai.com>
Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Nick Hudson <nhudson@akamai.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
---
include/uapi/linux/bpf.h | 2 +-
tools/include/uapi/linux/bpf.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 552bc5d9afbd..c021ed8d7b44 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6211,7 +6211,7 @@ enum {
};
/* BPF_FUNC_skb_adjust_room flags. */
-enum {
+enum bpf_adj_room_flags {
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 677be9a47347..ca35ed622ed5 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6211,7 +6211,7 @@ enum {
};
/* BPF_FUNC_skb_adjust_room flags. */
-enum {
+enum bpf_adj_room_flags {
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1),
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH bpf-next v3 2/6] bpf: refactor masks for ADJ_ROOM flags and encap validation
[not found] <20260407105005.1639815-1-nhudson@akamai.com>
2026-04-07 10:50 ` [PATCH bpf-next v3 1/6] bpf: name the enum for BPF_FUNC_skb_adjust_room flags Nick Hudson
@ 2026-04-07 10:50 ` Nick Hudson
2026-04-07 10:50 ` [PATCH bpf-next v3 3/6] bpf: add BPF_F_ADJ_ROOM_DECAP_* flags for tunnel decapsulation Nick Hudson
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Nick Hudson @ 2026-04-07 10:50 UTC (permalink / raw)
To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman,
Kumar Kartikeya Dwivedi, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, linux-kernel
Refactor the helper masks for bpf_skb_adjust_room() flags to simplify
validation logic and introduce:
- BPF_F_ADJ_ROOM_ENCAP_MASK
- BPF_F_ADJ_ROOM_DECAP_MASK
Refactor existing validation checks in bpf_skb_net_shrink()
and bpf_skb_adjust_room() to use the new masks (no behavior change).
This is in preparation for supporting the new decap flags.
Co-developed-by: Max Tottenham <mtottenh@akamai.com>
Signed-off-by: Max Tottenham <mtottenh@akamai.com>
Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
---
net/core/filter.c | 38 +++++++++++++++++++++-----------------
1 file changed, 21 insertions(+), 17 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index 78b548158fb0..4e860da4381d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3490,14 +3490,19 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
-#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
- BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
+#define BPF_F_ADJ_ROOM_ENCAP_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
BPF_F_ADJ_ROOM_ENCAP_L2( \
- BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
- BPF_F_ADJ_ROOM_DECAP_L3_MASK)
+ BPF_ADJ_ROOM_ENCAP_L2_MASK))
+
+#define BPF_F_ADJ_ROOM_DECAP_MASK (BPF_F_ADJ_ROOM_DECAP_L3_MASK)
+
+#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
+ BPF_F_ADJ_ROOM_ENCAP_MASK | \
+ BPF_F_ADJ_ROOM_DECAP_MASK | \
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET)
static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
@@ -3618,8 +3623,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
{
int ret;
- if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
- BPF_F_ADJ_ROOM_DECAP_L3_MASK |
+ if (unlikely(flags & ~(BPF_F_ADJ_ROOM_DECAP_MASK |
+ BPF_F_ADJ_ROOM_FIXED_GSO |
BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
return -EINVAL;
@@ -3715,8 +3720,7 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
u32 off;
int ret;
- if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
- BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
+ if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
return -EINVAL;
if (unlikely(len_diff_abs > 0xfffU))
return -EFAULT;
@@ -3735,20 +3739,20 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
return -ENOTSUPP;
}
- if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+ if (flags & BPF_F_ADJ_ROOM_DECAP_MASK) {
if (!shrink)
return -EINVAL;
- switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
- case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
+ /* Reject mutually exclusive decap flag pairs. */
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) ==
+ BPF_F_ADJ_ROOM_DECAP_L3_MASK)
+ return -EINVAL;
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
len_min = sizeof(struct iphdr);
- break;
- case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
len_min = sizeof(struct ipv6hdr);
- break;
- default:
- return -EINVAL;
- }
}
len_cur = skb->len - skb_network_offset(skb);
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH bpf-next v3 3/6] bpf: add BPF_F_ADJ_ROOM_DECAP_* flags for tunnel decapsulation
[not found] <20260407105005.1639815-1-nhudson@akamai.com>
2026-04-07 10:50 ` [PATCH bpf-next v3 1/6] bpf: name the enum for BPF_FUNC_skb_adjust_room flags Nick Hudson
2026-04-07 10:50 ` [PATCH bpf-next v3 2/6] bpf: refactor masks for ADJ_ROOM flags and encap validation Nick Hudson
@ 2026-04-07 10:50 ` Nick Hudson
2026-04-07 10:50 ` [PATCH bpf-next v3 4/6] bpf: allow new DECAP flags and add guard rails Nick Hudson
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Nick Hudson @ 2026-04-07 10:50 UTC (permalink / raw)
To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman,
Kumar Kartikeya Dwivedi, linux-kernel
Add new bpf_skb_adjust_room() decapsulation flags:
- BPF_F_ADJ_ROOM_DECAP_L4_GRE
- BPF_F_ADJ_ROOM_DECAP_L4_UDP
- BPF_F_ADJ_ROOM_DECAP_IPXIP4
- BPF_F_ADJ_ROOM_DECAP_IPXIP6
These flags let BPF programs describe which tunnel layer is being
removed, so later changes can update tunnel-related GSO state
accordingly during decapsulation.
This patch only introduces the UAPI flag definitions and helper
documentation.
Co-developed-by: Max Tottenham <mtottenh@akamai.com>
Signed-off-by: Max Tottenham <mtottenh@akamai.com>
Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
include/uapi/linux/bpf.h | 34 ++++++++++++++++++++++++++++++++--
tools/include/uapi/linux/bpf.h | 34 ++++++++++++++++++++++++++++++++--
2 files changed, 64 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c021ed8d7b44..4a53e731c554 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3010,8 +3010,34 @@ union bpf_attr {
*
* * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
* **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
- * Indicate the new IP header version after decapsulating the outer
- * IP header. Used when the inner and outer IP versions are different.
+ * Indicate the new IP header version after decapsulating the
+ * outer IP header. Used when the inner and outer IP versions
+ * are different. These flags only trigger a protocol change
+ * without clearing any tunnel-specific GSO flags.
+ *
+ * * **BPF_F_ADJ_ROOM_DECAP_L4_GRE**:
+ * Clear GRE tunnel GSO flags (SKB_GSO_GRE and SKB_GSO_GRE_CSUM)
+ * when decapsulating a GRE tunnel.
+ *
+ * * **BPF_F_ADJ_ROOM_DECAP_L4_UDP**:
+ * Clear UDP tunnel GSO flags (SKB_GSO_UDP_TUNNEL and
+ * SKB_GSO_UDP_TUNNEL_CSUM) when decapsulating a UDP tunnel.
+ *
+ * * **BPF_F_ADJ_ROOM_DECAP_IPXIP4**:
+ * Clear IPIP/SIT tunnel GSO flag (SKB_GSO_IPXIP4) when decapsulating
+ * a tunnel with an outer IPv4 header (IPv4-in-IPv4 or IPv6-in-IPv4).
+ *
+ * * **BPF_F_ADJ_ROOM_DECAP_IPXIP6**:
+ * Clear IPv6 encapsulation tunnel GSO flag (SKB_GSO_IPXIP6) when
+ * decapsulating a tunnel with an outer IPv6 header (IPv6-in-IPv6
+ * or IPv4-in-IPv6).
+ *
+ * When using the decapsulation flags above, the skb->encapsulation
+ * flag is automatically cleared if all tunnel-specific GSO flags
+ * (SKB_GSO_UDP_TUNNEL, SKB_GSO_UDP_TUNNEL_CSUM, SKB_GSO_GRE,
+ * SKB_GSO_GRE_CSUM, SKB_GSO_IPXIP4, SKB_GSO_IPXIP6) have been
+ * removed from the packet. This handles cases where all tunnel
+ * layers have been decapsulated.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -6221,6 +6247,10 @@ enum bpf_adj_room_flags {
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
+ BPF_F_ADJ_ROOM_DECAP_L4_GRE = (1ULL << 9),
+ BPF_F_ADJ_ROOM_DECAP_L4_UDP = (1ULL << 10),
+ BPF_F_ADJ_ROOM_DECAP_IPXIP4 = (1ULL << 11),
+ BPF_F_ADJ_ROOM_DECAP_IPXIP6 = (1ULL << 12),
};
enum {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ca35ed622ed5..f4c2fbd8fe68 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3010,8 +3010,34 @@ union bpf_attr {
*
* * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
* **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
- * Indicate the new IP header version after decapsulating the outer
- * IP header. Used when the inner and outer IP versions are different.
+ * Indicate the new IP header version after decapsulating the
+ * outer IP header. Used when the inner and outer IP versions
+ * are different. These flags only trigger a protocol change
+ * without clearing any tunnel-specific GSO flags.
+ *
+ * * **BPF_F_ADJ_ROOM_DECAP_L4_GRE**:
+ * Clear GRE tunnel GSO flags (SKB_GSO_GRE and SKB_GSO_GRE_CSUM)
+ * when decapsulating a GRE tunnel.
+ *
+ * * **BPF_F_ADJ_ROOM_DECAP_L4_UDP**:
+ * Clear UDP tunnel GSO flags (SKB_GSO_UDP_TUNNEL and
+ * SKB_GSO_UDP_TUNNEL_CSUM) when decapsulating a UDP tunnel.
+ *
+ * * **BPF_F_ADJ_ROOM_DECAP_IPXIP4**:
+ * Clear IPIP/SIT tunnel GSO flag (SKB_GSO_IPXIP4) when decapsulating
+ * a tunnel with an outer IPv4 header (IPv4-in-IPv4 or IPv6-in-IPv4).
+ *
+ * * **BPF_F_ADJ_ROOM_DECAP_IPXIP6**:
+ * Clear IPv6 encapsulation tunnel GSO flag (SKB_GSO_IPXIP6) when
+ * decapsulating a tunnel with an outer IPv6 header (IPv6-in-IPv6
+ * or IPv4-in-IPv6).
+ *
+ * When using the decapsulation flags above, the skb->encapsulation
+ * flag is automatically cleared if all tunnel-specific GSO flags
+ * (SKB_GSO_UDP_TUNNEL, SKB_GSO_UDP_TUNNEL_CSUM, SKB_GSO_GRE,
+ * SKB_GSO_GRE_CSUM, SKB_GSO_IPXIP4, SKB_GSO_IPXIP6) have been
+ * removed from the packet. This handles cases where all tunnel
+ * layers have been decapsulated.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -6221,6 +6247,10 @@ enum bpf_adj_room_flags {
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
+ BPF_F_ADJ_ROOM_DECAP_L4_GRE = (1ULL << 9),
+ BPF_F_ADJ_ROOM_DECAP_L4_UDP = (1ULL << 10),
+ BPF_F_ADJ_ROOM_DECAP_IPXIP4 = (1ULL << 11),
+ BPF_F_ADJ_ROOM_DECAP_IPXIP6 = (1ULL << 12),
};
enum {
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH bpf-next v3 4/6] bpf: allow new DECAP flags and add guard rails
[not found] <20260407105005.1639815-1-nhudson@akamai.com>
` (2 preceding siblings ...)
2026-04-07 10:50 ` [PATCH bpf-next v3 3/6] bpf: add BPF_F_ADJ_ROOM_DECAP_* flags for tunnel decapsulation Nick Hudson
@ 2026-04-07 10:50 ` Nick Hudson
2026-04-07 10:50 ` [PATCH bpf-next v3 5/6] bpf: clear decap tunnel GSO state in skb_adjust_room Nick Hudson
2026-04-07 10:50 ` [PATCH bpf-next v3 6/6] selftests/bpf: tc_tunnel validate decap GSO state Nick Hudson
5 siblings, 0 replies; 8+ messages in thread
From: Nick Hudson @ 2026-04-07 10:50 UTC (permalink / raw)
To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman,
Kumar Kartikeya Dwivedi, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, linux-kernel
Add checks to require shrink-only decap, reject conflicting decap flag
combinations, and verify removed length is sufficient for claimed header
decapsulation.
Co-developed-by: Max Tottenham <mtottenh@akamai.com>
Signed-off-by: Max Tottenham <mtottenh@akamai.com>
Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
net/core/filter.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 43 insertions(+), 1 deletion(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index 4e860da4381d..7f8d43420afb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -56,6 +56,7 @@
#include <net/sock_reuseport.h>
#include <net/busy_poll.h>
#include <net/tcp.h>
+#include <net/gre.h>
#include <net/xfrm.h>
#include <net/udp.h>
#include <linux/bpf_trace.h>
@@ -3490,6 +3491,12 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+#define BPF_F_ADJ_ROOM_DECAP_L4_MASK (BPF_F_ADJ_ROOM_DECAP_L4_UDP | \
+ BPF_F_ADJ_ROOM_DECAP_L4_GRE)
+
+#define BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK (BPF_F_ADJ_ROOM_DECAP_IPXIP4 | \
+ BPF_F_ADJ_ROOM_DECAP_IPXIP6)
+
#define BPF_F_ADJ_ROOM_ENCAP_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
@@ -3497,7 +3504,9 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
BPF_F_ADJ_ROOM_ENCAP_L2( \
BPF_ADJ_ROOM_ENCAP_L2_MASK))
-#define BPF_F_ADJ_ROOM_DECAP_MASK (BPF_F_ADJ_ROOM_DECAP_L3_MASK)
+#define BPF_F_ADJ_ROOM_DECAP_MASK (BPF_F_ADJ_ROOM_DECAP_L3_MASK | \
+ BPF_F_ADJ_ROOM_DECAP_L4_MASK | \
+ BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)
#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_MASK | \
@@ -3740,6 +3749,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
}
if (flags & BPF_F_ADJ_ROOM_DECAP_MASK) {
+ u32 len_decap_min = 0;
+
if (!shrink)
return -EINVAL;
@@ -3748,6 +3759,37 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
BPF_F_ADJ_ROOM_DECAP_L3_MASK)
return -EINVAL;
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK) ==
+ BPF_F_ADJ_ROOM_DECAP_L4_MASK)
+ return -EINVAL;
+
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK) ==
+ BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)
+ return -EINVAL;
+
+ /* Reject mutually exclusive decap tunnel type flags. */
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK) &&
+ (flags & BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK))
+ return -EINVAL;
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK)
+ len_decap_min += bpf_skb_net_base_len(skb);
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP)
+ len_decap_min += sizeof(struct udphdr);
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE)
+ len_decap_min += sizeof(struct gre_base_hdr);
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4)
+ len_decap_min += sizeof(struct iphdr);
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6)
+ len_decap_min += sizeof(struct ipv6hdr);
+
+ if (len_diff_abs < len_decap_min)
+ return -EINVAL;
+
if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
len_min = sizeof(struct iphdr);
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH bpf-next v3 5/6] bpf: clear decap tunnel GSO state in skb_adjust_room
[not found] <20260407105005.1639815-1-nhudson@akamai.com>
` (3 preceding siblings ...)
2026-04-07 10:50 ` [PATCH bpf-next v3 4/6] bpf: allow new DECAP flags and add guard rails Nick Hudson
@ 2026-04-07 10:50 ` Nick Hudson
2026-04-07 11:52 ` bot+bpf-ci
2026-04-08 15:10 ` Willem de Bruijn
2026-04-07 10:50 ` [PATCH bpf-next v3 6/6] selftests/bpf: tc_tunnel validate decap GSO state Nick Hudson
5 siblings, 2 replies; 8+ messages in thread
From: Nick Hudson @ 2026-04-07 10:50 UTC (permalink / raw)
To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman,
Kumar Kartikeya Dwivedi, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, linux-kernel
On shrink in bpf_skb_adjust_room(), clear tunnel-specific GSO flags
according to the decapsulation flags:
- BPF_F_ADJ_ROOM_DECAP_L4_UDP clears SKB_GSO_UDP_TUNNEL{,_CSUM}, and
SKB_GSO_TUNNEL_REMCSUM
- BPF_F_ADJ_ROOM_DECAP_L4_GRE clears SKB_GSO_GRE{,_CSUM}
- BPF_F_ADJ_ROOM_DECAP_IPXIP4 clears SKB_GSO_IPXIP4
- BPF_F_ADJ_ROOM_DECAP_IPXIP6 clears SKB_GSO_IPXIP6
When all tunnel-related GSO bits are cleared, also clear
skb->encapsulation.
Handle the ESP inside a UDP tunnel case where encapsulation should remain
set.
If UDP decap is performed and GSO state removed then reset encap_hdr_csum, and
remcsum_offload.
Co-developed-by: Max Tottenham <mtottenh@akamai.com>
Signed-off-by: Max Tottenham <mtottenh@akamai.com>
Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
net/core/filter.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/net/core/filter.c b/net/core/filter.c
index 7f8d43420afb..04059d07d368 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3667,6 +3667,46 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
skb_increase_gso_size(shinfo, len_diff);
+ /* Selective GSO flag clearing based on decap type.
+ * Only clear the flags for the tunnel layer being removed.
+ */
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&
+ (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM)))
+ shinfo->gso_type &= ~(SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM);
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE) &&
+ (shinfo->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM)))
+ shinfo->gso_type &= ~(SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM);
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4) &&
+ (shinfo->gso_type & SKB_GSO_IPXIP4))
+ shinfo->gso_type &= ~SKB_GSO_IPXIP4;
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6) &&
+ (shinfo->gso_type & SKB_GSO_IPXIP6))
+ shinfo->gso_type &= ~SKB_GSO_IPXIP6;
+
+ /* Clear encapsulation flag only when no tunnel GSO flags remain */
+ if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
+ BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {
+ if (!(shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
+ SKB_GSO_IPXIP4 |
+ SKB_GSO_IPXIP6 |
+ SKB_GSO_ESP)))
+ if (skb->encapsulation)
+ skb->encapsulation = 0;
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) {
+ skb->encap_hdr_csum = !!(shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+ skb->remcsum_offload = !!(shinfo->gso_type & SKB_GSO_TUNNEL_REMCSUM);
+ }
+ }
+
/* Header must be checked, and gso_segs recomputed. */
shinfo->gso_type |= SKB_GSO_DODGY;
shinfo->gso_segs = 0;
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH bpf-next v3 6/6] selftests/bpf: tc_tunnel validate decap GSO state
[not found] <20260407105005.1639815-1-nhudson@akamai.com>
` (4 preceding siblings ...)
2026-04-07 10:50 ` [PATCH bpf-next v3 5/6] bpf: clear decap tunnel GSO state in skb_adjust_room Nick Hudson
@ 2026-04-07 10:50 ` Nick Hudson
5 siblings, 0 replies; 8+ messages in thread
From: Nick Hudson @ 2026-04-07 10:50 UTC (permalink / raw)
To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
Cc: Nick Hudson, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Eduard Zingerman, Kumar Kartikeya Dwivedi, Shuah Khan,
linux-kselftest, linux-kernel
Require BPF_F_ADJ_ROOM_DECAP_L4_UDP and BPF_F_ADJ_ROOM_DECAP_L4_GRE enum
values at runtime using CO-RE enum existence checks so missing kernel
support fails fast instead of silently proceeding.
After bpf_skb_adjust_room() decapsulation, inspect skb_shared_info and
sk_buff state for GSO packets and assert that the expected tunnel GSO
bits are cleared and encapsulation matches the remaining tunnel state.
Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
.../selftests/bpf/progs/test_tc_tunnel.c | 58 +++++++++++++++++++
1 file changed, 58 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 7376df405a6b..74dfb694a210 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -6,6 +6,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
#include "bpf_tracing_net.h"
#include "bpf_compiler.h"
@@ -37,6 +38,23 @@ struct vxlanhdr___local {
#define EXTPROTO_VXLAN 0x1
+#define SKB_GSO_UDP_TUNNEL_MASK (SKB_GSO_UDP_TUNNEL | \
+ SKB_GSO_UDP_TUNNEL_CSUM | \
+ SKB_GSO_TUNNEL_REMCSUM)
+
+#define SKB_GSO_TUNNEL_MASK (SKB_GSO_UDP_TUNNEL_MASK | \
+ SKB_GSO_GRE | \
+ SKB_GSO_GRE_CSUM | \
+ SKB_GSO_IPXIP4 | \
+ SKB_GSO_IPXIP6 | \
+ SKB_GSO_ESP)
+
+#define BPF_F_ADJ_ROOM_DECAP_L4_MASK (BPF_F_ADJ_ROOM_DECAP_L4_UDP | \
+ BPF_F_ADJ_ROOM_DECAP_L4_GRE)
+
+#define BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK (BPF_F_ADJ_ROOM_DECAP_IPXIP4 | \
+ BPF_F_ADJ_ROOM_DECAP_IPXIP6)
+
#define VXLAN_FLAGS bpf_htonl(1<<27)
#define VNI_ID 1
#define VXLAN_VNI bpf_htonl(VNI_ID << 8)
@@ -592,6 +610,8 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
+ struct sk_buff *kskb;
+ struct skb_shared_info *shinfo;
struct ipv6_opt_hdr ip6_opt_hdr;
struct gre_hdr greh;
struct udphdr udph;
@@ -621,6 +641,11 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
break;
case IPPROTO_GRE:
olen += sizeof(struct gre_hdr);
+ if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+ BPF_F_ADJ_ROOM_DECAP_L4_GRE))
+ return TC_ACT_SHOT;
+ flags |= BPF_F_ADJ_ROOM_DECAP_L4_GRE;
+
if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
return TC_ACT_OK;
switch (bpf_ntohs(greh.protocol)) {
@@ -634,6 +659,10 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
break;
case IPPROTO_UDP:
olen += sizeof(struct udphdr);
+ if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+ BPF_F_ADJ_ROOM_DECAP_L4_UDP))
+ return TC_ACT_SHOT;
+ flags |= BPF_F_ADJ_ROOM_DECAP_L4_UDP;
if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
return TC_ACT_OK;
switch (bpf_ntohs(udph.dest)) {
@@ -655,6 +684,35 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
return TC_ACT_SHOT;
+ kskb = bpf_cast_to_kern_ctx(skb);
+ shinfo = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info);
+ if (!shinfo->gso_size)
+ return TC_ACT_OK;
+
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&
+ (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_MASK))
+ return TC_ACT_SHOT;
+
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE) &&
+ (shinfo->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM)))
+ return TC_ACT_SHOT;
+
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4) &&
+ (shinfo->gso_type & SKB_GSO_IPXIP4))
+ return TC_ACT_SHOT;
+
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6) &&
+ (shinfo->gso_type & SKB_GSO_IPXIP6))
+ return TC_ACT_SHOT;
+
+ if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
+ BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {
+ if ((shinfo->gso_type & SKB_GSO_TUNNEL_MASK) && !kskb->encapsulation)
+ return TC_ACT_SHOT;
+ if (!(shinfo->gso_type & SKB_GSO_TUNNEL_MASK) && kskb->encapsulation)
+ return TC_ACT_SHOT;
+ }
+
return TC_ACT_OK;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH bpf-next v3 5/6] bpf: clear decap tunnel GSO state in skb_adjust_room
2026-04-07 10:50 ` [PATCH bpf-next v3 5/6] bpf: clear decap tunnel GSO state in skb_adjust_room Nick Hudson
@ 2026-04-07 11:52 ` bot+bpf-ci
2026-04-08 15:10 ` Willem de Bruijn
1 sibling, 0 replies; 8+ messages in thread
From: bot+bpf-ci @ 2026-04-07 11:52 UTC (permalink / raw)
To: nhudson, bpf, netdev, willemb, martin.lau
Cc: nhudson, mtottenh, aglasgal, ast, daniel, andrii, eddyz87, memxor,
davem, edumazet, kuba, pabeni, linux-kernel, ast, andrii, daniel,
martin.lau, eddyz87, yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 2139 bytes --]
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 7f8d43420afb..04059d07d368 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3667,6 +3667,46 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
> if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
> skb_increase_gso_size(shinfo, len_diff);
>
> + /* Selective GSO flag clearing based on decap type.
> + * Only clear the flags for the tunnel layer being removed.
> + */
> + if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&
> + (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
> + SKB_GSO_UDP_TUNNEL_CSUM |
> + SKB_GSO_TUNNEL_REMCSUM)))
> + shinfo->gso_type &= ~(SKB_GSO_UDP_TUNNEL |
> + SKB_GSO_UDP_TUNNEL_CSUM |
> + SKB_GSO_TUNNEL_REMCSUM);
[ ... ]
> +
> + /* Clear encapsulation flag only when no tunnel GSO flags remain */
> + if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
> + BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {
[ ... ]
> +
> + if (flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) {
> + skb->encap_hdr_csum = !!(shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
> + skb->remcsum_offload = !!(shinfo->gso_type & SKB_GSO_TUNNEL_REMCSUM);
The L4_UDP clearing block above unconditionally clears
SKB_GSO_UDP_TUNNEL_CSUM and SKB_GSO_TUNNEL_REMCSUM from
shinfo->gso_type. By the time these two assignments execute,
both flags are always zero, so these expressions always
evaluate to 0.
The kernel's own __skb_udp_tunnel_segment() in udp_offload.c
uses the same !!() pattern:
need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
skb->encap_hdr_csum = need_csum;
remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
skb->remcsum_offload = remcsum;
but there the flags are still live. Here they have already been
cleared, so the conditional form is misleading. Would it be
clearer to just assign 0 directly?
> + }
> + }
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/24079111128
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH bpf-next v3 5/6] bpf: clear decap tunnel GSO state in skb_adjust_room
2026-04-07 10:50 ` [PATCH bpf-next v3 5/6] bpf: clear decap tunnel GSO state in skb_adjust_room Nick Hudson
2026-04-07 11:52 ` bot+bpf-ci
@ 2026-04-08 15:10 ` Willem de Bruijn
1 sibling, 0 replies; 8+ messages in thread
From: Willem de Bruijn @ 2026-04-08 15:10 UTC (permalink / raw)
To: Nick Hudson, bpf, netdev, Willem de Bruijn, Martin KaFai Lau
Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Alexei Starovoitov,
Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman,
Kumar Kartikeya Dwivedi, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, linux-kernel
Nick Hudson wrote:
> On shrink in bpf_skb_adjust_room(), clear tunnel-specific GSO flags
> according to the decapsulation flags:
>
> - BPF_F_ADJ_ROOM_DECAP_L4_UDP clears SKB_GSO_UDP_TUNNEL{,_CSUM}, and
> SKB_GSO_TUNNEL_REMCSUM
> - BPF_F_ADJ_ROOM_DECAP_L4_GRE clears SKB_GSO_GRE{,_CSUM}
> - BPF_F_ADJ_ROOM_DECAP_IPXIP4 clears SKB_GSO_IPXIP4
> - BPF_F_ADJ_ROOM_DECAP_IPXIP6 clears SKB_GSO_IPXIP6
>
> When all tunnel-related GSO bits are cleared, also clear
> skb->encapsulation.
>
> Handle the ESP inside a UDP tunnel case where encapsulation should remain
> set.
>
> If UDP decap is performed and GSO state removed then reset encap_hdr_csum, and
> remcsum_offload.
>
> Co-developed-by: Max Tottenham <mtottenh@akamai.com>
> Signed-off-by: Max Tottenham <mtottenh@akamai.com>
> Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
> Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
> Signed-off-by: Nick Hudson <nhudson@akamai.com>
> ---
> net/core/filter.c | 40 ++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 40 insertions(+)
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 7f8d43420afb..04059d07d368 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3667,6 +3667,46 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
> if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
> skb_increase_gso_size(shinfo, len_diff);
>
> + /* Selective GSO flag clearing based on decap type.
> + * Only clear the flags for the tunnel layer being removed.
> + */
> + if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&
> + (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
> + SKB_GSO_UDP_TUNNEL_CSUM |
> + SKB_GSO_TUNNEL_REMCSUM)))
> + shinfo->gso_type &= ~(SKB_GSO_UDP_TUNNEL |
> + SKB_GSO_UDP_TUNNEL_CSUM |
> + SKB_GSO_TUNNEL_REMCSUM);
REMCSUM was previously not included in the series.
It is a non-obvious and rare enough feature that I would exclude it,
or move it to a separate patch.
> + if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE) &&
> + (shinfo->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM)))
> + shinfo->gso_type &= ~(SKB_GSO_GRE |
> + SKB_GSO_GRE_CSUM);
> + if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4) &&
> + (shinfo->gso_type & SKB_GSO_IPXIP4))
> + shinfo->gso_type &= ~SKB_GSO_IPXIP4;
> + if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6) &&
> + (shinfo->gso_type & SKB_GSO_IPXIP6))
> + shinfo->gso_type &= ~SKB_GSO_IPXIP6;
> +
> + /* Clear encapsulation flag only when no tunnel GSO flags remain */
> + if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
> + BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {
> + if (!(shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
> + SKB_GSO_UDP_TUNNEL_CSUM |
> + SKB_GSO_GRE |
> + SKB_GSO_GRE_CSUM |
> + SKB_GSO_IPXIP4 |
> + SKB_GSO_IPXIP6 |
> + SKB_GSO_ESP)))
> + if (skb->encapsulation)
> + skb->encapsulation = 0;
> +
> + if (flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) {
> + skb->encap_hdr_csum = !!(shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
Since the flag is never set, only possibly cleared: just clear this field when clearing the flag?
It appears that this is only used for deprecated UFO anyway.
> + skb->remcsum_offload = !!(shinfo->gso_type & SKB_GSO_TUNNEL_REMCSUM);
Always zero?
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2026-04-08 15:10 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20260407105005.1639815-1-nhudson@akamai.com>
2026-04-07 10:50 ` [PATCH bpf-next v3 1/6] bpf: name the enum for BPF_FUNC_skb_adjust_room flags Nick Hudson
2026-04-07 10:50 ` [PATCH bpf-next v3 2/6] bpf: refactor masks for ADJ_ROOM flags and encap validation Nick Hudson
2026-04-07 10:50 ` [PATCH bpf-next v3 3/6] bpf: add BPF_F_ADJ_ROOM_DECAP_* flags for tunnel decapsulation Nick Hudson
2026-04-07 10:50 ` [PATCH bpf-next v3 4/6] bpf: allow new DECAP flags and add guard rails Nick Hudson
2026-04-07 10:50 ` [PATCH bpf-next v3 5/6] bpf: clear decap tunnel GSO state in skb_adjust_room Nick Hudson
2026-04-07 11:52 ` bot+bpf-ci
2026-04-08 15:10 ` Willem de Bruijn
2026-04-07 10:50 ` [PATCH bpf-next v3 6/6] selftests/bpf: tc_tunnel validate decap GSO state Nick Hudson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox