netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC nf-next 0/4] Add IP6IP6 flowtable SW acceleration
@ 2025-12-01 13:45 Lorenzo Bianconi
  2025-12-01 13:45 ` [PATCH RFC nf-next 1/4] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct Lorenzo Bianconi
                   ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: Lorenzo Bianconi @ 2025-12-01 13:45 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

Introduce SW acceleration for IP6IP6 tunnels in the netfilter flowtable
infrastructure.

---
Lorenzo Bianconi (4):
      netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct
      netfilter: flowtable: Add IP6IP6 rx sw acceleration
      netfilter: flowtable: Add IP6IP6 tx sw acceleration
      selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest

 net/ipv6/ip6_tunnel.c                              |  27 +++
 net/netfilter/nf_flow_table_ip.c                   | 239 ++++++++++++++++++---
 .../selftests/net/netfilter/nft_flowtable.sh       |  62 +++++-
 3 files changed, 285 insertions(+), 43 deletions(-)
---
base-commit: 0177f0f07886e54e12c6f18fa58f63e63ddd3c58
change-id: 20251201-flowtable-offload-ip6ip6-578b61067648

Best regards,
-- 
Lorenzo Bianconi <lorenzo@kernel.org>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH RFC nf-next 1/4] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct
  2025-12-01 13:45 [PATCH RFC nf-next 0/4] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
@ 2025-12-01 13:45 ` Lorenzo Bianconi
  2025-12-01 13:45 ` [PATCH RFC nf-next 2/4] netfilter: flowtable: Add IP6IP6 rx sw acceleration Lorenzo Bianconi
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Lorenzo Bianconi @ 2025-12-01 13:45 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

This is a preliminary patch to introduce IP6IP6 flowtable acceleration.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 net/netfilter/nf_flow_table_ip.c | 80 ++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 78883343e5d686014752ec4fe1a28319cbf08845..d28c256d33dc5a8d07490b765747b5c6c48aa67d 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -142,7 +142,18 @@ static bool ip_has_options(unsigned int thoff)
 	return thoff != sizeof(struct iphdr);
 }
 
-static void nf_flow_tuple_encap(struct sk_buff *skb,
+struct nf_flowtable_ctx {
+	const struct net_device	*in;
+	u32			offset;
+	u32			hdrsize;
+	struct {
+		u32 offset;
+		u8 proto;
+	} tun;
+};
+
+static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
+				struct sk_buff *skb,
 				struct flow_offload_tuple *tuple)
 {
 	__be16 inner_proto = skb->protocol;
@@ -174,22 +185,15 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
 		break;
 	}
 
-	if (inner_proto == htons(ETH_P_IP)) {
+	if (inner_proto == htons(ETH_P_IP) &&
+	    ctx->tun.proto == IPPROTO_IPIP) {
 		iph = (struct iphdr *)(skb_network_header(skb) + offset);
-		if (iph->protocol == IPPROTO_IPIP) {
-			tuple->tun.dst_v4.s_addr = iph->daddr;
-			tuple->tun.src_v4.s_addr = iph->saddr;
-			tuple->tun.l3_proto = IPPROTO_IPIP;
-		}
+		tuple->tun.dst_v4.s_addr = iph->daddr;
+		tuple->tun.src_v4.s_addr = iph->saddr;
+		tuple->tun.l3_proto = IPPROTO_IPIP;
 	}
 }
 
-struct nf_flowtable_ctx {
-	const struct net_device	*in;
-	u32			offset;
-	u32			hdrsize;
-};
-
 static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
 			    struct flow_offload_tuple *tuple)
 {
@@ -257,7 +261,7 @@ static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
 	tuple->l3proto		= AF_INET;
 	tuple->l4proto		= ipproto;
 	tuple->iifidx		= ctx->in->ifindex;
-	nf_flow_tuple_encap(skb, tuple);
+	nf_flow_tuple_encap(ctx, skb, tuple);
 
 	return 0;
 }
@@ -293,15 +297,16 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
-static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
+static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
+				     struct sk_buff *skb)
 {
 	struct iphdr *iph;
 	u16 size;
 
-	if (!pskb_may_pull(skb, sizeof(*iph) + *psize))
+	if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
 		return false;
 
-	iph = (struct iphdr *)(skb_network_header(skb) + *psize);
+	iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
 	size = iph->ihl << 2;
 
 	if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
@@ -310,25 +315,27 @@ static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
 	if (iph->ttl <= 1)
 		return false;
 
-	if (iph->protocol == IPPROTO_IPIP)
-		*psize += size;
+	if (iph->protocol == IPPROTO_IPIP) {
+		ctx->tun.proto = IPPROTO_IPIP;
+		ctx->tun.offset = size;
+		ctx->offset += size;
+	}
 
 	return true;
 }
 
-static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
+static void nf_flow_ip4_tunnel_pop(struct nf_flowtable_ctx *ctx,
+				   struct sk_buff *skb)
 {
-	struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
-
-	if (iph->protocol != IPPROTO_IPIP)
+	if (ctx->tun.proto != IPPROTO_IPIP)
 		return;
 
-	skb_pull(skb, iph->ihl << 2);
+	skb_pull(skb, ctx->tun.offset);
 	skb_reset_network_header(skb);
 }
 
-static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
-				       u32 *offset)
+static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
+				       struct sk_buff *skb, __be16 proto)
 {
 	__be16 inner_proto = skb->protocol;
 	struct vlan_ethhdr *veth;
@@ -341,7 +348,7 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
 
 		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 		if (veth->h_vlan_encapsulated_proto == proto) {
-			*offset += VLAN_HLEN;
+			ctx->offset += VLAN_HLEN;
 			inner_proto = proto;
 			ret = true;
 		}
@@ -349,19 +356,20 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
 	case htons(ETH_P_PPP_SES):
 		if (nf_flow_pppoe_proto(skb, &inner_proto) &&
 		    inner_proto == proto) {
-			*offset += PPPOE_SES_HLEN;
+			ctx->offset += PPPOE_SES_HLEN;
 			ret = true;
 		}
 		break;
 	}
 
 	if (inner_proto == htons(ETH_P_IP))
-		ret = nf_flow_ip4_tunnel_proto(skb, offset);
+		ret = nf_flow_ip4_tunnel_proto(ctx, skb);
 
 	return ret;
 }
 
-static void nf_flow_encap_pop(struct sk_buff *skb,
+static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
+			      struct sk_buff *skb,
 			      struct flow_offload_tuple_rhash *tuplehash)
 {
 	struct vlan_hdr *vlan_hdr;
@@ -388,7 +396,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
 	}
 
 	if (skb->protocol == htons(ETH_P_IP))
-		nf_flow_ip4_tunnel_pop(skb);
+		nf_flow_ip4_tunnel_pop(ctx, skb);
 }
 
 struct nf_flow_xmit {
@@ -414,7 +422,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
 {
 	struct flow_offload_tuple tuple = {};
 
-	if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
+	if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP)))
 		return NULL;
 
 	if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
@@ -458,7 +466,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
 
 	flow_offload_refresh(flow_table, flow, false);
 
-	nf_flow_encap_pop(skb, tuplehash);
+	nf_flow_encap_pop(ctx, skb, tuplehash);
 	thoff -= ctx->offset;
 
 	iph = ip_hdr(skb);
@@ -836,7 +844,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
 	tuple->l3proto		= AF_INET6;
 	tuple->l4proto		= nexthdr;
 	tuple->iifidx		= ctx->in->ifindex;
-	nf_flow_tuple_encap(skb, tuple);
+	nf_flow_tuple_encap(ctx, skb, tuple);
 
 	return 0;
 }
@@ -873,7 +881,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
 
 	flow_offload_refresh(flow_table, flow, false);
 
-	nf_flow_encap_pop(skb, tuplehash);
+	nf_flow_encap_pop(ctx, skb, tuplehash);
 
 	ip6h = ipv6_hdr(skb);
 	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
@@ -895,7 +903,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
 	struct flow_offload_tuple tuple = {};
 
 	if (skb->protocol != htons(ETH_P_IPV6) &&
-	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
+	    !nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)))
 		return NULL;
 
 	if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)

-- 
2.52.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH RFC nf-next 2/4] netfilter: flowtable: Add IP6IP6 rx sw acceleration
  2025-12-01 13:45 [PATCH RFC nf-next 0/4] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
  2025-12-01 13:45 ` [PATCH RFC nf-next 1/4] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct Lorenzo Bianconi
@ 2025-12-01 13:45 ` Lorenzo Bianconi
  2025-12-01 13:45 ` [PATCH RFC nf-next 3/4] netfilter: flowtable: Add IP6IP6 tx " Lorenzo Bianconi
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Lorenzo Bianconi @ 2025-12-01 13:45 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

Introduce sw acceleration for rx path of IP6IP6 tunnels relying on the
netfilter flowtable infrastructure. Subsequent patches will add sw
acceleration for IP6IP6 tunnels tx path.
IP6IP6 rx sw acceleration can be tested running the following scenario
where the traffic is forwarded between two NICs (eth0 and eth1) and an
IP6IP6 tunnel is used to access a remote site (using eth1 as the underlay
device):

ETH0 -- TUN0 <==> ETH1 -- [IP network] -- TUN1 (2001:db8:3::2)

$ip addr show
6: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 00:00:22:33:11:55 brd ff:ff:ff:ff:ff:ff
    inet6 2001:db8:1::2/64 scope global nodad
       valid_lft forever preferred_lft forever
7: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 00:11:22:33:11:55 brd ff:ff:ff:ff:ff:ff
    inet6 2001:db8:2::1/64 scope global nodad
       valid_lft forever preferred_lft forever
8: tun0@NONE: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1480 qdisc noqueue state UNKNOWN group default qlen 1000
    link/tunnel6 2001:db8:2::1 peer 2001:db8:2::2 permaddr ce9c:2940:7dcc::
    inet6 2002:db8:1::1/64 scope global nodad
       valid_lft forever preferred_lft forever

$ip -6 route show
2001:db8:1::/64 dev eth0 proto kernel metric 256 pref medium
2001:db8:2::/64 dev eth1 proto kernel metric 256 pref medium
2002:db8:1::/64 dev tun0 proto kernel metric 256 pref medium
default via 2002:db8:1::2 dev tun0 metric 1024 pref medium

$nft list ruleset
table inet filter {
        flowtable ft {
                hook ingress priority filter
                devices = { eth0, eth1 }
        }

        chain forward {
                type filter hook forward priority filter; policy accept;
                meta l4proto { tcp, udp } flow add @ft
        }
}

Reproducing the scenario described above using veths I got the following
results:
- TCP stream received from the IPIP tunnel:
  - net-next: (baseline)                  ~ 79Gbps
  - net-next + IP6IP6 flowtbale support:  ~106Gbps

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 net/ipv6/ip6_tunnel.c            | 27 +++++++++++
 net/netfilter/nf_flow_table_ip.c | 96 ++++++++++++++++++++++++++++++++++------
 2 files changed, 110 insertions(+), 13 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6405072050e0ef7521ca1fdddc4a0252e2159d2a..10341bfc16bd16a43290015952bd9a57658e6ae1 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1828,6 +1828,32 @@ int ip6_tnl_encap_setup(struct ip6_tnl *t,
 }
 EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup);
 
+static int ip6_tnl_fill_forward_path(struct net_device_path_ctx *ctx,
+				     struct net_device_path *path)
+{
+	struct ip6_tnl *t = netdev_priv(ctx->dev);
+	struct flowi6 fl6 = {
+		.daddr = t->parms.raddr,
+	};
+	struct dst_entry *dst;
+	int err;
+
+	dst = ip6_route_output(dev_net(ctx->dev), NULL, &fl6);
+	if (!dst->error) {
+		path->type = DEV_PATH_TUN;
+		path->tun.src_v6 = t->parms.laddr;
+		path->tun.dst_v6 = t->parms.raddr;
+		path->tun.l3_proto = IPPROTO_IPV6;
+		path->dev = ctx->dev;
+		ctx->dev = dst->dev;
+	}
+
+	err = dst->error;
+	dst_release(dst);
+
+	return err;
+}
+
 static const struct net_device_ops ip6_tnl_netdev_ops = {
 	.ndo_init	= ip6_tnl_dev_init,
 	.ndo_uninit	= ip6_tnl_dev_uninit,
@@ -1836,6 +1862,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
 	.ndo_change_mtu = ip6_tnl_change_mtu,
 	.ndo_get_stats64 = dev_get_tstats64,
 	.ndo_get_iflink = ip6_tnl_get_iflink,
+	.ndo_fill_forward_path = ip6_tnl_fill_forward_path,
 };
 
 #define IPXIPX_FEATURES (NETIF_F_SG |		\
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index d28c256d33dc5a8d07490b765747b5c6c48aa67d..ca5a5d58eb2edbd61fe826eb892eaaf2d5b9e537 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -15,6 +15,7 @@
 #include <net/neighbour.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/protocol.h>
 /* For layer 4 checksum field offset. */
 #include <linux/tcp.h>
 #include <linux/udp.h>
@@ -159,6 +160,7 @@ static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
 	__be16 inner_proto = skb->protocol;
 	struct vlan_ethhdr *veth;
 	struct pppoe_hdr *phdr;
+	struct ipv6hdr *ip6h;
 	struct iphdr *iph;
 	u16 offset = 0;
 	int i = 0;
@@ -185,12 +187,25 @@ static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
 		break;
 	}
 
-	if (inner_proto == htons(ETH_P_IP) &&
-	    ctx->tun.proto == IPPROTO_IPIP) {
+	switch (inner_proto) {
+	case htons(ETH_P_IP):
 		iph = (struct iphdr *)(skb_network_header(skb) + offset);
-		tuple->tun.dst_v4.s_addr = iph->daddr;
-		tuple->tun.src_v4.s_addr = iph->saddr;
-		tuple->tun.l3_proto = IPPROTO_IPIP;
+		if (ctx->tun.proto == IPPROTO_IPIP) {
+			tuple->tun.dst_v4.s_addr = iph->daddr;
+			tuple->tun.src_v4.s_addr = iph->saddr;
+			tuple->tun.l3_proto = IPPROTO_IPIP;
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+		if (ctx->tun.proto == IPPROTO_IPV6) {
+			tuple->tun.dst_v6 = ip6h->daddr;
+			tuple->tun.src_v6 = ip6h->saddr;
+			tuple->tun.l3_proto = IPPROTO_IPV6;
+		}
+		break;
+	default:
+		break;
 	}
 }
 
@@ -324,10 +339,57 @@ static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
 	return true;
 }
 
-static void nf_flow_ip4_tunnel_pop(struct nf_flowtable_ctx *ctx,
-				   struct sk_buff *skb)
+static bool nf_flow_ip6_tunnel_proto(struct nf_flowtable_ctx *ctx,
+				     struct sk_buff *skb)
+{
+	const struct inet6_protocol *ipprot;
+	struct ipv6hdr *ip6h;
+
+	if (!pskb_may_pull(skb, sizeof(*ip6h) + ctx->offset))
+		return false;
+
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
+	if (ip6h->hop_limit <= 1)
+		return false;
+
+	/* Initialize default values for extension headers parsing */
+	skb->transport_header = skb->network_header + sizeof(*ip6h) +
+				ctx->offset;
+	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr) + ctx->offset;
+
+	do {
+		unsigned int nhoff = IP6CB(skb)->nhoff;
+		u8 nexthdr;
+
+		if (!pskb_pull(skb, skb_transport_offset(skb)))
+			return false;
+
+		nexthdr = skb_network_header(skb)[nhoff];
+		ipprot = rcu_dereference(inet6_protos[nexthdr]);
+		if (!ipprot)
+			break;
+
+		if (ipprot->flags & INET6_PROTO_FINAL) {
+			if (nexthdr == IPPROTO_IPV6) {
+				ctx->tun.offset = skb->transport_header -
+						  skb->network_header - ctx->offset;
+				ctx->tun.proto = IPPROTO_IPV6;
+			}
+			break;
+		}
+	} while (ipprot->handler(skb) > 0);
+
+	skb_push(skb, skb->transport_header - skb->network_header);
+	ctx->offset += ctx->tun.offset;
+
+	return true;
+}
+
+static void nf_flow_ip_tunnel_pop(struct nf_flowtable_ctx *ctx,
+				  struct sk_buff *skb)
 {
-	if (ctx->tun.proto != IPPROTO_IPIP)
+	if (ctx->tun.proto != IPPROTO_IPIP &&
+	    ctx->tun.proto != IPPROTO_IPV6)
 		return;
 
 	skb_pull(skb, ctx->tun.offset);
@@ -362,8 +424,16 @@ static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
 		break;
 	}
 
-	if (inner_proto == htons(ETH_P_IP))
+	switch (inner_proto) {
+	case htons(ETH_P_IP):
 		ret = nf_flow_ip4_tunnel_proto(ctx, skb);
+		break;
+	case htons(ETH_P_IPV6):
+		ret = nf_flow_ip6_tunnel_proto(ctx, skb);
+		break;
+	default:
+		break;
+	}
 
 	return ret;
 }
@@ -395,8 +465,9 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
 		}
 	}
 
-	if (skb->protocol == htons(ETH_P_IP))
-		nf_flow_ip4_tunnel_pop(ctx, skb);
+	if (skb->protocol == htons(ETH_P_IP) ||
+	    skb->protocol == htons(ETH_P_IPV6))
+		nf_flow_ip_tunnel_pop(ctx, skb);
 }
 
 struct nf_flow_xmit {
@@ -902,8 +973,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
 {
 	struct flow_offload_tuple tuple = {};
 
-	if (skb->protocol != htons(ETH_P_IPV6) &&
-	    !nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)))
+	if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)))
 		return NULL;
 
 	if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)

-- 
2.52.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH RFC nf-next 3/4] netfilter: flowtable: Add IP6IP6 tx sw acceleration
  2025-12-01 13:45 [PATCH RFC nf-next 0/4] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
  2025-12-01 13:45 ` [PATCH RFC nf-next 1/4] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct Lorenzo Bianconi
  2025-12-01 13:45 ` [PATCH RFC nf-next 2/4] netfilter: flowtable: Add IP6IP6 rx sw acceleration Lorenzo Bianconi
@ 2025-12-01 13:45 ` Lorenzo Bianconi
  2025-12-01 13:45 ` [PATCH RFC nf-next 4/4] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest Lorenzo Bianconi
  2025-12-01 17:54 ` [syzbot ci] Re: Add IP6IP6 flowtable SW acceleration syzbot ci
  4 siblings, 0 replies; 7+ messages in thread
From: Lorenzo Bianconi @ 2025-12-01 13:45 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

Introduce sw acceleration for tx path of IP6IP6 tunnels relying on the
netfilter flowtable infrastructure.
IP6IP6 tx sw acceleration can be tested running the following scenario
where the traffic is forwarded between two NICs (eth0 and eth1) and an
IP6IP6 tunnel is used to access a remote site (using eth1 as the underlay
device):

ETH0 -- TUN0 <==> ETH1 -- [IP network] -- TUN1 (2001:db8:3::2)

$ip addr show
6: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 00:00:22:33:11:55 brd ff:ff:ff:ff:ff:ff
    inet6 2001:db8:1::2/64 scope global nodad
       valid_lft forever preferred_lft forever
7: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 00:11:22:33:11:55 brd ff:ff:ff:ff:ff:ff
    inet6 2001:db8:2::1/64 scope global nodad
       valid_lft forever preferred_lft forever
8: tun0@NONE: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1480 qdisc noqueue state UNKNOWN group default qlen 1000
    link/tunnel6 2001:db8:2::1 peer 2001:db8:2::2 permaddr ce9c:2940:7dcc::
    inet6 2002:db8:1::1/64 scope global nodad
       valid_lft forever preferred_lft forever

$ip -6 route show
2001:db8:1::/64 dev eth0 proto kernel metric 256 pref medium
2001:db8:2::/64 dev eth1 proto kernel metric 256 pref medium
2002:db8:1::/64 dev tun0 proto kernel metric 256 pref medium
default via 2002:db8:1::2 dev tun0 metric 1024 pref medium

$nft list ruleset
table inet filter {
        flowtable ft {
                hook ingress priority filter
                devices = { eth0, eth1 }
        }

        chain forward {
                type filter hook forward priority filter; policy accept;
                meta l4proto { tcp, udp } flow add @ft
        }
}

Reproducing the scenario described above using veths I got the following
results:
- TCP stream received from the IPIP tunnel:
  - net-next: (baseline)                  ~93Gbps
  - net-next + IP6IP6 flowtbale support:  ~98Gbps

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 net/netfilter/nf_flow_table_ip.c | 93 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index ca5a5d58eb2edbd61fe826eb892eaaf2d5b9e537..e5182630a83e3e959c7bde293ddc3beef295cd51 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -12,6 +12,7 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#include <net/ip6_tunnel.h>
 #include <net/neighbour.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_conntrack_acct.h>
@@ -646,6 +647,91 @@ static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
 	return 0;
 }
 
+struct ipv6_tel_txoption {
+	struct ipv6_txoptions ops;
+	__u8 dst_opt[8];
+};
+
+static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb,
+				      struct flow_offload_tuple *tuple,
+				      struct in6_addr **ip6_daddr)
+{
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)skb_network_header(skb);
+	int err, mtu, encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT;
+	u8 hop_limit = ip6h->hop_limit, proto = IPPROTO_IPV6;
+	struct rtable *rt = dst_rtable(tuple->dst_cache);
+	__u8 dsfield = ipv6_get_dsfield(ip6h);
+	struct flowi6 fl6 = {
+		.daddr = tuple->tun.src_v6,
+		.saddr = tuple->tun.dst_v6,
+		.flowi6_proto = proto,
+	};
+	u32 headroom;
+
+	err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
+	if (err)
+		return err;
+
+	skb_set_inner_ipproto(skb, proto);
+	headroom = sizeof(*ip6h) + LL_RESERVED_SPACE(rt->dst.dev) +
+		   rt->dst.header_len;
+	if (encap_limit)
+		headroom += 8;
+	err = skb_cow_head(skb, headroom);
+	if (err)
+		return err;
+
+	skb_scrub_packet(skb, true);
+	mtu = dst_mtu(&rt->dst) - sizeof(*ip6h);
+	if (encap_limit)
+		mtu -= 8;
+	mtu = max(mtu, IPV6_MIN_MTU);
+	skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+	if (encap_limit > 0) {
+		struct ipv6_tel_txoption opt = {
+			.dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT,
+			.dst_opt[3] = 1,
+			.dst_opt[4] = encap_limit,
+			.dst_opt[5] = IPV6_TLV_PADN,
+			.dst_opt[6] = 1,
+		};
+
+		opt.ops.dst1opt = (struct ipv6_opt_hdr *)opt.dst_opt;
+		opt.ops.opt_nflen = 8;
+		ipv6_push_frag_opts(skb, &opt.ops, &proto);
+	}
+
+	skb_push(skb, sizeof(*ip6h));
+	skb_reset_network_header(skb);
+
+	ip6h = ipv6_hdr(skb);
+	ip6_flow_hdr(ip6h, dsfield,
+		     ip6_make_flowlabel(net, skb, fl6.flowlabel, true, &fl6));
+	ip6h->hop_limit = hop_limit;
+	ip6h->nexthdr = proto;
+	ip6h->daddr = tuple->tun.src_v6;
+	ip6h->saddr = tuple->tun.dst_v6;
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(*ip6h));
+	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+	IP6CB(skb)->flags = 0;
+	memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+
+	*ip6_daddr = &tuple->tun.src_v6;
+
+	return 0;
+}
+
+static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb,
+				  struct flow_offload_tuple *tuple,
+				  struct in6_addr **ip6_daddr)
+{
+	if (tuple->tun_num)
+		return nf_flow_tunnel_ip6ip6_push(net, skb, tuple, ip6_daddr);
+
+	return 0;
+}
+
 static int nf_flow_encap_push(struct sk_buff *skb,
 			      struct flow_offload_tuple *tuple)
 {
@@ -934,6 +1020,9 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 
 	mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
+	if (flow->tuplehash[!dir].tuple.tun_num)
+		mtu -= sizeof(*ip6h);
+
 	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
 		return 0;
 
@@ -1023,6 +1112,10 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	other_tuple = &flow->tuplehash[!dir].tuple;
 	ip6_daddr = &other_tuple->src_v6;
 
+	if (nf_flow_tunnel_v6_push(state->net, skb, other_tuple,
+				   &ip6_daddr) < 0)
+		return NF_DROP;
+
 	if (nf_flow_encap_push(skb, other_tuple) < 0)
 		return NF_DROP;
 

-- 
2.52.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH RFC nf-next 4/4] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest
  2025-12-01 13:45 [PATCH RFC nf-next 0/4] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
                   ` (2 preceding siblings ...)
  2025-12-01 13:45 ` [PATCH RFC nf-next 3/4] netfilter: flowtable: Add IP6IP6 tx " Lorenzo Bianconi
@ 2025-12-01 13:45 ` Lorenzo Bianconi
  2025-12-01 17:54 ` [syzbot ci] Re: Add IP6IP6 flowtable SW acceleration syzbot ci
  4 siblings, 0 replies; 7+ messages in thread
From: Lorenzo Bianconi @ 2025-12-01 13:45 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

Similar to IPIP, introduce specific selftest for IP6IP6 flowtable SW
acceleration in nft_flowtable.sh

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 .../selftests/net/netfilter/nft_flowtable.sh       | 62 ++++++++++++++++++----
 1 file changed, 53 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a68bc882fa4ec0175d8db3df8337bb5fe6c42e26..14d7f67715edc17ee1917913a3d0f417215200ba 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -592,16 +592,28 @@ ip -net "$nsr1" link set tun0 up
 ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
 ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
 
+ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
+ip -net "$nsr1" link set tun6 up
+ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
+
 ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
 ip -net "$nsr2" link set tun0 up
 ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
 ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
 
+ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1
+ip -net "$nsr2" link set tun6 up
+ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
+
 ip -net "$nsr1" route change default via 192.168.100.2
 ip -net "$nsr2" route change default via 192.168.100.1
+ip -6 -net "$nsr1" route change default via fee1:3::2
+ip -6 -net "$nsr2" route change default via fee1:3::1
 ip -net "$ns2" route add default via 10.0.2.1
+ip -6 -net "$ns2" route add default via dead:2::1
 
 ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept'
 ip netns exec "$nsr1" nft -a insert rule inet filter forward \
 	'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
 
@@ -611,28 +623,51 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
 	ret=1
 fi
 
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+	echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel"
+else
+	echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
 # Create vlan tagged devices for IPIP traffic.
 ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
 ip -net "$nsr1" link set veth1.10 up
 ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad
 ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
 ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
-ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
-ip -net "$nsr1" link set tun1 up
-ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
+
+ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun0.10 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10
 ip -net "$nsr1" route change default via 192.168.200.2
-ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
-ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
+
+ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
+ip -net "$nsr1" link set tun6.10 up
+ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
+ip -6 -net "$nsr1" route change default via fee1:5::2
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept'
 
 ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
 ip -net "$nsr2" link set veth0.10 up
 ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad
 ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
-ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
-ip -net "$nsr2" link set tun1 up
-ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
+
+ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun0.10 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
 ip -net "$nsr2" route change default via 192.168.200.1
-ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1
+ip -net "$nsr2" link set tun6.10 up
+ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
+ip -6 -net "$nsr2" route change default via fee1:5::1
 
 if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
 	echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
@@ -640,10 +675,19 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
 	ret=1
 fi
 
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+	echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan"
+else
+	echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
 # Restore the previous configuration
 ip -net "$nsr1" route change default via 192.168.10.2
 ip -net "$nsr2" route change default via 192.168.10.1
 ip -net "$ns2" route del default via 10.0.2.1
+ip -6 -net "$ns2" route del default via dead:2::1
 }
 
 # Another test:

-- 
2.52.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [syzbot ci] Re: Add IP6IP6 flowtable SW acceleration
  2025-12-01 13:45 [PATCH RFC nf-next 0/4] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
                   ` (3 preceding siblings ...)
  2025-12-01 13:45 ` [PATCH RFC nf-next 4/4] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest Lorenzo Bianconi
@ 2025-12-01 17:54 ` syzbot ci
  4 siblings, 0 replies; 7+ messages in thread
From: syzbot ci @ 2025-12-01 17:54 UTC (permalink / raw)
  To: coreteam, davem, dsahern, edumazet, fw, horms, kadlec, kuba,
	linux-kselftest, lorenzo, netdev, netfilter-devel, pabeni, pablo,
	phil, shuah
  Cc: syzbot, syzkaller-bugs

syzbot ci has tested the following series

[v1] Add IP6IP6 flowtable SW acceleration
https://lore.kernel.org/all/20251201-flowtable-offload-ip6ip6-v1-0-1dabf534c074@kernel.org
* [PATCH RFC nf-next 1/4] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct
* [PATCH RFC nf-next 2/4] netfilter: flowtable: Add IP6IP6 rx sw acceleration
* [PATCH RFC nf-next 3/4] netfilter: flowtable: Add IP6IP6 tx sw acceleration
* [PATCH RFC nf-next 4/4] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest

and found the following issue:
KASAN: slab-use-after-free Read in nf_flow_skb_encap_protocol

Full report is available here:
https://ci.syzbot.org/series/df395c22-6768-4a9f-9a96-56b5307acbc6

***

KASAN: slab-use-after-free Read in nf_flow_skb_encap_protocol

tree:      nf-next
URL:       https://kernel.googlesource.com/pub/scm/linux/kernel/git/netfilter/nf-next.git
base:      ff736a286116d462a4067ba258fa351bc0b4ed80
arch:      amd64
compiler:  Debian clang version 20.1.8 (++20250708063551+0c9f909b7976-1~exp1~20250708183702.136), Debian LLD 20.1.8
config:    https://ci.syzbot.org/builds/018f7259-7abc-4b77-b40c-e8e520e1e793/config
C repro:   https://ci.syzbot.org/findings/5cb5bc43-721b-4a9e-a093-4d52aab5ba9f/c_repro
syz repro: https://ci.syzbot.org/findings/5cb5bc43-721b-4a9e-a093-4d52aab5ba9f/syz_repro

==================================================================
BUG: KASAN: slab-use-after-free in nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:383 [inline]
BUG: KASAN: slab-use-after-free in nf_flow_skb_encap_protocol+0x1336/0x14e0 net/netfilter/nf_flow_table_ip.c:433
Read of size 2 at addr ffff888115de92b6 by task syz.0.20/5970

CPU: 0 UID: 0 PID: 5970 Comm: syz.0.20 Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:378 [inline]
 print_report+0xca/0x240 mm/kasan/report.c:482
 kasan_report+0x118/0x150 mm/kasan/report.c:595
 nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:383 [inline]
 nf_flow_skb_encap_protocol+0x1336/0x14e0 net/netfilter/nf_flow_table_ip.c:433
 nf_flow_offload_ipv6_lookup net/netfilter/nf_flow_table_ip.c:1065 [inline]
 nf_flow_offload_ipv6_hook+0x131/0x3380 net/netfilter/nf_flow_table_ip.c:1092
 nf_hook_entry_hookfn include/linux/netfilter.h:158 [inline]
 nf_hook_slow+0xc5/0x220 net/netfilter/core.c:623
 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline]
 nf_ingress net/core/dev.c:5900 [inline]
 __netif_receive_skb_core+0x241c/0x2f90 net/core/dev.c:5996
 __netif_receive_skb_one_core net/core/dev.c:6135 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6250
 netif_receive_skb_internal net/core/dev.c:6336 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6395
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f8b24f8f7c9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f8b25e26038 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007f8b251e6090 RCX: 00007f8b24f8f7c9
RDX: 000000000000fdef RSI: 0000200000000440 RDI: 0000000000000003
RBP: 00007f8b24ff297f R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f8b251e6128 R14: 00007f8b251e6090 R15: 00007ffe014b8a38
 </TASK>

Allocated by task 5970:
 kasan_save_stack mm/kasan/common.c:56 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
 unpoison_slab_object mm/kasan/common.c:342 [inline]
 __kasan_slab_alloc+0x6c/0x80 mm/kasan/common.c:368
 kasan_slab_alloc include/linux/kasan.h:252 [inline]
 slab_post_alloc_hook mm/slub.c:4978 [inline]
 slab_alloc_node mm/slub.c:5288 [inline]
 kmem_cache_alloc_node_noprof+0x433/0x710 mm/slub.c:5340
 __alloc_skb+0x255/0x430 net/core/skbuff.c:679
 alloc_skb include/linux/skbuff.h:1383 [inline]
 alloc_skb_with_frags+0xca/0x890 net/core/skbuff.c:6712
 sock_alloc_send_pskb+0x84d/0x980 net/core/sock.c:2995
 tun_alloc_skb drivers/net/tun.c:1461 [inline]
 tun_get_user+0xa43/0x3e90 drivers/net/tun.c:1794
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Freed by task 5970:
 kasan_save_stack mm/kasan/common.c:56 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
 __kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:587
 kasan_save_free_info mm/kasan/kasan.h:406 [inline]
 poison_slab_object mm/kasan/common.c:252 [inline]
 __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:284
 kasan_slab_free include/linux/kasan.h:234 [inline]
 slab_free_hook mm/slub.c:2543 [inline]
 slab_free mm/slub.c:6642 [inline]
 kmem_cache_free+0x19b/0x690 mm/slub.c:6752
 icmpv6_param_prob include/linux/icmpv6.h:95 [inline]
 ipv6_rthdr_rcv+0x150e/0x2020 net/ipv6/exthdrs.c:828
 nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:381 [inline]
 nf_flow_skb_encap_protocol+0x9b5/0x14e0 net/netfilter/nf_flow_table_ip.c:433
 nf_flow_offload_ipv6_lookup net/netfilter/nf_flow_table_ip.c:1065 [inline]
 nf_flow_offload_ipv6_hook+0x131/0x3380 net/netfilter/nf_flow_table_ip.c:1092
 nf_hook_entry_hookfn include/linux/netfilter.h:158 [inline]
 nf_hook_slow+0xc5/0x220 net/netfilter/core.c:623
 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline]
 nf_ingress net/core/dev.c:5900 [inline]
 __netif_receive_skb_core+0x241c/0x2f90 net/core/dev.c:5996
 __netif_receive_skb_one_core net/core/dev.c:6135 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6250
 netif_receive_skb_internal net/core/dev.c:6336 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6395
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

The buggy address belongs to the object at ffff888115de9200
 which belongs to the cache skbuff_head_cache of size 240
The buggy address is located 182 bytes inside of
 freed 240-byte region [ffff888115de9200, ffff888115de92f0)

The buggy address belongs to the physical page:
page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x115de8
head: order:1 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
flags: 0x17ff00000000040(head|node=0|zone=2|lastcpupid=0x7ff)
page_type: f5(slab)
raw: 017ff00000000040 ffff8881036ba8c0 ffffea0004280a80 dead000000000002
raw: 0000000000000000 0000000080150015 00000000f5000000 0000000000000000
head: 017ff00000000040 ffff8881036ba8c0 ffffea0004280a80 dead000000000002
head: 0000000000000000 0000000080150015 00000000f5000000 0000000000000000
head: 017ff00000000001 ffffea0004577a01 00000000ffffffff 00000000ffffffff
head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000002
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 1, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 5555, tgid 5555 (dhcpcd), ts 36767395595, free_ts 35122573404
 set_page_owner include/linux/page_owner.h:32 [inline]
 post_alloc_hook+0x234/0x290 mm/page_alloc.c:1845
 prep_new_page mm/page_alloc.c:1853 [inline]
 get_page_from_freelist+0x2365/0x2440 mm/page_alloc.c:3879
 __alloc_frozen_pages_noprof+0x181/0x370 mm/page_alloc.c:5178
 alloc_pages_mpol+0x232/0x4a0 mm/mempolicy.c:2416
 alloc_slab_page mm/slub.c:3059 [inline]
 allocate_slab+0x96/0x350 mm/slub.c:3232
 new_slab mm/slub.c:3286 [inline]
 ___slab_alloc+0xf56/0x1990 mm/slub.c:4655
 __slab_alloc+0x65/0x100 mm/slub.c:4778
 __slab_alloc_node mm/slub.c:4854 [inline]
 slab_alloc_node mm/slub.c:5276 [inline]
 kmem_cache_alloc_node_noprof+0x4c5/0x710 mm/slub.c:5340
 __alloc_skb+0x255/0x430 net/core/skbuff.c:679
 alloc_skb include/linux/skbuff.h:1383 [inline]
 alloc_skb_with_frags+0xca/0x890 net/core/skbuff.c:6712
 sock_alloc_send_pskb+0x84d/0x980 net/core/sock.c:2995
 unix_dgram_sendmsg+0x454/0x1840 net/unix/af_unix.c:2139
 sock_sendmsg_nosec net/socket.c:727 [inline]
 __sock_sendmsg+0x21c/0x270 net/socket.c:742
 sock_write_iter+0x279/0x360 net/socket.c:1195
 do_iter_readv_writev+0x623/0x8c0 fs/read_write.c:-1
 vfs_writev+0x31a/0x960 fs/read_write.c:1057
page last free pid 5262 tgid 5262 stack trace:
 reset_page_owner include/linux/page_owner.h:25 [inline]
 free_pages_prepare mm/page_alloc.c:1394 [inline]
 __free_frozen_pages+0xbc4/0xd30 mm/page_alloc.c:2901
 __slab_free+0x2e7/0x390 mm/slub.c:5970
 qlink_free mm/kasan/quarantine.c:163 [inline]
 qlist_free_all+0x97/0x140 mm/kasan/quarantine.c:179
 kasan_quarantine_reduce+0x148/0x160 mm/kasan/quarantine.c:286
 __kasan_slab_alloc+0x22/0x80 mm/kasan/common.c:352
 kasan_slab_alloc include/linux/kasan.h:252 [inline]
 slab_post_alloc_hook mm/slub.c:4978 [inline]
 slab_alloc_node mm/slub.c:5288 [inline]
 kmem_cache_alloc_noprof+0x367/0x6e0 mm/slub.c:5295
 getname_flags+0xb8/0x540 fs/namei.c:146
 getname include/linux/fs.h:2924 [inline]
 do_sys_openat2+0xbc/0x1c0 fs/open.c:1431
 do_sys_open fs/open.c:1452 [inline]
 __do_sys_openat fs/open.c:1468 [inline]
 __se_sys_openat fs/open.c:1463 [inline]
 __x64_sys_openat+0x138/0x170 fs/open.c:1463
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Memory state around the buggy address:
 ffff888115de9180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888115de9200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>ffff888115de9280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc
                                     ^
 ffff888115de9300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888115de9380: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================


***

If these findings have caused you to resend the series or submit a
separate fix, please add the following tag to your commit message:
  Tested-by: syzbot@syzkaller.appspotmail.com

---
This report is generated by a bot. It may contain errors.
syzbot ci engineers can be reached at syzkaller@googlegroups.com.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [syzbot ci] Re: Add IP6IP6 flowtable SW acceleration
  2025-12-07 16:06 [PATCH nf-next 0/5] " Lorenzo Bianconi
@ 2025-12-07 23:54 ` syzbot ci
  0 siblings, 0 replies; 7+ messages in thread
From: syzbot ci @ 2025-12-07 23:54 UTC (permalink / raw)
  To: coreteam, davem, dsahern, edumazet, fw, horms, kadlec, kuba,
	linux-kselftest, lorenzo, netdev, netfilter-devel, pabeni, pablo,
	phil, shuah
  Cc: syzbot, syzkaller-bugs

syzbot ci has tested the following series

[v1] Add IP6IP6 flowtable SW acceleration
https://lore.kernel.org/all/20251207-b4-flowtable-offload-ip6ip6-v1-0-18e3ab7f748c@kernel.org
* [PATCH nf-next 1/5] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct
* [PATCH nf-next 2/5] netfilter: Modify nf_flow_skb_encap_protocol() to return int intead of bool
* [PATCH nf-next 3/5] netfilter: flowtable: Add IP6IP6 rx sw acceleration
* [PATCH nf-next 4/5] netfilter: flowtable: Add IP6IP6 tx sw acceleration
* [PATCH nf-next 5/5] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest

and found the following issues:
* KASAN: slab-use-after-free Read in nf_flow_skb_encap_protocol
* general protection fault in ipv6_frag_rcv
* general protection fault in nf_flow_offload_ipv6_hook

Full report is available here:
https://ci.syzbot.org/series/24632ec9-06ab-4e09-8015-19822d83c6a9

***

KASAN: slab-use-after-free Read in nf_flow_skb_encap_protocol

tree:      nf-next
URL:       https://kernel.googlesource.com/pub/scm/linux/kernel/git/netfilter/nf-next.git
base:      ff736a286116d462a4067ba258fa351bc0b4ed80
arch:      amd64
compiler:  Debian clang version 20.1.8 (++20250708063551+0c9f909b7976-1~exp1~20250708183702.136), Debian LLD 20.1.8
config:    https://ci.syzbot.org/builds/833219cc-e7e2-437d-b31c-1eb3666c1e93/config
C repro:   https://ci.syzbot.org/findings/afcab565-801f-4cde-a1c5-b1d327537871/c_repro
syz repro: https://ci.syzbot.org/findings/afcab565-801f-4cde-a1c5-b1d327537871/syz_repro

==================================================================
BUG: KASAN: slab-use-after-free in nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:388 [inline]
BUG: KASAN: slab-use-after-free in nf_flow_skb_encap_protocol+0x13a2/0x1540 net/netfilter/nf_flow_table_ip.c:438
Read of size 2 at addr ffff888175605eb6 by task syz.1.20/6010

CPU: 1 UID: 0 PID: 6010 Comm: syz.1.20 Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:378 [inline]
 print_report+0xca/0x240 mm/kasan/report.c:482
 kasan_report+0x118/0x150 mm/kasan/report.c:595
 nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:388 [inline]
 nf_flow_skb_encap_protocol+0x13a2/0x1540 net/netfilter/nf_flow_table_ip.c:438
 nf_flow_offload_ipv6_lookup net/netfilter/nf_flow_table_ip.c:1074 [inline]
 nf_flow_offload_ipv6_hook+0x13c/0x32a0 net/netfilter/nf_flow_table_ip.c:1102
 nf_hook_entry_hookfn include/linux/netfilter.h:158 [inline]
 nf_hook_slow+0xc5/0x220 net/netfilter/core.c:623
 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline]
 nf_ingress net/core/dev.c:5900 [inline]
 __netif_receive_skb_core+0x241c/0x2f90 net/core/dev.c:5996
 __netif_receive_skb_one_core net/core/dev.c:6135 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6250
 netif_receive_skb_internal net/core/dev.c:6336 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6395
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f53b9d8f7c9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f53babe6038 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007f53b9fe6090 RCX: 00007f53b9d8f7c9
RDX: 000000000000fdef RSI: 0000200000000340 RDI: 0000000000000003
RBP: 00007f53b9df297f R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f53b9fe6128 R14: 00007f53b9fe6090 R15: 00007ffc1b120a08
 </TASK>

Allocated by task 6010:
 kasan_save_stack mm/kasan/common.c:56 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
 unpoison_slab_object mm/kasan/common.c:342 [inline]
 __kasan_slab_alloc+0x6c/0x80 mm/kasan/common.c:368
 kasan_slab_alloc include/linux/kasan.h:252 [inline]
 slab_post_alloc_hook mm/slub.c:4978 [inline]
 slab_alloc_node mm/slub.c:5288 [inline]
 kmem_cache_alloc_node_noprof+0x433/0x710 mm/slub.c:5340
 __alloc_skb+0x255/0x430 net/core/skbuff.c:679
 alloc_skb include/linux/skbuff.h:1383 [inline]
 alloc_skb_with_frags+0xca/0x890 net/core/skbuff.c:6712
 sock_alloc_send_pskb+0x84d/0x980 net/core/sock.c:2995
 tun_alloc_skb drivers/net/tun.c:1461 [inline]
 tun_get_user+0xa43/0x3e90 drivers/net/tun.c:1794
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Freed by task 6010:
 kasan_save_stack mm/kasan/common.c:56 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
 __kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:587
 kasan_save_free_info mm/kasan/kasan.h:406 [inline]
 poison_slab_object mm/kasan/common.c:252 [inline]
 __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:284
 kasan_slab_free include/linux/kasan.h:234 [inline]
 slab_free_hook mm/slub.c:2543 [inline]
 slab_free mm/slub.c:6642 [inline]
 kmem_cache_free+0x19b/0x690 mm/slub.c:6752
 kfree_skb_reason include/linux/skbuff.h:1322 [inline]
 kfree_skb include/linux/skbuff.h:1331 [inline]
 dst_discard_out+0x1c/0x30 net/core/dst.c:32
 nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:383 [inline]
 nf_flow_skb_encap_protocol+0x9c2/0x1540 net/netfilter/nf_flow_table_ip.c:438
 nf_flow_offload_ipv6_lookup net/netfilter/nf_flow_table_ip.c:1074 [inline]
 nf_flow_offload_ipv6_hook+0x13c/0x32a0 net/netfilter/nf_flow_table_ip.c:1102
 nf_hook_entry_hookfn include/linux/netfilter.h:158 [inline]
 nf_hook_slow+0xc5/0x220 net/netfilter/core.c:623
 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline]
 nf_ingress net/core/dev.c:5900 [inline]
 __netif_receive_skb_core+0x241c/0x2f90 net/core/dev.c:5996
 __netif_receive_skb_one_core net/core/dev.c:6135 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6250
 netif_receive_skb_internal net/core/dev.c:6336 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6395
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

The buggy address belongs to the object at ffff888175605e00
 which belongs to the cache skbuff_head_cache of size 240
The buggy address is located 182 bytes inside of
 freed 240-byte region [ffff888175605e00, ffff888175605ef0)

The buggy address belongs to the physical page:
page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x175604
head: order:1 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
flags: 0x57ff00000000040(head|node=1|zone=2|lastcpupid=0x7ff)
page_type: f5(slab)
raw: 057ff00000000040 ffff8881036c4a00 dead000000000122 0000000000000000
raw: 0000000000000000 0000000080150015 00000000f5000000 0000000000000000
head: 057ff00000000040 ffff8881036c4a00 dead000000000122 0000000000000000
head: 0000000000000000 0000000080150015 00000000f5000000 0000000000000000
head: 057ff00000000001 ffffea0005d58101 00000000ffffffff 00000000ffffffff
head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000002
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 1, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 5922, tgid 5922 (syz-executor), ts 68256102142, free_ts 61484167308
 set_page_owner include/linux/page_owner.h:32 [inline]
 post_alloc_hook+0x234/0x290 mm/page_alloc.c:1845
 prep_new_page mm/page_alloc.c:1853 [inline]
 get_page_from_freelist+0x2365/0x2440 mm/page_alloc.c:3879
 __alloc_frozen_pages_noprof+0x181/0x370 mm/page_alloc.c:5178
 alloc_pages_mpol+0x232/0x4a0 mm/mempolicy.c:2416
 alloc_slab_page mm/slub.c:3059 [inline]
 allocate_slab+0x96/0x350 mm/slub.c:3232
 new_slab mm/slub.c:3286 [inline]
 ___slab_alloc+0xf56/0x1990 mm/slub.c:4655
 __slab_alloc+0x65/0x100 mm/slub.c:4778
 __slab_alloc_node mm/slub.c:4854 [inline]
 slab_alloc_node mm/slub.c:5276 [inline]
 kmem_cache_alloc_node_noprof+0x4c5/0x710 mm/slub.c:5340
 __alloc_skb+0x255/0x430 net/core/skbuff.c:679
 alloc_skb include/linux/skbuff.h:1383 [inline]
 nlmsg_new include/net/netlink.h:1055 [inline]
 inet_ifmcaddr_notify+0x7e/0x150 net/ipv4/igmp.c:1481
 ____ip_mc_inc_group+0x9b8/0xde0 net/ipv4/igmp.c:1564
 __ip_mc_inc_group net/ipv4/igmp.c:1573 [inline]
 ip_mc_inc_group net/ipv4/igmp.c:1579 [inline]
 ip_mc_up+0x125/0x300 net/ipv4/igmp.c:1880
 inetdev_event+0xfb3/0x15b0 net/ipv4/devinet.c:1630
 notifier_call_chain+0x1b6/0x3e0 kernel/notifier.c:85
 call_netdevice_notifiers_extack net/core/dev.c:2268 [inline]
 call_netdevice_notifiers net/core/dev.c:2282 [inline]
 __dev_notify_flags+0x18d/0x2e0 net/core/dev.c:-1
 netif_change_flags+0xe8/0x1a0 net/core/dev.c:9802
page last free pid 5811 tgid 5811 stack trace:
 reset_page_owner include/linux/page_owner.h:25 [inline]
 free_pages_prepare mm/page_alloc.c:1394 [inline]
 __free_frozen_pages+0xbc4/0xd30 mm/page_alloc.c:2901
 __slab_free+0x2e7/0x390 mm/slub.c:5970
 qlink_free mm/kasan/quarantine.c:163 [inline]
 qlist_free_all+0x97/0x140 mm/kasan/quarantine.c:179
 kasan_quarantine_reduce+0x148/0x160 mm/kasan/quarantine.c:286
 __kasan_slab_alloc+0x22/0x80 mm/kasan/common.c:352
 kasan_slab_alloc include/linux/kasan.h:252 [inline]
 slab_post_alloc_hook mm/slub.c:4978 [inline]
 slab_alloc_node mm/slub.c:5288 [inline]
 kmem_cache_alloc_noprof+0x367/0x6e0 mm/slub.c:5295
 ptlock_alloc+0x20/0x70 mm/memory.c:7302
 ptlock_init include/linux/mm.h:3059 [inline]
 pagetable_pte_ctor include/linux/mm.h:3113 [inline]
 __pte_alloc_one_noprof include/asm-generic/pgalloc.h:78 [inline]
 pte_alloc_one+0x7a/0x310 arch/x86/mm/pgtable.c:18
 __do_fault+0xd1/0x390 mm/memory.c:5276
 do_shared_fault mm/memory.c:5780 [inline]
 do_fault mm/memory.c:5854 [inline]
 do_pte_missing mm/memory.c:4362 [inline]
 handle_pte_fault mm/memory.c:6195 [inline]
 __handle_mm_fault+0x1847/0x5400 mm/memory.c:6336
 handle_mm_fault+0x40a/0x8e0 mm/memory.c:6505
 do_user_addr_fault+0xa7c/0x1380 arch/x86/mm/fault.c:1336
 handle_page_fault arch/x86/mm/fault.c:1476 [inline]
 exc_page_fault+0x82/0x100 arch/x86/mm/fault.c:1532
 asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:618

Memory state around the buggy address:
 ffff888175605d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888175605e00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>ffff888175605e80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc
                                     ^
 ffff888175605f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888175605f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
==================================================================


***

general protection fault in ipv6_frag_rcv

tree:      nf-next
URL:       https://kernel.googlesource.com/pub/scm/linux/kernel/git/netfilter/nf-next.git
base:      ff736a286116d462a4067ba258fa351bc0b4ed80
arch:      amd64
compiler:  Debian clang version 20.1.8 (++20250708063551+0c9f909b7976-1~exp1~20250708183702.136), Debian LLD 20.1.8
config:    https://ci.syzbot.org/builds/833219cc-e7e2-437d-b31c-1eb3666c1e93/config
C repro:   https://ci.syzbot.org/findings/bac8a298-33d8-4b3a-9f63-a0a2373aaab2/c_repro
syz repro: https://ci.syzbot.org/findings/bac8a298-33d8-4b3a-9f63-a0a2373aaab2/syz_repro

Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
CPU: 1 UID: 0 PID: 5982 Comm: syz.1.20 Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:dst_dev include/net/dst.h:571 [inline]
RIP: 0010:skb_dst_dev include/net/dst.h:586 [inline]
RIP: 0010:skb_dst_dev_net include/net/dst.h:596 [inline]
RIP: 0010:ipv6_frag_rcv+0x19c/0x2a20 net/ipv6/reassembly.c:328
Code: 5c 24 08 4c 89 74 24 10 48 8b 44 24 60 42 80 3c 28 00 74 08 48 89 df e8 42 15 21 f8 48 8b 03 49 89 c6 49 83 e6 fe 48 c1 e8 03 <42> 80 3c 28 00 74 08 4c 89 f7 e8 25 15 21 f8 bb 08 01 00 00 49 03
RSP: 0018:ffffc90003776d20 EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff88816d87a1d8 RCX: ffff88816bf65700
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffffc90003776f08 R08: ffffea0006d29400 R09: 0000000b00007ff5
R10: ffffea0006d29400 R11: ffffffff8a04b090 R12: 0000000000000040
R13: dffffc0000000000 R14: 0000000000000000 R15: ffff888173607480
FS:  00007fa70abdd6c0(0000) GS:ffff8882a9f2e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000200000010000 CR3: 0000000112fa8000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:383 [inline]
 nf_flow_skb_encap_protocol+0x9c2/0x1540 net/netfilter/nf_flow_table_ip.c:438
 nf_flow_offload_ipv6_lookup net/netfilter/nf_flow_table_ip.c:1074 [inline]
 nf_flow_offload_ipv6_hook+0x13c/0x32a0 net/netfilter/nf_flow_table_ip.c:1102
 nf_hook_entry_hookfn include/linux/netfilter.h:158 [inline]
 nf_hook_slow+0xc5/0x220 net/netfilter/core.c:623
 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline]
 nf_ingress net/core/dev.c:5900 [inline]
 __netif_receive_skb_core+0x241c/0x2f90 net/core/dev.c:5996
 __netif_receive_skb_one_core net/core/dev.c:6135 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6250
 netif_receive_skb_internal net/core/dev.c:6336 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6395
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fa70b58f7c9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fa70abdd038 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007fa70b7e6090 RCX: 00007fa70b58f7c9
RDX: 000000000000fdef RSI: 0000200000000340 RDI: 0000000000000003
RBP: 00007fa70b5f297f R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fa70b7e6128 R14: 00007fa70b7e6090 R15: 00007ffcd9538b88
 </TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:dst_dev include/net/dst.h:571 [inline]
RIP: 0010:skb_dst_dev include/net/dst.h:586 [inline]
RIP: 0010:skb_dst_dev_net include/net/dst.h:596 [inline]
RIP: 0010:ipv6_frag_rcv+0x19c/0x2a20 net/ipv6/reassembly.c:328
Code: 5c 24 08 4c 89 74 24 10 48 8b 44 24 60 42 80 3c 28 00 74 08 48 89 df e8 42 15 21 f8 48 8b 03 49 89 c6 49 83 e6 fe 48 c1 e8 03 <42> 80 3c 28 00 74 08 4c 89 f7 e8 25 15 21 f8 bb 08 01 00 00 49 03
RSP: 0018:ffffc90003776d20 EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff88816d87a1d8 RCX: ffff88816bf65700
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffffc90003776f08 R08: ffffea0006d29400 R09: 0000000b00007ff5
R10: ffffea0006d29400 R11: ffffffff8a04b090 R12: 0000000000000040
R13: dffffc0000000000 R14: 0000000000000000 R15: ffff888173607480
FS:  00007fa70abdd6c0(0000) GS:ffff8882a9f2e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000200000010000 CR3: 0000000112fa8000 CR4: 00000000000006f0
----------------
Code disassembly (best guess):
   0:	5c                   	pop    %rsp
   1:	24 08                	and    $0x8,%al
   3:	4c 89 74 24 10       	mov    %r14,0x10(%rsp)
   8:	48 8b 44 24 60       	mov    0x60(%rsp),%rax
   d:	42 80 3c 28 00       	cmpb   $0x0,(%rax,%r13,1)
  12:	74 08                	je     0x1c
  14:	48 89 df             	mov    %rbx,%rdi
  17:	e8 42 15 21 f8       	call   0xf821155e
  1c:	48 8b 03             	mov    (%rbx),%rax
  1f:	49 89 c6             	mov    %rax,%r14
  22:	49 83 e6 fe          	and    $0xfffffffffffffffe,%r14
  26:	48 c1 e8 03          	shr    $0x3,%rax
* 2a:	42 80 3c 28 00       	cmpb   $0x0,(%rax,%r13,1) <-- trapping instruction
  2f:	74 08                	je     0x39
  31:	4c 89 f7             	mov    %r14,%rdi
  34:	e8 25 15 21 f8       	call   0xf821155e
  39:	bb 08 01 00 00       	mov    $0x108,%ebx
  3e:	49                   	rex.WB
  3f:	03                   	.byte 0x3


***

general protection fault in nf_flow_offload_ipv6_hook

tree:      nf-next
URL:       https://kernel.googlesource.com/pub/scm/linux/kernel/git/netfilter/nf-next.git
base:      ff736a286116d462a4067ba258fa351bc0b4ed80
arch:      amd64
compiler:  Debian clang version 20.1.8 (++20250708063551+0c9f909b7976-1~exp1~20250708183702.136), Debian LLD 20.1.8
config:    https://ci.syzbot.org/builds/833219cc-e7e2-437d-b31c-1eb3666c1e93/config
C repro:   https://ci.syzbot.org/findings/bee4a66a-8f79-4936-9f6e-7f9e229693c9/c_repro
syz repro: https://ci.syzbot.org/findings/bee4a66a-8f79-4936-9f6e-7f9e229693c9/syz_repro

Oops: general protection fault, probably for non-canonical address 0xdffffc000000000c: 0000 [#1] SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000060-0x0000000000000067]
CPU: 1 UID: 0 PID: 5978 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:nf_flow_offload_ipv6_forward net/netfilter/nf_flow_table_ip.c:1027 [inline]
RIP: 0010:nf_flow_offload_ipv6_hook+0x2f3/0x32a0 net/netfilter/nf_flow_table_ip.c:1106
Code: 85 c6 03 00 00 e8 6d f0 31 f8 41 bc 14 00 00 00 e9 af 02 00 00 48 8d 4b 60 48 89 4c 24 18 48 c1 e9 03 48 89 8c 24 90 00 00 00 <42> 0f b6 04 21 84 c0 0f 85 de 23 00 00 44 0f b7 73 60 41 83 e6 03
RSP: 0018:ffffc900031d6fe0 EFLAGS: 00010206
RAX: ffffffff898e1101 RBX: 0000000000000002 RCX: 000000000000000c
RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000
RBP: ffffc900031d73e0 R08: ffffffff8f7d1e77 R09: 1ffffffff1efa3ce
R10: dffffc0000000000 R11: fffffbfff1efa3cf R12: dffffc0000000000
R13: 1ffff9200063ae1c R14: ffff88816cfc5680 R15: ffffc900031d72e0
FS:  00007fd7653a06c0(0000) GS:ffff8882a9f2e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000200000010000 CR3: 000000011342a000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 nf_hook_entry_hookfn include/linux/netfilter.h:158 [inline]
 nf_hook_slow+0xc5/0x220 net/netfilter/core.c:623
 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline]
 nf_ingress net/core/dev.c:5900 [inline]
 __netif_receive_skb_core+0x241c/0x2f90 net/core/dev.c:5996
 __netif_receive_skb_one_core net/core/dev.c:6135 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6250
 netif_receive_skb_internal net/core/dev.c:6336 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6395
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fd76458f7c9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fd7653a0038 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007fd7647e6090 RCX: 00007fd76458f7c9
RDX: 000000000000fdef RSI: 0000200000000380 RDI: 0000000000000003
RBP: 00007fd7645f297f R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fd7647e6128 R14: 00007fd7647e6090 R15: 00007ffde1f0b2b8
 </TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:nf_flow_offload_ipv6_forward net/netfilter/nf_flow_table_ip.c:1027 [inline]
RIP: 0010:nf_flow_offload_ipv6_hook+0x2f3/0x32a0 net/netfilter/nf_flow_table_ip.c:1106
Code: 85 c6 03 00 00 e8 6d f0 31 f8 41 bc 14 00 00 00 e9 af 02 00 00 48 8d 4b 60 48 89 4c 24 18 48 c1 e9 03 48 89 8c 24 90 00 00 00 <42> 0f b6 04 21 84 c0 0f 85 de 23 00 00 44 0f b7 73 60 41 83 e6 03
RSP: 0018:ffffc900031d6fe0 EFLAGS: 00010206
RAX: ffffffff898e1101 RBX: 0000000000000002 RCX: 000000000000000c
RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000
RBP: ffffc900031d73e0 R08: ffffffff8f7d1e77 R09: 1ffffffff1efa3ce
R10: dffffc0000000000 R11: fffffbfff1efa3cf R12: dffffc0000000000
R13: 1ffff9200063ae1c R14: ffff88816cfc5680 R15: ffffc900031d72e0
FS:  00007fd7653a06c0(0000) GS:ffff8882a9f2e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000200000010000 CR3: 000000011342a000 CR4: 00000000000006f0
----------------
Code disassembly (best guess):
   0:	85 c6                	test   %eax,%esi
   2:	03 00                	add    (%rax),%eax
   4:	00 e8                	add    %ch,%al
   6:	6d                   	insl   (%dx),%es:(%rdi)
   7:	f0 31 f8             	lock xor %edi,%eax
   a:	41 bc 14 00 00 00    	mov    $0x14,%r12d
  10:	e9 af 02 00 00       	jmp    0x2c4
  15:	48 8d 4b 60          	lea    0x60(%rbx),%rcx
  19:	48 89 4c 24 18       	mov    %rcx,0x18(%rsp)
  1e:	48 c1 e9 03          	shr    $0x3,%rcx
  22:	48 89 8c 24 90 00 00 	mov    %rcx,0x90(%rsp)
  29:	00
* 2a:	42 0f b6 04 21       	movzbl (%rcx,%r12,1),%eax <-- trapping instruction
  2f:	84 c0                	test   %al,%al
  31:	0f 85 de 23 00 00    	jne    0x2415
  37:	44 0f b7 73 60       	movzwl 0x60(%rbx),%r14d
  3c:	41 83 e6 03          	and    $0x3,%r14d


***

If these findings have caused you to resend the series or submit a
separate fix, please add the following tag to your commit message:
  Tested-by: syzbot@syzkaller.appspotmail.com

---
This report is generated by a bot. It may contain errors.
syzbot ci engineers can be reached at syzkaller@googlegroups.com.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-12-07 23:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-01 13:45 [PATCH RFC nf-next 0/4] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
2025-12-01 13:45 ` [PATCH RFC nf-next 1/4] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct Lorenzo Bianconi
2025-12-01 13:45 ` [PATCH RFC nf-next 2/4] netfilter: flowtable: Add IP6IP6 rx sw acceleration Lorenzo Bianconi
2025-12-01 13:45 ` [PATCH RFC nf-next 3/4] netfilter: flowtable: Add IP6IP6 tx " Lorenzo Bianconi
2025-12-01 13:45 ` [PATCH RFC nf-next 4/4] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest Lorenzo Bianconi
2025-12-01 17:54 ` [syzbot ci] Re: Add IP6IP6 flowtable SW acceleration syzbot ci
  -- strict thread matches above, loose matches on Subject: below --
2025-12-07 16:06 [PATCH nf-next 0/5] " Lorenzo Bianconi
2025-12-07 23:54 ` [syzbot ci] " syzbot ci

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).