netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration
@ 2025-12-07 16:06 Lorenzo Bianconi
  2025-12-07 16:06 ` [PATCH nf-next 1/5] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct Lorenzo Bianconi
                   ` (6 more replies)
  0 siblings, 7 replies; 10+ messages in thread
From: Lorenzo Bianconi @ 2025-12-07 16:06 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

Introduce SW acceleration for IP6IP6 tunnels in the netfilter flowtable
infrastructure.

---
Lorenzo Bianconi (5):
      netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct
      netfilter: Modify nf_flow_skb_encap_protocol() to return int intead of bool
      netfilter: flowtable: Add IP6IP6 rx sw acceleration
      netfilter: flowtable: Add IP6IP6 tx sw acceleration
      selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest

 net/ipv6/ip6_tunnel.c                              |  27 ++
 net/netfilter/nf_flow_table_ip.c                   | 271 +++++++++++++++++----
 .../selftests/net/netfilter/nft_flowtable.sh       |  62 ++++-
 3 files changed, 306 insertions(+), 54 deletions(-)
---
base-commit: f8156ef0fd8232055396ebf1e044fa06fb8bc388
change-id: 20251207-b4-flowtable-offload-ip6ip6-8e9a2c6f3a77

Best regards,
-- 
Lorenzo Bianconi <lorenzo@kernel.org>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH nf-next 1/5] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct
  2025-12-07 16:06 [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
@ 2025-12-07 16:06 ` Lorenzo Bianconi
  2025-12-07 16:06 ` [PATCH nf-next 2/5] netfilter: Modify nf_flow_skb_encap_protocol() to return int intead of bool Lorenzo Bianconi
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Lorenzo Bianconi @ 2025-12-07 16:06 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

This is a preliminary patch to introduce IP6IP6 flowtable acceleration.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 net/netfilter/nf_flow_table_ip.c | 80 ++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index e128b0fe9a7bf50b458df9940d629ea08c521871..14c01b59f76569170057d2465ee5953efb557bcc 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -142,7 +142,18 @@ static bool ip_has_options(unsigned int thoff)
 	return thoff != sizeof(struct iphdr);
 }
 
-static void nf_flow_tuple_encap(struct sk_buff *skb,
+struct nf_flowtable_ctx {
+	const struct net_device	*in;
+	u32			offset;
+	u32			hdrsize;
+	struct {
+		u32 offset;
+		u8 proto;
+	} tun;
+};
+
+static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
+				struct sk_buff *skb,
 				struct flow_offload_tuple *tuple)
 {
 	__be16 inner_proto = skb->protocol;
@@ -174,22 +185,15 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
 		break;
 	}
 
-	if (inner_proto == htons(ETH_P_IP)) {
+	if (inner_proto == htons(ETH_P_IP) &&
+	    ctx->tun.proto == IPPROTO_IPIP) {
 		iph = (struct iphdr *)(skb_network_header(skb) + offset);
-		if (iph->protocol == IPPROTO_IPIP) {
-			tuple->tun.dst_v4.s_addr = iph->daddr;
-			tuple->tun.src_v4.s_addr = iph->saddr;
-			tuple->tun.l3_proto = IPPROTO_IPIP;
-		}
+		tuple->tun.dst_v4.s_addr = iph->daddr;
+		tuple->tun.src_v4.s_addr = iph->saddr;
+		tuple->tun.l3_proto = IPPROTO_IPIP;
 	}
 }
 
-struct nf_flowtable_ctx {
-	const struct net_device	*in;
-	u32			offset;
-	u32			hdrsize;
-};
-
 static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
 			    struct flow_offload_tuple *tuple)
 {
@@ -257,7 +261,7 @@ static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
 	tuple->l3proto		= AF_INET;
 	tuple->l4proto		= ipproto;
 	tuple->iifidx		= ctx->in->ifindex;
-	nf_flow_tuple_encap(skb, tuple);
+	nf_flow_tuple_encap(ctx, skb, tuple);
 
 	return 0;
 }
@@ -293,15 +297,16 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
-static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
+static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
+				     struct sk_buff *skb)
 {
 	struct iphdr *iph;
 	u16 size;
 
-	if (!pskb_may_pull(skb, sizeof(*iph) + *psize))
+	if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
 		return false;
 
-	iph = (struct iphdr *)(skb_network_header(skb) + *psize);
+	iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
 	size = iph->ihl << 2;
 
 	if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
@@ -310,25 +315,27 @@ static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
 	if (iph->ttl <= 1)
 		return false;
 
-	if (iph->protocol == IPPROTO_IPIP)
-		*psize += size;
+	if (iph->protocol == IPPROTO_IPIP) {
+		ctx->tun.proto = IPPROTO_IPIP;
+		ctx->tun.offset = size;
+		ctx->offset += size;
+	}
 
 	return true;
 }
 
-static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
+static void nf_flow_ip4_tunnel_pop(struct nf_flowtable_ctx *ctx,
+				   struct sk_buff *skb)
 {
-	struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
-
-	if (iph->protocol != IPPROTO_IPIP)
+	if (ctx->tun.proto != IPPROTO_IPIP)
 		return;
 
-	skb_pull(skb, iph->ihl << 2);
+	skb_pull(skb, ctx->tun.offset);
 	skb_reset_network_header(skb);
 }
 
-static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
-				       u32 *offset)
+static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
+				       struct sk_buff *skb, __be16 proto)
 {
 	__be16 inner_proto = skb->protocol;
 	struct vlan_ethhdr *veth;
@@ -341,7 +348,7 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
 
 		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 		if (veth->h_vlan_encapsulated_proto == proto) {
-			*offset += VLAN_HLEN;
+			ctx->offset += VLAN_HLEN;
 			inner_proto = proto;
 			ret = true;
 		}
@@ -349,19 +356,20 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
 	case htons(ETH_P_PPP_SES):
 		if (nf_flow_pppoe_proto(skb, &inner_proto) &&
 		    inner_proto == proto) {
-			*offset += PPPOE_SES_HLEN;
+			ctx->offset += PPPOE_SES_HLEN;
 			ret = true;
 		}
 		break;
 	}
 
 	if (inner_proto == htons(ETH_P_IP))
-		ret = nf_flow_ip4_tunnel_proto(skb, offset);
+		ret = nf_flow_ip4_tunnel_proto(ctx, skb);
 
 	return ret;
 }
 
-static void nf_flow_encap_pop(struct sk_buff *skb,
+static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
+			      struct sk_buff *skb,
 			      struct flow_offload_tuple_rhash *tuplehash)
 {
 	struct vlan_hdr *vlan_hdr;
@@ -388,7 +396,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
 	}
 
 	if (skb->protocol == htons(ETH_P_IP))
-		nf_flow_ip4_tunnel_pop(skb);
+		nf_flow_ip4_tunnel_pop(ctx, skb);
 }
 
 struct nf_flow_xmit {
@@ -414,7 +422,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
 {
 	struct flow_offload_tuple tuple = {};
 
-	if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
+	if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP)))
 		return NULL;
 
 	if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
@@ -458,7 +466,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
 
 	flow_offload_refresh(flow_table, flow, false);
 
-	nf_flow_encap_pop(skb, tuplehash);
+	nf_flow_encap_pop(ctx, skb, tuplehash);
 	thoff -= ctx->offset;
 
 	iph = ip_hdr(skb);
@@ -836,7 +844,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
 	tuple->l3proto		= AF_INET6;
 	tuple->l4proto		= nexthdr;
 	tuple->iifidx		= ctx->in->ifindex;
-	nf_flow_tuple_encap(skb, tuple);
+	nf_flow_tuple_encap(ctx, skb, tuple);
 
 	return 0;
 }
@@ -873,7 +881,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
 
 	flow_offload_refresh(flow_table, flow, false);
 
-	nf_flow_encap_pop(skb, tuplehash);
+	nf_flow_encap_pop(ctx, skb, tuplehash);
 
 	ip6h = ipv6_hdr(skb);
 	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
@@ -895,7 +903,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
 	struct flow_offload_tuple tuple = {};
 
 	if (skb->protocol != htons(ETH_P_IPV6) &&
-	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
+	    !nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)))
 		return NULL;
 
 	if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)

-- 
2.52.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH nf-next 2/5] netfilter: Modify nf_flow_skb_encap_protocol() to return int intead of bool
  2025-12-07 16:06 [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
  2025-12-07 16:06 ` [PATCH nf-next 1/5] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct Lorenzo Bianconi
@ 2025-12-07 16:06 ` Lorenzo Bianconi
  2025-12-07 16:06 ` [PATCH nf-next 3/5] netfilter: flowtable: Add IP6IP6 rx sw acceleration Lorenzo Bianconi
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Lorenzo Bianconi @ 2025-12-07 16:06 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

Change nf_flow_skb_encap_protocol signature in order to return int error
code instead of a boolean value. This is a preliminary patch to introduce
IP6IP6 flowtable acceleration.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 net/netfilter/nf_flow_table_ip.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 14c01b59f76569170057d2465ee5953efb557bcc..9dbb4eed7724edbd68d386bb48bc237527ea7368 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -297,23 +297,23 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
-static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
-				     struct sk_buff *skb)
+static int nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
+				    struct sk_buff *skb)
 {
 	struct iphdr *iph;
 	u16 size;
 
 	if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
-		return false;
+		return -1;
 
 	iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
 	size = iph->ihl << 2;
 
 	if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
-		return false;
+		return -1;
 
 	if (iph->ttl <= 1)
-		return false;
+		return -1;
 
 	if (iph->protocol == IPPROTO_IPIP) {
 		ctx->tun.proto = IPPROTO_IPIP;
@@ -321,7 +321,7 @@ static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
 		ctx->offset += size;
 	}
 
-	return true;
+	return 0;
 }
 
 static void nf_flow_ip4_tunnel_pop(struct nf_flowtable_ctx *ctx,
@@ -334,30 +334,30 @@ static void nf_flow_ip4_tunnel_pop(struct nf_flowtable_ctx *ctx,
 	skb_reset_network_header(skb);
 }
 
-static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
-				       struct sk_buff *skb, __be16 proto)
+static int nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
+				      struct sk_buff *skb, __be16 proto)
 {
 	__be16 inner_proto = skb->protocol;
 	struct vlan_ethhdr *veth;
-	bool ret = false;
+	int ret = -1;
 
 	switch (skb->protocol) {
 	case htons(ETH_P_8021Q):
 		if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth)))
-			return false;
+			return -1;
 
 		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 		if (veth->h_vlan_encapsulated_proto == proto) {
 			ctx->offset += VLAN_HLEN;
 			inner_proto = proto;
-			ret = true;
+			ret = 0;
 		}
 		break;
 	case htons(ETH_P_PPP_SES):
 		if (nf_flow_pppoe_proto(skb, &inner_proto) &&
 		    inner_proto == proto) {
 			ctx->offset += PPPOE_SES_HLEN;
-			ret = true;
+			ret = 0;
 		}
 		break;
 	}
@@ -422,7 +422,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
 {
 	struct flow_offload_tuple tuple = {};
 
-	if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP)))
+	if (nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP)) < 0)
 		return NULL;
 
 	if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
@@ -903,7 +903,7 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
 	struct flow_offload_tuple tuple = {};
 
 	if (skb->protocol != htons(ETH_P_IPV6) &&
-	    !nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)))
+	    nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)) < 0)
 		return NULL;
 
 	if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)

-- 
2.52.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH nf-next 3/5] netfilter: flowtable: Add IP6IP6 rx sw acceleration
  2025-12-07 16:06 [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
  2025-12-07 16:06 ` [PATCH nf-next 1/5] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct Lorenzo Bianconi
  2025-12-07 16:06 ` [PATCH nf-next 2/5] netfilter: Modify nf_flow_skb_encap_protocol() to return int intead of bool Lorenzo Bianconi
@ 2025-12-07 16:06 ` Lorenzo Bianconi
  2025-12-07 16:06 ` [PATCH nf-next 4/5] netfilter: flowtable: Add IP6IP6 tx " Lorenzo Bianconi
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Lorenzo Bianconi @ 2025-12-07 16:06 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

Introduce sw acceleration for rx path of IP6IP6 tunnels relying on the
netfilter flowtable infrastructure. Subsequent patches will add sw
acceleration for IP6IP6 tunnels tx path.
IP6IP6 rx sw acceleration can be tested running the following scenario
where the traffic is forwarded between two NICs (eth0 and eth1) and an
IP6IP6 tunnel is used to access a remote site (using eth1 as the underlay
device):

ETH0 -- TUN0 <==> ETH1 -- [IP network] -- TUN1 (2001:db8:3::2)

$ip addr show
6: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 00:00:22:33:11:55 brd ff:ff:ff:ff:ff:ff
    inet6 2001:db8:1::2/64 scope global nodad
       valid_lft forever preferred_lft forever
7: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 00:11:22:33:11:55 brd ff:ff:ff:ff:ff:ff
    inet6 2001:db8:2::1/64 scope global nodad
       valid_lft forever preferred_lft forever
8: tun0@NONE: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1480 qdisc noqueue state UNKNOWN group default qlen 1000
    link/tunnel6 2001:db8:2::1 peer 2001:db8:2::2 permaddr ce9c:2940:7dcc::
    inet6 2002:db8:1::1/64 scope global nodad
       valid_lft forever preferred_lft forever

$ip -6 route show
2001:db8:1::/64 dev eth0 proto kernel metric 256 pref medium
2001:db8:2::/64 dev eth1 proto kernel metric 256 pref medium
2002:db8:1::/64 dev tun0 proto kernel metric 256 pref medium
default via 2002:db8:1::2 dev tun0 metric 1024 pref medium

$nft list ruleset
table inet filter {
        flowtable ft {
                hook ingress priority filter
                devices = { eth0, eth1 }
        }

        chain forward {
                type filter hook forward priority filter; policy accept;
                meta l4proto { tcp, udp } flow add @ft
        }
}

Reproducing the scenario described above using veths I got the following
results:
- TCP stream received from the IPIP tunnel:
  - net-next: (baseline)                  ~ 79Gbps
  - net-next + IP6IP6 flowtbale support:  ~106Gbps

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 net/ipv6/ip6_tunnel.c            |  27 ++++++++++
 net/netfilter/nf_flow_table_ip.c | 109 +++++++++++++++++++++++++++++++++------
 2 files changed, 120 insertions(+), 16 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6405072050e0ef7521ca1fdddc4a0252e2159d2a..10341bfc16bd16a43290015952bd9a57658e6ae1 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1828,6 +1828,32 @@ int ip6_tnl_encap_setup(struct ip6_tnl *t,
 }
 EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup);
 
+static int ip6_tnl_fill_forward_path(struct net_device_path_ctx *ctx,
+				     struct net_device_path *path)
+{
+	struct ip6_tnl *t = netdev_priv(ctx->dev);
+	struct flowi6 fl6 = {
+		.daddr = t->parms.raddr,
+	};
+	struct dst_entry *dst;
+	int err;
+
+	dst = ip6_route_output(dev_net(ctx->dev), NULL, &fl6);
+	if (!dst->error) {
+		path->type = DEV_PATH_TUN;
+		path->tun.src_v6 = t->parms.laddr;
+		path->tun.dst_v6 = t->parms.raddr;
+		path->tun.l3_proto = IPPROTO_IPV6;
+		path->dev = ctx->dev;
+		ctx->dev = dst->dev;
+	}
+
+	err = dst->error;
+	dst_release(dst);
+
+	return err;
+}
+
 static const struct net_device_ops ip6_tnl_netdev_ops = {
 	.ndo_init	= ip6_tnl_dev_init,
 	.ndo_uninit	= ip6_tnl_dev_uninit,
@@ -1836,6 +1862,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
 	.ndo_change_mtu = ip6_tnl_change_mtu,
 	.ndo_get_stats64 = dev_get_tstats64,
 	.ndo_get_iflink = ip6_tnl_get_iflink,
+	.ndo_fill_forward_path = ip6_tnl_fill_forward_path,
 };
 
 #define IPXIPX_FEATURES (NETIF_F_SG |		\
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 9dbb4eed7724edbd68d386bb48bc237527ea7368..f24e2c063ab8835d3e4a02439020ace79f70dd70 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -15,6 +15,7 @@
 #include <net/neighbour.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/protocol.h>
 /* For layer 4 checksum field offset. */
 #include <linux/tcp.h>
 #include <linux/udp.h>
@@ -159,6 +160,7 @@ static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
 	__be16 inner_proto = skb->protocol;
 	struct vlan_ethhdr *veth;
 	struct pppoe_hdr *phdr;
+	struct ipv6hdr *ip6h;
 	struct iphdr *iph;
 	u16 offset = 0;
 	int i = 0;
@@ -185,12 +187,25 @@ static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx,
 		break;
 	}
 
-	if (inner_proto == htons(ETH_P_IP) &&
-	    ctx->tun.proto == IPPROTO_IPIP) {
+	switch (inner_proto) {
+	case htons(ETH_P_IP):
 		iph = (struct iphdr *)(skb_network_header(skb) + offset);
-		tuple->tun.dst_v4.s_addr = iph->daddr;
-		tuple->tun.src_v4.s_addr = iph->saddr;
-		tuple->tun.l3_proto = IPPROTO_IPIP;
+		if (ctx->tun.proto == IPPROTO_IPIP) {
+			tuple->tun.dst_v4.s_addr = iph->daddr;
+			tuple->tun.src_v4.s_addr = iph->saddr;
+			tuple->tun.l3_proto = IPPROTO_IPIP;
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+		if (ctx->tun.proto == IPPROTO_IPV6) {
+			tuple->tun.dst_v6 = ip6h->daddr;
+			tuple->tun.src_v6 = ip6h->saddr;
+			tuple->tun.l3_proto = IPPROTO_IPV6;
+		}
+		break;
+	default:
+		break;
 	}
 }
 
@@ -324,10 +339,62 @@ static int nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
 	return 0;
 }
 
-static void nf_flow_ip4_tunnel_pop(struct nf_flowtable_ctx *ctx,
-				   struct sk_buff *skb)
+static int nf_flow_ip6_tunnel_proto(struct nf_flowtable_ctx *ctx,
+				    struct sk_buff *skb)
+{
+	const struct inet6_protocol *ipprot;
+	struct ipv6hdr *ip6h;
+	int ret;
+
+	if (!pskb_may_pull(skb, sizeof(*ip6h) + ctx->offset))
+		return -1;
+
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
+	if (ip6h->hop_limit <= 1)
+		return -1;
+
+	/* Initialize default values for extension headers parsing */
+	skb->transport_header = skb->network_header + sizeof(*ip6h) +
+				ctx->offset;
+	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr) + ctx->offset;
+
+	do {
+		unsigned int nhoff = IP6CB(skb)->nhoff;
+		u8 nexthdr;
+
+		if (!pskb_pull(skb, skb_transport_offset(skb)))
+			return -1;
+
+		nexthdr = skb_network_header(skb)[nhoff];
+		ipprot = rcu_dereference(inet6_protos[nexthdr]);
+		if (!ipprot)
+			break;
+
+		if (ipprot->flags & INET6_PROTO_FINAL) {
+			if (nexthdr == IPPROTO_IPV6) {
+				ctx->tun.offset = skb->transport_header -
+						  skb->network_header - ctx->offset;
+				ctx->tun.proto = IPPROTO_IPV6;
+			}
+			break;
+		}
+
+		ret = ipprot->handler(skb);
+		if (ret < 0)
+			return NF_STOLEN;
+	} while (ret > 0);
+
+	skb_push(skb, skb->transport_header - skb->network_header);
+	ctx->offset += ctx->tun.offset;
+
+	return 0;
+}
+
+static void nf_flow_ip_tunnel_pop(struct nf_flowtable_ctx *ctx,
+				  struct sk_buff *skb)
 {
-	if (ctx->tun.proto != IPPROTO_IPIP)
+	if (ctx->tun.proto != IPPROTO_IPIP &&
+	    ctx->tun.proto != IPPROTO_IPV6)
 		return;
 
 	skb_pull(skb, ctx->tun.offset);
@@ -362,8 +429,16 @@ static int nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx,
 		break;
 	}
 
-	if (inner_proto == htons(ETH_P_IP))
+	switch (inner_proto) {
+	case htons(ETH_P_IP):
 		ret = nf_flow_ip4_tunnel_proto(ctx, skb);
+		break;
+	case htons(ETH_P_IPV6):
+		ret = nf_flow_ip6_tunnel_proto(ctx, skb);
+		break;
+	default:
+		break;
+	}
 
 	return ret;
 }
@@ -395,8 +470,9 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
 		}
 	}
 
-	if (skb->protocol == htons(ETH_P_IP))
-		nf_flow_ip4_tunnel_pop(ctx, skb);
+	if (skb->protocol == htons(ETH_P_IP) ||
+	    skb->protocol == htons(ETH_P_IPV6))
+		nf_flow_ip_tunnel_pop(ctx, skb);
 }
 
 struct nf_flow_xmit {
@@ -901,10 +977,11 @@ nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
 			    struct sk_buff *skb)
 {
 	struct flow_offload_tuple tuple = {};
+	int err;
 
-	if (skb->protocol != htons(ETH_P_IPV6) &&
-	    nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6)) < 0)
-		return NULL;
+	err = nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6));
+	if (err)
+		return ERR_PTR(err);
 
 	if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
 		return NULL;
@@ -931,8 +1008,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	int ret;
 
 	tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb);
-	if (tuplehash == NULL)
-		return NF_ACCEPT;
+	if (IS_ERR_OR_NULL(tuplehash))
+		return tuplehash == ERR_PTR(NF_STOLEN) ? NF_STOLEN : NF_ACCEPT;
 
 	ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
 	if (ret < 0)

-- 
2.52.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH nf-next 4/5] netfilter: flowtable: Add IP6IP6 tx sw acceleration
  2025-12-07 16:06 [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
                   ` (2 preceding siblings ...)
  2025-12-07 16:06 ` [PATCH nf-next 3/5] netfilter: flowtable: Add IP6IP6 rx sw acceleration Lorenzo Bianconi
@ 2025-12-07 16:06 ` Lorenzo Bianconi
  2025-12-07 16:06 ` [PATCH nf-next 5/5] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest Lorenzo Bianconi
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Lorenzo Bianconi @ 2025-12-07 16:06 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

Introduce sw acceleration for tx path of IP6IP6 tunnels relying on the
netfilter flowtable infrastructure.
IP6IP6 tx sw acceleration can be tested running the following scenario
where the traffic is forwarded between two NICs (eth0 and eth1) and an
IP6IP6 tunnel is used to access a remote site (using eth1 as the underlay
device):

ETH0 -- TUN0 <==> ETH1 -- [IP network] -- TUN1 (2001:db8:3::2)

$ip addr show
6: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 00:00:22:33:11:55 brd ff:ff:ff:ff:ff:ff
    inet6 2001:db8:1::2/64 scope global nodad
       valid_lft forever preferred_lft forever
7: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 00:11:22:33:11:55 brd ff:ff:ff:ff:ff:ff
    inet6 2001:db8:2::1/64 scope global nodad
       valid_lft forever preferred_lft forever
8: tun0@NONE: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1480 qdisc noqueue state UNKNOWN group default qlen 1000
    link/tunnel6 2001:db8:2::1 peer 2001:db8:2::2 permaddr ce9c:2940:7dcc::
    inet6 2002:db8:1::1/64 scope global nodad
       valid_lft forever preferred_lft forever

$ip -6 route show
2001:db8:1::/64 dev eth0 proto kernel metric 256 pref medium
2001:db8:2::/64 dev eth1 proto kernel metric 256 pref medium
2002:db8:1::/64 dev tun0 proto kernel metric 256 pref medium
default via 2002:db8:1::2 dev tun0 metric 1024 pref medium

$nft list ruleset
table inet filter {
        flowtable ft {
                hook ingress priority filter
                devices = { eth0, eth1 }
        }

        chain forward {
                type filter hook forward priority filter; policy accept;
                meta l4proto { tcp, udp } flow add @ft
        }
}

Reproducing the scenario described above using veths I got the following
results:
- TCP stream received from the IPIP tunnel:
  - net-next: (baseline)                  ~93Gbps
  - net-next + IP6IP6 flowtbale support:  ~98Gbps

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 net/netfilter/nf_flow_table_ip.c | 96 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index f24e2c063ab8835d3e4a02439020ace79f70dd70..2c252aacbd44dfb2c0463a11c6f2a3a9c2b036bf 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -12,6 +12,7 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#include <net/ip6_tunnel.h>
 #include <net/neighbour.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_conntrack_acct.h>
@@ -651,6 +652,94 @@ static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
 	return 0;
 }
 
+struct ipv6_tel_txoption {
+	struct ipv6_txoptions ops;
+	__u8 dst_opt[8];
+};
+
+static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb,
+				      struct flow_offload_tuple *tuple,
+				      struct in6_addr **ip6_daddr)
+{
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)skb_network_header(skb);
+	int err, mtu, encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT;
+	u8 hop_limit = ip6h->hop_limit, proto = IPPROTO_IPV6;
+	struct rtable *rt = dst_rtable(tuple->dst_cache);
+	__u8 dsfield = ipv6_get_dsfield(ip6h);
+	struct flowi6 fl6 = {
+		.daddr = tuple->tun.src_v6,
+		.saddr = tuple->tun.dst_v6,
+		.flowi6_proto = proto,
+	};
+	u32 headroom;
+
+	err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
+	if (err)
+		return err;
+
+	skb_set_inner_ipproto(skb, proto);
+	headroom = sizeof(*ip6h) + LL_RESERVED_SPACE(rt->dst.dev) +
+		   rt->dst.header_len;
+	if (encap_limit)
+		headroom += 8;
+	err = skb_cow_head(skb, headroom);
+	if (err)
+		return err;
+
+	skb_scrub_packet(skb, true);
+	mtu = dst_mtu(&rt->dst) - sizeof(*ip6h);
+	if (encap_limit)
+		mtu -= 8;
+	mtu = max(mtu, IPV6_MIN_MTU);
+	skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+	if (encap_limit > 0) {
+		struct ipv6_tel_txoption opt = {
+			.dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT,
+			.dst_opt[3] = 1,
+			.dst_opt[4] = encap_limit,
+			.dst_opt[5] = IPV6_TLV_PADN,
+			.dst_opt[6] = 1,
+		};
+		struct ipv6_opt_hdr *hopt;
+
+		opt.ops.dst1opt = (struct ipv6_opt_hdr *)opt.dst_opt;
+		opt.ops.opt_nflen = 8;
+
+		hopt = skb_push(skb, ipv6_optlen(opt.ops.dst1opt));
+		memcpy(hopt, opt.ops.dst1opt, ipv6_optlen(opt.ops.dst1opt));
+		hopt->nexthdr = IPPROTO_IPV6;
+		proto = NEXTHDR_DEST;
+	}
+
+	skb_push(skb, sizeof(*ip6h));
+	skb_reset_network_header(skb);
+
+	ip6h = ipv6_hdr(skb);
+	ip6_flow_hdr(ip6h, dsfield,
+		     ip6_make_flowlabel(net, skb, fl6.flowlabel, true, &fl6));
+	ip6h->hop_limit = hop_limit;
+	ip6h->nexthdr = proto;
+	ip6h->daddr = tuple->tun.src_v6;
+	ip6h->saddr = tuple->tun.dst_v6;
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(*ip6h));
+	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+	*ip6_daddr = &tuple->tun.src_v6;
+
+	return 0;
+}
+
+static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb,
+				  struct flow_offload_tuple *tuple,
+				  struct in6_addr **ip6_daddr)
+{
+	if (tuple->tun_num)
+		return nf_flow_tunnel_ip6ip6_push(net, skb, tuple, ip6_daddr);
+
+	return 0;
+}
+
 static int nf_flow_encap_push(struct sk_buff *skb,
 			      struct flow_offload_tuple *tuple)
 {
@@ -939,6 +1028,9 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 
 	mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
+	if (flow->tuplehash[!dir].tuple.tun_num)
+		mtu -= sizeof(*ip6h);
+
 	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
 		return 0;
 
@@ -1030,6 +1122,10 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	other_tuple = &flow->tuplehash[!dir].tuple;
 	ip6_daddr = &other_tuple->src_v6;
 
+	if (nf_flow_tunnel_v6_push(state->net, skb, other_tuple,
+				   &ip6_daddr) < 0)
+		return NF_DROP;
+
 	if (nf_flow_encap_push(skb, other_tuple) < 0)
 		return NF_DROP;
 

-- 
2.52.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH nf-next 5/5] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest
  2025-12-07 16:06 [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
                   ` (3 preceding siblings ...)
  2025-12-07 16:06 ` [PATCH nf-next 4/5] netfilter: flowtable: Add IP6IP6 tx " Lorenzo Bianconi
@ 2025-12-07 16:06 ` Lorenzo Bianconi
  2025-12-07 21:06 ` [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Jakub Kicinski
  2025-12-07 23:54 ` [syzbot ci] " syzbot ci
  6 siblings, 0 replies; 10+ messages in thread
From: Lorenzo Bianconi @ 2025-12-07 16:06 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, David Ahern, Shuah Khan
  Cc: netfilter-devel, coreteam, netdev, linux-kselftest,
	Lorenzo Bianconi

Similar to IPIP, introduce specific selftest for IP6IP6 flowtable SW
acceleration in nft_flowtable.sh

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 .../selftests/net/netfilter/nft_flowtable.sh       | 62 ++++++++++++++++++----
 1 file changed, 53 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index 24b4e60b91451e7ea7f6a041b0335233047c6242..bc98baba56c638cad35478109a3776d6d93c34a8 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -590,16 +590,28 @@ ip -net "$nsr1" link set tun0 up
 ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
 ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
 
+ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
+ip -net "$nsr1" link set tun6 up
+ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
+
 ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
 ip -net "$nsr2" link set tun0 up
 ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
 ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
 
+ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1
+ip -net "$nsr2" link set tun6 up
+ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
+
 ip -net "$nsr1" route change default via 192.168.100.2
 ip -net "$nsr2" route change default via 192.168.100.1
+ip -6 -net "$nsr1" route change default via fee1:3::2
+ip -6 -net "$nsr2" route change default via fee1:3::1
 ip -net "$ns2" route add default via 10.0.2.1
+ip -6 -net "$ns2" route add default via dead:2::1
 
 ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept'
 ip netns exec "$nsr1" nft -a insert rule inet filter forward \
 	'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
 
@@ -609,28 +621,51 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
 	ret=1
 fi
 
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+	echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel"
+else
+	echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
 # Create vlan tagged devices for IPIP traffic.
 ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
 ip -net "$nsr1" link set veth1.10 up
 ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad
 ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
 ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
-ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
-ip -net "$nsr1" link set tun1 up
-ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
+
+ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun0.10 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10
 ip -net "$nsr1" route change default via 192.168.200.2
-ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
-ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
+
+ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
+ip -net "$nsr1" link set tun6.10 up
+ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
+ip -6 -net "$nsr1" route change default via fee1:5::2
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept'
 
 ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
 ip -net "$nsr2" link set veth0.10 up
 ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad
 ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
-ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
-ip -net "$nsr2" link set tun1 up
-ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
+
+ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun0.10 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
 ip -net "$nsr2" route change default via 192.168.200.1
-ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1
+ip -net "$nsr2" link set tun6.10 up
+ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
+ip -6 -net "$nsr2" route change default via fee1:5::1
 
 if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
 	echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
@@ -638,10 +673,19 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
 	ret=1
 fi
 
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+	echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan"
+else
+	echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
 # Restore the previous configuration
 ip -net "$nsr1" route change default via 192.168.10.2
 ip -net "$nsr2" route change default via 192.168.10.1
 ip -net "$ns2" route del default via 10.0.2.1
+ip -6 -net "$ns2" route del default via dead:2::1
 }
 
 # Another test:

-- 
2.52.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration
  2025-12-07 16:06 [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
                   ` (4 preceding siblings ...)
  2025-12-07 16:06 ` [PATCH nf-next 5/5] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest Lorenzo Bianconi
@ 2025-12-07 21:06 ` Jakub Kicinski
  2025-12-07 21:55   ` Lorenzo Bianconi
  2025-12-07 23:54 ` [syzbot ci] " syzbot ci
  6 siblings, 1 reply; 10+ messages in thread
From: Jakub Kicinski @ 2025-12-07 21:06 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Paolo Abeni,
	Simon Horman, David Ahern, Shuah Khan, netfilter-devel, coreteam,
	netdev, linux-kselftest

On Sun, 07 Dec 2025 17:06:40 +0100 Lorenzo Bianconi wrote:
> Introduce SW acceleration for IP6IP6 tunnels in the netfilter flowtable
> infrastructure.

tc-testing build fails with this:

net/netfilter/nf_flow_table_ip.c: In function ‘nf_flow_ip6_tunnel_proto’:
net/netfilter/nf_flow_table_ip.c:370:42: error: ‘inet6_protos’ undeclared (first use in this function); did you mean ‘inet_protos’?
  370 |                 ipprot = rcu_dereference(inet6_protos[nexthdr]);
      |                                          ^~~~~~~~~~~~
./include/linux/rcupdate.h:532:17: note: in definition of macro ‘__rcu_dereference_check’
  532 |         typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
      |                 ^
./include/linux/rcupdate.h:770:28: note: in expansion of macro ‘rcu_dereference_check’
  770 | #define rcu_dereference(p) rcu_dereference_check(p, 0)
      |                            ^~~~~~~~~~~~~~~~~~~~~
net/netfilter/nf_flow_table_ip.c:370:26: note: in expansion of macro ‘rcu_dereference’
  370 |                 ipprot = rcu_dereference(inet6_protos[nexthdr]);
      |                          ^~~~~~~~~~~~~~~
net/netfilter/nf_flow_table_ip.c:370:42: note: each undeclared identifier is reported only once for each function it appears in
  370 |                 ipprot = rcu_dereference(inet6_protos[nexthdr]);
      |                                          ^~~~~~~~~~~~
./include/linux/rcupdate.h:532:17: note: in definition of macro ‘__rcu_dereference_check’
  532 |         typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
      |                 ^
./include/linux/rcupdate.h:770:28: note: in expansion of macro ‘rcu_dereference_check’
  770 | #define rcu_dereference(p) rcu_dereference_check(p, 0)
      |                            ^~~~~~~~~~~~~~~~~~~~~
net/netfilter/nf_flow_table_ip.c:370:26: note: in expansion of macro ‘rcu_dereference’
  370 |                 ipprot = rcu_dereference(inet6_protos[nexthdr]);
      |                          ^~~~~~~~~~~~~~~
net/netfilter/nf_flow_table_ip.c:374:27: error: invalid use of undefined type ‘const struct inet6_protocol’
  374 |                 if (ipprot->flags & INET6_PROTO_FINAL) {
      |                           ^~
net/netfilter/nf_flow_table_ip.c:374:37: error: ‘INET6_PROTO_FINAL’ undeclared (first use in this function)
  374 |                 if (ipprot->flags & INET6_PROTO_FINAL) {
      |                                     ^~~~~~~~~~~~~~~~~
net/netfilter/nf_flow_table_ip.c:383:29: error: invalid use of undefined type ‘const struct inet6_protocol’
  383 |                 ret = ipprot->handler(skb);
      |                             ^~

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration
  2025-12-07 21:06 ` [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Jakub Kicinski
@ 2025-12-07 21:55   ` Lorenzo Bianconi
  2025-12-07 23:09     ` Jakub Kicinski
  0 siblings, 1 reply; 10+ messages in thread
From: Lorenzo Bianconi @ 2025-12-07 21:55 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Paolo Abeni,
	Simon Horman, David Ahern, Shuah Khan, netfilter-devel, coreteam,
	netdev, linux-kselftest

[-- Attachment #1: Type: text/plain, Size: 2908 bytes --]

> On Sun, 07 Dec 2025 17:06:40 +0100 Lorenzo Bianconi wrote:
> > Introduce SW acceleration for IP6IP6 tunnels in the netfilter flowtable
> > infrastructure.
> 
> tc-testing build fails with this:
> 
> net/netfilter/nf_flow_table_ip.c: In function ‘nf_flow_ip6_tunnel_proto’:
> net/netfilter/nf_flow_table_ip.c:370:42: error: ‘inet6_protos’ undeclared (first use in this function); did you mean ‘inet_protos’?
>   370 |                 ipprot = rcu_dereference(inet6_protos[nexthdr]);
>       |                                          ^~~~~~~~~~~~
> ./include/linux/rcupdate.h:532:17: note: in definition of macro ‘__rcu_dereference_check’
>   532 |         typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
>       |                 ^
> ./include/linux/rcupdate.h:770:28: note: in expansion of macro ‘rcu_dereference_check’
>   770 | #define rcu_dereference(p) rcu_dereference_check(p, 0)
>       |                            ^~~~~~~~~~~~~~~~~~~~~
> net/netfilter/nf_flow_table_ip.c:370:26: note: in expansion of macro ‘rcu_dereference’
>   370 |                 ipprot = rcu_dereference(inet6_protos[nexthdr]);
>       |                          ^~~~~~~~~~~~~~~
> net/netfilter/nf_flow_table_ip.c:370:42: note: each undeclared identifier is reported only once for each function it appears in
>   370 |                 ipprot = rcu_dereference(inet6_protos[nexthdr]);
>       |                                          ^~~~~~~~~~~~
> ./include/linux/rcupdate.h:532:17: note: in definition of macro ‘__rcu_dereference_check’
>   532 |         typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
>       |                 ^
> ./include/linux/rcupdate.h:770:28: note: in expansion of macro ‘rcu_dereference_check’
>   770 | #define rcu_dereference(p) rcu_dereference_check(p, 0)
>       |                            ^~~~~~~~~~~~~~~~~~~~~
> net/netfilter/nf_flow_table_ip.c:370:26: note: in expansion of macro ‘rcu_dereference’
>   370 |                 ipprot = rcu_dereference(inet6_protos[nexthdr]);
>       |                          ^~~~~~~~~~~~~~~
> net/netfilter/nf_flow_table_ip.c:374:27: error: invalid use of undefined type ‘const struct inet6_protocol’
>   374 |                 if (ipprot->flags & INET6_PROTO_FINAL) {
>       |                           ^~
> net/netfilter/nf_flow_table_ip.c:374:37: error: ‘INET6_PROTO_FINAL’ undeclared (first use in this function)
>   374 |                 if (ipprot->flags & INET6_PROTO_FINAL) {
>       |                                     ^~~~~~~~~~~~~~~~~
> net/netfilter/nf_flow_table_ip.c:383:29: error: invalid use of undefined type ‘const struct inet6_protocol’
>   383 |                 ret = ipprot->handler(skb);
>       |                             ^~

Hi Jakub,

I guess CONFIG_IPV6 is not enabled, right? I will fix it.

Regards,
Lorenzo

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration
  2025-12-07 21:55   ` Lorenzo Bianconi
@ 2025-12-07 23:09     ` Jakub Kicinski
  0 siblings, 0 replies; 10+ messages in thread
From: Jakub Kicinski @ 2025-12-07 23:09 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	Phil Sutter, David S. Miller, Eric Dumazet, Paolo Abeni,
	Simon Horman, David Ahern, Shuah Khan, netfilter-devel, coreteam,
	netdev, linux-kselftest

On Sun, 7 Dec 2025 22:55:02 +0100 Lorenzo Bianconi wrote:
> > net/netfilter/nf_flow_table_ip.c:383:29: error: invalid use of undefined type ‘const struct inet6_protocol’
> >   383 |                 ret = ipprot->handler(skb);
> >       |                             ^~  
> 
> Hi Jakub,
> 
> I guess CONFIG_IPV6 is not enabled, right? I will fix it.

IDK, it's run but TC folks I don't see the artifact for config 
in the logs. 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [syzbot ci] Re: Add IP6IP6 flowtable SW acceleration
  2025-12-07 16:06 [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
                   ` (5 preceding siblings ...)
  2025-12-07 21:06 ` [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Jakub Kicinski
@ 2025-12-07 23:54 ` syzbot ci
  6 siblings, 0 replies; 10+ messages in thread
From: syzbot ci @ 2025-12-07 23:54 UTC (permalink / raw)
  To: coreteam, davem, dsahern, edumazet, fw, horms, kadlec, kuba,
	linux-kselftest, lorenzo, netdev, netfilter-devel, pabeni, pablo,
	phil, shuah
  Cc: syzbot, syzkaller-bugs

syzbot ci has tested the following series

[v1] Add IP6IP6 flowtable SW acceleration
https://lore.kernel.org/all/20251207-b4-flowtable-offload-ip6ip6-v1-0-18e3ab7f748c@kernel.org
* [PATCH nf-next 1/5] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct
* [PATCH nf-next 2/5] netfilter: Modify nf_flow_skb_encap_protocol() to return int intead of bool
* [PATCH nf-next 3/5] netfilter: flowtable: Add IP6IP6 rx sw acceleration
* [PATCH nf-next 4/5] netfilter: flowtable: Add IP6IP6 tx sw acceleration
* [PATCH nf-next 5/5] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest

and found the following issues:
* KASAN: slab-use-after-free Read in nf_flow_skb_encap_protocol
* general protection fault in ipv6_frag_rcv
* general protection fault in nf_flow_offload_ipv6_hook

Full report is available here:
https://ci.syzbot.org/series/24632ec9-06ab-4e09-8015-19822d83c6a9

***

KASAN: slab-use-after-free Read in nf_flow_skb_encap_protocol

tree:      nf-next
URL:       https://kernel.googlesource.com/pub/scm/linux/kernel/git/netfilter/nf-next.git
base:      ff736a286116d462a4067ba258fa351bc0b4ed80
arch:      amd64
compiler:  Debian clang version 20.1.8 (++20250708063551+0c9f909b7976-1~exp1~20250708183702.136), Debian LLD 20.1.8
config:    https://ci.syzbot.org/builds/833219cc-e7e2-437d-b31c-1eb3666c1e93/config
C repro:   https://ci.syzbot.org/findings/afcab565-801f-4cde-a1c5-b1d327537871/c_repro
syz repro: https://ci.syzbot.org/findings/afcab565-801f-4cde-a1c5-b1d327537871/syz_repro

==================================================================
BUG: KASAN: slab-use-after-free in nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:388 [inline]
BUG: KASAN: slab-use-after-free in nf_flow_skb_encap_protocol+0x13a2/0x1540 net/netfilter/nf_flow_table_ip.c:438
Read of size 2 at addr ffff888175605eb6 by task syz.1.20/6010

CPU: 1 UID: 0 PID: 6010 Comm: syz.1.20 Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:378 [inline]
 print_report+0xca/0x240 mm/kasan/report.c:482
 kasan_report+0x118/0x150 mm/kasan/report.c:595
 nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:388 [inline]
 nf_flow_skb_encap_protocol+0x13a2/0x1540 net/netfilter/nf_flow_table_ip.c:438
 nf_flow_offload_ipv6_lookup net/netfilter/nf_flow_table_ip.c:1074 [inline]
 nf_flow_offload_ipv6_hook+0x13c/0x32a0 net/netfilter/nf_flow_table_ip.c:1102
 nf_hook_entry_hookfn include/linux/netfilter.h:158 [inline]
 nf_hook_slow+0xc5/0x220 net/netfilter/core.c:623
 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline]
 nf_ingress net/core/dev.c:5900 [inline]
 __netif_receive_skb_core+0x241c/0x2f90 net/core/dev.c:5996
 __netif_receive_skb_one_core net/core/dev.c:6135 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6250
 netif_receive_skb_internal net/core/dev.c:6336 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6395
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f53b9d8f7c9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f53babe6038 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007f53b9fe6090 RCX: 00007f53b9d8f7c9
RDX: 000000000000fdef RSI: 0000200000000340 RDI: 0000000000000003
RBP: 00007f53b9df297f R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f53b9fe6128 R14: 00007f53b9fe6090 R15: 00007ffc1b120a08
 </TASK>

Allocated by task 6010:
 kasan_save_stack mm/kasan/common.c:56 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
 unpoison_slab_object mm/kasan/common.c:342 [inline]
 __kasan_slab_alloc+0x6c/0x80 mm/kasan/common.c:368
 kasan_slab_alloc include/linux/kasan.h:252 [inline]
 slab_post_alloc_hook mm/slub.c:4978 [inline]
 slab_alloc_node mm/slub.c:5288 [inline]
 kmem_cache_alloc_node_noprof+0x433/0x710 mm/slub.c:5340
 __alloc_skb+0x255/0x430 net/core/skbuff.c:679
 alloc_skb include/linux/skbuff.h:1383 [inline]
 alloc_skb_with_frags+0xca/0x890 net/core/skbuff.c:6712
 sock_alloc_send_pskb+0x84d/0x980 net/core/sock.c:2995
 tun_alloc_skb drivers/net/tun.c:1461 [inline]
 tun_get_user+0xa43/0x3e90 drivers/net/tun.c:1794
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Freed by task 6010:
 kasan_save_stack mm/kasan/common.c:56 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
 __kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:587
 kasan_save_free_info mm/kasan/kasan.h:406 [inline]
 poison_slab_object mm/kasan/common.c:252 [inline]
 __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:284
 kasan_slab_free include/linux/kasan.h:234 [inline]
 slab_free_hook mm/slub.c:2543 [inline]
 slab_free mm/slub.c:6642 [inline]
 kmem_cache_free+0x19b/0x690 mm/slub.c:6752
 kfree_skb_reason include/linux/skbuff.h:1322 [inline]
 kfree_skb include/linux/skbuff.h:1331 [inline]
 dst_discard_out+0x1c/0x30 net/core/dst.c:32
 nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:383 [inline]
 nf_flow_skb_encap_protocol+0x9c2/0x1540 net/netfilter/nf_flow_table_ip.c:438
 nf_flow_offload_ipv6_lookup net/netfilter/nf_flow_table_ip.c:1074 [inline]
 nf_flow_offload_ipv6_hook+0x13c/0x32a0 net/netfilter/nf_flow_table_ip.c:1102
 nf_hook_entry_hookfn include/linux/netfilter.h:158 [inline]
 nf_hook_slow+0xc5/0x220 net/netfilter/core.c:623
 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline]
 nf_ingress net/core/dev.c:5900 [inline]
 __netif_receive_skb_core+0x241c/0x2f90 net/core/dev.c:5996
 __netif_receive_skb_one_core net/core/dev.c:6135 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6250
 netif_receive_skb_internal net/core/dev.c:6336 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6395
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

The buggy address belongs to the object at ffff888175605e00
 which belongs to the cache skbuff_head_cache of size 240
The buggy address is located 182 bytes inside of
 freed 240-byte region [ffff888175605e00, ffff888175605ef0)

The buggy address belongs to the physical page:
page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x175604
head: order:1 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
flags: 0x57ff00000000040(head|node=1|zone=2|lastcpupid=0x7ff)
page_type: f5(slab)
raw: 057ff00000000040 ffff8881036c4a00 dead000000000122 0000000000000000
raw: 0000000000000000 0000000080150015 00000000f5000000 0000000000000000
head: 057ff00000000040 ffff8881036c4a00 dead000000000122 0000000000000000
head: 0000000000000000 0000000080150015 00000000f5000000 0000000000000000
head: 057ff00000000001 ffffea0005d58101 00000000ffffffff 00000000ffffffff
head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000002
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 1, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 5922, tgid 5922 (syz-executor), ts 68256102142, free_ts 61484167308
 set_page_owner include/linux/page_owner.h:32 [inline]
 post_alloc_hook+0x234/0x290 mm/page_alloc.c:1845
 prep_new_page mm/page_alloc.c:1853 [inline]
 get_page_from_freelist+0x2365/0x2440 mm/page_alloc.c:3879
 __alloc_frozen_pages_noprof+0x181/0x370 mm/page_alloc.c:5178
 alloc_pages_mpol+0x232/0x4a0 mm/mempolicy.c:2416
 alloc_slab_page mm/slub.c:3059 [inline]
 allocate_slab+0x96/0x350 mm/slub.c:3232
 new_slab mm/slub.c:3286 [inline]
 ___slab_alloc+0xf56/0x1990 mm/slub.c:4655
 __slab_alloc+0x65/0x100 mm/slub.c:4778
 __slab_alloc_node mm/slub.c:4854 [inline]
 slab_alloc_node mm/slub.c:5276 [inline]
 kmem_cache_alloc_node_noprof+0x4c5/0x710 mm/slub.c:5340
 __alloc_skb+0x255/0x430 net/core/skbuff.c:679
 alloc_skb include/linux/skbuff.h:1383 [inline]
 nlmsg_new include/net/netlink.h:1055 [inline]
 inet_ifmcaddr_notify+0x7e/0x150 net/ipv4/igmp.c:1481
 ____ip_mc_inc_group+0x9b8/0xde0 net/ipv4/igmp.c:1564
 __ip_mc_inc_group net/ipv4/igmp.c:1573 [inline]
 ip_mc_inc_group net/ipv4/igmp.c:1579 [inline]
 ip_mc_up+0x125/0x300 net/ipv4/igmp.c:1880
 inetdev_event+0xfb3/0x15b0 net/ipv4/devinet.c:1630
 notifier_call_chain+0x1b6/0x3e0 kernel/notifier.c:85
 call_netdevice_notifiers_extack net/core/dev.c:2268 [inline]
 call_netdevice_notifiers net/core/dev.c:2282 [inline]
 __dev_notify_flags+0x18d/0x2e0 net/core/dev.c:-1
 netif_change_flags+0xe8/0x1a0 net/core/dev.c:9802
page last free pid 5811 tgid 5811 stack trace:
 reset_page_owner include/linux/page_owner.h:25 [inline]
 free_pages_prepare mm/page_alloc.c:1394 [inline]
 __free_frozen_pages+0xbc4/0xd30 mm/page_alloc.c:2901
 __slab_free+0x2e7/0x390 mm/slub.c:5970
 qlink_free mm/kasan/quarantine.c:163 [inline]
 qlist_free_all+0x97/0x140 mm/kasan/quarantine.c:179
 kasan_quarantine_reduce+0x148/0x160 mm/kasan/quarantine.c:286
 __kasan_slab_alloc+0x22/0x80 mm/kasan/common.c:352
 kasan_slab_alloc include/linux/kasan.h:252 [inline]
 slab_post_alloc_hook mm/slub.c:4978 [inline]
 slab_alloc_node mm/slub.c:5288 [inline]
 kmem_cache_alloc_noprof+0x367/0x6e0 mm/slub.c:5295
 ptlock_alloc+0x20/0x70 mm/memory.c:7302
 ptlock_init include/linux/mm.h:3059 [inline]
 pagetable_pte_ctor include/linux/mm.h:3113 [inline]
 __pte_alloc_one_noprof include/asm-generic/pgalloc.h:78 [inline]
 pte_alloc_one+0x7a/0x310 arch/x86/mm/pgtable.c:18
 __do_fault+0xd1/0x390 mm/memory.c:5276
 do_shared_fault mm/memory.c:5780 [inline]
 do_fault mm/memory.c:5854 [inline]
 do_pte_missing mm/memory.c:4362 [inline]
 handle_pte_fault mm/memory.c:6195 [inline]
 __handle_mm_fault+0x1847/0x5400 mm/memory.c:6336
 handle_mm_fault+0x40a/0x8e0 mm/memory.c:6505
 do_user_addr_fault+0xa7c/0x1380 arch/x86/mm/fault.c:1336
 handle_page_fault arch/x86/mm/fault.c:1476 [inline]
 exc_page_fault+0x82/0x100 arch/x86/mm/fault.c:1532
 asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:618

Memory state around the buggy address:
 ffff888175605d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888175605e00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>ffff888175605e80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc
                                     ^
 ffff888175605f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888175605f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
==================================================================


***

general protection fault in ipv6_frag_rcv

tree:      nf-next
URL:       https://kernel.googlesource.com/pub/scm/linux/kernel/git/netfilter/nf-next.git
base:      ff736a286116d462a4067ba258fa351bc0b4ed80
arch:      amd64
compiler:  Debian clang version 20.1.8 (++20250708063551+0c9f909b7976-1~exp1~20250708183702.136), Debian LLD 20.1.8
config:    https://ci.syzbot.org/builds/833219cc-e7e2-437d-b31c-1eb3666c1e93/config
C repro:   https://ci.syzbot.org/findings/bac8a298-33d8-4b3a-9f63-a0a2373aaab2/c_repro
syz repro: https://ci.syzbot.org/findings/bac8a298-33d8-4b3a-9f63-a0a2373aaab2/syz_repro

Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
CPU: 1 UID: 0 PID: 5982 Comm: syz.1.20 Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:dst_dev include/net/dst.h:571 [inline]
RIP: 0010:skb_dst_dev include/net/dst.h:586 [inline]
RIP: 0010:skb_dst_dev_net include/net/dst.h:596 [inline]
RIP: 0010:ipv6_frag_rcv+0x19c/0x2a20 net/ipv6/reassembly.c:328
Code: 5c 24 08 4c 89 74 24 10 48 8b 44 24 60 42 80 3c 28 00 74 08 48 89 df e8 42 15 21 f8 48 8b 03 49 89 c6 49 83 e6 fe 48 c1 e8 03 <42> 80 3c 28 00 74 08 4c 89 f7 e8 25 15 21 f8 bb 08 01 00 00 49 03
RSP: 0018:ffffc90003776d20 EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff88816d87a1d8 RCX: ffff88816bf65700
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffffc90003776f08 R08: ffffea0006d29400 R09: 0000000b00007ff5
R10: ffffea0006d29400 R11: ffffffff8a04b090 R12: 0000000000000040
R13: dffffc0000000000 R14: 0000000000000000 R15: ffff888173607480
FS:  00007fa70abdd6c0(0000) GS:ffff8882a9f2e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000200000010000 CR3: 0000000112fa8000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 nf_flow_ip6_tunnel_proto net/netfilter/nf_flow_table_ip.c:383 [inline]
 nf_flow_skb_encap_protocol+0x9c2/0x1540 net/netfilter/nf_flow_table_ip.c:438
 nf_flow_offload_ipv6_lookup net/netfilter/nf_flow_table_ip.c:1074 [inline]
 nf_flow_offload_ipv6_hook+0x13c/0x32a0 net/netfilter/nf_flow_table_ip.c:1102
 nf_hook_entry_hookfn include/linux/netfilter.h:158 [inline]
 nf_hook_slow+0xc5/0x220 net/netfilter/core.c:623
 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline]
 nf_ingress net/core/dev.c:5900 [inline]
 __netif_receive_skb_core+0x241c/0x2f90 net/core/dev.c:5996
 __netif_receive_skb_one_core net/core/dev.c:6135 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6250
 netif_receive_skb_internal net/core/dev.c:6336 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6395
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fa70b58f7c9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fa70abdd038 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007fa70b7e6090 RCX: 00007fa70b58f7c9
RDX: 000000000000fdef RSI: 0000200000000340 RDI: 0000000000000003
RBP: 00007fa70b5f297f R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fa70b7e6128 R14: 00007fa70b7e6090 R15: 00007ffcd9538b88
 </TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:dst_dev include/net/dst.h:571 [inline]
RIP: 0010:skb_dst_dev include/net/dst.h:586 [inline]
RIP: 0010:skb_dst_dev_net include/net/dst.h:596 [inline]
RIP: 0010:ipv6_frag_rcv+0x19c/0x2a20 net/ipv6/reassembly.c:328
Code: 5c 24 08 4c 89 74 24 10 48 8b 44 24 60 42 80 3c 28 00 74 08 48 89 df e8 42 15 21 f8 48 8b 03 49 89 c6 49 83 e6 fe 48 c1 e8 03 <42> 80 3c 28 00 74 08 4c 89 f7 e8 25 15 21 f8 bb 08 01 00 00 49 03
RSP: 0018:ffffc90003776d20 EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff88816d87a1d8 RCX: ffff88816bf65700
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffffc90003776f08 R08: ffffea0006d29400 R09: 0000000b00007ff5
R10: ffffea0006d29400 R11: ffffffff8a04b090 R12: 0000000000000040
R13: dffffc0000000000 R14: 0000000000000000 R15: ffff888173607480
FS:  00007fa70abdd6c0(0000) GS:ffff8882a9f2e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000200000010000 CR3: 0000000112fa8000 CR4: 00000000000006f0
----------------
Code disassembly (best guess):
   0:	5c                   	pop    %rsp
   1:	24 08                	and    $0x8,%al
   3:	4c 89 74 24 10       	mov    %r14,0x10(%rsp)
   8:	48 8b 44 24 60       	mov    0x60(%rsp),%rax
   d:	42 80 3c 28 00       	cmpb   $0x0,(%rax,%r13,1)
  12:	74 08                	je     0x1c
  14:	48 89 df             	mov    %rbx,%rdi
  17:	e8 42 15 21 f8       	call   0xf821155e
  1c:	48 8b 03             	mov    (%rbx),%rax
  1f:	49 89 c6             	mov    %rax,%r14
  22:	49 83 e6 fe          	and    $0xfffffffffffffffe,%r14
  26:	48 c1 e8 03          	shr    $0x3,%rax
* 2a:	42 80 3c 28 00       	cmpb   $0x0,(%rax,%r13,1) <-- trapping instruction
  2f:	74 08                	je     0x39
  31:	4c 89 f7             	mov    %r14,%rdi
  34:	e8 25 15 21 f8       	call   0xf821155e
  39:	bb 08 01 00 00       	mov    $0x108,%ebx
  3e:	49                   	rex.WB
  3f:	03                   	.byte 0x3


***

general protection fault in nf_flow_offload_ipv6_hook

tree:      nf-next
URL:       https://kernel.googlesource.com/pub/scm/linux/kernel/git/netfilter/nf-next.git
base:      ff736a286116d462a4067ba258fa351bc0b4ed80
arch:      amd64
compiler:  Debian clang version 20.1.8 (++20250708063551+0c9f909b7976-1~exp1~20250708183702.136), Debian LLD 20.1.8
config:    https://ci.syzbot.org/builds/833219cc-e7e2-437d-b31c-1eb3666c1e93/config
C repro:   https://ci.syzbot.org/findings/bee4a66a-8f79-4936-9f6e-7f9e229693c9/c_repro
syz repro: https://ci.syzbot.org/findings/bee4a66a-8f79-4936-9f6e-7f9e229693c9/syz_repro

Oops: general protection fault, probably for non-canonical address 0xdffffc000000000c: 0000 [#1] SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000060-0x0000000000000067]
CPU: 1 UID: 0 PID: 5978 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:nf_flow_offload_ipv6_forward net/netfilter/nf_flow_table_ip.c:1027 [inline]
RIP: 0010:nf_flow_offload_ipv6_hook+0x2f3/0x32a0 net/netfilter/nf_flow_table_ip.c:1106
Code: 85 c6 03 00 00 e8 6d f0 31 f8 41 bc 14 00 00 00 e9 af 02 00 00 48 8d 4b 60 48 89 4c 24 18 48 c1 e9 03 48 89 8c 24 90 00 00 00 <42> 0f b6 04 21 84 c0 0f 85 de 23 00 00 44 0f b7 73 60 41 83 e6 03
RSP: 0018:ffffc900031d6fe0 EFLAGS: 00010206
RAX: ffffffff898e1101 RBX: 0000000000000002 RCX: 000000000000000c
RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000
RBP: ffffc900031d73e0 R08: ffffffff8f7d1e77 R09: 1ffffffff1efa3ce
R10: dffffc0000000000 R11: fffffbfff1efa3cf R12: dffffc0000000000
R13: 1ffff9200063ae1c R14: ffff88816cfc5680 R15: ffffc900031d72e0
FS:  00007fd7653a06c0(0000) GS:ffff8882a9f2e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000200000010000 CR3: 000000011342a000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 nf_hook_entry_hookfn include/linux/netfilter.h:158 [inline]
 nf_hook_slow+0xc5/0x220 net/netfilter/core.c:623
 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline]
 nf_ingress net/core/dev.c:5900 [inline]
 __netif_receive_skb_core+0x241c/0x2f90 net/core/dev.c:5996
 __netif_receive_skb_one_core net/core/dev.c:6135 [inline]
 __netif_receive_skb+0x72/0x380 net/core/dev.c:6250
 netif_receive_skb_internal net/core/dev.c:6336 [inline]
 netif_receive_skb+0x1cb/0x790 net/core/dev.c:6395
 tun_rx_batched+0x1b9/0x730 drivers/net/tun.c:1485
 tun_get_user+0x2b65/0x3e90 drivers/net/tun.c:1953
 tun_chr_write_iter+0x113/0x200 drivers/net/tun.c:1999
 new_sync_write fs/read_write.c:593 [inline]
 vfs_write+0x5c9/0xb30 fs/read_write.c:686
 ksys_write+0x145/0x250 fs/read_write.c:738
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fd76458f7c9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fd7653a0038 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007fd7647e6090 RCX: 00007fd76458f7c9
RDX: 000000000000fdef RSI: 0000200000000380 RDI: 0000000000000003
RBP: 00007fd7645f297f R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fd7647e6128 R14: 00007fd7647e6090 R15: 00007ffde1f0b2b8
 </TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:nf_flow_offload_ipv6_forward net/netfilter/nf_flow_table_ip.c:1027 [inline]
RIP: 0010:nf_flow_offload_ipv6_hook+0x2f3/0x32a0 net/netfilter/nf_flow_table_ip.c:1106
Code: 85 c6 03 00 00 e8 6d f0 31 f8 41 bc 14 00 00 00 e9 af 02 00 00 48 8d 4b 60 48 89 4c 24 18 48 c1 e9 03 48 89 8c 24 90 00 00 00 <42> 0f b6 04 21 84 c0 0f 85 de 23 00 00 44 0f b7 73 60 41 83 e6 03
RSP: 0018:ffffc900031d6fe0 EFLAGS: 00010206
RAX: ffffffff898e1101 RBX: 0000000000000002 RCX: 000000000000000c
RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000
RBP: ffffc900031d73e0 R08: ffffffff8f7d1e77 R09: 1ffffffff1efa3ce
R10: dffffc0000000000 R11: fffffbfff1efa3cf R12: dffffc0000000000
R13: 1ffff9200063ae1c R14: ffff88816cfc5680 R15: ffffc900031d72e0
FS:  00007fd7653a06c0(0000) GS:ffff8882a9f2e000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000200000010000 CR3: 000000011342a000 CR4: 00000000000006f0
----------------
Code disassembly (best guess):
   0:	85 c6                	test   %eax,%esi
   2:	03 00                	add    (%rax),%eax
   4:	00 e8                	add    %ch,%al
   6:	6d                   	insl   (%dx),%es:(%rdi)
   7:	f0 31 f8             	lock xor %edi,%eax
   a:	41 bc 14 00 00 00    	mov    $0x14,%r12d
  10:	e9 af 02 00 00       	jmp    0x2c4
  15:	48 8d 4b 60          	lea    0x60(%rbx),%rcx
  19:	48 89 4c 24 18       	mov    %rcx,0x18(%rsp)
  1e:	48 c1 e9 03          	shr    $0x3,%rcx
  22:	48 89 8c 24 90 00 00 	mov    %rcx,0x90(%rsp)
  29:	00
* 2a:	42 0f b6 04 21       	movzbl (%rcx,%r12,1),%eax <-- trapping instruction
  2f:	84 c0                	test   %al,%al
  31:	0f 85 de 23 00 00    	jne    0x2415
  37:	44 0f b7 73 60       	movzwl 0x60(%rbx),%r14d
  3c:	41 83 e6 03          	and    $0x3,%r14d


***

If these findings have caused you to resend the series or submit a
separate fix, please add the following tag to your commit message:
  Tested-by: syzbot@syzkaller.appspotmail.com

---
This report is generated by a bot. It may contain errors.
syzbot ci engineers can be reached at syzkaller@googlegroups.com.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-12-07 23:54 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-07 16:06 [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Lorenzo Bianconi
2025-12-07 16:06 ` [PATCH nf-next 1/5] netfilter: Introduce tunnel metadata info in nf_flowtable_ctx struct Lorenzo Bianconi
2025-12-07 16:06 ` [PATCH nf-next 2/5] netfilter: Modify nf_flow_skb_encap_protocol() to return int intead of bool Lorenzo Bianconi
2025-12-07 16:06 ` [PATCH nf-next 3/5] netfilter: flowtable: Add IP6IP6 rx sw acceleration Lorenzo Bianconi
2025-12-07 16:06 ` [PATCH nf-next 4/5] netfilter: flowtable: Add IP6IP6 tx " Lorenzo Bianconi
2025-12-07 16:06 ` [PATCH nf-next 5/5] selftests: netfilter: nft_flowtable.sh: Add IP6IP6 flowtable selftest Lorenzo Bianconi
2025-12-07 21:06 ` [PATCH nf-next 0/5] Add IP6IP6 flowtable SW acceleration Jakub Kicinski
2025-12-07 21:55   ` Lorenzo Bianconi
2025-12-07 23:09     ` Jakub Kicinski
2025-12-07 23:54 ` [syzbot ci] " syzbot ci

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).