public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org,
	pabeni@redhat.com, edumazet@google.com, fw@strlen.de,
	horms@kernel.org, steffen.klassert@secunet.com,
	antony.antony@secunet.com
Subject: [PATCH net-next,RFC 1/8] netfilter: flowtable: Add basic bulking infrastructure for early ingress hook
Date: Tue, 17 Mar 2026 12:29:10 +0100	[thread overview]
Message-ID: <20260317112917.4170466-2-pablo@netfilter.org> (raw)
In-Reply-To: <20260317112917.4170466-1-pablo@netfilter.org>

Add support for registering an early_ingress hook for the flowtable to
deal with the skb list. Split initial this list in bulks according to
ethertype, output device, next hop and tos.

Then, send each skb bulk through neighbour layer. The xmit path is not
yet listified, ie. the bulk is splitted in individual skbuffs that are
sent to xmit path, one by one, at this stage.

This only implements the flowtable RX bulking. The TX side comes as a
follow up patch in this series.

Co-developed-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h |  11 +-
 net/netfilter/nf_flow_table_inet.c    |  79 ++++++++++
 net/netfilter/nf_flow_table_ip.c      | 209 ++++++++++++++++++++++++++
 3 files changed, 298 insertions(+), 1 deletion(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index b09c11c048d5..ee98da9edc1b 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -18,6 +18,13 @@ struct nf_flow_rule;
 struct flow_offload;
 enum flow_offload_tuple_dir;
 
+struct nft_bulk_cb {
+	struct sk_buff *last;
+	struct flow_offload_tuple_rhash *tuplehash;
+};
+
+#define NFT_BULK_CB(skb) ((struct nft_bulk_cb *)(skb)->cb)
+
 struct nf_flow_key {
 	struct flow_dissector_key_meta			meta;
 	struct flow_dissector_key_control		control;
@@ -65,6 +72,7 @@ struct nf_flowtable_type {
 	void				(*get)(struct nf_flowtable *ft);
 	void				(*put)(struct nf_flowtable *ft);
 	nf_hookfn			*hook;
+	nf_hookfn			*hook_list;
 	struct module			*owner;
 };
 
@@ -77,7 +85,6 @@ struct nf_flowtable {
 	unsigned int			flags;		/* readonly in datapath */
 	int				priority;	/* control path (padding hole) */
 	struct rhashtable		rhashtable;	/* datapath, read-mostly members come first */
-
 	struct list_head		list;		/* slowpath parts */
 	const struct nf_flowtable_type	*type;
 	struct delayed_work		gc_work;
@@ -339,6 +346,8 @@ unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 				     const struct nf_hook_state *state);
 unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 				       const struct nf_hook_state *state);
+void __nf_flow_offload_ip_hook_list(void *priv, struct list_head *head,
+				    const struct nf_hook_state *state);
 
 #if (IS_BUILTIN(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
     (IS_MODULE(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index b0f199171932..d0e7860c9d08 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -42,6 +42,82 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
+static unsigned int
+__nf_flow_offload_hook_list(void *priv, struct sk_buff *unused,
+			    const struct nf_hook_state *state, u32 flags)
+{
+	struct list_head *skb_list = state->skb_list;
+	struct sk_buff *skb, *next;
+	struct vlan_ethhdr *veth;
+	LIST_HEAD(skb_ipv4_list);
+	LIST_HEAD(skb_ipv6_list);
+	__be16 proto;
+
+	list_for_each_entry_safe(skb, next, skb_list, list) {
+		skb_reset_network_header(skb);
+		if (!skb_transport_header_was_set(skb))
+			skb_reset_transport_header(skb);
+		skb_reset_mac_len(skb);
+
+		switch (skb->protocol) {
+		case htons(ETH_P_8021Q):
+			veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+			proto = veth->h_vlan_encapsulated_proto;
+			break;
+		case htons(ETH_P_PPP_SES):
+			nf_flow_pppoe_proto(skb, &proto);
+			break;
+		default:
+			proto = skb->protocol;
+			break;
+		}
+
+		switch (proto) {
+		case htons(ETH_P_IP):
+			list_move_tail(&skb->list, &skb_ipv4_list);
+			break;
+		case htons(ETH_P_IPV6):
+			list_move_tail(&skb->list, &skb_ipv6_list);
+			break;
+		}
+	}
+
+	if (flags & (1 << NFPROTO_IPV4) && !list_empty(&skb_ipv4_list))
+		__nf_flow_offload_ip_hook_list(priv, &skb_ipv4_list, state);
+
+	list_splice_tail(&skb_ipv4_list, skb_list);
+	list_splice_tail(&skb_ipv6_list, skb_list);
+
+	if (!list_empty(skb_list))
+		return NF_ACCEPT;
+
+	return NF_STOLEN;
+}
+
+static unsigned int
+nf_flow_offload_ip_hook_list(void *priv, struct sk_buff *unused,
+			     const struct nf_hook_state *state)
+{
+	return __nf_flow_offload_hook_list(priv, unused, state,
+					   1 << NFPROTO_IPV4);
+}
+
+static unsigned int
+nf_flow_offload_ipv6_hook_list(void *priv, struct sk_buff *unused,
+				 const struct nf_hook_state *state)
+{
+	return __nf_flow_offload_hook_list(priv, unused, state,
+					   1 << NFPROTO_IPV6);
+}
+
+static unsigned int
+nf_flow_offload_inet_hook_list(void *priv, struct sk_buff *unused,
+			       const struct nf_hook_state *state)
+{
+	return __nf_flow_offload_hook_list(priv, unused, state,
+					   (1 << NFPROTO_IPV4) | (1 << NFPROTO_IPV6));
+}
+
 static int nf_flow_rule_route_inet(struct net *net,
 				   struct flow_offload *flow,
 				   enum flow_offload_tuple_dir dir,
@@ -72,6 +148,7 @@ static struct nf_flowtable_type flowtable_inet = {
 	.action		= nf_flow_rule_route_inet,
 	.free		= nf_flow_table_free,
 	.hook		= nf_flow_offload_inet_hook,
+	.hook_list	= nf_flow_offload_inet_hook_list,
 	.owner		= THIS_MODULE,
 };
 
@@ -82,6 +159,7 @@ static struct nf_flowtable_type flowtable_ipv4 = {
 	.action		= nf_flow_rule_route_ipv4,
 	.free		= nf_flow_table_free,
 	.hook		= nf_flow_offload_ip_hook,
+	.hook_list	= nf_flow_offload_ip_hook_list,
 	.owner		= THIS_MODULE,
 };
 
@@ -92,6 +170,7 @@ static struct nf_flowtable_type flowtable_ipv6 = {
 	.action		= nf_flow_rule_route_ipv6,
 	.free		= nf_flow_table_free,
 	.hook		= nf_flow_offload_ipv6_hook,
+	.hook_list	= nf_flow_offload_ipv6_hook_list,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 3fdb10d9bf7f..41f4768ce715 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -752,6 +752,215 @@ static int nf_flow_encap_push(struct sk_buff *skb,
 	return 0;
 }
 
+static void nft_flow_v4_push_hdrs_list(struct net *net, struct sk_buff *first,
+				       struct flow_offload_tuple *other_tuple,
+				       __be32 *ip_daddr)
+{
+	struct sk_buff *skb, *nskb;
+
+	skb_list_walk_safe(first, skb, nskb) {
+		if (nf_flow_tunnel_v4_push(net, skb, other_tuple, ip_daddr) < 0) {
+			skb_mark_not_on_list(skb);
+			kfree_skb(skb);
+			continue;
+		}
+		if (nf_flow_encap_push(skb, other_tuple) < 0) {
+			skb_mark_not_on_list(skb);
+			kfree_skb(skb);
+			continue;
+		}
+	}
+}
+
+static void nft_bulk_receive(struct list_head *head, struct sk_buff *skb)
+{
+	const struct iphdr *iph;
+	struct dst_entry *dst;
+	struct xfrm_state *x;
+	struct sk_buff *p;
+	struct rtable *rt;
+	__be32 daddr;
+	int proto;
+	__u8 tos;
+
+	iph = ip_hdr(skb);
+	dst = skb_dst(skb);
+	BUG_ON(!dst);
+
+	rt = (struct rtable *)dst;
+	daddr = rt_nexthop(rt, iph->daddr);
+	x = dst_xfrm(dst);
+	proto = iph->protocol;
+	tos = iph->tos;
+
+	list_for_each_entry(p, head, list) {
+		struct dst_entry *dst2;
+		struct rtable *rt2;
+		struct iphdr *iph2;
+		__be32 daddr2;
+
+		if (p->protocol != htons(ETH_P_IP))
+			continue;
+
+		dst2 = skb_dst(p);
+		rt2 = (struct rtable *)dst2;
+		if (dst->dev != dst2->dev)
+			continue;
+
+		iph2 = ip_hdr(p);
+		daddr2 = rt_nexthop(rt2, iph2->daddr);
+		if (daddr != daddr2)
+			continue;
+
+		if (tos != iph2->tos)
+			continue;
+
+		if (x != dst_xfrm(dst2))
+			continue;
+
+		goto found;
+	}
+
+	goto out;
+
+found:
+	if (NFT_BULK_CB(p)->last == p)
+		skb_shinfo(p)->frag_list = skb;
+	else
+		NFT_BULK_CB(p)->last->next = skb;
+
+	NFT_BULK_CB(p)->last = skb;
+
+	return;
+out:
+	/* First skb */
+	NFT_BULK_CB(skb)->last = skb;
+	list_add_tail(&skb->list, head);
+	skb->priority = rt_tos2priority(iph->tos);
+
+	return;
+}
+
+static void nf_flow_neigh_xmit_list(struct sk_buff *skb, struct net_device *outdev, const void *daddr)
+{
+	struct sk_buff *iter = skb->next;
+	int hlen;
+
+	skb->dev = outdev;
+	hlen = dev_hard_header(skb, outdev, ntohs(skb->protocol), daddr, NULL, skb->len);
+	if (hlen < 0) {
+		kfree_skb_list(skb);
+		return;
+	}
+
+	skb_reset_mac_header(skb);
+
+	while (iter) {
+		iter->dev = outdev;
+		skb_push(iter, hlen);
+		skb_copy_to_linear_data(iter, skb->data, hlen);
+		skb_reset_mac_header(iter);
+		iter = iter->next;
+	}
+
+	iter = skb;
+	while (iter) {
+		struct sk_buff *next;
+
+		next = iter->next;
+		iter->next = NULL;
+		dev_queue_xmit(iter);
+		iter = next;
+	}
+}
+
+void __nf_flow_offload_ip_hook_list(void *priv, struct list_head *head,
+				    const struct nf_hook_state *state)
+{
+	struct flow_offload_tuple_rhash *tuplehash;
+	struct nf_flowtable *flow_table = priv;
+	struct flow_offload_tuple *other_tuple;
+	enum flow_offload_tuple_dir dir;
+	struct nf_flowtable_ctx ctx = {
+		.in	= state->in,
+	};
+	struct flow_offload *flow;
+	struct sk_buff *skb, *n;
+	struct neighbour *neigh;
+	LIST_HEAD(bulk_head);
+	LIST_HEAD(bulk_list);
+	LIST_HEAD(acc_list);
+	struct rtable *rt;
+	__be32 ip_daddr;
+	int ret;
+
+	list_for_each_entry_safe(skb, n, head, list) {
+		skb_list_del_init(skb);
+
+		ctx.hdrsize = 0;
+		ctx.offset = 0;
+
+		tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
+		if (!tuplehash) {
+			list_add_tail(&skb->list, &acc_list);
+			continue;
+		}
+
+		ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb);
+		if (ret < 0) {
+			kfree_skb(skb);
+			continue;
+		} else if (ret == 0) {
+			list_add_tail(&skb->list, &acc_list);
+			continue;
+		}
+
+		skb_dst_set_noref(skb, tuplehash->tuple.dst_cache);
+		memset(skb->cb, 0, sizeof(struct nft_bulk_cb));
+		NFT_BULK_CB(skb)->tuplehash = tuplehash;
+
+		list_add_tail(&skb->list, &bulk_list);
+	}
+
+	list_splice_init(&acc_list, head);
+
+	list_for_each_entry_safe(skb, n, &bulk_list, list) {
+		skb_list_del_init(skb);
+		nft_bulk_receive(&bulk_head, skb);
+	}
+
+	list_for_each_entry_safe(skb, n, &bulk_head, list) {
+
+		list_del_init(&skb->list);
+
+		skb->next = skb_shinfo(skb)->frag_list;
+		skb_shinfo(skb)->frag_list = NULL;
+
+		tuplehash = NFT_BULK_CB(skb)->tuplehash;
+		skb_dst_set_noref(skb, tuplehash->tuple.dst_cache);
+		rt = (struct rtable *)skb_dst(skb);
+
+		dir = tuplehash->tuple.dir;
+		flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+		other_tuple = &flow->tuplehash[!dir].tuple;
+		ip_daddr = other_tuple->src_v4.s_addr;
+
+		if (other_tuple->tun_num || other_tuple->encap_num)
+			nft_flow_v4_push_hdrs_list(state->net, skb, other_tuple, &ip_daddr);
+
+		neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, ip_daddr));
+		if (IS_ERR(neigh)) {
+			kfree_skb_list(skb);
+			continue;
+		}
+
+		nf_flow_neigh_xmit_list(skb, rt->dst.dev, neigh->ha);
+	}
+
+	BUG_ON(!list_empty(&bulk_head));
+}
+EXPORT_SYMBOL_GPL(__nf_flow_offload_ip_hook_list);
+
 unsigned int
 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 			const struct nf_hook_state *state)
-- 
2.47.3


  reply	other threads:[~2026-03-17 11:29 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-17 11:29 [PATCH net-next,RFC 0/8] netfilter: flowtable bulking Pablo Neira Ayuso
2026-03-17 11:29 ` Pablo Neira Ayuso [this message]
2026-03-17 11:29 ` [PATCH net-next,RFC 2/8] netfilter: flowtable: Add IPv6 bulking infrastructure for early ingress hook Pablo Neira Ayuso
2026-03-17 11:29 ` [PATCH net-next,RFC 3/8] netfilter: nf_tables: add flowtable early_ingress support Pablo Neira Ayuso
2026-03-17 11:29 ` [PATCH net-next,RFC 4/8] netfilter: nf_tables: add nft_set_pktinfo_ingress() Pablo Neira Ayuso
2026-03-17 11:29 ` [PATCH net-next,RFC 5/8] netfilter: nf_tables: add early ingress chain Pablo Neira Ayuso
2026-03-17 11:29 ` [PATCH net-next,RFC 6/8] net: add dev_dst_drop() helper function Pablo Neira Ayuso
2026-03-17 11:29 ` [PATCH net-next,RFC 7/8] net: add dev_noqueue_xmit_list() " Pablo Neira Ayuso
2026-03-17 11:29 ` [PATCH net-next,RFC 8/8] net: add dev_queue_xmit_list() and use it Pablo Neira Ayuso
2026-03-17 11:39 ` [PATCH net-next,RFC 0/8] netfilter: flowtable bulking Pablo Neira Ayuso
2026-03-19  6:15 ` Qingfang Deng
2026-03-19 11:28   ` Steffen Klassert
2026-03-19 12:18     ` Felix Fietkau
2026-03-20  6:49       ` Steffen Klassert
2026-03-20  8:50         ` Felix Fietkau
2026-03-20  9:00           ` Steffen Klassert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260317112917.4170466-2-pablo@netfilter.org \
    --to=pablo@netfilter.org \
    --cc=antony.antony@secunet.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=fw@strlen.de \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=steffen.klassert@secunet.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox