From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org,
pabeni@redhat.com, edumazet@google.com, fw@strlen.de,
horms@kernel.org
Subject: [PATCH net 12/14] netfilter: flowtable: fix inline vlan encapsulation in xmit path
Date: Fri, 1 May 2026 14:22:35 +0200 [thread overview]
Message-ID: <20260501122237.296262-13-pablo@netfilter.org> (raw)
In-Reply-To: <20260501122237.296262-1-pablo@netfilter.org>
Several issues in the inline vlan support:
- The layer 2 encapsulation representation in the tuple takes encap[0] as
the outer header and encap[1] as the inner header as seen from the ingress
path. Reverse the encap loop to push first the inner then the outer vlan
header.
- Postpone pushing the layer 2 header once destination device is known.
This allows to calculate the needed hearoom via LL_RESERVED_SPACE to
accommodate the layer 2 headers.
- Add and use nf_flow_vlan_push() as suggested by Eric Woudstra, this
is a simplified version of skb_vlan_push() for egress path only.
Fixes: c653d5a78f34 ("netfilter: flowtable: inline vlan encapsulation in xmit path")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_flow_table_ip.c | 110 ++++++++++++++++++++-----------
1 file changed, 73 insertions(+), 37 deletions(-)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 8d5fb7e940a1..0ce3c209050c 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -462,32 +462,6 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
nf_flow_ip_tunnel_pop(ctx, skb);
}
-struct nf_flow_xmit {
- const void *dest;
- const void *source;
- struct net_device *outdev;
-};
-
-static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
- struct nf_flow_xmit *xmit)
-{
- struct net_device *dev = xmit->outdev;
- unsigned int hh_len = LL_RESERVED_SPACE(dev);
-
- if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- skb = skb_expand_head(skb, hh_len);
- if (!skb)
- return NF_STOLEN;
- }
-
- skb->dev = dev;
- dev_hard_header(skb, dev, ntohs(skb->protocol),
- xmit->dest, xmit->source, skb->len);
- dev_queue_xmit(skb);
-
- return NF_STOLEN;
-}
-
static struct flow_offload_tuple_rhash *
nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
struct nf_flowtable *flow_table, struct sk_buff *skb)
@@ -553,6 +527,32 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
return 1;
}
+/* Similar to skb_vlan_push. */
+static int nf_flow_vlan_push(struct sk_buff *skb, __be16 proto, u16 id,
+ u32 needed_headroom)
+{
+ if (skb_vlan_tag_present(skb)) {
+ struct vlan_hdr *vhdr;
+
+ if (skb_cow_head(skb, needed_headroom + VLAN_HLEN))
+ return -1;
+
+ __skb_push(skb, VLAN_HLEN);
+ if (skb_mac_header_was_set(skb))
+ skb->mac_header -= VLAN_HLEN;
+
+ vhdr = (struct vlan_hdr *)skb->data;
+ skb->network_header -= VLAN_HLEN;
+ vhdr->h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+ vhdr->h_vlan_encapsulated_proto = skb->protocol;
+ skb->protocol = skb->vlan_proto;
+ skb_postpush_rcsum(skb, skb->data, VLAN_HLEN);
+ }
+ __vlan_hwaccel_put_tag(skb, proto, id);
+
+ return 0;
+}
+
static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id)
{
int data_len = skb->len + sizeof(__be16);
@@ -739,17 +739,19 @@ static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb,
}
static int nf_flow_encap_push(struct sk_buff *skb,
- struct flow_offload_tuple *tuple)
+ struct flow_offload_tuple *tuple,
+ struct net_device *outdev)
{
+ u32 needed_headroom = LL_RESERVED_SPACE(outdev);
int i;
- for (i = 0; i < tuple->encap_num; i++) {
+ for (i = tuple->encap_num - 1; i >= 0; i--) {
switch (tuple->encap[i].proto) {
case htons(ETH_P_8021Q):
case htons(ETH_P_8021AD):
- skb_reset_mac_header(skb);
- if (skb_vlan_push(skb, tuple->encap[i].proto,
- tuple->encap[i].id) < 0)
+ if (nf_flow_vlan_push(skb, tuple->encap[i].proto,
+ tuple->encap[i].id,
+ needed_headroom) < 0)
return -1;
break;
case htons(ETH_P_PPP_SES):
@@ -762,6 +764,44 @@ static int nf_flow_encap_push(struct sk_buff *skb,
return 0;
}
+struct nf_flow_xmit {
+ const void *dest;
+ const void *source;
+ struct net_device *outdev;
+ struct flow_offload_tuple *tuple;
+};
+
+static void __nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+ struct nf_flow_xmit *xmit)
+{
+ struct net_device *dev = xmit->outdev;
+ unsigned int hh_len = LL_RESERVED_SPACE(dev);
+
+ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
+ return;
+ }
+
+ skb->dev = dev;
+ dev_hard_header(skb, dev, ntohs(skb->protocol),
+ xmit->dest, xmit->source, skb->len);
+ dev_queue_xmit(skb);
+}
+
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+ struct nf_flow_xmit *xmit)
+{
+ if (xmit->tuple->encap_num) {
+ if (nf_flow_encap_push(skb, xmit->tuple, xmit->outdev) < 0)
+ return NF_DROP;
+ }
+
+ __nf_flow_queue_xmit(net, skb, xmit);
+
+ return NF_STOLEN;
+}
+
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -806,9 +846,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0)
return NF_DROP;
- if (nf_flow_encap_push(skb, other_tuple) < 0)
- return NF_DROP;
-
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = dst_rtable(tuplehash->tuple.dst_cache);
@@ -838,6 +875,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
WARN_ON_ONCE(1);
return NF_DROP;
}
+ xmit.tuple = other_tuple;
return nf_flow_queue_xmit(state->net, skb, &xmit);
}
@@ -1128,9 +1166,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
&ip6_daddr, encap_limit) < 0)
return NF_DROP;
- if (nf_flow_encap_push(skb, other_tuple) < 0)
- return NF_DROP;
-
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = dst_rt6_info(tuplehash->tuple.dst_cache);
@@ -1160,6 +1195,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
WARN_ON_ONCE(1);
return NF_DROP;
}
+ xmit.tuple = other_tuple;
return nf_flow_queue_xmit(state->net, skb, &xmit);
}
--
2.47.3
next prev parent reply other threads:[~2026-05-01 12:23 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-01 12:22 [PATCH net 00/14] Netfilter fixes for net Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 01/14] netfilter: replace skb_try_make_writable() by skb_ensure_writable() Pablo Neira Ayuso
2026-05-01 23:50 ` patchwork-bot+netdevbpf
2026-05-01 12:22 ` [PATCH net 02/14] netfilter: nft_fwd_netdev: add device and headroom validate with neigh forwarding Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 03/14] netfilter: nft_fwd_netdev: use recursion counter in neigh egress path Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 04/14] netfilter: x_tables: add .check_hooks to matches and targets Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 05/14] netfilter: nft_compat: run xt_check_hooks_{match,target}() from .validate Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 06/14] netfilter: xt_CT: fix usersize for v1 and v2 revision Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 07/14] netfilter: nf_tables: fix netdev hook allocation memleak with dormant tables Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 08/14] netfilter: nf_socket: skip socket lookup for non-first fragments Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 09/14] netfilter: nf_tables: skip L4 header parsing " Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 10/14] netfilter: xtables: fix " Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 11/14] netfilter: flowtable: ensure sufficient headroom in xmit path Pablo Neira Ayuso
2026-05-01 12:22 ` Pablo Neira Ayuso [this message]
2026-05-01 12:22 ` [PATCH net 13/14] netfilter: flowtable: fix inline pppoe encapsulation " Pablo Neira Ayuso
2026-05-01 12:22 ` [PATCH net 14/14] netfilter: flowtable: use skb_pull_rcsum() to pop vlan/pppoe header Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260501122237.296262-13-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.