* [PATCH nf 1/2] netfilter: flowtable: fix inline vlan encapsulation in xmit path
@ 2026-04-24 10:07 Pablo Neira Ayuso
2026-04-24 10:07 ` [PATCH nf 2/2] netfilter: flowtable: fix inline pppoe " Pablo Neira Ayuso
0 siblings, 1 reply; 3+ messages in thread
From: Pablo Neira Ayuso @ 2026-04-24 10:07 UTC (permalink / raw)
To: netfilter-devel
Several issues in the inline vlan support:
- The layer 2 encapsulation representation in the tuple takes encap[0] as
the outer header and encap[1] as the inner header as seen from the ingress
path. Reverse the encap loop to push first the inner then the outer vlan
header.
- Postpone pushing the layer 2 header once destination device is known.
This allows to calculate the needed hearoom via LL_RESERVED_SPACE to
accommodate the layer 2 headers.
- Add and use nf_flow_vlan_push() as suggested by Eric Woudstra, this
is a simplified version of skb_vlan_push() for egress path only.
Fixes: c653d5a78f34 ("netfilter: flowtable: inline vlan encapsulation in xmit path")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
Tested with ip netns enviroment both with single vlan and vlan-in-vlan, it
needs a bit of tidy up before submission for selftests.
I originally tested this submission with a script that I discover it was wrong:
netns configuration with vlan-in-vlan devices but that only used a single vlan
device.
net/netfilter/nf_flow_table_ip.c | 91 ++++++++++++++++++++++----------
1 file changed, 63 insertions(+), 28 deletions(-)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index dbd7644fdbeb..1573189a23a1 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -462,23 +462,6 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx,
nf_flow_ip_tunnel_pop(ctx, skb);
}
-struct nf_flow_xmit {
- const void *dest;
- const void *source;
- struct net_device *outdev;
-};
-
-static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
- struct nf_flow_xmit *xmit)
-{
- skb->dev = xmit->outdev;
- dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
- xmit->dest, xmit->source, skb->len);
- dev_queue_xmit(skb);
-
- return NF_STOLEN;
-}
-
static struct flow_offload_tuple_rhash *
nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
struct nf_flowtable *flow_table, struct sk_buff *skb)
@@ -544,6 +527,31 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
return 1;
}
+/* Similar to skb_vlan_push. */
+static int nf_flow_vlan_push(struct sk_buff *skb, __be16 proto, u16 id,
+ u32 needed_headroom)
+{
+ if (skb_vlan_tag_present(skb)) {
+ struct vlan_hdr *vhdr;
+
+ if (skb_cow_head(skb, needed_headroom + VLAN_HLEN))
+ return -1;
+
+ skb_push(skb, VLAN_HLEN);
+ if (skb_mac_header_was_set(skb))
+ skb->mac_header -= VLAN_HLEN;
+
+ vhdr = (struct vlan_hdr *)skb->data;
+ skb->network_header -= VLAN_HLEN;
+ vhdr->h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+ vhdr->h_vlan_encapsulated_proto = skb->protocol;
+ skb->protocol = skb->vlan_proto;
+ }
+ __vlan_hwaccel_put_tag(skb, proto, id);
+
+ return 0;
+}
+
static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id)
{
int data_len = skb->len + sizeof(__be16);
@@ -730,17 +738,19 @@ static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb,
}
static int nf_flow_encap_push(struct sk_buff *skb,
- struct flow_offload_tuple *tuple)
+ struct flow_offload_tuple *tuple,
+ struct net_device *outdev)
{
+ u32 needed_headroom = LL_RESERVED_SPACE(outdev);
int i;
- for (i = 0; i < tuple->encap_num; i++) {
+ for (i = tuple->encap_num - 1; i >= 0; i--) {
switch (tuple->encap[i].proto) {
case htons(ETH_P_8021Q):
case htons(ETH_P_8021AD):
- skb_reset_mac_header(skb);
- if (skb_vlan_push(skb, tuple->encap[i].proto,
- tuple->encap[i].id) < 0)
+ if (nf_flow_vlan_push(skb, tuple->encap[i].proto,
+ tuple->encap[i].id,
+ needed_headroom) < 0)
return -1;
break;
case htons(ETH_P_PPP_SES):
@@ -753,6 +763,35 @@ static int nf_flow_encap_push(struct sk_buff *skb,
return 0;
}
+struct nf_flow_xmit {
+ const void *dest;
+ const void *source;
+ struct net_device *outdev;
+ struct flow_offload_tuple *tuple;
+};
+
+static void __nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+ struct nf_flow_xmit *xmit)
+{
+ skb->dev = xmit->outdev;
+ dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
+ xmit->dest, xmit->source, skb->len);
+ dev_queue_xmit(skb);
+}
+
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+ struct nf_flow_xmit *xmit)
+{
+ if (xmit->tuple->encap_num) {
+ if (nf_flow_encap_push(skb, xmit->tuple, xmit->outdev) < 0)
+ return NF_DROP;
+ }
+
+ __nf_flow_queue_xmit(net, skb, xmit);
+
+ return NF_STOLEN;
+}
+
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -797,9 +836,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0)
return NF_DROP;
- if (nf_flow_encap_push(skb, other_tuple) < 0)
- return NF_DROP;
-
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = dst_rtable(tuplehash->tuple.dst_cache);
@@ -829,6 +865,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
WARN_ON_ONCE(1);
return NF_DROP;
}
+ xmit.tuple = other_tuple;
return nf_flow_queue_xmit(state->net, skb, &xmit);
}
@@ -1119,9 +1156,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
&ip6_daddr, encap_limit) < 0)
return NF_DROP;
- if (nf_flow_encap_push(skb, other_tuple) < 0)
- return NF_DROP;
-
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = dst_rt6_info(tuplehash->tuple.dst_cache);
@@ -1151,6 +1185,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
WARN_ON_ONCE(1);
return NF_DROP;
}
+ xmit.tuple = other_tuple;
return nf_flow_queue_xmit(state->net, skb, &xmit);
}
--
2.47.3
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH nf 2/2] netfilter: flowtable: fix inline pppoe encapsulation in xmit path
2026-04-24 10:07 [PATCH nf 1/2] netfilter: flowtable: fix inline vlan encapsulation in xmit path Pablo Neira Ayuso
@ 2026-04-24 10:07 ` Pablo Neira Ayuso
2026-04-27 8:11 ` Qingfang Deng
0 siblings, 1 reply; 3+ messages in thread
From: Pablo Neira Ayuso @ 2026-04-24 10:07 UTC (permalink / raw)
To: netfilter-devel
Address two issues in the inline pppoe encapsulation:
- Add needs_gso_segment flag to segment PPPoE packets in software
given that there is no GSO support for this.
- Use FLOW_OFFLOAD_XMIT_DIRECT since neighbour cache is not available
in point-to-point device, use the hardware address that is obtained
via flowtable path discovery (ie. fill_forward_path).
Fixes: 18d27bed0880 ("netfilter: flowtable: inline pppoe encapsulation in xmit path")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
Tested locally with a pppoe server in my network and pppoe client in a netns inside VM,
it shows an improvement of 70% in bandwith with iperf3 when forwarding pppoe traffic
through the flowtable here compared to classic forwarding.
include/net/netfilter/nf_flow_table.h | 4 +++-
net/netfilter/nf_flow_table_core.c | 1 +
net/netfilter/nf_flow_table_ip.c | 34 ++++++++++++++++++++++++---
net/netfilter/nf_flow_table_path.c | 7 +++++-
4 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index b09c11c048d5..7b23b245a5a8 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -148,9 +148,10 @@ struct flow_offload_tuple {
/* All members above are keys for lookups, see flow_offload_hash(). */
struct { } __hash;
- u8 dir:2,
+ u16 dir:2,
xmit_type:3,
encap_num:2,
+ needs_gso_segment:1,
tun_num:2,
in_vlan_ingress:2;
u16 mtu;
@@ -232,6 +233,7 @@ struct nf_flow_route {
u32 hw_ifindex;
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
+ u8 needs_gso_segment:1;
} out;
enum flow_offload_xmit_type xmit_type;
} tuple[FLOW_OFFLOAD_DIR_MAX];
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 2c4140e6f53c..785d8c244a77 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -122,6 +122,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
flow_tuple->tun = route->tuple[dir].in.tun;
flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
+ flow_tuple->needs_gso_segment = route->tuple[dir].out.needs_gso_segment;
flow_tuple->tun_num = route->tuple[dir].in.num_tuns;
switch (route->tuple[dir].xmit_type) {
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 1573189a23a1..c3aa5df9b724 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -552,7 +552,8 @@ static int nf_flow_vlan_push(struct sk_buff *skb, __be16 proto, u16 id,
return 0;
}
-static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id)
+static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id,
+ u32 needed_headroom)
{
int data_len = skb->len + sizeof(__be16);
struct ppp_hdr {
@@ -561,7 +562,7 @@ static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id)
} *ph;
__be16 proto;
- if (skb_cow_head(skb, PPPOE_SES_HLEN))
+ if (skb_cow_head(skb, needed_headroom + PPPOE_SES_HLEN))
return -1;
switch (skb->protocol) {
@@ -754,7 +755,8 @@ static int nf_flow_encap_push(struct sk_buff *skb,
return -1;
break;
case htons(ETH_P_PPP_SES):
- if (nf_flow_pppoe_push(skb, tuple->encap[i].id) < 0)
+ if (nf_flow_pppoe_push(skb, tuple->encap[i].id,
+ needed_headroom) < 0)
return -1;
break;
}
@@ -779,10 +781,36 @@ static void __nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
dev_queue_xmit(skb);
}
+static unsigned int nf_flow_encap_gso_xmit(struct net *net, struct sk_buff *skb,
+ struct nf_flow_xmit *xmit)
+{
+ struct sk_buff *segs, *nskb;
+
+ segs = skb_gso_segment(skb, 0);
+ if (IS_ERR(segs) || !segs)
+ return NF_DROP;
+
+ skb_list_walk_safe(segs, segs, nskb) {
+ skb_mark_not_on_list(segs);
+
+ if (nf_flow_encap_push(segs, xmit->tuple, xmit->outdev) < 0) {
+ kfree_skb_list(nskb);
+ return NF_DROP;
+ }
+ __nf_flow_queue_xmit(net, segs, xmit);
+ }
+ consume_skb(skb);
+
+ return NF_STOLEN;
+}
+
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
struct nf_flow_xmit *xmit)
{
if (xmit->tuple->encap_num) {
+ if (skb_is_gso(skb) && xmit->tuple->needs_gso_segment)
+ return nf_flow_encap_gso_xmit(net, skb, xmit);
+
if (nf_flow_encap_push(skb, xmit->tuple, xmit->outdev) < 0)
return NF_DROP;
}
diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c
index 6bb9579dcc2a..37f7b590c531 100644
--- a/net/netfilter/nf_flow_table_path.c
+++ b/net/netfilter/nf_flow_table_path.c
@@ -86,6 +86,7 @@ struct nft_forward_info {
u8 ingress_vlans;
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
+ bool needs_gso_segment;
enum flow_offload_xmit_type xmit_type;
};
@@ -138,8 +139,11 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
path->encap.proto;
info->num_encaps++;
}
- if (path->type == DEV_PATH_PPPOE)
+ if (path->type == DEV_PATH_PPPOE) {
memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+ info->needs_gso_segment = 1;
+ }
break;
case DEV_PATH_BRIDGE:
if (is_zero_ether_addr(info->h_source))
@@ -279,6 +283,7 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
route->tuple[dir].xmit_type = info.xmit_type;
}
+ route->tuple[!dir].out.needs_gso_segment = info.needs_gso_segment;
}
int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
--
2.47.3
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH nf 2/2] netfilter: flowtable: fix inline pppoe encapsulation in xmit path
2026-04-24 10:07 ` [PATCH nf 2/2] netfilter: flowtable: fix inline pppoe " Pablo Neira Ayuso
@ 2026-04-27 8:11 ` Qingfang Deng
0 siblings, 0 replies; 3+ messages in thread
From: Qingfang Deng @ 2026-04-27 8:11 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel, netdev, Felix Fietkau
On Fri, 24 Apr 2026 12:07:59 +0200, Pablo Neira Ayuso wrote:
> Address two issues in the inline pppoe encapsulation:
>
> - Add needs_gso_segment flag to segment PPPoE packets in software
> given that there is no GSO support for this.
FYI, there is a pending PPPoE GSO patch:
https://lore.kernel.org/netdev/20260326081127.61229-1-dqfext@gmail.com/
Regards,
Qingfang
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-04-27 8:11 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-24 10:07 [PATCH nf 1/2] netfilter: flowtable: fix inline vlan encapsulation in xmit path Pablo Neira Ayuso
2026-04-24 10:07 ` [PATCH nf 2/2] netfilter: flowtable: fix inline pppoe " Pablo Neira Ayuso
2026-04-27 8:11 ` Qingfang Deng
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.