From: Eric Woudstra <ericwouds@gmail.com>
To: Michal Ostrowski <mostrows@earthlink.net>,
Andrew Lunn <andrew+netdev@lunn.ch>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Pablo Neira Ayuso <pablo@netfilter.org>,
Jozsef Kadlecsik <kadlec@netfilter.org>,
Simon Horman <horms@kernel.org>,
Nikolay Aleksandrov <razor@blackwall.org>
Cc: netdev@vger.kernel.org, netfilter-devel@vger.kernel.org,
linux-hardening@vger.kernel.org,
Eric Woudstra <ericwouds@gmail.com>
Subject: [PATCH v11 nf-next 2/2] netfilter: nf_flow_table_offload: Add nf_flow_encap_push() for xmit direct
Date: Tue, 8 Apr 2025 16:24:25 +0200 [thread overview]
Message-ID: <20250408142425.95437-3-ericwouds@gmail.com> (raw)
In-Reply-To: <20250408142425.95437-1-ericwouds@gmail.com>
Loosely based on wenxu's patches:
"nf_flow_table_offload: offload the vlan/PPPoE encap in the flowtable".
Fixed double vlan and pppoe packets, almost entirely rewriting the patch.
When there is no extra vlan-device or pppoe-device added to the fastpath,
it may still be possible that the other tuple has encaps.
This is the case when there is only a bridge in the forward-fastpath,
without a vlan-device. When the bridge is tagging at ingress and keeping
at egress, the other tuple will have an encap.
It will be also be the case in the future bridge-fastpath.
In these cases it is necessary to push these encaps.
This patch adds nf_flow_encap_push() and alters nf_flow_queue_xmit()
to call it, only when (tuple.out.ifidx == tuple.out.hw_ifidx).
Signed-off-by: Eric Woudstra <ericwouds@gmail.com>
---
net/netfilter/nf_flow_table_ip.c | 97 +++++++++++++++++++++++++++++++-
1 file changed, 95 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 8cd4cf7ae211..64a12b9668e7 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -306,6 +306,92 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
return false;
}
+static int nf_flow_vlan_inner_push(struct sk_buff *skb, __be16 proto, u16 id)
+{
+ struct vlan_hdr *vhdr;
+
+ if (skb_cow_head(skb, VLAN_HLEN))
+ return -1;
+
+ __skb_push(skb, VLAN_HLEN);
+ skb_reset_network_header(skb);
+
+ vhdr = (struct vlan_hdr *)(skb->data);
+ vhdr->h_vlan_TCI = htons(id);
+ vhdr->h_vlan_encapsulated_proto = skb->protocol;
+ skb->protocol = proto;
+
+ return 0;
+}
+
+static int nf_flow_ppoe_push(struct sk_buff *skb, u16 id)
+{
+ struct ppp_hdr {
+ struct pppoe_hdr hdr;
+ __be16 proto;
+ } *ph;
+ int data_len = skb->len + 2;
+ __be16 proto;
+
+ if (skb_cow_head(skb, PPPOE_SES_HLEN))
+ return -1;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ proto = htons(PPP_IP);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ proto = htons(PPP_IPV6);
+ else
+ return -1;
+
+ __skb_push(skb, PPPOE_SES_HLEN);
+ skb_reset_network_header(skb);
+
+ ph = (struct ppp_hdr *)(skb->data);
+ ph->hdr.ver = 1;
+ ph->hdr.type = 1;
+ ph->hdr.code = 0;
+ ph->hdr.sid = htons(id);
+ ph->hdr.length = htons(data_len);
+ ph->proto = proto;
+ skb->protocol = htons(ETH_P_PPP_SES);
+
+ return 0;
+}
+
+static int nf_flow_encap_push(struct sk_buff *skb,
+ struct flow_offload_tuple_rhash *tuplehash,
+ unsigned short *type)
+{
+ int i = 0, ret = 0;
+
+ if (!tuplehash->tuple.encap_num)
+ return 0;
+
+ if (tuplehash->tuple.encap[i].proto == htons(ETH_P_8021Q) ||
+ tuplehash->tuple.encap[i].proto == htons(ETH_P_8021AD)) {
+ __vlan_hwaccel_put_tag(skb, tuplehash->tuple.encap[i].proto,
+ tuplehash->tuple.encap[i].id);
+ i++;
+ if (i >= tuplehash->tuple.encap_num)
+ return 0;
+ }
+
+ switch (tuplehash->tuple.encap[i].proto) {
+ case htons(ETH_P_8021Q):
+ *type = ETH_P_8021Q;
+ ret = nf_flow_vlan_inner_push(skb,
+ tuplehash->tuple.encap[i].proto,
+ tuplehash->tuple.encap[i].id);
+ break;
+ case htons(ETH_P_PPP_SES):
+ *type = ETH_P_PPP_SES;
+ ret = nf_flow_ppoe_push(skb,
+ tuplehash->tuple.encap[i].id);
+ break;
+ }
+ return ret;
+}
+
static void nf_flow_encap_pop(struct sk_buff *skb,
struct flow_offload_tuple_rhash *tuplehash)
{
@@ -335,6 +421,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
const struct flow_offload_tuple_rhash *tuplehash,
+ struct flow_offload_tuple_rhash *other_tuplehash,
unsigned short type)
{
struct net_device *outdev;
@@ -343,6 +430,10 @@ static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
if (!outdev)
return NF_DROP;
+ if (tuplehash->tuple.out.ifidx == tuplehash->tuple.out.hw_ifidx &&
+ (nf_flow_encap_push(skb, other_tuplehash, &type) < 0))
+ return NF_DROP;
+
skb->dev = outdev;
dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
tuplehash->tuple.out.h_source, skb->len);
@@ -462,7 +553,8 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
ret = NF_STOLEN;
break;
case FLOW_OFFLOAD_XMIT_DIRECT:
- ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash,
+ &flow->tuplehash[!dir], ETH_P_IP);
if (ret == NF_DROP)
flow_offload_teardown(flow);
break;
@@ -757,7 +849,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
ret = NF_STOLEN;
break;
case FLOW_OFFLOAD_XMIT_DIRECT:
- ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash,
+ &flow->tuplehash[!dir], ETH_P_IPV6);
if (ret == NF_DROP)
flow_offload_teardown(flow);
break;
--
2.47.1
next prev parent reply other threads:[~2025-04-08 14:24 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-08 14:24 [PATCH v11 nf-next 0/2] Add nf_flow_encap_push() for xmit direct Eric Woudstra
2025-04-08 14:24 ` [PATCH v11 nf-next 1/2] net: pppoe: avoid zero-length arrays in struct pppoe_hdr Eric Woudstra
2025-04-09 16:25 ` Kees Cook
2025-04-08 14:24 ` Eric Woudstra [this message]
2025-04-08 16:02 ` [PATCH v11 nf-next 0/2] Add nf_flow_encap_push() for xmit direct Pablo Neira Ayuso
2025-04-08 18:19 ` Eric Woudstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250408142425.95437-3-ericwouds@gmail.com \
--to=ericwouds@gmail.com \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=kadlec@netfilter.org \
--cc=kuba@kernel.org \
--cc=linux-hardening@vger.kernel.org \
--cc=mostrows@earthlink.net \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=pablo@netfilter.org \
--cc=razor@blackwall.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).