From: Eric Woudstra <ericwouds@gmail.com>
To: Michal Ostrowski <mostrows@earthlink.net>,
Andrew Lunn <andrew+netdev@lunn.ch>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Pablo Neira Ayuso <pablo@netfilter.org>,
Jozsef Kadlecsik <kadlec@netfilter.org>,
Simon Horman <horms@kernel.org>,
Nikolay Aleksandrov <razor@blackwall.org>
Cc: netdev@vger.kernel.org, netfilter-devel@vger.kernel.org,
linux-hardening@vger.kernel.org,
Eric Woudstra <ericwouds@gmail.com>
Subject: [PATCH v11 nf-next 2/2] netfilter: nf_flow_table_offload: Add nf_flow_encap_push() for xmit direct
Date: Tue, 8 Apr 2025 16:24:25 +0200 [thread overview]
Message-ID: <20250408142425.95437-3-ericwouds@gmail.com> (raw)
In-Reply-To: <20250408142425.95437-1-ericwouds@gmail.com>
Loosely based on wenxu's patches:
"nf_flow_table_offload: offload the vlan/PPPoE encap in the flowtable".
Fixed double vlan and pppoe packets, almost entirely rewriting the patch.
When there is no extra vlan-device or pppoe-device added to the fastpath,
it may still be possible that the other tuple has encaps.
This is the case when there is only a bridge in the forward-fastpath,
without a vlan-device. When the bridge is tagging at ingress and keeping
at egress, the other tuple will have an encap.
It will be also be the case in the future bridge-fastpath.
In these cases it is necessary to push these encaps.
This patch adds nf_flow_encap_push() and alters nf_flow_queue_xmit()
to call it, only when (tuple.out.ifidx == tuple.out.hw_ifidx).
Signed-off-by: Eric Woudstra <ericwouds@gmail.com>
---
net/netfilter/nf_flow_table_ip.c | 97 +++++++++++++++++++++++++++++++-
1 file changed, 95 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 8cd4cf7ae211..64a12b9668e7 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -306,6 +306,92 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
return false;
}
+static int nf_flow_vlan_inner_push(struct sk_buff *skb, __be16 proto, u16 id)
+{
+ struct vlan_hdr *vhdr;
+
+ if (skb_cow_head(skb, VLAN_HLEN))
+ return -1;
+
+ __skb_push(skb, VLAN_HLEN);
+ skb_reset_network_header(skb);
+
+ vhdr = (struct vlan_hdr *)(skb->data);
+ vhdr->h_vlan_TCI = htons(id);
+ vhdr->h_vlan_encapsulated_proto = skb->protocol;
+ skb->protocol = proto;
+
+ return 0;
+}
+
+static int nf_flow_ppoe_push(struct sk_buff *skb, u16 id)
+{
+ struct ppp_hdr {
+ struct pppoe_hdr hdr;
+ __be16 proto;
+ } *ph;
+ int data_len = skb->len + 2;
+ __be16 proto;
+
+ if (skb_cow_head(skb, PPPOE_SES_HLEN))
+ return -1;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ proto = htons(PPP_IP);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ proto = htons(PPP_IPV6);
+ else
+ return -1;
+
+ __skb_push(skb, PPPOE_SES_HLEN);
+ skb_reset_network_header(skb);
+
+ ph = (struct ppp_hdr *)(skb->data);
+ ph->hdr.ver = 1;
+ ph->hdr.type = 1;
+ ph->hdr.code = 0;
+ ph->hdr.sid = htons(id);
+ ph->hdr.length = htons(data_len);
+ ph->proto = proto;
+ skb->protocol = htons(ETH_P_PPP_SES);
+
+ return 0;
+}
+
+static int nf_flow_encap_push(struct sk_buff *skb,
+ struct flow_offload_tuple_rhash *tuplehash,
+ unsigned short *type)
+{
+ int i = 0, ret = 0;
+
+ if (!tuplehash->tuple.encap_num)
+ return 0;
+
+ if (tuplehash->tuple.encap[i].proto == htons(ETH_P_8021Q) ||
+ tuplehash->tuple.encap[i].proto == htons(ETH_P_8021AD)) {
+ __vlan_hwaccel_put_tag(skb, tuplehash->tuple.encap[i].proto,
+ tuplehash->tuple.encap[i].id);
+ i++;
+ if (i >= tuplehash->tuple.encap_num)
+ return 0;
+ }
+
+ switch (tuplehash->tuple.encap[i].proto) {
+ case htons(ETH_P_8021Q):
+ *type = ETH_P_8021Q;
+ ret = nf_flow_vlan_inner_push(skb,
+ tuplehash->tuple.encap[i].proto,
+ tuplehash->tuple.encap[i].id);
+ break;
+ case htons(ETH_P_PPP_SES):
+ *type = ETH_P_PPP_SES;
+ ret = nf_flow_ppoe_push(skb,
+ tuplehash->tuple.encap[i].id);
+ break;
+ }
+ return ret;
+}
+
static void nf_flow_encap_pop(struct sk_buff *skb,
struct flow_offload_tuple_rhash *tuplehash)
{
@@ -335,6 +421,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
const struct flow_offload_tuple_rhash *tuplehash,
+ struct flow_offload_tuple_rhash *other_tuplehash,
unsigned short type)
{
struct net_device *outdev;
@@ -343,6 +430,10 @@ static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
if (!outdev)
return NF_DROP;
+ if (tuplehash->tuple.out.ifidx == tuplehash->tuple.out.hw_ifidx &&
+ (nf_flow_encap_push(skb, other_tuplehash, &type) < 0))
+ return NF_DROP;
+
skb->dev = outdev;
dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
tuplehash->tuple.out.h_source, skb->len);
@@ -462,7 +553,8 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
ret = NF_STOLEN;
break;
case FLOW_OFFLOAD_XMIT_DIRECT:
- ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash,
+ &flow->tuplehash[!dir], ETH_P_IP);
if (ret == NF_DROP)
flow_offload_teardown(flow);
break;
@@ -757,7 +849,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
ret = NF_STOLEN;
break;
case FLOW_OFFLOAD_XMIT_DIRECT:
- ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash,
+ &flow->tuplehash[!dir], ETH_P_IPV6);
if (ret == NF_DROP)
flow_offload_teardown(flow);
break;
--
2.47.1
next prev parent reply other threads:[~2025-04-08 14:24 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-08 14:24 [PATCH v11 nf-next 0/2] Add nf_flow_encap_push() for xmit direct Eric Woudstra
2025-04-08 14:24 ` [PATCH v11 nf-next 1/2] net: pppoe: avoid zero-length arrays in struct pppoe_hdr Eric Woudstra
2025-04-09 16:25 ` Kees Cook
2025-04-08 14:24 ` Eric Woudstra [this message]
2025-04-08 16:02 ` [PATCH v11 nf-next 0/2] Add nf_flow_encap_push() for xmit direct Pablo Neira Ayuso
2025-04-08 18:19 ` Eric Woudstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250408142425.95437-3-ericwouds@gmail.com \
--to=ericwouds@gmail.com \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=kadlec@netfilter.org \
--cc=kuba@kernel.org \
--cc=linux-hardening@vger.kernel.org \
--cc=mostrows@earthlink.net \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=pablo@netfilter.org \
--cc=razor@blackwall.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.