From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org,
Felix Fietkau <nbd@nbd.name>
Subject: [PATCH net-next 09/23] netfilter: flowtable: use dev_fill_forward_path() to obtain egress device
Date: Thu, 11 Mar 2021 01:35:50 +0100 [thread overview]
Message-ID: <20210311003604.22199-10-pablo@netfilter.org> (raw)
In-Reply-To: <20210311003604.22199-1-pablo@netfilter.org>
The egress device in the tuple is obtained from route. Use
dev_fill_forward_path() instead to provide the real egress device for
this flow whenever this is available.
The new FLOW_OFFLOAD_XMIT_DIRECT type uses dev_queue_xmit() to transmit
ethernet frames. Cache the source and destination hardware address to
use dev_queue_xmit() to transfer packets.
The FLOW_OFFLOAD_XMIT_DIRECT replaces FLOW_OFFLOAD_XMIT_NEIGH if
dev_fill_forward_path() finds a direct transmit path.
In case of topology updates, if peer is moved to different bridge port,
the connection will time out, reconnect will result in a new entry with
the correct path. Snooping fdb updates would allow for cleaning up stale
flowtable entries.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_flow_table.h | 16 +++++-
net/netfilter/nf_flow_table_core.c | 35 ++++++++++---
net/netfilter/nf_flow_table_ip.c | 72 +++++++++++++++++++++------
net/netfilter/nft_flow_offload.c | 35 ++++++++++---
4 files changed, 127 insertions(+), 31 deletions(-)
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 963f99fb1c06..83110e4705c0 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -92,6 +92,7 @@ enum flow_offload_tuple_dir {
enum flow_offload_xmit_type {
FLOW_OFFLOAD_XMIT_NEIGH = 0,
FLOW_OFFLOAD_XMIT_XFRM,
+ FLOW_OFFLOAD_XMIT_DIRECT,
};
struct flow_offload_tuple {
@@ -120,8 +121,14 @@ struct flow_offload_tuple {
xmit_type:2;
u16 mtu;
-
- struct dst_entry *dst_cache;
+ union {
+ struct dst_entry *dst_cache;
+ struct {
+ u32 ifidx;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ } out;
+ };
};
struct flow_offload_tuple_rhash {
@@ -168,6 +175,11 @@ struct nf_flow_route {
struct {
u32 ifindex;
} in;
+ struct {
+ u32 ifindex;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ } out;
enum flow_offload_xmit_type xmit_type;
} tuple[FLOW_OFFLOAD_DIR_MAX];
};
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index a3cb9c8420e4..a4cfbefbb6da 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -81,9 +81,6 @@ static int flow_offload_fill_route(struct flow_offload *flow,
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
struct dst_entry *dst = route->tuple[dir].dst;
- if (!dst_hold_safe(route->tuple[dir].dst))
- return -1;
-
switch (flow_tuple->l3proto) {
case NFPROTO_IPV4:
flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
@@ -94,12 +91,36 @@ static int flow_offload_fill_route(struct flow_offload *flow,
}
flow_tuple->iifidx = route->tuple[dir].in.ifindex;
+
+ switch (route->tuple[dir].xmit_type) {
+ case FLOW_OFFLOAD_XMIT_DIRECT:
+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
+ ETH_ALEN);
+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
+ ETH_ALEN);
+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
+ break;
+ case FLOW_OFFLOAD_XMIT_XFRM:
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ if (!dst_hold_safe(route->tuple[dir].dst))
+ return -1;
+
+ flow_tuple->dst_cache = dst;
+ break;
+ }
flow_tuple->xmit_type = route->tuple[dir].xmit_type;
- flow_tuple->dst_cache = dst;
return 0;
}
+static void nft_flow_dst_release(struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir)
+{
+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
+}
+
int flow_offload_route_init(struct flow_offload *flow,
const struct nf_flow_route *route)
{
@@ -118,7 +139,7 @@ int flow_offload_route_init(struct flow_offload *flow,
return 0;
err_route_reply:
- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
return err;
}
@@ -169,8 +190,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
static void flow_offload_route_release(struct flow_offload *flow)
{
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
}
void flow_offload_free(struct flow_offload *flow)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index af49672bd38d..ae0b008c639a 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -248,6 +248,24 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+ const struct flow_offload_tuple_rhash *tuplehash,
+ unsigned short type)
+{
+ struct net_device *outdev;
+
+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
+ if (!outdev)
+ return NF_DROP;
+
+ skb->dev = outdev;
+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
+ tuplehash->tuple.out.h_source, skb->len);
+ dev_queue_xmit(skb);
+
+ return NF_STOLEN;
+}
+
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -262,6 +280,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
unsigned int thoff;
struct iphdr *iph;
__be32 nexthop;
+ int ret;
if (skb->protocol != htons(ETH_P_IP))
return NF_ACCEPT;
@@ -303,22 +322,32 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
-
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
IPCB(skb)->iif = skb->dev->ifindex;
IPCB(skb)->flags = IPSKB_FORWARDED;
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
- outdev = rt->dst.dev;
- skb->dev = outdev;
- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
- skb_dst_set_noref(skb, &rt->dst);
- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+ switch (tuplehash->tuple.xmit_type) {
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ outdev = rt->dst.dev;
+ skb->dev = outdev;
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+ skb_dst_set_noref(skb, &rt->dst);
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+ ret = NF_STOLEN;
+ break;
+ case FLOW_OFFLOAD_XMIT_DIRECT:
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
+ if (ret == NF_DROP)
+ flow_offload_teardown(flow);
+ break;
+ }
- return NF_STOLEN;
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
@@ -504,6 +533,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
struct net_device *outdev;
struct ipv6hdr *ip6h;
struct rt6_info *rt;
+ int ret;
if (skb->protocol != htons(ETH_P_IPV6))
return NF_ACCEPT;
@@ -545,21 +575,31 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
-
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
IP6CB(skb)->iif = skb->dev->ifindex;
IP6CB(skb)->flags = IP6SKB_FORWARDED;
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
- outdev = rt->dst.dev;
- skb->dev = outdev;
- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
- skb_dst_set_noref(skb, &rt->dst);
- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+ switch (tuplehash->tuple.xmit_type) {
+ case FLOW_OFFLOAD_XMIT_NEIGH:
+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ outdev = rt->dst.dev;
+ skb->dev = outdev;
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ skb_dst_set_noref(skb, &rt->dst);
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+ ret = NF_STOLEN;
+ break;
+ case FLOW_OFFLOAD_XMIT_DIRECT:
+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
+ if (ret == NF_DROP)
+ flow_offload_teardown(flow);
+ break;
+ }
- return NF_STOLEN;
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 15f90c31feb0..a6595dca1b1f 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -39,12 +39,11 @@ static void nft_default_forward_path(struct nf_flow_route *route,
static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
const struct dst_entry *dst_cache,
const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
+ enum ip_conntrack_dir dir, u8 *ha,
struct net_device_path_stack *stack)
{
const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
struct net_device *dev = dst_cache->dev;
- unsigned char ha[ETH_ALEN];
struct neighbour *n;
u8 nud_state;
@@ -66,27 +65,43 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
struct nft_forward_info {
const struct net_device *indev;
+ const struct net_device *outdev;
+ u8 h_source[ETH_ALEN];
+ u8 h_dest[ETH_ALEN];
+ enum flow_offload_xmit_type xmit_type;
};
static void nft_dev_path_info(const struct net_device_path_stack *stack,
- struct nft_forward_info *info)
+ struct nft_forward_info *info,
+ unsigned char *ha)
{
const struct net_device_path *path;
int i;
+ memcpy(info->h_dest, ha, ETH_ALEN);
+
for (i = 0; i < stack->num_paths; i++) {
path = &stack->path[i];
switch (path->type) {
case DEV_PATH_ETHERNET:
info->indev = path->dev;
+ if (is_zero_ether_addr(info->h_source))
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
break;
- case DEV_PATH_VLAN:
case DEV_PATH_BRIDGE:
+ if (is_zero_ether_addr(info->h_source))
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+ break;
+ case DEV_PATH_VLAN:
default:
info->indev = NULL;
break;
}
}
+ if (!info->outdev)
+ info->outdev = info->indev;
}
static bool nft_flowtable_find_dev(const struct net_device *dev,
@@ -114,14 +129,22 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
const struct dst_entry *dst = route->tuple[dir].dst;
struct net_device_path_stack stack;
struct nft_forward_info info = {};
+ unsigned char ha[ETH_ALEN];
- if (nft_dev_fill_forward_path(route, dst, ct, dir, &stack) >= 0)
- nft_dev_path_info(&stack, &info);
+ if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
+ nft_dev_path_info(&stack, &info, ha);
if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
return;
route->tuple[!dir].in.ifindex = info.indev->ifindex;
+
+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
+ route->tuple[dir].xmit_type = info.xmit_type;
+ }
}
static int nft_flow_route(const struct nft_pktinfo *pkt,
--
2.20.1
next prev parent reply other threads:[~2021-03-11 0:37 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-11 0:35 [PATCH net-next 00/23] netfilter: flowtable enhancements Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 01/23] net: resolve forwarding path from virtual netdevice and HW destination address Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 02/23] net: 8021q: resolve forwarding path for vlan devices Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 03/23] net: bridge: resolve forwarding path for bridge devices Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 04/23] net: bridge: resolve forwarding path for VLAN tag actions in " Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 05/23] net: ppp: resolve forwarding path for bridge pppoe devices Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 06/23] net: dsa: resolve forwarding path for dsa slave ports Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 07/23] netfilter: flowtable: add xmit path types Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 08/23] netfilter: flowtable: use dev_fill_forward_path() to obtain ingress device Pablo Neira Ayuso
2021-03-11 0:35 ` Pablo Neira Ayuso [this message]
2021-03-11 0:35 ` [PATCH net-next 10/23] netfilter: flowtable: add vlan support Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 11/23] netfilter: flowtable: add bridge vlan filtering support Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 12/23] netfilter: flowtable: add pppoe support Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 13/23] netfilter: flowtable: add dsa support Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 14/23] selftests: netfilter: flowtable bridge and vlan support Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 15/23] netfilter: flowtable: add offload support for xmit path types Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 16/23] netfilter: nft_flow_offload: use direct xmit if hardware offload is enabled Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 17/23] netfilter: flowtable: bridge vlan hardware offload and switchdev Pablo Neira Ayuso
2021-03-11 0:35 ` [PATCH net-next 18/23] net: flow_offload: add FLOW_ACTION_PPPOE_PUSH Pablo Neira Ayuso
2021-03-11 0:36 ` [PATCH net-next 19/23] netfilter: flowtable: support for FLOW_ACTION_PPPOE_PUSH Pablo Neira Ayuso
2021-03-11 0:36 ` [PATCH net-next 20/23] dsa: slave: add support for TC_SETUP_FT Pablo Neira Ayuso
2021-03-11 0:36 ` [PATCH net-next 21/23] net: ethernet: mtk_eth_soc: add support for initializing the PPE Pablo Neira Ayuso
2021-03-11 0:36 ` [PATCH net-next 22/23] net: ethernet: mtk_eth_soc: add flow offloading support Pablo Neira Ayuso
2021-03-11 0:36 ` [PATCH net-next 23/23] net: ethernet: mtk_eth_soc: fix parsing packets in GDM Pablo Neira Ayuso
2021-03-12 7:36 ` Felix Fietkau
2021-03-11 20:47 ` [PATCH net-next 00/23] netfilter: flowtable enhancements Jakub Kicinski
2021-03-11 21:45 ` Pablo Neira Ayuso
2021-03-11 22:31 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210311003604.22199-10-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=kuba@kernel.org \
--cc=nbd@nbd.name \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.