netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org,
	razor@blackwall.org, jeremy@azazel.net
Subject: [PATCH net-next,v3 7/9] netfilter: flowtable: use dev_fill_forward_path() to obtain egress device
Date: Wed, 11 Nov 2020 20:37:35 +0100	[thread overview]
Message-ID: <20201111193737.1793-8-pablo@netfilter.org> (raw)
In-Reply-To: <20201111193737.1793-1-pablo@netfilter.org>

The egress device in the tuple is obtained from route. Use
dev_fill_forward_path() instead to provide the real egress device for
this flow whenever this is available.

The new FLOW_OFFLOAD_XMIT_DIRECT type uses dev_queue_xmit() to transmit
ethernet frames. Cache the source and destination hardware address to
use dev_queue_xmit() to transfer packets.

The FLOW_OFFLOAD_XMIT_DIRECT replaces FLOW_OFFLOAD_XMIT_NEIGH if
dev_fill_forward_path() finds a direct transmit path.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h | 16 +++++-
 net/netfilter/nf_flow_table_core.c    | 35 ++++++++++---
 net/netfilter/nf_flow_table_ip.c      | 72 +++++++++++++++++++++------
 net/netfilter/nft_flow_offload.c      | 25 ++++++++--
 4 files changed, 118 insertions(+), 30 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 963f99fb1c06..83110e4705c0 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -92,6 +92,7 @@ enum flow_offload_tuple_dir {
 enum flow_offload_xmit_type {
 	FLOW_OFFLOAD_XMIT_NEIGH		= 0,
 	FLOW_OFFLOAD_XMIT_XFRM,
+	FLOW_OFFLOAD_XMIT_DIRECT,
 };
 
 struct flow_offload_tuple {
@@ -120,8 +121,14 @@ struct flow_offload_tuple {
 					xmit_type:2;
 
 	u16				mtu;
-
-	struct dst_entry		*dst_cache;
+	union {
+		struct dst_entry	*dst_cache;
+		struct {
+			u32		ifidx;
+			u8		h_source[ETH_ALEN];
+			u8		h_dest[ETH_ALEN];
+		} out;
+	};
 };
 
 struct flow_offload_tuple_rhash {
@@ -168,6 +175,11 @@ struct nf_flow_route {
 		struct {
 			u32			ifindex;
 		} in;
+		struct {
+			u32			ifindex;
+			u8			h_source[ETH_ALEN];
+			u8			h_dest[ETH_ALEN];
+		} out;
 		enum flow_offload_xmit_type	xmit_type;
 	} tuple[FLOW_OFFLOAD_DIR_MAX];
 };
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 27b4315d7b96..751bc1c27c16 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -81,9 +81,6 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 	struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
 	struct dst_entry *dst = route->tuple[dir].dst;
 
-	if (!dst_hold_safe(route->tuple[dir].dst))
-		return -1;
-
 	switch (flow_tuple->l3proto) {
 	case NFPROTO_IPV4:
 		flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
@@ -94,12 +91,36 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 	}
 
 	flow_tuple->iifidx = route->tuple[dir].in.ifindex;
+
+	switch (route->tuple[dir].xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
+		       ETH_ALEN);
+		memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
+		       ETH_ALEN);
+		flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
+		break;
+	case FLOW_OFFLOAD_XMIT_XFRM:
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		if (!dst_hold_safe(route->tuple[dir].dst))
+			return -1;
+
+		flow_tuple->dst_cache = dst;
+		break;
+	}
 	flow_tuple->xmit_type = route->tuple[dir].xmit_type;
-	flow_tuple->dst_cache = dst;
 
 	return 0;
 }
 
+static void nft_flow_dst_release(struct flow_offload *flow,
+				 enum flow_offload_tuple_dir dir)
+{
+	if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+	    flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
+		dst_release(flow->tuplehash[dir].tuple.dst_cache);
+}
+
 int flow_offload_route_init(struct flow_offload *flow,
 			    const struct nf_flow_route *route)
 {
@@ -118,7 +139,7 @@ int flow_offload_route_init(struct flow_offload *flow,
 	return 0;
 
 err_route_reply:
-	dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
 
 	return err;
 }
@@ -169,8 +190,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
 
 static void flow_offload_route_release(struct flow_offload *flow)
 {
-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
 }
 
 void flow_offload_free(struct flow_offload *flow)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 03314cc06e04..d10ba09a41b5 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -248,6 +248,24 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+				       const struct flow_offload_tuple_rhash *tuplehash,
+				       unsigned short type)
+{
+	struct net_device *outdev;
+
+	outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
+	if (!outdev)
+		return NF_DROP;
+
+	skb->dev = outdev;
+	dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
+			tuplehash->tuple.out.h_source, skb->len);
+	dev_queue_xmit(skb);
+
+	return NF_STOLEN;
+}
+
 unsigned int
 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 			const struct nf_hook_state *state)
@@ -262,6 +280,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	unsigned int thoff;
 	struct iphdr *iph;
 	__be32 nexthop;
+	int ret;
 
 	if (skb->protocol != htons(ETH_P_IP))
 		return NF_ACCEPT;
@@ -303,22 +322,32 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
 		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
-	rt = (struct rtable *)tuplehash->tuple.dst_cache;
-
 	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+		rt = (struct rtable *)tuplehash->tuple.dst_cache;
 		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
 		IPCB(skb)->iif = skb->dev->ifindex;
 		IPCB(skb)->flags = IPSKB_FORWARDED;
 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
 	}
 
-	outdev = rt->dst.dev;
-	skb->dev = outdev;
-	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
-	skb_dst_set_noref(skb, &rt->dst);
-	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+	switch (tuplehash->tuple.xmit_type) {
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		rt = (struct rtable *)tuplehash->tuple.dst_cache;
+		outdev = rt->dst.dev;
+		skb->dev = outdev;
+		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+		skb_dst_set_noref(skb, &rt->dst);
+		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+		ret = NF_STOLEN;
+		break;
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
+		if (ret == NF_DROP)
+			flow_offload_teardown(flow);
+		break;
+	}
 
-	return NF_STOLEN;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
 
@@ -504,6 +533,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	struct net_device *outdev;
 	struct ipv6hdr *ip6h;
 	struct rt6_info *rt;
+	int ret;
 
 	if (skb->protocol != htons(ETH_P_IPV6))
 		return NF_ACCEPT;
@@ -545,21 +575,31 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
 		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
-	rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
-
 	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)) {
+		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 		IP6CB(skb)->iif = skb->dev->ifindex;
 		IP6CB(skb)->flags = IP6SKB_FORWARDED;
 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
 	}
 
-	outdev = rt->dst.dev;
-	skb->dev = outdev;
-	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
-	skb_dst_set_noref(skb, &rt->dst);
-	neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+	switch (tuplehash->tuple.xmit_type) {
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+		outdev = rt->dst.dev;
+		skb->dev = outdev;
+		nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+		skb_dst_set_noref(skb, &rt->dst);
+		neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+		ret = NF_STOLEN;
+		break;
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
+		if (ret == NF_DROP)
+			flow_offload_teardown(flow);
+		break;
+	}
 
-	return NF_STOLEN;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index cae53c56f957..849fc40786cf 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -39,12 +39,11 @@ static void nft_default_forward_path(struct nf_flow_route *route,
 static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
 				     const struct dst_entry *dst_cache,
 				     const struct nf_conn *ct,
-				     enum ip_conntrack_dir dir,
+				     enum ip_conntrack_dir dir, u8 *ha,
 				     struct net_device_path_stack *stack)
 {
 	const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
 	struct net_device *dev = dst_cache->dev;
-	unsigned char ha[ETH_ALEN];
 	struct neighbour *n;
 	u8 nud_state;
 
@@ -66,10 +65,14 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
 
 struct nft_forward_info {
 	const struct net_device *indev;
+	u8 h_source[ETH_ALEN];
+	u8 h_dest[ETH_ALEN];
+	enum flow_offload_xmit_type xmit_type;
 };
 
 static void nft_dev_path_info(const struct net_device_path_stack *stack,
-			      struct nft_forward_info *info)
+			      struct nft_forward_info *info,
+			      unsigned char *ha)
 {
 	const struct net_device_path *path;
 	int i;
@@ -79,10 +82,14 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 		switch (path->type) {
 		case DEV_PATH_ETHERNET:
 			info->indev = path->dev;
+			memcpy(info->h_dest, path->dev->dev_addr, ETH_ALEN);
+			memcpy(info->h_source, ha, ETH_ALEN);
 			break;
 		case DEV_PATH_VLAN:
 			break;
 		case DEV_PATH_BRIDGE:
+			memcpy(info->h_dest, path->dev->dev_addr, ETH_ALEN);
+			info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
 			break;
 		}
 	}
@@ -113,14 +120,22 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
 	const struct dst_entry *dst = route->tuple[dir].dst;
 	struct net_device_path_stack stack = {};
 	struct nft_forward_info info = {};
+	unsigned char ha[ETH_ALEN];
 
-	if (nft_dev_fill_forward_path(route, dst, ct, dir, &stack) >= 0)
-		nft_dev_path_info(&stack, &info);
+	if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
+		nft_dev_path_info(&stack, &info, ha);
 
 	if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
 		return;
 
 	route->tuple[!dir].in.ifindex = info.indev->ifindex;
+
+	if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
+		memcpy(route->tuple[dir].out.h_dest, info.h_source, ETH_ALEN);
+		memcpy(route->tuple[dir].out.h_source, info.h_dest, ETH_ALEN);
+		route->tuple[dir].out.ifindex = info.indev->ifindex;
+		route->tuple[dir].xmit_type = info.xmit_type;
+	}
 }
 
 static int nft_flow_route(const struct nft_pktinfo *pkt,
-- 
2.20.1


  parent reply	other threads:[~2020-11-11 19:38 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-11 19:37 [PATCH net-next,v3 0/9] netfilter: flowtable bridge and vlan enhancements Pablo Neira Ayuso
2020-11-11 19:37 ` [PATCH net-next,v3 1/9] netfilter: flowtable: add hash offset field to tuple Pablo Neira Ayuso
2020-11-11 19:37 ` [PATCH net-next,v3 2/9] netfilter: flowtable: add xmit path types Pablo Neira Ayuso
2020-11-11 19:37 ` [PATCH net-next,v3 3/9] net: resolve forwarding path from virtual netdevice and HW destination address Pablo Neira Ayuso
2020-11-14  1:42   ` Jakub Kicinski
2020-11-14 12:00     ` Pablo Neira Ayuso
2020-11-11 19:37 ` [PATCH net-next,v3 4/9] net: 8021q: resolve forwarding path for vlan devices Pablo Neira Ayuso
2020-11-11 19:37 ` [PATCH net-next,v3 5/9] bridge: resolve forwarding path for bridge devices Pablo Neira Ayuso
2020-11-12  0:53   ` Nikolay Aleksandrov
2020-11-13 15:40     ` Nikolay Aleksandrov
2020-11-11 19:37 ` [PATCH net-next,v3 6/9] netfilter: flowtable: use dev_fill_forward_path() to obtain ingress device Pablo Neira Ayuso
2020-11-11 19:37 ` Pablo Neira Ayuso [this message]
2020-11-11 19:37 ` [PATCH net-next,v3 8/9] netfilter: flowtable: add vlan support Pablo Neira Ayuso
2020-11-11 19:37 ` [PATCH net-next,v3 9/9] selftests: netfilter: flowtable bridge and VLAN support Pablo Neira Ayuso
2020-11-14  1:55 ` [PATCH net-next,v3 0/9] netfilter: flowtable bridge and vlan enhancements Jakub Kicinski
2020-11-14 11:59   ` Pablo Neira Ayuso
2020-11-14 14:00     ` Tobias Waldekranz
2020-11-14 17:03       ` Jakub Kicinski
2020-11-16 22:18         ` Pablo Neira Ayuso
2020-11-16 22:28           ` Jakub Kicinski
2020-11-16 22:36             ` Pablo Neira Ayuso
2020-11-16 22:45               ` Jakub Kicinski
2020-11-16 22:56                 ` Pablo Neira Ayuso
2020-11-21 12:31                   ` Pablo Neira Ayuso
2020-11-21 18:15                     ` Jakub Kicinski
2020-11-21 18:56                       ` Pablo Neira Ayuso
2020-11-21 19:23                         ` Jakub Kicinski
2020-11-22 11:33                           ` Pablo Neira Ayuso
2020-11-16 22:31       ` Pablo Neira Ayuso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201111193737.1793-8-pablo@netfilter.org \
    --to=pablo@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=jeremy@azazel.net \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=razor@blackwall.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).