From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 47/52] netfilter: flow table support for IPv6
Date: Mon, 8 Jan 2018 21:19:55 +0100 [thread overview]
Message-ID: <20180108202000.12989-48-pablo@netfilter.org> (raw)
In-Reply-To: <20180108202000.12989-1-pablo@netfilter.org>
This patch adds the IPv6 flow table type, that implements the datapath
flow table to forward IPv6 traffic.
This patch exports ip6_dst_mtu_forward() that is required to check for
mtu to pass up packets that need PMTUD handling to the classic
forwarding path.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/ipv6.h | 2 +
net/ipv6/ip6_output.c | 3 +-
net/ipv6/netfilter/Kconfig | 8 +
net/ipv6/netfilter/Makefile | 3 +
net/ipv6/netfilter/nf_flow_table_ipv6.c | 277 ++++++++++++++++++++++++++++++++
5 files changed, 292 insertions(+), 1 deletion(-)
create mode 100644 net/ipv6/netfilter/nf_flow_table_ipv6.c
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 25be4715578c..9dc1230d789c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -969,6 +969,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
&inet6_sk(sk)->cork);
}
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
+
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bcdb615aed6e..19adad6d90bc 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -378,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}
-static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
{
unsigned int mtu;
struct inet6_dev *idev;
@@ -398,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
+EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6acb2eecd986..806e95375ec8 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -71,6 +71,14 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES
+config NF_FLOW_TABLE_IPV6
+ select NF_FLOW_TABLE
+ tristate "Netfilter flow table IPv6 module"
+ help
+ This option adds the flow table IPv6 support.
+
+ To compile it as a module, choose M here.
+
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c6ee0cdd0ba9..95611c4b39b0 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
new file mode 100644
index 000000000000..d7d073bb19ee
--- /dev/null
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -0,0 +1,277 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff, struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ switch (ip6h->nexthdr) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ unsigned int thoff = sizeof(*ip6h);
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ struct ipv6hdr *ip6h;
+ unsigned int thoff;
+
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ return -1;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_TCP &&
+ ip6h->nexthdr != IPPROTO_UDP)
+ return -1;
+
+ thoff = sizeof(*ip6h);
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v6 = ip6h->saddr;
+ tuple->dst_v6 = ip6h->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET6;
+ tuple->l4proto = ip6h->nexthdr;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
+{
+ u32 mtu;
+
+ mtu = ip6_dst_mtu_forward(&rt->dst);
+ if (__nf_flow_exceeds_mtu(skb, mtu))
+ return true;
+
+ return false;
+}
+
+static unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct in6_addr *nexthop;
+ struct ipv6hdr *ip6h;
+ struct rt6_info *rt;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*ip6h)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ipv6(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ ip6h = ipv6_hdr(skb);
+ ip6h->hop_limit--;
+
+ skb->dev = outdev;
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+
+ return NF_STOLEN;
+}
+
+static struct nf_flowtable_type flowtable_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_ipv6_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_ipv6_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_ipv6);
+
+ return 0;
+}
+
+static void __exit nf_flow_ipv6_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_ipv6);
+}
+
+module_init(nf_flow_ipv6_module_init);
+module_exit(nf_flow_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
--
2.11.0
next prev parent reply other threads:[~2018-01-08 20:19 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-01-08 20:19 [PATCH 00/52] Netfilter/IPVS updates for net-next Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 01/52] netfilter: conntrack: remove nlattr_size pointer from l4proto trackers Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 02/52] netfilter: conntrack: constify list of builtin trackers Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 03/52] netfilter: conntrack: l4 protocol trackers can be const Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 04/52] netfilter: mark expected switch fall-throughs Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 05/52] netfilter: conntrack: timeouts can be const Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 06/52] netfilter: ipvs: Remove useless ipvsh param of frag_safe_skb_hp Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 07/52] netfilter: ipset: use nfnl_mutex_is_locked Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 08/52] netfilter: ipset: add resched points during set listing Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 09/52] netfilter: nf_conntrack_h323: Remove unwanted comments Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 10/52] netfilter: core: make nf_unregister_net_hooks simple wrapper again Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 11/52] netfilter: core: remove synchronize_net call if nfqueue is used Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 12/52] netfilter: core: free hooks with call_rcu Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 13/52] netfilter: reduce size of hook entry point locations Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 14/52] netfilter: add defines for arp/decnet max hooks Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 15/52] netfilter: reduce hook array sizes to what is needed Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 16/52] netfilter: don't allocate space for decnet hooks unless needed Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 17/52] netfilter: don't allocate space for arp/bridge " Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 18/52] netfilter: reduce NF_MAX_HOOKS define Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 19/52] netfilter: xtables: add and use xt_request_find_table_lock Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 20/52] netfilter: core: only allow one nat hook per hook point Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 21/52] netfilter: nf_tables: reject nat hook registration if prio is before conntrack Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 22/52] netfilter: nf_tables_arp: don't set forward chain Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 23/52] netfilter: nf_tables: explicit nft_set_pktinfo() call from hook path Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 24/52] netfilter: nf_tables: add nft_set_is_anonymous() helper Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 25/52] netfilter: core: add nf_remove_net_hook Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 26/52] netfilter: core: pass hook number, family and device to nf_find_hook_list() Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 27/52] netfilter: core: pass family as parameter to nf_remove_net_hook() Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 28/52] netfilter: core: support for NFPROTO_INET hook registration Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 29/52] netfilter: nf_tables_inet: don't use multihook infrastructure anymore Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 30/52] netfilter: nf_tables: remove multihook chains and families Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 31/52] netfilter: nf_tables: remove hooks from family definition Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 32/52] netfilter: connlimit: split xt_connlimit into front and backend Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 33/52] netfilter: move checksum indirection to struct nf_ipv6_ops Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 34/52] netfilter: move checksum_partial " Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 35/52] netfilter: remove saveroute indirection in struct nf_afinfo Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 36/52] netfilter: move route indirection to struct nf_ipv6_ops Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 37/52] netfilter: move reroute " Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 38/52] netfilter: remove route_key_size field in struct nf_afinfo Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 39/52] netfilter: remove struct nf_afinfo and its helper functions Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 40/52] netfilter: meta: secpath support Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 41/52] netfilter: remove defensive check on malformed packets from raw sockets Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 42/52] netfilter: nf_tables: remove nft_dereference() Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 43/52] netfilter: nf_conntrack: add IPS_OFFLOAD status bit Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 44/52] netfilter: nf_tables: add flow table netlink frontend Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 45/52] netfilter: add generic flow table infrastructure Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 46/52] netfilter: flow table support for IPv4 Pablo Neira Ayuso
2018-01-08 20:19 ` Pablo Neira Ayuso [this message]
2018-01-08 20:19 ` [PATCH 48/52] netfilter: flow table support for the mixed IPv4/IPv6 family Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 49/52] netfilter: nf_tables: flow offload expression Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 50/52] netfilter: ipset: use swap macro instead of _manually_ swapping values Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 51/52] netfilter: ipset: Fix "don't update counters" mode when counters used at the matching Pablo Neira Ayuso
2018-01-08 20:20 ` [PATCH 52/52] netfilter: ipset: Missing nfnl_lock()/nfnl_unlock() is added to ip_set_net_exit() Pablo Neira Ayuso
2018-01-09 1:55 ` [PATCH 00/52] Netfilter/IPVS updates for net-next David Miller
2018-01-09 15:43 ` Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180108202000.12989-48-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).