From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 47/52] netfilter: flow table support for IPv6
Date: Mon, 8 Jan 2018 21:19:55 +0100 [thread overview]
Message-ID: <20180108202000.12989-48-pablo@netfilter.org> (raw)
In-Reply-To: <20180108202000.12989-1-pablo@netfilter.org>
This patch adds the IPv6 flow table type, that implements the datapath
flow table to forward IPv6 traffic.
This patch exports ip6_dst_mtu_forward() that is required to check for
mtu to pass up packets that need PMTUD handling to the classic
forwarding path.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/ipv6.h | 2 +
net/ipv6/ip6_output.c | 3 +-
net/ipv6/netfilter/Kconfig | 8 +
net/ipv6/netfilter/Makefile | 3 +
net/ipv6/netfilter/nf_flow_table_ipv6.c | 277 ++++++++++++++++++++++++++++++++
5 files changed, 292 insertions(+), 1 deletion(-)
create mode 100644 net/ipv6/netfilter/nf_flow_table_ipv6.c
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 25be4715578c..9dc1230d789c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -969,6 +969,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
&inet6_sk(sk)->cork);
}
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
+
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bcdb615aed6e..19adad6d90bc 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -378,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}
-static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
{
unsigned int mtu;
struct inet6_dev *idev;
@@ -398,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
+EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6acb2eecd986..806e95375ec8 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -71,6 +71,14 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES
+config NF_FLOW_TABLE_IPV6
+ select NF_FLOW_TABLE
+ tristate "Netfilter flow table IPv6 module"
+ help
+ This option adds the flow table IPv6 support.
+
+ To compile it as a module, choose M here.
+
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c6ee0cdd0ba9..95611c4b39b0 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
new file mode 100644
index 000000000000..d7d073bb19ee
--- /dev/null
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -0,0 +1,277 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff, struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ switch (ip6h->nexthdr) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ unsigned int thoff = sizeof(*ip6h);
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ struct ipv6hdr *ip6h;
+ unsigned int thoff;
+
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ return -1;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_TCP &&
+ ip6h->nexthdr != IPPROTO_UDP)
+ return -1;
+
+ thoff = sizeof(*ip6h);
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v6 = ip6h->saddr;
+ tuple->dst_v6 = ip6h->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET6;
+ tuple->l4proto = ip6h->nexthdr;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
+{
+ u32 mtu;
+
+ mtu = ip6_dst_mtu_forward(&rt->dst);
+ if (__nf_flow_exceeds_mtu(skb, mtu))
+ return true;
+
+ return false;
+}
+
+static unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct in6_addr *nexthop;
+ struct ipv6hdr *ip6h;
+ struct rt6_info *rt;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*ip6h)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ipv6(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ ip6h = ipv6_hdr(skb);
+ ip6h->hop_limit--;
+
+ skb->dev = outdev;
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+
+ return NF_STOLEN;
+}
+
+static struct nf_flowtable_type flowtable_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_ipv6_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_ipv6_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_ipv6);
+
+ return 0;
+}
+
+static void __exit nf_flow_ipv6_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_ipv6);
+}
+
+module_init(nf_flow_ipv6_module_init);
+module_exit(nf_flow_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
--
2.11.0
next prev parent reply other threads:[~2018-01-08 20:19 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-01-08 20:19 [PATCH 00/52] Netfilter/IPVS updates for net-next Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 01/52] netfilter: conntrack: remove nlattr_size pointer from l4proto trackers Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 02/52] netfilter: conntrack: constify list of builtin trackers Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 03/52] netfilter: conntrack: l4 protocol trackers can be const Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 04/52] netfilter: mark expected switch fall-throughs Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 05/52] netfilter: conntrack: timeouts can be const Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 06/52] netfilter: ipvs: Remove useless ipvsh param of frag_safe_skb_hp Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 07/52] netfilter: ipset: use nfnl_mutex_is_locked Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 08/52] netfilter: ipset: add resched points during set listing Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 09/52] netfilter: nf_conntrack_h323: Remove unwanted comments Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 10/52] netfilter: core: make nf_unregister_net_hooks simple wrapper again Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 11/52] netfilter: core: remove synchronize_net call if nfqueue is used Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 12/52] netfilter: core: free hooks with call_rcu Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 13/52] netfilter: reduce size of hook entry point locations Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 14/52] netfilter: add defines for arp/decnet max hooks Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 15/52] netfilter: reduce hook array sizes to what is needed Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 16/52] netfilter: don't allocate space for decnet hooks unless needed Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 17/52] netfilter: don't allocate space for arp/bridge " Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 18/52] netfilter: reduce NF_MAX_HOOKS define Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 19/52] netfilter: xtables: add and use xt_request_find_table_lock Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 20/52] netfilter: core: only allow one nat hook per hook point Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 21/52] netfilter: nf_tables: reject nat hook registration if prio is before conntrack Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 22/52] netfilter: nf_tables_arp: don't set forward chain Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 23/52] netfilter: nf_tables: explicit nft_set_pktinfo() call from hook path Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 24/52] netfilter: nf_tables: add nft_set_is_anonymous() helper Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 25/52] netfilter: core: add nf_remove_net_hook Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 26/52] netfilter: core: pass hook number, family and device to nf_find_hook_list() Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 27/52] netfilter: core: pass family as parameter to nf_remove_net_hook() Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 28/52] netfilter: core: support for NFPROTO_INET hook registration Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 29/52] netfilter: nf_tables_inet: don't use multihook infrastructure anymore Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 30/52] netfilter: nf_tables: remove multihook chains and families Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 31/52] netfilter: nf_tables: remove hooks from family definition Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 32/52] netfilter: connlimit: split xt_connlimit into front and backend Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 33/52] netfilter: move checksum indirection to struct nf_ipv6_ops Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 34/52] netfilter: move checksum_partial " Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 35/52] netfilter: remove saveroute indirection in struct nf_afinfo Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 36/52] netfilter: move route indirection to struct nf_ipv6_ops Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 37/52] netfilter: move reroute " Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 38/52] netfilter: remove route_key_size field in struct nf_afinfo Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 39/52] netfilter: remove struct nf_afinfo and its helper functions Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 40/52] netfilter: meta: secpath support Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 41/52] netfilter: remove defensive check on malformed packets from raw sockets Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 42/52] netfilter: nf_tables: remove nft_dereference() Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 43/52] netfilter: nf_conntrack: add IPS_OFFLOAD status bit Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 44/52] netfilter: nf_tables: add flow table netlink frontend Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 45/52] netfilter: add generic flow table infrastructure Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 46/52] netfilter: flow table support for IPv4 Pablo Neira Ayuso
2018-01-08 20:19 ` Pablo Neira Ayuso [this message]
2018-01-08 20:19 ` [PATCH 48/52] netfilter: flow table support for the mixed IPv4/IPv6 family Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 49/52] netfilter: nf_tables: flow offload expression Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 50/52] netfilter: ipset: use swap macro instead of _manually_ swapping values Pablo Neira Ayuso
2018-01-08 20:19 ` [PATCH 51/52] netfilter: ipset: Fix "don't update counters" mode when counters used at the matching Pablo Neira Ayuso
2018-01-08 20:20 ` [PATCH 52/52] netfilter: ipset: Missing nfnl_lock()/nfnl_unlock() is added to ip_set_net_exit() Pablo Neira Ayuso
2018-01-09 1:55 ` [PATCH 00/52] Netfilter/IPVS updates for net-next David Miller
2018-01-09 15:43 ` Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180108202000.12989-48-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.