* [PATCH net-next 3/4] net: Add IPv6 support to VRF device
2015-10-07 22:02 [PATCH net-next 0/4 v2] net: VRF support in IPv6 stack David Ahern
2015-10-07 22:02 ` [PATCH net-next 1/4] net: Add IPv6 support to l3mdev David Ahern
2015-10-07 22:02 ` [PATCH net-next 2/4] net: Export fib6_get_table and nd_tbl David Ahern
@ 2015-10-07 22:02 ` David Ahern
2015-10-07 22:02 ` [PATCH net-next 4/4] net: Add VRF support to IPv6 stack David Ahern
2015-10-11 11:48 ` [PATCH net-next 0/4 v2] net: VRF support in " David Miller
4 siblings, 0 replies; 8+ messages in thread
From: David Ahern @ 2015-10-07 22:02 UTC (permalink / raw)
To: netdev; +Cc: dsahern, David Ahern
Add support for IPv6 to VRF device driver. Implemenation parallels what
has been done for IPv4.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
drivers/net/Kconfig | 4 +-
drivers/net/vrf.c | 279 +++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 281 insertions(+), 2 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index b9ebd0d18a52..f184fb5bd110 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -298,8 +298,10 @@ config NLMON
config NET_VRF
tristate "Virtual Routing and Forwarding (Lite)"
- depends on IP_MULTIPLE_TABLES && IPV6_MULTIPLE_TABLES
+ depends on IP_MULTIPLE_TABLES
depends on NET_L3_MASTER_DEV
+ depends on IPV6 || IPV6=n
+ depends on IPV6_MULTIPLE_TABLES || IPV6=n
---help---
This option enables the support for mapping interfaces into VRF's. The
support enables VRF devices.
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 64499766e00f..c6f9266c4032 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -30,6 +30,7 @@
#include <net/arp.h>
#include <net/ip.h>
#include <net/ip_fib.h>
+#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#include <net/rtnetlink.h>
#include <net/route.h>
@@ -57,6 +58,7 @@ struct slave_queue {
struct net_vrf {
struct slave_queue queue;
struct rtable *rth;
+ struct rt6_info *rt6;
u32 tb_id;
};
@@ -104,12 +106,56 @@ static struct dst_ops vrf_dst_ops = {
.default_advmss = vrf_default_advmss,
};
+/* neighbor handling is done with actual device; do not want
+ * to flip skb->dev for those ndisc packets. This really fails
+ * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
+ * a start.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static bool check_ipv6_frame(const struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
+ size_t hlen = sizeof(*ipv6h);
+ bool rc = true;
+
+ if (skb->len < hlen)
+ goto out;
+
+ if (ipv6h->nexthdr == NEXTHDR_ICMP) {
+ const struct icmp6hdr *icmph;
+
+ if (skb->len < hlen + sizeof(*icmph))
+ goto out;
+
+ icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h));
+ switch (icmph->icmp6_type) {
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_ROUTER_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ case NDISC_REDIRECT:
+ rc = false;
+ break;
+ }
+ }
+
+out:
+ return rc;
+}
+#else
+static bool check_ipv6_frame(const struct sk_buff *skb)
+{
+ return false;
+}
+#endif
+
static bool is_ip_rx_frame(struct sk_buff *skb)
{
switch (skb->protocol) {
case htons(ETH_P_IP):
- case htons(ETH_P_IPV6):
return true;
+ case htons(ETH_P_IPV6):
+ return check_ipv6_frame(skb);
}
return false;
}
@@ -169,12 +215,52 @@ static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev,
return stats;
}
+#if IS_ENABLED(CONFIG_IPV6)
+static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct net *net = dev_net(skb->dev);
+ struct flowi6 fl6 = {
+ /* needed to match OIF rule */
+ .flowi6_oif = dev->ifindex,
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
+ .flowi6_mark = skb->mark,
+ .flowi6_proto = iph->nexthdr,
+ .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
+ };
+ int ret = NET_XMIT_DROP;
+ struct dst_entry *dst;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst == (struct dst_entry *)net->ipv6.ip6_null_entry)
+ goto err;
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+
+ ret = ip6_local_out(skb);
+ if (unlikely(net_xmit_eval(ret)))
+ dev->stats.tx_errors++;
+ else
+ ret = NET_XMIT_SUCCESS;
+
+ return ret;
+err:
+ vrf_tx_error(dev, skb);
+ return NET_XMIT_DROP;
+}
+#else
static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
struct net_device *dev)
{
vrf_tx_error(dev, skb);
return NET_XMIT_DROP;
}
+#endif
static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4,
struct net_device *vrf_dev)
@@ -269,6 +355,164 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
return ret;
}
+#if IS_ENABLED(CONFIG_IPV6)
+static struct dst_entry *vrf_ip6_check(struct dst_entry *dst, u32 cookie)
+{
+ return dst;
+}
+
+static int vrf_ip6_local_out(struct sk_buff *skb)
+{
+ return ip6_local_out(skb);
+}
+
+static struct dst_ops vrf_dst_ops6 = {
+ .family = AF_INET6,
+ .local_out = vrf_ip6_local_out,
+ .check = vrf_ip6_check,
+ .mtu = vrf_v4_mtu,
+ .destroy = vrf_dst_destroy,
+ .default_advmss = vrf_default_advmss,
+};
+
+static int init_dst_ops6_kmem_cachep(void)
+{
+ vrf_dst_ops6.kmem_cachep = kmem_cache_create("vrf_ip6_dst_cache",
+ sizeof(struct rt6_info),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+
+ if (!vrf_dst_ops6.kmem_cachep)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void free_dst_ops6_kmem_cachep(void)
+{
+ kmem_cache_destroy(vrf_dst_ops6.kmem_cachep);
+}
+
+static int vrf_input6(struct sk_buff *skb)
+{
+ skb->dev->stats.rx_errors++;
+ kfree_skb(skb);
+ return 0;
+}
+
+/* modelled after ip6_finish_output2 */
+static int vrf_finish_output6(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst->dev;
+ struct neighbour *neigh;
+ struct in6_addr *nexthop;
+ int ret;
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+
+ rcu_read_lock_bh();
+ nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+ neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+ if (!IS_ERR(neigh)) {
+ ret = dst_neigh_output(dst, neigh, skb);
+ rcu_read_unlock_bh();
+ return ret;
+ }
+ rcu_read_unlock_bh();
+
+ IP6_INC_STATS(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/* modelled after ip6_output */
+static int vrf_output6(struct sock *sk, struct sk_buff *skb)
+{
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ dev_net(skb_dst(skb)->dev), sk, skb,
+ NULL, skb_dst(skb)->dev,
+ vrf_finish_output6,
+ !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+}
+
+static void vrf_rt6_destroy(struct net_vrf *vrf)
+{
+ struct dst_entry *dst = (struct dst_entry *)vrf->rt6;
+
+ dst_destroy(dst);
+ free_percpu(vrf->rt6->rt6i_pcpu);
+ vrf->rt6 = NULL;
+}
+
+static int vrf_rt6_create(struct net_device *dev)
+{
+ struct net_vrf *vrf = netdev_priv(dev);
+ struct rt6_info *rt6;
+ struct dst_entry *dst;
+ int cpu;
+ int rc = -ENOMEM;
+
+ rt6 = dst_alloc(&vrf_dst_ops6, dev, 0,
+ DST_OBSOLETE_NONE,
+ (DST_HOST | DST_NOPOLICY | DST_NOXFRM));
+ if (!rt6)
+ goto out;
+
+ rt6->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_KERNEL);
+ if (!rt6->rt6i_pcpu) {
+ dst_destroy((struct dst_entry *)rt6);
+ goto out;
+ }
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **p = per_cpu_ptr(rt6->rt6i_pcpu, cpu);
+ *p = NULL;
+ }
+
+ dst = &rt6->dst;
+ memset(dst + 1, 0, sizeof(*rt6) - sizeof(*dst));
+
+ INIT_LIST_HEAD(&rt6->rt6i_siblings);
+ INIT_LIST_HEAD(&rt6->rt6i_uncached);
+
+ rt6->dst.input = vrf_input6;
+ rt6->dst.output = vrf_output6;
+
+ rt6->rt6i_table = fib6_get_table(dev_net(dev), vrf->tb_id);
+
+ atomic_set(&rt6->dst.__refcnt, 2);
+
+ vrf->rt6 = rt6;
+ rc = 0;
+out:
+ return rc;
+}
+#else
+static int init_dst_ops6_kmem_cachep(void)
+{
+ return 0;
+}
+
+static void free_dst_ops6_kmem_cachep(void)
+{
+}
+
+static void vrf_rt6_destroy(struct net_vrf *vrf)
+{
+}
+
+static int vrf_rt6_create(struct net_device *dev)
+{
+ return 0;
+}
+#endif
+
/* modelled after ip_finish_output2 */
static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -491,6 +735,7 @@ static void vrf_dev_uninit(struct net_device *dev)
struct slave *slave, *next;
vrf_rtable_destroy(vrf);
+ vrf_rt6_destroy(vrf);
list_for_each_entry_safe(slave, next, head, list)
vrf_del_slave(dev, slave->dev);
@@ -514,10 +759,15 @@ static int vrf_dev_init(struct net_device *dev)
if (!vrf->rth)
goto out_stats;
+ if (vrf_rt6_create(dev) != 0)
+ goto out_rth;
+
dev->flags = IFF_MASTER | IFF_NOARP;
return 0;
+out_rth:
+ vrf_rtable_destroy(vrf);
out_stats:
free_percpu(dev->dstats);
dev->dstats = NULL;
@@ -587,10 +837,30 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
fl4->flowi4_scope = scope;
}
+#if IS_ENABLED(CONFIG_IPV6)
+static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
+ const struct flowi6 *fl6)
+{
+ struct rt6_info *rt = NULL;
+
+ if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
+ struct net_vrf *vrf = netdev_priv(dev);
+
+ rt = vrf->rt6;
+ atomic_inc(&rt->dst.__refcnt);
+ }
+
+ return (struct dst_entry *)rt;
+}
+#endif
+
static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_fib_table = vrf_fib_table,
.l3mdev_get_rtable = vrf_get_rtable,
.l3mdev_get_saddr = vrf_get_saddr,
+#if IS_ENABLED(CONFIG_IPV6)
+ .l3mdev_get_rt6_dst = vrf_get_rt6_dst,
+#endif
};
static void vrf_get_drvinfo(struct net_device *dev,
@@ -732,6 +1002,10 @@ static int __init vrf_init_module(void)
if (!vrf_dst_ops.kmem_cachep)
return -ENOMEM;
+ rc = init_dst_ops6_kmem_cachep();
+ if (rc != 0)
+ goto error2;
+
register_netdevice_notifier(&vrf_notifier_block);
rc = rtnl_link_register(&vrf_link_ops);
@@ -742,6 +1016,8 @@ static int __init vrf_init_module(void)
error:
unregister_netdevice_notifier(&vrf_notifier_block);
+ free_dst_ops6_kmem_cachep();
+error2:
kmem_cache_destroy(vrf_dst_ops.kmem_cachep);
return rc;
}
@@ -751,6 +1027,7 @@ static void __exit vrf_cleanup_module(void)
rtnl_link_unregister(&vrf_link_ops);
unregister_netdevice_notifier(&vrf_notifier_block);
kmem_cache_destroy(vrf_dst_ops.kmem_cachep);
+ free_dst_ops6_kmem_cachep();
}
module_init(vrf_init_module);
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH net-next 4/4] net: Add VRF support to IPv6 stack
2015-10-07 22:02 [PATCH net-next 0/4 v2] net: VRF support in IPv6 stack David Ahern
` (2 preceding siblings ...)
2015-10-07 22:02 ` [PATCH net-next 3/4] net: Add IPv6 support to VRF device David Ahern
@ 2015-10-07 22:02 ` David Ahern
2015-10-11 11:48 ` [PATCH net-next 0/4 v2] net: VRF support in " David Miller
4 siblings, 0 replies; 8+ messages in thread
From: David Ahern @ 2015-10-07 22:02 UTC (permalink / raw)
To: netdev; +Cc: dsahern, David Ahern
As with IPv4 support for VRFs added to IPv6 stack by replacing hardcoded
table ids with possibly device specific ones and manipulating the oif in
the flowi6. The flow flags are used to skip oif compare in nexthop lookups
if the device is enslaved to a VRF via the L3 master device.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
net/ipv6/addrconf.c | 12 +++++++++---
net/ipv6/icmp.c | 6 +++++-
net/ipv6/ip6_output.c | 6 ++++--
net/ipv6/ndisc.c | 26 +++++++++++++++++++++++---
net/ipv6/route.c | 22 +++++++++++++++++-----
5 files changed, 58 insertions(+), 14 deletions(-)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b2d6f2fc0fbd..e07b1fb52131 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
#include <net/ip.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/l3mdev.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
#include <linux/netconf.h>
@@ -2149,7 +2150,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
unsigned long expires, u32 flags)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_PREFIX,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
.fc_metric = IP6_RT_PRIO_ADDRCONF,
.fc_ifindex = dev->ifindex,
.fc_expires = expires,
@@ -2182,8 +2183,9 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
struct fib6_node *fn;
struct rt6_info *rt = NULL;
struct fib6_table *table;
+ u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
- table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
+ table = fib6_get_table(dev_net(dev), tb_id);
if (!table)
return NULL;
@@ -2214,7 +2216,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
static void addrconf_add_mroute(struct net_device *dev)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_LOCAL,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL,
.fc_metric = IP6_RT_PRIO_ADDRCONF,
.fc_ifindex = dev->ifindex,
.fc_dst_len = 8,
@@ -3035,6 +3037,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
{
struct in6_addr addr;
+ /* no link local addresses on L3 master devices */
+ if (netif_is_l3_master(idev->dev))
+ return;
+
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6c2b2132c8d3..efb1c00f2270 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
#include <net/xfrm.h>
#include <net/inet_common.h>
#include <net/dsfield.h>
+#include <net/l3mdev.h>
#include <asm/uaccess.h>
@@ -496,6 +497,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
+
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
goto out;
@@ -575,7 +579,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
fl6.saddr = *saddr;
- fl6.flowi6_oif = skb->dev->ifindex;
+ fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index caf7d14a1bdd..527870ccf5c1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -55,6 +55,7 @@
#include <net/xfrm.h>
#include <net/checksum.h>
#include <linux/mroute6.h>
+#include <net/l3mdev.h>
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -886,7 +887,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
#ifdef CONFIG_IPV6_SUBTREES
ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
#endif
- (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+ (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
+ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
dst_release(dst);
dst = NULL;
}
@@ -1038,7 +1040,7 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
if (final_dst)
fl6->daddr = *final_dst;
if (!fl6->flowi6_oif)
- fl6->flowi6_oif = dst->dev->ifindex;
+ fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b3292db05198..0b85242d8469 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -67,6 +67,7 @@
#include <net/flow.h>
#include <net/ip6_checksum.h>
#include <net/inet_common.h>
+#include <net/l3mdev.h>
#include <linux/proc_fs.h>
#include <linux/netfilter.h>
@@ -442,8 +443,11 @@ static void ndisc_send_skb(struct sk_buff *skb,
if (!dst) {
struct flowi6 fl6;
+ int oif = l3mdev_fib_oif(skb->dev);
- icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
+ icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
+ if (oif != skb->dev->ifindex)
+ fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
kfree_skb(skb);
@@ -767,7 +771,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
if (ifp) {
-
+have_ifp:
if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
if (dad) {
/*
@@ -793,6 +797,18 @@ static void ndisc_recv_ns(struct sk_buff *skb)
} else {
struct net *net = dev_net(dev);
+ /* perhaps an address on the master device */
+ if (netif_is_l3_slave(dev)) {
+ struct net_device *mdev;
+
+ mdev = netdev_master_upper_dev_get_rcu(dev);
+ if (mdev) {
+ ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
+ if (ifp)
+ goto have_ifp;
+ }
+ }
+
idev = in6_dev_get(dev);
if (!idev) {
/* XXX: count this drop? */
@@ -1484,6 +1500,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
struct flowi6 fl6;
int rd_len;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+ int oif = l3mdev_fib_oif(dev);
bool ret;
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
@@ -1500,7 +1517,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
}
icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
- &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
+ &saddr_buf, &ipv6_hdr(skb)->saddr, oif);
+
+ if (oif != skb->dev->ifindex)
+ fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d3d946773a3e..63446bae7f5f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -61,6 +61,7 @@
#include <net/nexthop.h>
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
+#include <net/l3mdev.h>
#include <asm/uaccess.h>
@@ -1044,6 +1045,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
+ if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+ oif = 0;
+
redo_rt6_select:
rt = rt6_select(fn, oif, strict);
if (rt->rt6i_nsiblings)
@@ -1141,7 +1145,7 @@ void ip6_route_input(struct sk_buff *skb)
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
- .flowi6_iif = skb->dev->ifindex,
+ .flowi6_iif = l3mdev_fib_oif(skb->dev),
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = ip6_flowinfo(iph),
@@ -1165,8 +1169,13 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
+ struct dst_entry *dst;
int flags = 0;
+ dst = l3mdev_rt6_dst_by_oif(net, fl6);
+ if (dst)
+ return dst;
+
fl6->flowi6_iif = LOOPBACK_IFINDEX;
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
@@ -2264,7 +2273,6 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_INFO,
.fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = ifindex,
.fc_dst_len = prefixlen,
@@ -2275,6 +2283,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_nlinfo.nl_net = net,
};
+ cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
cfg.fc_dst = *prefix;
cfg.fc_gateway = *gwaddr;
@@ -2315,7 +2324,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_DFLT,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
.fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -2362,7 +2371,8 @@ static void rtmsg_to_fib6_config(struct net *net,
{
memset(cfg, 0, sizeof(*cfg));
- cfg->fc_table = RT6_TABLE_MAIN;
+ cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+ : RT6_TABLE_MAIN;
cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
cfg->fc_metric = rtmsg->rtmsg_metric;
cfg->fc_expires = rtmsg->rtmsg_info;
@@ -2471,6 +2481,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
const struct in6_addr *addr,
bool anycast)
{
+ u32 tb_id;
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
DST_NOCOUNT);
@@ -2493,7 +2504,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
- rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
+ tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
+ rt->rt6i_table = fib6_get_table(net, tb_id);
rt->dst.flags |= DST_NOCACHE;
atomic_set(&rt->dst.__refcnt, 1);
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread