Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v2 05/21] net/ipv6: Pass net namespace to route functions
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Pass network namespace reference into route add, delete and get
functions.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_route.h | 12 ++++++-----
 net/ipv6/addrconf.c     | 33 ++++++++++++++++--------------
 net/ipv6/anycast.c      | 10 +++++----
 net/ipv6/ndisc.c        | 12 ++++++-----
 net/ipv6/route.c        | 54 +++++++++++++++++++++++++------------------------
 5 files changed, 66 insertions(+), 55 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 08b132381984..1130a1144dfd 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -101,8 +101,8 @@ void ip6_route_cleanup(void);
 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
 
 int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack);
-int ip6_ins_rt(struct rt6_info *);
-int ip6_del_rt(struct rt6_info *);
+int ip6_ins_rt(struct net *net, struct rt6_info *rt);
+int ip6_del_rt(struct net *net, struct rt6_info *rt);
 
 void rt6_flush_exceptions(struct rt6_info *rt);
 int rt6_remove_exception_rt(struct rt6_info *rt);
@@ -137,7 +137,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
 void fib6_force_start_gc(struct net *net);
 
-struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
+struct rt6_info *addrconf_dst_alloc(struct net *net, struct inet6_dev *idev,
 				    const struct in6_addr *addr, bool anycast);
 
 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
@@ -147,9 +147,11 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
  *	support functions for ND
  *
  */
-struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr,
+struct rt6_info *rt6_get_dflt_router(struct net *net,
+				     const struct in6_addr *addr,
 				     struct net_device *dev);
-struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
+struct rt6_info *rt6_add_dflt_router(struct net *net,
+				     const struct in6_addr *gwaddr,
 				     struct net_device *dev, unsigned int pref);
 
 void rt6_purge_dflt_routers(struct net *net);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b2c0175125db..7ff7466c52e5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1037,7 +1037,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 		goto out;
 	}
 
-	rt = addrconf_dst_alloc(idev, addr, false);
+	rt = addrconf_dst_alloc(net, idev, addr, false);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		rt = NULL;
@@ -1187,7 +1187,7 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r
 				       0, RTF_GATEWAY | RTF_DEFAULT);
 	if (rt) {
 		if (del_rt)
-			ip6_del_rt(rt);
+			ip6_del_rt(dev_net(ifp->idev->dev), rt);
 		else {
 			if (!(rt->rt6i_flags & RTF_EXPIRES))
 				rt6_set_expires(rt, expires);
@@ -2666,7 +2666,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 		if (rt) {
 			/* Autoconf prefix route */
 			if (valid_lft == 0) {
-				ip6_del_rt(rt);
+				ip6_del_rt(net, rt);
 				rt = NULL;
 			} else if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
@@ -3333,7 +3333,8 @@ static void addrconf_gre_config(struct net_device *dev)
 }
 #endif
 
-static int fixup_permanent_addr(struct inet6_dev *idev,
+static int fixup_permanent_addr(struct net *net,
+				struct inet6_dev *idev,
 				struct inet6_ifaddr *ifp)
 {
 	/* !rt6i_node means the host route was removed from the
@@ -3343,7 +3344,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
 	if (!ifp->rt || !ifp->rt->rt6i_node) {
 		struct rt6_info *rt, *prev;
 
-		rt = addrconf_dst_alloc(idev, &ifp->addr, false);
+		rt = addrconf_dst_alloc(net, idev, &ifp->addr, false);
 		if (IS_ERR(rt))
 			return PTR_ERR(rt);
 
@@ -3367,7 +3368,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
 	return 0;
 }
 
-static void addrconf_permanent_addr(struct net_device *dev)
+static void addrconf_permanent_addr(struct net *net, struct net_device *dev)
 {
 	struct inet6_ifaddr *ifp, *tmp;
 	struct inet6_dev *idev;
@@ -3380,7 +3381,7 @@ static void addrconf_permanent_addr(struct net_device *dev)
 
 	list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
 		if ((ifp->flags & IFA_F_PERMANENT) &&
-		    fixup_permanent_addr(idev, ifp) < 0) {
+		    fixup_permanent_addr(net, idev, ifp) < 0) {
 			write_unlock_bh(&idev->lock);
 			in6_ifa_hold(ifp);
 			ipv6_del_addr(ifp);
@@ -3449,7 +3450,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 
 		if (event == NETDEV_UP) {
 			/* restore routes for permanent addresses */
-			addrconf_permanent_addr(dev);
+			addrconf_permanent_addr(net, dev);
 
 			if (!addrconf_link_ready(dev)) {
 				/* device is not ready yet. */
@@ -3735,7 +3736,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		spin_unlock_bh(&ifa->lock);
 
 		if (rt)
-			ip6_del_rt(rt);
+			ip6_del_rt(net, rt);
 
 		if (state != INET6_IFADDR_STATE_DEAD) {
 			__ipv6_ifa_notify(RTM_DELADDR, ifa);
@@ -3853,6 +3854,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	struct inet6_dev *idev = ifp->idev;
 	struct net_device *dev = idev->dev;
 	bool bump_id, notify = false;
+	struct net *net;
 
 	addrconf_join_solict(dev, &ifp->addr);
 
@@ -3863,8 +3865,9 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	if (ifp->state == INET6_IFADDR_STATE_DEAD)
 		goto out;
 
+	net = dev_net(dev);
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
-	    (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 &&
+	    (net->ipv6.devconf_all->accept_dad < 1 &&
 	     idev->cnf.accept_dad < 1) ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
@@ -3900,8 +3903,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	 * Frames right away
 	 */
 	if (ifp->flags & IFA_F_OPTIMISTIC) {
-		ip6_ins_rt(ifp->rt);
-		if (ipv6_use_optimistic_addr(dev_net(dev), idev)) {
+		ip6_ins_rt(net, ifp->rt);
+		if (ipv6_use_optimistic_addr(net, idev)) {
 			/* Because optimistic nodes can use this address,
 			 * notify listeners. If DAD fails, RTM_DELADDR is sent.
 			 */
@@ -5604,7 +5607,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		 * to do it again
 		 */
 		if (!rcu_access_pointer(ifp->rt->rt6i_node))
-			ip6_ins_rt(ifp->rt);
+			ip6_ins_rt(net, ifp->rt);
 		if (ifp->idev->cnf.forwarding)
 			addrconf_join_anycast(ifp);
 		if (!ipv6_addr_any(&ifp->peer_addr))
@@ -5621,11 +5624,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 			rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
 						       ifp->idev->dev, 0, 0);
 			if (rt)
-				ip6_del_rt(rt);
+				ip6_del_rt(net, rt);
 		}
 		if (ifp->rt) {
 			if (dst_hold_safe(&ifp->rt->dst))
-				ip6_del_rt(ifp->rt);
+				ip6_del_rt(net, ifp->rt);
 		}
 		rt_genid_bump_ipv6(net);
 		break;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index bbcabbba9bd8..1122fb299b86 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -247,6 +247,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct ifacaddr6 *aca;
 	struct rt6_info *rt;
+	struct net *net;
 	int err;
 
 	ASSERT_RTNL();
@@ -265,7 +266,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 		}
 	}
 
-	rt = addrconf_dst_alloc(idev, addr, true);
+	net = dev_net(idev->dev);
+	rt = addrconf_dst_alloc(net, idev, addr, true);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		goto out;
@@ -286,7 +288,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 	aca_get(aca);
 	write_unlock_bh(&idev->lock);
 
-	ip6_ins_rt(rt);
+	ip6_ins_rt(net, rt);
 
 	addrconf_join_solict(idev->dev, &aca->aca_addr);
 
@@ -329,7 +331,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
 	dst_hold(&aca->aca_rt->dst);
-	ip6_del_rt(aca->aca_rt);
+	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 
 	aca_put(aca);
 	return 0;
@@ -357,7 +359,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 		addrconf_leave_solict(idev, &aca->aca_addr);
 
 		dst_hold(&aca->aca_rt->dst);
-		ip6_del_rt(aca->aca_rt);
+		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 
 		aca_put(aca);
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 9de4dfb126ba..3fbc3805e69b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1156,6 +1156,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	struct neighbour *neigh = NULL;
 	struct inet6_dev *in6_dev;
 	struct rt6_info *rt = NULL;
+	struct net *net;
 	int lifetime;
 	struct ndisc_options ndopts;
 	int optlen;
@@ -1253,9 +1254,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	/* Do not accept RA with source-addr found on local machine unless
 	 * accept_ra_from_local is set to true.
 	 */
+	net = dev_net(in6_dev->dev);
 	if (!in6_dev->cnf.accept_ra_from_local &&
-	    ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
-			  in6_dev->dev, 0)) {
+	    ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
 		ND_PRINTK(2, info,
 			  "RA from local address detected on dev: %s: default router ignored\n",
 			  skb->dev->name);
@@ -1272,7 +1273,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
 #endif
 
-	rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
+	rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
 
 	if (rt) {
 		neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
@@ -1285,7 +1286,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		}
 	}
 	if (rt && lifetime == 0) {
-		ip6_del_rt(rt);
+		ip6_del_rt(net, rt);
 		rt = NULL;
 	}
 
@@ -1294,7 +1295,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	if (!rt && lifetime) {
 		ND_PRINTK(3, info, "RA: adding default router\n");
 
-		rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
+		rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
+					 skb->dev, pref);
 		if (!rt) {
 			ND_PRINTK(0, err,
 				  "RA: %s failed to add default route\n",
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0a99cda9fd7b..045811a3da76 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -850,13 +850,13 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	}
 
 	if (rinfo->prefix_len == 0)
-		rt = rt6_get_dflt_router(gwaddr, dev);
+		rt = rt6_get_dflt_router(net, gwaddr, dev);
 	else
 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 					gwaddr, dev);
 
 	if (rt && !lifetime) {
-		ip6_del_rt(rt);
+		ip6_del_rt(net, rt);
 		rt = NULL;
 	}
 
@@ -1014,9 +1014,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 	return err;
 }
 
-int ip6_ins_rt(struct rt6_info *rt)
+int ip6_ins_rt(struct net *net, struct rt6_info *rt)
 {
-	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
+	struct nl_info info = {	.nl_net = net, };
 	struct mx6_config mxc = { .mx = NULL, };
 
 	/* Hold dst to account for the reference from the fib6 tree */
@@ -1121,14 +1121,13 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 	return pcpu_rt;
 }
 
-static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
+static struct rt6_info *rt6_make_pcpu_route(struct net *net,
+					    struct rt6_info *rt)
 {
 	struct rt6_info *pcpu_rt, *prev, **p;
 
 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
 	if (!pcpu_rt) {
-		struct net *net = dev_net(rt->dst.dev);
-
 		dst_hold(&net->ipv6.ip6_null_entry->dst);
 		return net->ipv6.ip6_null_entry;
 	}
@@ -1787,7 +1786,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 				/* No dst_hold() on rt is needed because grabbing
 				 * rt->rt6i_ref makes sure rt can't be released.
 				 */
-				pcpu_rt = rt6_make_pcpu_route(rt);
+				pcpu_rt = rt6_make_pcpu_route(net, rt);
 				rt6_release(rt);
 			} else {
 				/* rt is already removed from tree */
@@ -2088,7 +2087,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (rt6_check_expired(rt)) {
-				ip6_del_rt(rt);
+				ip6_del_rt(dev_net(dst->dev), rt);
 				dst = NULL;
 			}
 		} else {
@@ -2109,7 +2108,7 @@ static void ip6_link_failure(struct sk_buff *skb)
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (dst_hold_safe(&rt->dst))
-				ip6_del_rt(rt);
+				ip6_del_rt(dev_net(rt->dst.dev), rt);
 		} else {
 			struct fib6_node *fn;
 
@@ -3018,9 +3017,9 @@ int ip6_route_add(struct fib6_config *cfg,
 
 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 {
-	int err;
+	struct net *net = info->nl_net;
 	struct fib6_table *table;
-	struct net *net = dev_net(rt->dst.dev);
+	int err;
 
 	if (rt == net->ipv6.ip6_null_entry) {
 		err = -ENOENT;
@@ -3037,11 +3036,10 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	return err;
 }
 
-int ip6_del_rt(struct rt6_info *rt)
+int ip6_del_rt(struct net *net, struct rt6_info *rt)
 {
-	struct nl_info info = {
-		.nl_net = dev_net(rt->dst.dev),
-	};
+	struct nl_info info = { .nl_net = net };
+
 	return __ip6_del_rt(rt, &info);
 }
 
@@ -3376,13 +3374,15 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 }
 #endif
 
-struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
+struct rt6_info *rt6_get_dflt_router(struct net *net,
+				     const struct in6_addr *addr,
+				     struct net_device *dev)
 {
 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
 	struct rt6_info *rt;
 	struct fib6_table *table;
 
-	table = fib6_get_table(dev_net(dev), tb_id);
+	table = fib6_get_table(net, tb_id);
 	if (!table)
 		return NULL;
 
@@ -3399,7 +3399,8 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
 	return rt;
 }
 
-struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
+struct rt6_info *rt6_add_dflt_router(struct net *net,
+				     const struct in6_addr *gwaddr,
 				     struct net_device *dev,
 				     unsigned int pref)
 {
@@ -3412,7 +3413,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 		.fc_protocol = RTPROT_RA,
 		.fc_nlinfo.portid = 0,
 		.fc_nlinfo.nlh = NULL,
-		.fc_nlinfo.nl_net = dev_net(dev),
+		.fc_nlinfo.nl_net = net,
 	};
 
 	cfg.fc_gateway = *gwaddr;
@@ -3425,10 +3426,11 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
 	}
 
-	return rt6_get_dflt_router(gwaddr, dev);
+	return rt6_get_dflt_router(net, gwaddr, dev);
 }
 
-static void __rt6_purge_dflt_routers(struct fib6_table *table)
+static void __rt6_purge_dflt_routers(struct net *net,
+				     struct fib6_table *table)
 {
 	struct rt6_info *rt;
 
@@ -3439,7 +3441,7 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table)
 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
 			if (dst_hold_safe(&rt->dst)) {
 				rcu_read_unlock();
-				ip6_del_rt(rt);
+				ip6_del_rt(net, rt);
 			} else {
 				rcu_read_unlock();
 			}
@@ -3463,7 +3465,7 @@ void rt6_purge_dflt_routers(struct net *net)
 		head = &net->ipv6.fib_table_hash[h];
 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
-				__rt6_purge_dflt_routers(table);
+				__rt6_purge_dflt_routers(net, table);
 		}
 	}
 
@@ -3583,12 +3585,12 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff
  *	Allocate a dst for local (unicast / anycast) address.
  */
 
-struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
+struct rt6_info *addrconf_dst_alloc(struct net *net,
+				    struct inet6_dev *idev,
 				    const struct in6_addr *addr,
 				    bool anycast)
 {
 	u32 tb_id;
-	struct net *net = dev_net(idev->dev);
 	struct net_device *dev = idev->dev;
 	struct rt6_info *rt;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 06/21] net/ipv6: Move support functions up in route.c
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Code move only.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/route.c | 119 +++++++++++++++++++++++++++----------------------------
 1 file changed, 59 insertions(+), 60 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 045811a3da76..0daf4c9c9f2b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -78,7 +78,6 @@ enum rt6_nud_state {
 	RT6_NUD_SUCCEED = 1
 };
 
-static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
@@ -879,6 +878,65 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 }
 #endif
 
+/*
+ *	Misc support functions
+ */
+
+/* called with rcu_lock held */
+static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
+{
+	struct net_device *dev = rt->dst.dev;
+
+	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
+		/* for copies of local routes, dst->dev needs to be the
+		 * device if it is a master device, the master device if
+		 * device is enslaved, and the loopback as the default
+		 */
+		if (netif_is_l3_slave(dev) &&
+		    !rt6_need_strict(&rt->rt6i_dst.addr))
+			dev = l3mdev_master_dev_rcu(dev);
+		else if (!netif_is_l3_master(dev))
+			dev = dev_net(dev)->loopback_dev;
+		/* last case is netif_is_l3_master(dev) is true in which
+		 * case we want dev returned to be dev
+		 */
+	}
+
+	return dev;
+}
+
+static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
+{
+	BUG_ON(from->from);
+
+	rt->rt6i_flags &= ~RTF_EXPIRES;
+	dst_hold(&from->dst);
+	rt->from = from;
+	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
+}
+
+static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
+{
+	rt->dst.input = ort->dst.input;
+	rt->dst.output = ort->dst.output;
+	rt->rt6i_dst = ort->rt6i_dst;
+	rt->dst.error = ort->dst.error;
+	rt->rt6i_idev = ort->rt6i_idev;
+	if (rt->rt6i_idev)
+		in6_dev_hold(rt->rt6i_idev);
+	rt->dst.lastuse = jiffies;
+	rt->rt6i_gateway = ort->rt6i_gateway;
+	rt->rt6i_flags = ort->rt6i_flags;
+	rt6_set_from(rt, ort);
+	rt->rt6i_metric = ort->rt6i_metric;
+#ifdef CONFIG_IPV6_SUBTREES
+	rt->rt6i_src = ort->rt6i_src;
+#endif
+	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
+	rt->rt6i_table = ort->rt6i_table;
+	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
+}
+
 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 					struct in6_addr *saddr)
 {
@@ -1024,29 +1082,6 @@ int ip6_ins_rt(struct net *net, struct rt6_info *rt)
 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
 }
 
-/* called with rcu_lock held */
-static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
-{
-	struct net_device *dev = rt->dst.dev;
-
-	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
-		/* for copies of local routes, dst->dev needs to be the
-		 * device if it is a master device, the master device if
-		 * device is enslaved, and the loopback as the default
-		 */
-		if (netif_is_l3_slave(dev) &&
-		    !rt6_need_strict(&rt->rt6i_dst.addr))
-			dev = l3mdev_master_dev_rcu(dev);
-		else if (!netif_is_l3_master(dev))
-			dev = dev_net(dev)->loopback_dev;
-		/* last case is netif_is_l3_master(dev) is true in which
-		 * case we want dev returned to be dev
-		 */
-	}
-
-	return dev;
-}
-
 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 					   const struct in6_addr *daddr,
 					   const struct in6_addr *saddr)
@@ -3270,42 +3305,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	neigh_release(neigh);
 }
 
-/*
- *	Misc support functions
- */
-
-static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
-{
-	BUG_ON(from->from);
-
-	rt->rt6i_flags &= ~RTF_EXPIRES;
-	dst_hold(&from->dst);
-	rt->from = from;
-	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
-}
-
-static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
-{
-	rt->dst.input = ort->dst.input;
-	rt->dst.output = ort->dst.output;
-	rt->rt6i_dst = ort->rt6i_dst;
-	rt->dst.error = ort->dst.error;
-	rt->rt6i_idev = ort->rt6i_idev;
-	if (rt->rt6i_idev)
-		in6_dev_hold(rt->rt6i_idev);
-	rt->dst.lastuse = jiffies;
-	rt->rt6i_gateway = ort->rt6i_gateway;
-	rt->rt6i_flags = ort->rt6i_flags;
-	rt6_set_from(rt, ort);
-	rt->rt6i_metric = ort->rt6i_metric;
-#ifdef CONFIG_IPV6_SUBTREES
-	rt->rt6i_src = ort->rt6i_src;
-#endif
-	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
-	rt->rt6i_table = ort->rt6i_table;
-	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
-}
-
 #ifdef CONFIG_IPV6_ROUTE_INFO
 static struct rt6_info *rt6_get_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 07/21] net/ipv6: Save route type in rt6_info
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

The RTN_ type for IPv6 FIB entries is currently embedded in rt6i_flags
and dst.error. Since dst is going to be removed, it can no longer be
relied on for FIB dumps so save the route type as fib6_type.

fc_type is set in current users based on the algorithm in rt6_fill_node:
  - rt6i_flags contains RTF_LOCAL: fc_type = RTN_LOCAL
  - rt6i_flags contains RTF_ANYCAST: fc_type = RTN_ANYCAST
  - else fc_type = RTN_UNICAST

Similarly, fib6_type is set in the rt6_info templates based on the
RTF_REJECT section of rt6_fill_node converting dst.error to RTN type.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h |  1 +
 net/ipv6/addrconf.c   |  2 ++
 net/ipv6/route.c      | 46 ++++++++++++++++++++--------------------------
 3 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index f0aaf1c8f1a8..0165820bbafb 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -174,6 +174,7 @@ struct rt6_info {
 	int				rt6i_nh_weight;
 	unsigned short			rt6i_nfheader_len;
 	u8				rt6i_protocol;
+	u8				fib6_type;
 	u8				exception_bucket_flushed:1,
 					should_flush:1,
 					unused:6;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7ff7466c52e5..f71fcf2635d5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2331,6 +2331,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		.fc_flags = RTF_UP | flags,
 		.fc_nlinfo.nl_net = dev_net(dev),
 		.fc_protocol = RTPROT_KERNEL,
+		.fc_type = RTN_UNICAST,
 	};
 
 	cfg.fc_dst = *pfx;
@@ -2394,6 +2395,7 @@ static void addrconf_add_mroute(struct net_device *dev)
 		.fc_ifindex = dev->ifindex,
 		.fc_dst_len = 8,
 		.fc_flags = RTF_UP,
+		.fc_type = RTN_UNICAST,
 		.fc_nlinfo.nl_net = dev_net(dev),
 	};
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0daf4c9c9f2b..1cc01f5bb773 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -307,6 +307,7 @@ static const struct rt6_info ip6_null_entry_template = {
 	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32) 0,
 	.rt6i_ref	= ATOMIC_INIT(1),
+	.fib6_type	= RTN_UNREACHABLE,
 };
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -324,6 +325,7 @@ static const struct rt6_info ip6_prohibit_entry_template = {
 	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32) 0,
 	.rt6i_ref	= ATOMIC_INIT(1),
+	.fib6_type	= RTN_PROHIBIT,
 };
 
 static const struct rt6_info ip6_blk_hole_entry_template = {
@@ -339,6 +341,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
 	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32) 0,
 	.rt6i_ref	= ATOMIC_INIT(1),
+	.fib6_type	= RTN_BLACKHOLE,
 };
 
 #endif
@@ -2802,6 +2805,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		goto out;
 	}
 
+	if (cfg->fc_type > RTN_MAX) {
+		NL_SET_ERR_MSG(extack, "Invalid route type");
+		goto out;
+	}
+
 	if (cfg->fc_dst_len > 128) {
 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
 		goto out;
@@ -2914,6 +2922,8 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	rt->rt6i_metric = cfg->fc_metric;
 	rt->rt6i_nh_weight = 1;
 
+	rt->fib6_type = cfg->fc_type;
+
 	/* We cannot add true routes via loopback here,
 	   they would result in kernel looping; promote them to reject routes
 	 */
@@ -3354,6 +3364,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
 				  RTF_UP | RTF_PREF(pref),
 		.fc_protocol = RTPROT_RA,
+		.fc_type = RTN_UNICAST,
 		.fc_nlinfo.portid = 0,
 		.fc_nlinfo.nlh = NULL,
 		.fc_nlinfo.nl_net = net,
@@ -3410,6 +3421,7 @@ struct rt6_info *rt6_add_dflt_router(struct net *net,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
 		.fc_protocol = RTPROT_RA,
+		.fc_type = RTN_UNICAST,
 		.fc_nlinfo.portid = 0,
 		.fc_nlinfo.nlh = NULL,
 		.fc_nlinfo.nl_net = net,
@@ -3485,6 +3497,7 @@ static void rtmsg_to_fib6_config(struct net *net,
 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
 	cfg->fc_flags = rtmsg->rtmsg_flags;
+	cfg->fc_type = rtmsg->rtmsg_type;
 
 	cfg->fc_nlinfo.nl_net = net;
 
@@ -3606,10 +3619,13 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
 
 	rt->rt6i_protocol = RTPROT_KERNEL;
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
-	if (anycast)
+	if (anycast) {
+		rt->fib6_type = RTN_ANYCAST;
 		rt->rt6i_flags |= RTF_ANYCAST;
-	else
+	} else {
+		rt->fib6_type = RTN_LOCAL;
 		rt->rt6i_flags |= RTF_LOCAL;
+	}
 
 	rt->rt6i_gateway  = *addr;
 	rt->rt6i_dst.addr = *addr;
@@ -4509,30 +4525,8 @@ static int rt6_fill_node(struct net *net,
 	rtm->rtm_table = table;
 	if (nla_put_u32(skb, RTA_TABLE, table))
 		goto nla_put_failure;
-	if (rt->rt6i_flags & RTF_REJECT) {
-		switch (rt->dst.error) {
-		case -EINVAL:
-			rtm->rtm_type = RTN_BLACKHOLE;
-			break;
-		case -EACCES:
-			rtm->rtm_type = RTN_PROHIBIT;
-			break;
-		case -EAGAIN:
-			rtm->rtm_type = RTN_THROW;
-			break;
-		default:
-			rtm->rtm_type = RTN_UNREACHABLE;
-			break;
-		}
-	}
-	else if (rt->rt6i_flags & RTF_LOCAL)
-		rtm->rtm_type = RTN_LOCAL;
-	else if (rt->rt6i_flags & RTF_ANYCAST)
-		rtm->rtm_type = RTN_ANYCAST;
-	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
-		rtm->rtm_type = RTN_LOCAL;
-	else
-		rtm->rtm_type = RTN_UNICAST;
+
+	rtm->rtm_type = rt->fib6_type;
 	rtm->rtm_flags = 0;
 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
 	rtm->rtm_protocol = rt->rt6i_protocol;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 08/21] net/ipv6: Move nexthop data to fib6_nh
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Introduce fib6_nh structure and move nexthop related data from
rt6_info and rt6_info.dst to fib6_nh. References to dev, gateway or
lwtstate from a FIB lookup perspective are converted to use fib6_nh;
datapath references to dst version are left as is.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  |  32 ++--
 include/net/ip6_fib.h                              |  16 +-
 include/net/ip6_route.h                            |   6 +-
 net/ipv6/addrconf.c                                |   2 +-
 net/ipv6/ip6_fib.c                                 |   6 +-
 net/ipv6/route.c                                   | 162 +++++++++++----------
 6 files changed, 125 insertions(+), 99 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 1904c0323d39..d995a0b52d7c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2770,9 +2770,9 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
 		struct in6_addr *gw;
 		int ifindex, weight;
 
-		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
-		weight = mlxsw_sp_rt6->rt->rt6i_nh_weight;
-		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
+		ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
+		weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
+		gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
 							 weight))
 			return false;
@@ -2838,7 +2838,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
 	struct net_device *dev;
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		dev = mlxsw_sp_rt6->rt->dst.dev;
+		dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
 		val ^= dev->ifindex;
 	}
 
@@ -3836,9 +3836,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
 
-		if (nh->rif && nh->rif->dev == rt->dst.dev &&
+		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
-				    &rt->rt6i_gateway))
+				    &rt->fib6_nh.nh_gw))
 			return nh;
 		continue;
 	}
@@ -3895,7 +3895,7 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 
 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
-				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
+				 list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
 		return;
 	}
 
@@ -3905,9 +3905,9 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 
 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
 		if (nh && nh->offloaded)
-			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
+			mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
 		else
-			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
+			mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -3922,7 +3922,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
 
-		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
+		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -4818,8 +4818,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
 					const struct rt6_info *rt,
 					enum mlxsw_sp_ipip_type *ret)
 {
-	return rt->dst.dev &&
-	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
+	return rt->fib6_nh.nh_dev &&
+	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
 }
 
 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
@@ -4829,7 +4829,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
-	struct net_device *dev = rt->dst.dev;
+	struct net_device *dev = rt->fib6_nh.nh_dev;
 	struct mlxsw_sp_rif *rif;
 	int err;
 
@@ -4872,11 +4872,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_nexthop *nh,
 				  const struct rt6_info *rt)
 {
-	struct net_device *dev = rt->dst.dev;
+	struct net_device *dev = rt->fib6_nh.nh_dev;
 
 	nh->nh_grp = nh_grp;
-	nh->nh_weight = rt->rt6i_nh_weight;
-	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
+	nh->nh_weight = rt->fib6_nh.nh_weight;
+	memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
 
 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 0165820bbafb..f0a88370ba95 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -127,6 +127,16 @@ struct rt6_exception {
 #define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT)
 #define FIB6_MAX_DEPTH 5
 
+struct fib6_nh {
+	struct in6_addr		nh_gw;
+	struct net_device	*nh_dev;
+	struct lwtunnel_state	*nh_lwtstate;
+
+	unsigned int		nh_flags;
+	atomic_t		nh_upper_bound;
+	int			nh_weight;
+};
+
 struct rt6_info {
 	struct dst_entry		dst;
 	struct rt6_info __rcu		*rt6_next;
@@ -149,12 +159,9 @@ struct rt6_info {
 	 */
 	struct list_head		rt6i_siblings;
 	unsigned int			rt6i_nsiblings;
-	atomic_t			rt6i_nh_upper_bound;
 
 	atomic_t			rt6i_ref;
 
-	unsigned int			rt6i_nh_flags;
-
 	/* These are in a separate cache line. */
 	struct rt6key			rt6i_dst ____cacheline_aligned_in_smp;
 	u32				rt6i_flags;
@@ -171,13 +178,14 @@ struct rt6_info {
 	u32				rt6i_metric;
 	u32				rt6i_pmtu;
 	/* more non-fragment space at head required */
-	int				rt6i_nh_weight;
 	unsigned short			rt6i_nfheader_len;
 	u8				rt6i_protocol;
 	u8				fib6_type;
 	u8				exception_bucket_flushed:1,
 					should_flush:1,
 					unused:6;
+
+	struct fib6_nh			fib6_nh;
 };
 
 #define for_each_fib6_node_rt_rcu(fn)					\
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 1130a1144dfd..655e13017a45 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -273,10 +273,10 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
 
 static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
 {
-	return a->dst.dev == b->dst.dev &&
+	return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev &&
 	       a->rt6i_idev == b->rt6i_idev &&
-	       ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
-	       !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
+	       ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) &&
+	       !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
 }
 
 #endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f71fcf2635d5..483c8772e856 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2369,7 +2369,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 		goto out;
 
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->dst.dev->ifindex != dev->ifindex)
+		if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex)
 			continue;
 		if ((rt->rt6i_flags & flags) != flags)
 			continue;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 74d2a3748e2f..64b73e65f114 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2221,6 +2221,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 {
 	struct rt6_info *rt = v;
 	struct ipv6_route_iter *iter = seq->private;
+	const struct net_device *dev;
 
 	seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
 
@@ -2230,14 +2231,15 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "00000000000000000000000000000000 00 ");
 #endif
 	if (rt->rt6i_flags & RTF_GATEWAY)
-		seq_printf(seq, "%pi6", &rt->rt6i_gateway);
+		seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw);
 	else
 		seq_puts(seq, "00000000000000000000000000000000");
 
+	dev = rt->fib6_nh.nh_dev;
 	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
 		   rt->dst.__use, rt->rt6i_flags,
-		   rt->dst.dev ? rt->dst.dev->name : "");
+		   dev ? dev->name : "");
 	iter->w.leaf = NULL;
 	return 0;
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1cc01f5bb773..222af19d3403 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -466,12 +466,15 @@ static struct rt6_info *rt6_multipath_select(const struct net *net,
 	if (!fl6->mp_hash)
 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
-	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
+	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
 		return match;
 
 	list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
 				 rt6i_siblings) {
-		if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
+		int nh_upper_bound;
+
+		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
+		if (fl6->mp_hash > nh_upper_bound)
 			continue;
 		if (rt6_score_route(sibling, oif, strict) < 0)
 			break;
@@ -495,13 +498,14 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 	struct rt6_info *local = NULL;
 	struct rt6_info *sprt;
 
-	if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
+	if (!oif && ipv6_addr_any(saddr) &&
+	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
 		return rt;
 
 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
-		struct net_device *dev = sprt->dst.dev;
+		const struct net_device *dev = sprt->fib6_nh.nh_dev;
 
-		if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
+		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
 			continue;
 
 		if (oif) {
@@ -533,7 +537,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 			return net->ipv6.ip6_null_entry;
 	}
 
-	return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
+	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
 }
 
 #ifdef CONFIG_IPV6_ROUTER_PREF
@@ -558,7 +562,10 @@ static void rt6_probe_deferred(struct work_struct *w)
 static void rt6_probe(struct rt6_info *rt)
 {
 	struct __rt6_probe_work *work;
+	const struct in6_addr *nh_gw;
 	struct neighbour *neigh;
+	struct net_device *dev;
+
 	/*
 	 * Okay, this does not seem to be appropriate
 	 * for now, however, we need to check if it
@@ -569,8 +576,11 @@ static void rt6_probe(struct rt6_info *rt)
 	 */
 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 		return;
+
+	nh_gw = &rt->fib6_nh.nh_gw;
+	dev = rt->fib6_nh.nh_dev;
 	rcu_read_lock_bh();
-	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 	if (neigh) {
 		if (neigh->nud_state & NUD_VALID)
 			goto out;
@@ -592,9 +602,9 @@ static void rt6_probe(struct rt6_info *rt)
 
 	if (work) {
 		INIT_WORK(&work->work, rt6_probe_deferred);
-		work->target = rt->rt6i_gateway;
-		dev_hold(rt->dst.dev);
-		work->dev = rt->dst.dev;
+		work->target = *nh_gw;
+		dev_hold(dev);
+		work->dev = dev;
 		schedule_work(&work->work);
 	}
 
@@ -612,7 +622,8 @@ static inline void rt6_probe(struct rt6_info *rt)
  */
 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 {
-	struct net_device *dev = rt->dst.dev;
+	const struct net_device *dev = rt->fib6_nh.nh_dev;
+
 	if (!oif || dev->ifindex == oif)
 		return 2;
 	if ((dev->flags & IFF_LOOPBACK) &&
@@ -623,15 +634,16 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 
 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 {
-	struct neighbour *neigh;
 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
+	struct neighbour *neigh;
 
 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 	    !(rt->rt6i_flags & RTF_GATEWAY))
 		return RT6_NUD_SUCCEED;
 
 	rcu_read_lock_bh();
-	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
+					  &rt->fib6_nh.nh_gw);
 	if (neigh) {
 		read_lock(&neigh->lock);
 		if (neigh->nud_state & NUD_VALID)
@@ -679,11 +691,11 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 	bool match_do_rr = false;
 	struct inet6_dev *idev = rt->rt6i_idev;
 
-	if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 		goto out;
 
 	if (idev->cnf.ignore_routes_with_linkdown &&
-	    rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
+	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 		goto out;
 
@@ -888,7 +900,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 /* called with rcu_lock held */
 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
 {
-	struct net_device *dev = rt->dst.dev;
+	struct net_device *dev = rt->fib6_nh.nh_dev;
 
 	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
 		/* for copies of local routes, dst->dev needs to be the
@@ -928,7 +940,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
 	if (rt->rt6i_idev)
 		in6_dev_hold(rt->rt6i_idev);
 	rt->dst.lastuse = jiffies;
-	rt->rt6i_gateway = ort->rt6i_gateway;
+	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
 	rt->rt6i_flags = ort->rt6i_flags;
 	rt6_set_from(rt, ort);
 	rt->rt6i_metric = ort->rt6i_metric;
@@ -937,7 +949,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
 #endif
 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
 	rt->rt6i_table = ort->rt6i_table;
-	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
+	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
 }
 
 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
@@ -1308,7 +1320,7 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
 static int rt6_insert_exception(struct rt6_info *nrt,
 				struct rt6_info *ort)
 {
-	struct net *net = dev_net(ort->dst.dev);
+	struct net *net = dev_net(nrt->dst.dev);
 	struct rt6_exception_bucket *bucket;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
@@ -2313,7 +2325,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 			continue;
 		if (rt6_check_expired(rt))
 			continue;
@@ -2321,14 +2333,14 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 			break;
 		if (!(rt->rt6i_flags & RTF_GATEWAY))
 			continue;
-		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
+		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
 			continue;
 		/* rt_cache's gateway might be different from its 'parent'
 		 * in the case of an ip redirect.
 		 * So we keep searching in the exception table if the gateway
 		 * is different.
 		 */
-		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
+		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
 			rt_cache = rt6_find_cached_rt(rt,
 						      &fl6->daddr,
 						      &fl6->saddr);
@@ -2905,7 +2917,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 					   &lwtstate, extack);
 		if (err)
 			goto out;
-		rt->dst.lwtstate = lwtstate_get(lwtstate);
+		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
 		lwtunnel_set_redirect(&rt->dst);
 	}
 
@@ -2920,7 +2932,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 #endif
 
 	rt->rt6i_metric = cfg->fc_metric;
-	rt->rt6i_nh_weight = 1;
+	rt->fib6_nh.nh_weight = 1;
 
 	rt->fib6_type = cfg->fc_type;
 
@@ -2975,7 +2987,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (err)
 			goto out;
 
-		rt->rt6i_gateway = cfg->fc_gateway;
+		rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
 	}
 
 	err = -ENODEV;
@@ -3010,9 +3022,9 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 install_route:
 	if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
 	    !netif_carrier_ok(dev))
-		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
-	rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
-	rt->dst.dev = dev;
+		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
+	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
+	rt->fib6_nh.nh_dev = rt->dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
 
@@ -3171,11 +3183,11 @@ static int ip6_route_del(struct fib6_config *cfg,
 				rt = rt_cache;
 			}
 			if (cfg->fc_ifindex &&
-			    (!rt->dst.dev ||
-			     rt->dst.dev->ifindex != cfg->fc_ifindex))
+			    (!rt->fib6_nh.nh_dev ||
+			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
 				continue;
 			if (cfg->fc_flags & RTF_GATEWAY &&
-			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
+			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
 				continue;
 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
 				continue;
@@ -3337,11 +3349,11 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 		goto out;
 
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->dst.dev->ifindex != ifindex)
+		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
 			continue;
 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 			continue;
-		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
+		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
 			continue;
 		ip6_hold_safe(NULL, &rt, false);
 		break;
@@ -3398,9 +3410,9 @@ struct rt6_info *rt6_get_dflt_router(struct net *net,
 
 	rcu_read_lock();
 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
-		if (dev == rt->dst.dev &&
+		if (dev == rt->fib6_nh.nh_dev &&
 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
-		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
+		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
 			break;
 	}
 	if (rt)
@@ -3627,6 +3639,8 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
 		rt->rt6i_flags |= RTF_LOCAL;
 	}
 
+	rt->fib6_nh.nh_gw = *addr;
+	rt->fib6_nh.nh_dev = dev;
 	rt->rt6i_gateway  = *addr;
 	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
@@ -3649,7 +3663,7 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
 
-	if (((void *)rt->dst.dev == dev || !dev) &&
+	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
 	    rt != net->ipv6.ip6_null_entry &&
 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
 		spin_lock_bh(&rt6_exception_lock);
@@ -3681,7 +3695,7 @@ static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
 	struct in6_addr *gateway = (struct in6_addr *)arg;
 
 	if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
-	    ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
 		return -1;
 	}
 
@@ -3729,8 +3743,8 @@ static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
 
 static bool rt6_is_dead(const struct rt6_info *rt)
 {
-	if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
-	    (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
+	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
+	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
 	     rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
 		return true;
 
@@ -3743,11 +3757,11 @@ static int rt6_multipath_total_weight(const struct rt6_info *rt)
 	int total = 0;
 
 	if (!rt6_is_dead(rt))
-		total += rt->rt6i_nh_weight;
+		total += rt->fib6_nh.nh_weight;
 
 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
 		if (!rt6_is_dead(iter))
-			total += iter->rt6i_nh_weight;
+			total += iter->fib6_nh.nh_weight;
 	}
 
 	return total;
@@ -3758,11 +3772,11 @@ static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
 	int upper_bound = -1;
 
 	if (!rt6_is_dead(rt)) {
-		*weight += rt->rt6i_nh_weight;
+		*weight += rt->fib6_nh.nh_weight;
 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
 						    total) - 1;
 	}
-	atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
+	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
 }
 
 static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
@@ -3805,8 +3819,8 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg)
 	const struct arg_netdev_event *arg = p_arg;
 	struct net *net = dev_net(arg->dev);
 
-	if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
-		rt->rt6i_nh_flags &= ~arg->nh_flags;
+	if (rt != net->ipv6.ip6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
+		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
 		fib6_update_sernum_upto_root(net, rt);
 		rt6_multipath_rebalance(rt);
 	}
@@ -3834,10 +3848,10 @@ static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
 {
 	struct rt6_info *iter;
 
-	if (rt->dst.dev == dev)
+	if (rt->fib6_nh.nh_dev == dev)
 		return true;
 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
-		if (iter->dst.dev == dev)
+		if (iter->fib6_nh.nh_dev == dev)
 			return true;
 
 	return false;
@@ -3858,11 +3872,12 @@ static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
 	struct rt6_info *iter;
 	unsigned int dead = 0;
 
-	if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
+	if (rt->fib6_nh.nh_dev == down_dev ||
+	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 		dead++;
 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
-		if (iter->dst.dev == down_dev ||
-		    iter->rt6i_nh_flags & RTNH_F_DEAD)
+		if (iter->fib6_nh.nh_dev == down_dev ||
+		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)
 			dead++;
 
 	return dead;
@@ -3874,11 +3889,11 @@ static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
 {
 	struct rt6_info *iter;
 
-	if (rt->dst.dev == dev)
-		rt->rt6i_nh_flags |= nh_flags;
+	if (rt->fib6_nh.nh_dev == dev)
+		rt->fib6_nh.nh_flags |= nh_flags;
 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
-		if (iter->dst.dev == dev)
-			iter->rt6i_nh_flags |= nh_flags;
+		if (iter->fib6_nh.nh_dev == dev)
+			iter->fib6_nh.nh_flags |= nh_flags;
 }
 
 /* called with write lock held for table with rt */
@@ -3893,12 +3908,12 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
 
 	switch (arg->event) {
 	case NETDEV_UNREGISTER:
-		return rt->dst.dev == dev ? -1 : 0;
+		return rt->fib6_nh.nh_dev == dev ? -1 : 0;
 	case NETDEV_DOWN:
 		if (rt->should_flush)
 			return -1;
 		if (!rt->rt6i_nsiblings)
-			return rt->dst.dev == dev ? -1 : 0;
+			return rt->fib6_nh.nh_dev == dev ? -1 : 0;
 		if (rt6_multipath_uses_dev(rt, dev)) {
 			unsigned int count;
 
@@ -3914,10 +3929,10 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
 		}
 		return -2;
 	case NETDEV_CHANGE:
-		if (rt->dst.dev != dev ||
+		if (rt->fib6_nh.nh_dev != dev ||
 		    rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
 			break;
-		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
 		rt6_multipath_rebalance(rt);
 		break;
 	}
@@ -3969,7 +3984,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   Since RFC 1981 doesn't include administrative MTU increase
 	   update PMTU increase is a MUST. (i.e. jumbo frame)
 	 */
-	if (rt->dst.dev == arg->dev &&
+	if (rt->fib6_nh.nh_dev == arg->dev &&
 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
 		spin_lock_bh(&rt6_exception_lock);
 		if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
@@ -4255,7 +4270,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 			goto cleanup;
 		}
 
-		rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
+		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
 
 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
 		if (err) {
@@ -4412,7 +4427,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
 			    + NLA_ALIGN(sizeof(struct rtnexthop))
 			    + nla_total_size(16) /* RTA_GATEWAY */
-			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
+			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
 
 		nexthop_len *= rt->rt6i_nsiblings;
 	}
@@ -4430,38 +4445,38 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 	       + nla_total_size(sizeof(struct rta_cacheinfo))
 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
 	       + nla_total_size(1) /* RTA_PREF */
-	       + lwtunnel_get_encap_size(rt->dst.lwtstate)
+	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
 	       + nexthop_len;
 }
 
 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 			    unsigned int *flags, bool skip_oif)
 {
-	if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 		*flags |= RTNH_F_DEAD;
 
-	if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
+	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
 		*flags |= RTNH_F_LINKDOWN;
 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
 			*flags |= RTNH_F_DEAD;
 	}
 
 	if (rt->rt6i_flags & RTF_GATEWAY) {
-		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
+		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
 			goto nla_put_failure;
 	}
 
-	*flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
-	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
+	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
+	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
 		*flags |= RTNH_F_OFFLOAD;
 
 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
-	if (!skip_oif && rt->dst.dev &&
-	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
+	if (!skip_oif && rt->fib6_nh.nh_dev &&
+	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
 		goto nla_put_failure;
 
-	if (rt->dst.lwtstate &&
-	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
+	if (rt->fib6_nh.nh_lwtstate &&
+	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
 		goto nla_put_failure;
 
 	return 0;
@@ -4473,6 +4488,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 /* add multipath next hop */
 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 {
+	const struct net_device *dev = rt->fib6_nh.nh_dev;
 	struct rtnexthop *rtnh;
 	unsigned int flags = 0;
 
@@ -4480,8 +4496,8 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 	if (!rtnh)
 		goto nla_put_failure;
 
-	rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
-	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
+	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
+	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
 
 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
 		goto nla_put_failure;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 09/21] net/ipv6: Defer initialization of dst to data path
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Defer setting dst input, output and error until fib entry is copied.

The reject path from ip6_route_info_create is moved to a new function
ip6_rt_init_dst_reject with a helper doing the conversion from fib6_type
to dst error.

The remainder of the new ip6_rt_init_dst is an amalgamtion of dst code
from addrconf_dst_alloc and the non-reject path of ip6_route_info_create.
The dst output function is always ip6_output and the input function is
either ip6_input (local routes), ip6_mc_input (multicast routes) or
ip6_forward (anything else).

A couple of places using dst.error are updated to look at rt6i_flags.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/route.c | 115 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 74 insertions(+), 41 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 222af19d3403..3b301aafd2ed 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -920,6 +920,75 @@ static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
 	return dev;
 }
 
+static const int fib6_prop[RTN_MAX + 1] = {
+	[RTN_UNSPEC]	= 0,
+	[RTN_UNICAST]	= 0,
+	[RTN_LOCAL]	= 0,
+	[RTN_BROADCAST]	= 0,
+	[RTN_ANYCAST]	= 0,
+	[RTN_MULTICAST]	= 0,
+	[RTN_BLACKHOLE]	= -EINVAL,
+	[RTN_UNREACHABLE] = -EHOSTUNREACH,
+	[RTN_PROHIBIT]	= -EACCES,
+	[RTN_THROW]	= -EAGAIN,
+	[RTN_NAT]	= -EINVAL,
+	[RTN_XRESOLVE]	= -EINVAL,
+};
+
+static int ip6_rt_type_to_error(u8 fib6_type)
+{
+	return fib6_prop[fib6_type];
+}
+
+static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
+{
+	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
+
+	switch (ort->fib6_type) {
+	case RTN_BLACKHOLE:
+		rt->dst.output = dst_discard_out;
+		rt->dst.input = dst_discard;
+		break;
+	case RTN_PROHIBIT:
+		rt->dst.output = ip6_pkt_prohibit_out;
+		rt->dst.input = ip6_pkt_prohibit;
+		break;
+	case RTN_THROW:
+	case RTN_UNREACHABLE:
+	default:
+		rt->dst.output = ip6_pkt_discard_out;
+		rt->dst.input = ip6_pkt_discard;
+		break;
+	}
+}
+
+static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
+{
+	if (ort->rt6i_flags & RTF_REJECT) {
+		ip6_rt_init_dst_reject(rt, ort);
+		return;
+	}
+
+	rt->dst.error = 0;
+	rt->dst.output = ip6_output;
+
+	if (ort->fib6_type == RTN_LOCAL) {
+		rt->dst.flags |= DST_HOST;
+		rt->dst.input = ip6_input;
+	} else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
+		rt->dst.input = ip6_mc_input;
+	} else {
+		rt->dst.input = ip6_forward;
+	}
+
+	if (ort->fib6_nh.nh_lwtstate) {
+		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
+		lwtunnel_set_redirect(&rt->dst);
+	}
+
+	rt->dst.lastuse = jiffies;
+}
+
 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 {
 	BUG_ON(from->from);
@@ -932,14 +1001,12 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 
 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
 {
-	rt->dst.input = ort->dst.input;
-	rt->dst.output = ort->dst.output;
+	ip6_rt_init_dst(rt, ort);
+
 	rt->rt6i_dst = ort->rt6i_dst;
-	rt->dst.error = ort->dst.error;
 	rt->rt6i_idev = ort->rt6i_idev;
 	if (rt->rt6i_idev)
 		in6_dev_hold(rt->rt6i_idev);
-	rt->dst.lastuse = jiffies;
 	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
 	rt->rt6i_flags = ort->rt6i_flags;
 	rt6_set_from(rt, ort);
@@ -2329,7 +2396,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 			continue;
 		if (rt6_check_expired(rt))
 			continue;
-		if (rt->dst.error)
+		if (rt->rt6i_flags & RTF_REJECT)
 			break;
 		if (!(rt->rt6i_flags & RTF_GATEWAY))
 			continue;
@@ -2357,7 +2424,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 
 	if (!rt)
 		rt = net->ipv6.ip6_null_entry;
-	else if (rt->dst.error) {
+	else if (rt->rt6i_flags & RTF_REJECT) {
 		rt = net->ipv6.ip6_null_entry;
 		goto out;
 	}
@@ -2900,15 +2967,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 
 	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
-	if (addr_type & IPV6_ADDR_MULTICAST)
-		rt->dst.input = ip6_mc_input;
-	else if (cfg->fc_flags & RTF_LOCAL)
-		rt->dst.input = ip6_input;
-	else
-		rt->dst.input = ip6_forward;
-
-	rt->dst.output = ip6_output;
-
 	if (cfg->fc_encap) {
 		struct lwtunnel_state *lwtstate;
 
@@ -2918,7 +2976,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (err)
 			goto out;
 		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
-		lwtunnel_set_redirect(&rt->dst);
 	}
 
 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
@@ -2958,27 +3015,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 			}
 		}
 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
-		switch (cfg->fc_type) {
-		case RTN_BLACKHOLE:
-			rt->dst.error = -EINVAL;
-			rt->dst.output = dst_discard_out;
-			rt->dst.input = dst_discard;
-			break;
-		case RTN_PROHIBIT:
-			rt->dst.error = -EACCES;
-			rt->dst.output = ip6_pkt_prohibit_out;
-			rt->dst.input = ip6_pkt_prohibit;
-			break;
-		case RTN_THROW:
-		case RTN_UNREACHABLE:
-		default:
-			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
-					: (cfg->fc_type == RTN_UNREACHABLE)
-					? -EHOSTUNREACH : -ENETUNREACH;
-			rt->dst.output = ip6_pkt_discard_out;
-			rt->dst.input = ip6_pkt_discard;
-			break;
-		}
 		goto install_route;
 	}
 
@@ -3623,12 +3659,9 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
 		return ERR_PTR(-ENOMEM);
 
 	in6_dev_hold(idev);
-
-	rt->dst.flags |= DST_HOST;
-	rt->dst.input = ip6_input;
-	rt->dst.output = ip6_output;
 	rt->rt6i_idev = idev;
 
+	rt->dst.flags |= DST_HOST;
 	rt->rt6i_protocol = RTPROT_KERNEL;
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
 	if (anycast) {
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 10/21] net/ipv6: move metrics from dst to rt6_info
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Similar to IPv4, add fib metrics to the fib struct, which at the moment
is rt6_info. Will be moved to fib6_info in a later patch. Copy metrics
into dst by reference using refcount.

To make the transition:
- add dst_metrics to rt6_info. Default to dst_default_metrics if no
  metrics are passed during route add. No need for a separate pmtu
  entry; it can reference the MTU slot in fib6_metrics

- ip6_convert_metrics allocates memory in the FIB entry and uses
  ip_metrics_convert to copy from netlink attribute to metrics entry

- the convert metrics call is done in ip6_route_info_create simplifying
  the route add path
  + fib6_commit_metrics and fib6_copy_metrics and the temporary
    mx6_config are no longer needed

- add fib6_metric_set helper to change the value of a metric in the
  fib entry since dst_metric_set can no longer be used

- cow_metrics for IPv6 can drop to dst_cow_metrics_generic

- rt6_dst_from_metrics_check is no longer needed

- rt6_fill_node needs the FIB entry and dst as separate arguments to
  keep compatibility with existing output. Current dst address is
  renamed to dest.
  (to be consistent with IPv4 rt6_fill_node really should be split
  into 2 functions similar to fib_dump_info and rt_fill_info)

- rt6_fill_node no longer needs the temporary metrics variable

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h |  17 ++--
 net/core/dst.c        |   1 +
 net/ipv6/ip6_fib.c    |  66 +++++--------
 net/ipv6/ndisc.c      |  10 +-
 net/ipv6/route.c      | 257 +++++++++++++++++++-------------------------------
 5 files changed, 133 insertions(+), 218 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index f0a88370ba95..1f8dc9d12abb 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -94,11 +94,6 @@ struct fib6_gc_args {
 #define FIB6_SUBTREE(fn)	(rcu_dereference_protected((fn)->subtree, 1))
 #endif
 
-struct mx6_config {
-	const u32 *mx;
-	DECLARE_BITMAP(mx_valid, RTAX_MAX);
-};
-
 /*
  *	routing information
  *
@@ -176,7 +171,6 @@ struct rt6_info {
 	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
 
 	u32				rt6i_metric;
-	u32				rt6i_pmtu;
 	/* more non-fragment space at head required */
 	unsigned short			rt6i_nfheader_len;
 	u8				rt6i_protocol;
@@ -185,6 +179,8 @@ struct rt6_info {
 					should_flush:1,
 					unused:6;
 
+	struct dst_metrics		*fib6_metrics;
+#define fib6_pmtu		fib6_metrics->metrics[RTAX_MTU-1]
 	struct fib6_nh			fib6_nh;
 };
 
@@ -390,8 +386,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
 		    void *arg);
 
 int fib6_add(struct fib6_node *root, struct rt6_info *rt,
-	     struct nl_info *info, struct mx6_config *mxc,
-	     struct netlink_ext_ack *extack);
+	     struct nl_info *info, struct netlink_ext_ack *extack);
 int fib6_del(struct rt6_info *rt, struct nl_info *info);
 
 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
@@ -420,6 +415,12 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb);
 void fib6_update_sernum(struct net *net, struct rt6_info *rt);
 void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
 
+void fib6_metric_set(struct rt6_info *f6i, int metric, u32 val);
+static inline bool fib6_metric_locked(struct rt6_info *f6i, int metric)
+{
+	return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
+}
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
diff --git a/net/core/dst.c b/net/core/dst.c
index 007aa0b08291..2d9b37f8944a 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -58,6 +58,7 @@ const struct dst_metrics dst_default_metrics = {
 	 */
 	.refcnt = REFCOUNT_INIT(1),
 };
+EXPORT_SYMBOL(dst_default_metrics);
 
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
 	      struct net_device *dev, int initial_ref, int initial_obsolete,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 64b73e65f114..0d94c56c3e41 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -578,6 +578,24 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	return res;
 }
 
+void fib6_metric_set(struct rt6_info *f6i, int metric, u32 val)
+{
+	if (!f6i)
+		return;
+
+	if (f6i->fib6_metrics == &dst_default_metrics) {
+		struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC);
+
+		if (!p)
+			return;
+
+		refcount_set(&p->refcnt, 1);
+		f6i->fib6_metrics = p;
+	}
+
+	f6i->fib6_metrics->metrics[metric - 1] = val;
+}
+
 /*
  *	Routing Table
  *
@@ -801,38 +819,6 @@ static struct fib6_node *fib6_add_1(struct net *net,
 	return ln;
 }
 
-static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
-{
-	int i;
-
-	for (i = 0; i < RTAX_MAX; i++) {
-		if (test_bit(i, mxc->mx_valid))
-			mp[i] = mxc->mx[i];
-	}
-}
-
-static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
-{
-	if (!mxc->mx)
-		return 0;
-
-	if (dst->flags & DST_HOST) {
-		u32 *mp = dst_metrics_write_ptr(dst);
-
-		if (unlikely(!mp))
-			return -ENOMEM;
-
-		fib6_copy_metrics(mp, mxc);
-	} else {
-		dst_init_metrics(dst, mxc->mx, false);
-
-		/* We've stolen mx now. */
-		mxc->mx = NULL;
-	}
-
-	return 0;
-}
-
 static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 			  struct net *net)
 {
@@ -866,7 +852,7 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
  */
 
 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
-			    struct nl_info *info, struct mx6_config *mxc,
+			    struct nl_info *info,
 			    struct netlink_ext_ack *extack)
 {
 	struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
@@ -923,7 +909,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 					rt6_clean_expires(iter);
 				else
 					rt6_set_expires(iter, rt->dst.expires);
-				iter->rt6i_pmtu = rt->rt6i_pmtu;
+				fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
 				return -EEXIST;
 			}
 			/* If we have the same destination and the same metric,
@@ -1002,9 +988,6 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
 add:
 		nlflags |= NLM_F_CREATE;
-		err = fib6_commit_metrics(&rt->dst, mxc);
-		if (err)
-			return err;
 
 		err = call_fib6_entry_notifiers(info->nl_net,
 						FIB_EVENT_ENTRY_ADD,
@@ -1035,10 +1018,6 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			return -ENOENT;
 		}
 
-		err = fib6_commit_metrics(&rt->dst, mxc);
-		if (err)
-			return err;
-
 		err = call_fib6_entry_notifiers(info->nl_net,
 						FIB_EVENT_ENTRY_REPLACE,
 						rt, extack);
@@ -1135,8 +1114,7 @@ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
  */
 
 int fib6_add(struct fib6_node *root, struct rt6_info *rt,
-	     struct nl_info *info, struct mx6_config *mxc,
-	     struct netlink_ext_ack *extack)
+	     struct nl_info *info, struct netlink_ext_ack *extack)
 {
 	struct fib6_table *table = rt->rt6i_table;
 	struct fib6_node *fn, *pn = NULL;
@@ -1244,7 +1222,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	}
 #endif
 
-	err = fib6_add_rt2node(fn, rt, info, mxc, extack);
+	err = fib6_add_rt2node(fn, rt, info, extack);
 	if (!err) {
 		__fib6_update_sernum_upto_root(rt, sernum);
 		fib6_start_gc(info->nl_net, rt);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3fbc3805e69b..b058ea9ecec0 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1323,9 +1323,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	    ra_msg->icmph.icmp6_hop_limit) {
 		if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
 			in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
-			if (rt)
-				dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
-					       ra_msg->icmph.icmp6_hop_limit);
+			fib6_metric_set(rt, RTAX_HOPLIMIT,
+					ra_msg->icmph.icmp6_hop_limit);
 		} else {
 			ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
 		}
@@ -1477,10 +1476,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
 		} else if (in6_dev->cnf.mtu6 != mtu) {
 			in6_dev->cnf.mtu6 = mtu;
-
-			if (rt)
-				dst_metric_set(&rt->dst, RTAX_MTU, mtu);
-
+			fib6_metric_set(rt, RTAX_MTU, mtu);
 			rt6_mtu_change(skb->dev, mtu);
 		}
 	}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3b301aafd2ed..62aafd06c35f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -96,12 +96,11 @@ static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 					   struct sk_buff *skb, u32 mtu);
 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 					struct sk_buff *skb);
-static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 static size_t rt6_nlmsg_size(struct rt6_info *rt);
-static int rt6_fill_node(struct net *net,
-			 struct sk_buff *skb, struct rt6_info *rt,
-			 struct in6_addr *dst, struct in6_addr *src,
+static int rt6_fill_node(struct net *net, struct sk_buff *skb,
+			 struct rt6_info *rt, struct dst_entry *dst,
+			 struct in6_addr *dest, struct in6_addr *src,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags);
 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
@@ -183,23 +182,6 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 	}
 }
 
-static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
-{
-	return dst_metrics_write_ptr(&rt->from->dst);
-}
-
-static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
-{
-	struct rt6_info *rt = (struct rt6_info *)dst;
-
-	if (rt->rt6i_flags & RTF_PCPU)
-		return rt6_pcpu_cow_metrics(rt);
-	else if (rt->rt6i_flags & RTF_CACHE)
-		return NULL;
-	else
-		return dst_cow_metrics_generic(dst, old);
-}
-
 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 					     struct sk_buff *skb,
 					     const void *daddr)
@@ -249,7 +231,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.check			=	ip6_dst_check,
 	.default_advmss		=	ip6_default_advmss,
 	.mtu			=	ip6_mtu,
-	.cow_metrics		=	ipv6_cow_metrics,
+	.cow_metrics		=	dst_cow_metrics_generic,
 	.destroy		=	ip6_dst_destroy,
 	.ifdown			=	ip6_dst_ifdown,
 	.negative_advice	=	ip6_negative_advice,
@@ -353,6 +335,7 @@ static void rt6_info_init(struct rt6_info *rt)
 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 	INIT_LIST_HEAD(&rt->rt6i_siblings);
 	INIT_LIST_HEAD(&rt->rt6i_uncached);
+	rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
 }
 
 /* allocate dst with ip6_dst_ops */
@@ -395,6 +378,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 	struct rt6_exception_bucket *bucket;
 	struct rt6_info *from = rt->from;
 	struct inet6_dev *idev;
+	struct dst_metrics *m;
 
 	dst_destroy_metrics_generic(dst);
 	free_percpu(rt->rt6i_pcpu);
@@ -411,6 +395,10 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 		kfree(bucket);
 	}
 
+	m = rt->fib6_metrics;
+	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
+		kfree(m);
+
 	rt->from = NULL;
 	dst_release(&from->dst);
 }
@@ -996,7 +984,11 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 	rt->rt6i_flags &= ~RTF_EXPIRES;
 	dst_hold(&from->dst);
 	rt->from = from;
-	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
+	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
+	if (from->fib6_metrics != &dst_default_metrics) {
+		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+		refcount_inc(&from->fib6_metrics->refcnt);
+	}
 }
 
 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
@@ -1140,7 +1132,6 @@ EXPORT_SYMBOL(rt6_lookup);
  */
 
 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
-			struct mx6_config *mxc,
 			struct netlink_ext_ack *extack)
 {
 	int err;
@@ -1148,7 +1139,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 
 	table = rt->rt6i_table;
 	spin_lock_bh(&table->tb6_lock);
-	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
+	err = fib6_add(&table->tb6_root, rt, info, extack);
 	spin_unlock_bh(&table->tb6_lock);
 
 	return err;
@@ -1157,11 +1148,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 int ip6_ins_rt(struct net *net, struct rt6_info *rt)
 {
 	struct nl_info info = {	.nl_net = net, };
-	struct mx6_config mxc = { .mx = NULL, };
 
 	/* Hold dst to account for the reference from the fib6 tree */
 	dst_hold(&rt->dst);
-	return __ip6_ins_rt(rt, &info, &mxc, NULL);
+	return __ip6_ins_rt(rt, &info, NULL);
 }
 
 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
@@ -1232,8 +1222,8 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 	p = this_cpu_ptr(rt->rt6i_pcpu);
 	pcpu_rt = *p;
 
-	if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
-		rt6_dst_from_metrics_check(pcpu_rt);
+	if (pcpu_rt)
+		ip6_hold_safe(NULL, &pcpu_rt, false);
 
 	return pcpu_rt;
 }
@@ -1254,7 +1244,6 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
 	prev = cmpxchg(p, NULL, pcpu_rt);
 	BUG_ON(prev);
 
-	rt6_dst_from_metrics_check(pcpu_rt);
 	return pcpu_rt;
 }
 
@@ -1384,6 +1373,16 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
 	return NULL;
 }
 
+static unsigned int fib6_mtu(const struct rt6_info *rt)
+{
+	unsigned int mtu;
+
+	mtu = rt->fib6_pmtu ? : rt->rt6i_idev->cnf.mtu6;
+	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+
+	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
+}
+
 static int rt6_insert_exception(struct rt6_info *nrt,
 				struct rt6_info *ort)
 {
@@ -1436,7 +1435,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	 * Only insert this exception route if its mtu
 	 * is less than ort's mtu value.
 	 */
-	if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
+	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -1673,12 +1672,12 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
 			struct rt6_info *entry = rt6_ex->rt6i;
 
 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
-			 * route), the metrics of its rt->dst.from have already
+			 * route), the metrics of its rt->from have already
 			 * been updated.
 			 */
-			if (entry->rt6i_pmtu &&
+			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
-				entry->rt6i_pmtu = mtu;
+				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
 		}
 		bucket++;
 	}
@@ -1844,10 +1843,9 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
 	} else if (rt->rt6i_flags & RTF_CACHE) {
-		if (ip6_hold_safe(net, &rt, true)) {
+		if (ip6_hold_safe(net, &rt, true))
 			dst_use_noref(&rt->dst, jiffies);
-			rt6_dst_from_metrics_check(rt);
-		}
+
 		rcu_read_unlock();
 		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
@@ -2147,13 +2145,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
  *	Destination cache support functions
  */
 
-static void rt6_dst_from_metrics_check(struct rt6_info *rt)
-{
-	if (rt->from &&
-	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
-		dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
-}
-
 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
 {
 	u32 rt_cookie = 0;
@@ -2188,8 +2179,6 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 	 * into this function always.
 	 */
 
-	rt6_dst_from_metrics_check(rt);
-
 	if (rt->rt6i_flags & RTF_PCPU ||
 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
 		return rt6_dst_from_check(rt, cookie);
@@ -2242,8 +2231,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
 {
 	struct net *net = dev_net(rt->dst.dev);
 
+	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
 	rt->rt6i_flags |= RTF_MODIFIED;
-	rt->rt6i_pmtu = mtu;
 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
 }
 
@@ -2289,10 +2278,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 	} else if (daddr) {
 		struct rt6_info *nrt6;
 
-		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
+		nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr);
 		if (nrt6) {
 			rt6_do_update_pmtu(nrt6, mtu);
-			if (rt6_insert_exception(nrt6, rt6))
+			if (rt6_insert_exception(nrt6, rt6->from))
 				dst_release_immediate(&nrt6->dst);
 		}
 	}
@@ -2533,12 +2522,8 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 
 static unsigned int ip6_mtu(const struct dst_entry *dst)
 {
-	const struct rt6_info *rt = (const struct rt6_info *)dst;
-	unsigned int mtu = rt->rt6i_pmtu;
 	struct inet6_dev *idev;
-
-	if (mtu)
-		goto out;
+	unsigned int mtu;
 
 	mtu = dst_metric_raw(dst, RTAX_MTU);
 	if (mtu)
@@ -2622,60 +2607,24 @@ static int ip6_dst_gc(struct dst_ops *ops)
 	return entries > rt_max_size;
 }
 
-static int ip6_convert_metrics(struct mx6_config *mxc,
-			       const struct fib6_config *cfg)
+static int ip6_convert_metrics(struct net *net, struct rt6_info *rt,
+			       struct fib6_config *cfg)
 {
-	struct net *net = cfg->fc_nlinfo.nl_net;
-	bool ecn_ca = false;
-	struct nlattr *nla;
-	int remaining;
-	u32 *mp;
-
-	if (!cfg->fc_mx)
-		return 0;
-
-	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
-	if (unlikely(!mp))
-		return -ENOMEM;
-
-	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
-		int type = nla_type(nla);
-		u32 val;
-
-		if (!type)
-			continue;
-		if (unlikely(type > RTAX_MAX))
-			goto err;
-
-		if (type == RTAX_CC_ALGO) {
-			char tmp[TCP_CA_NAME_MAX];
+	int err = 0;
 
-			nla_strlcpy(tmp, nla, sizeof(tmp));
-			val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
-			if (val == TCP_CA_UNSPEC)
-				goto err;
-		} else {
-			val = nla_get_u32(nla);
-		}
-		if (type == RTAX_HOPLIMIT && val > 255)
-			val = 255;
-		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
-			goto err;
+	if (cfg->fc_mx) {
+		rt->fib6_metrics = kzalloc(sizeof(*rt->fib6_metrics),
+					   GFP_KERNEL);
+		if (unlikely(!rt->fib6_metrics))
+			return -ENOMEM;
 
-		mp[type - 1] = val;
-		__set_bit(type - 1, mxc->mx_valid);
-	}
+		refcount_set(&rt->fib6_metrics->refcnt, 1);
 
-	if (ecn_ca) {
-		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
-		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+		err = ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len,
+					 rt->fib6_metrics->metrics);
 	}
 
-	mxc->mx = mp;
-	return 0;
- err:
-	kfree(mp);
-	return -EINVAL;
+	return err;
 }
 
 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
@@ -2955,6 +2904,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		goto out;
 	}
 
+	err = ip6_convert_metrics(net, rt, cfg);
+	if (err < 0)
+		goto out;
+
 	if (cfg->fc_flags & RTF_EXPIRES)
 		rt6_set_expires(rt, jiffies +
 				clock_t_to_jiffies(cfg->fc_expires));
@@ -3078,32 +3031,16 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	return ERR_PTR(err);
 }
 
-int ip6_route_add(struct fib6_config *cfg,
-		  struct netlink_ext_ack *extack)
+int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack)
 {
-	struct mx6_config mxc = { .mx = NULL, };
 	struct rt6_info *rt;
 	int err;
 
 	rt = ip6_route_info_create(cfg, extack);
-	if (IS_ERR(rt)) {
-		err = PTR_ERR(rt);
-		rt = NULL;
-		goto out;
-	}
-
-	err = ip6_convert_metrics(&mxc, cfg);
-	if (err)
-		goto out;
-
-	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
-
-	kfree(mxc.mx);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
 
-	return err;
-out:
-	if (rt)
-		dst_release_immediate(&rt->dst);
+	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
 
 	return err;
 }
@@ -3157,7 +3094,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 		if (skb) {
 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
 
-			if (rt6_fill_node(net, skb, rt,
+			if (rt6_fill_node(net, skb, rt, NULL,
 					  NULL, NULL, 0, RTM_DELROUTE,
 					  info->portid, seq, 0) < 0) {
 				kfree_skb(skb);
@@ -3348,7 +3285,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	 * a cached route because rt6_insert_exception() will
 	 * takes care of it
 	 */
-	if (rt6_insert_exception(nrt, rt)) {
+	if (rt6_insert_exception(nrt, rt->from)) {
 		dst_release_immediate(&nrt->dst);
 		goto out;
 	}
@@ -4018,11 +3955,14 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   update PMTU increase is a MUST. (i.e. jumbo frame)
 	 */
 	if (rt->fib6_nh.nh_dev == arg->dev &&
-	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
+	    !fib6_metric_locked(rt, RTAX_MTU)) {
+		u32 mtu = rt->fib6_pmtu;
+
+		if (mtu >= arg->mtu ||
+		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
+			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
+
 		spin_lock_bh(&rt6_exception_lock);
-		if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
-		    rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
-			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
 		spin_unlock_bh(&rt6_exception_lock);
 	}
@@ -4183,7 +4123,6 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 struct rt6_nh {
 	struct rt6_info *rt6_info;
 	struct fib6_config r_cfg;
-	struct mx6_config mxc;
 	struct list_head next;
 };
 
@@ -4198,7 +4137,8 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
 	}
 }
 
-static int ip6_route_info_append(struct list_head *rt6_nh_list,
+static int ip6_route_info_append(struct net *net,
+				 struct list_head *rt6_nh_list,
 				 struct rt6_info *rt, struct fib6_config *r_cfg)
 {
 	struct rt6_nh *nh;
@@ -4214,7 +4154,7 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list,
 	if (!nh)
 		return -ENOMEM;
 	nh->rt6_info = rt;
-	err = ip6_convert_metrics(&nh->mxc, r_cfg);
+	err = ip6_convert_metrics(net, rt, r_cfg);
 	if (err) {
 		kfree(nh);
 		return err;
@@ -4305,7 +4245,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 
 		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
 
-		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
+		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
+					    rt, &r_cfg);
 		if (err) {
 			dst_release_immediate(&rt->dst);
 			goto cleanup;
@@ -4323,7 +4264,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 	err_nh = NULL;
 	list_for_each_entry(nh, &rt6_nh_list, next) {
 		rt_last = nh->rt6_info;
-		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
+		err = __ip6_ins_rt(nh->rt6_info, info, extack);
 		/* save reference to first route for notification */
 		if (!rt_notif && !err)
 			rt_notif = nh->rt6_info;
@@ -4372,7 +4313,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
 		if (nh->rt6_info)
 			dst_release_immediate(&nh->rt6_info->dst);
-		kfree(nh->mxc.mx);
 		list_del(&nh->next);
 		kfree(nh);
 	}
@@ -4546,16 +4486,16 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 	return -EMSGSIZE;
 }
 
-static int rt6_fill_node(struct net *net,
-			 struct sk_buff *skb, struct rt6_info *rt,
-			 struct in6_addr *dst, struct in6_addr *src,
+static int rt6_fill_node(struct net *net, struct sk_buff *skb,
+			 struct rt6_info *rt, struct dst_entry *dst,
+			 struct in6_addr *dest, struct in6_addr *src,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags)
 {
-	u32 metrics[RTAX_MAX];
 	struct rtmsg *rtm;
 	struct nlmsghdr *nlh;
-	long expires;
+	long expires = 0;
+	u32 *pmetrics;
 	u32 table;
 
 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
@@ -4583,8 +4523,8 @@ static int rt6_fill_node(struct net *net,
 	if (rt->rt6i_flags & RTF_CACHE)
 		rtm->rtm_flags |= RTM_F_CLONED;
 
-	if (dst) {
-		if (nla_put_in6_addr(skb, RTA_DST, dst))
+	if (dest) {
+		if (nla_put_in6_addr(skb, RTA_DST, dest))
 			goto nla_put_failure;
 		rtm->rtm_dst_len = 128;
 	} else if (rtm->rtm_dst_len)
@@ -4612,9 +4552,9 @@ static int rt6_fill_node(struct net *net,
 #endif
 			if (nla_put_u32(skb, RTA_IIF, iif))
 				goto nla_put_failure;
-	} else if (dst) {
+	} else if (dest) {
 		struct in6_addr saddr_buf;
-		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
+		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
 			goto nla_put_failure;
 	}
@@ -4626,10 +4566,8 @@ static int rt6_fill_node(struct net *net,
 			goto nla_put_failure;
 	}
 
-	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
-	if (rt->rt6i_pmtu)
-		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
-	if (rtnetlink_put_metrics(skb, metrics) < 0)
+	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
+	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
@@ -4661,9 +4599,10 @@ static int rt6_fill_node(struct net *net,
 			goto nla_put_failure;
 	}
 
-	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
+	if (rt->rt6i_flags & RTF_EXPIRES && dst)
+		expires = dst->expires - jiffies;
 
-	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
+	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
 		goto nla_put_failure;
 
 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
@@ -4697,10 +4636,9 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 		}
 	}
 
-	return rt6_fill_node(net,
-		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
-		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
-		     NLM_F_MULTI);
+	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
+			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
+			     arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
 }
 
 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
@@ -4814,13 +4752,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	skb_dst_set(skb, &rt->dst);
 	if (fibmatch)
-		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
+		err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
 				    nlh->nlmsg_seq, 0);
 	else
-		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
-				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
-				    nlh->nlmsg_seq, 0);
+		err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
+				    iif, RTM_NEWROUTE,
+				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
+				    0);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto errout;
@@ -4846,8 +4785,8 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
 	if (!skb)
 		goto errout;
 
-	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
-				event, info->portid, seq, nlm_flags);
+	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
+			    event, info->portid, seq, nlm_flags);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 11/21] net/ipv6: move expires into rt6_info
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Add expires to rt6_info for FIB entries, and add fib6 helpers to
manage it. Data path use of dst.expires remains.

The transition is fairly straightforward: when working with fib entries,
rt->dst.expires is just rt->expires, rt6_clean_expires is replaced with
fib6_clean_expires, rt6_set_expires becomes fib6_set_expires, and
rt6_check_expired becomes fib6_check_expired, where the fib6 versions
are added by this patch.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h | 27 +++++++++++++++++++++++----
 net/ipv6/addrconf.c   |  6 +++---
 net/ipv6/ip6_fib.c    |  8 ++++----
 net/ipv6/ndisc.c      |  2 +-
 net/ipv6/route.c      | 20 +++++++++++---------
 5 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1f8dc9d12abb..c73b985734f5 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -179,6 +179,7 @@ struct rt6_info {
 					should_flush:1,
 					unused:6;
 
+	unsigned long			expires;
 	struct dst_metrics		*fib6_metrics;
 #define fib6_pmtu		fib6_metrics->metrics[RTAX_MTU-1]
 	struct fib6_nh			fib6_nh;
@@ -197,6 +198,26 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
 	return ((struct rt6_info *)dst)->rt6i_idev;
 }
 
+static inline void fib6_clean_expires(struct rt6_info *f6i)
+{
+	f6i->rt6i_flags &= ~RTF_EXPIRES;
+	f6i->expires = 0;
+}
+
+static inline void fib6_set_expires(struct rt6_info *f6i,
+				    unsigned long expires)
+{
+	f6i->expires = expires;
+	f6i->rt6i_flags |= RTF_EXPIRES;
+}
+
+static inline bool fib6_check_expired(const struct rt6_info *f6i)
+{
+	if (f6i->rt6i_flags & RTF_EXPIRES)
+		return time_after(jiffies, f6i->expires);
+	return false;
+}
+
 static inline void rt6_clean_expires(struct rt6_info *rt)
 {
 	rt->rt6i_flags &= ~RTF_EXPIRES;
@@ -211,11 +232,9 @@ static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires)
 
 static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
 {
-	struct rt6_info *rt;
+	if (!(rt0->rt6i_flags & RTF_EXPIRES) && rt0->from)
+		rt0->dst.expires = rt0->from->expires;
 
-	for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from);
-	if (rt && rt != rt0)
-		rt0->dst.expires = rt->dst.expires;
 	dst_set_expires(&rt0->dst, timeout);
 	rt0->rt6i_flags |= RTF_EXPIRES;
 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 483c8772e856..a156f1a0b1a7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1190,7 +1190,7 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r
 			ip6_del_rt(dev_net(ifp->idev->dev), rt);
 		else {
 			if (!(rt->rt6i_flags & RTF_EXPIRES))
-				rt6_set_expires(rt, expires);
+				fib6_set_expires(rt, expires);
 			ip6_rt_put(rt);
 		}
 	}
@@ -2672,9 +2672,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 				rt = NULL;
 			} else if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
-				rt6_set_expires(rt, jiffies + rt_expires);
+				fib6_set_expires(rt, jiffies + rt_expires);
 			} else {
-				rt6_clean_expires(rt);
+				fib6_clean_expires(rt);
 			}
 		} else if (valid_lft) {
 			clock_t expires = 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 0d94c56c3e41..f25f4d9831e8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -906,9 +906,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 				if (!(iter->rt6i_flags & RTF_EXPIRES))
 					return -EEXIST;
 				if (!(rt->rt6i_flags & RTF_EXPIRES))
-					rt6_clean_expires(iter);
+					fib6_clean_expires(iter);
 				else
-					rt6_set_expires(iter, rt->dst.expires);
+					fib6_set_expires(iter, rt->expires);
 				fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
 				return -EEXIST;
 			}
@@ -2003,8 +2003,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 	 *	Routes are expired even if they are in use.
 	 */
 
-	if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
-		if (time_after(now, rt->dst.expires)) {
+	if (rt->rt6i_flags & RTF_EXPIRES && rt->expires) {
+		if (time_after(now, rt->expires)) {
 			RT6_TRACE("expiring %p\n", rt);
 			return -1;
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b058ea9ecec0..e4d9eea92139 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1318,7 +1318,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	}
 
 	if (rt)
-		rt6_set_expires(rt, jiffies + (HZ * lifetime));
+		fib6_set_expires(rt, jiffies + (HZ * lifetime));
 	if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
 	    ra_msg->icmph.icmp6_hop_limit) {
 		if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 62aafd06c35f..f6ca55d21fac 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -435,7 +435,7 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 			return true;
 	} else if (rt->from) {
 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
-			rt6_check_expired(rt->from);
+			fib6_check_expired(rt->from);
 	}
 	return false;
 }
@@ -687,7 +687,7 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 		goto out;
 
-	if (rt6_check_expired(rt))
+	if (fib6_check_expired(rt))
 		goto out;
 
 	m = rt6_score_route(rt, oif, strict);
@@ -871,9 +871,9 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 
 	if (rt) {
 		if (!addrconf_finite_timeout(lifetime))
-			rt6_clean_expires(rt);
+			fib6_clean_expires(rt);
 		else
-			rt6_set_expires(rt, jiffies + HZ * lifetime);
+			fib6_set_expires(rt, jiffies + HZ * lifetime);
 
 		ip6_rt_put(rt);
 	}
@@ -2383,7 +2383,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	for_each_fib6_node_rt_rcu(fn) {
 		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 			continue;
-		if (rt6_check_expired(rt))
+		if (fib6_check_expired(rt))
 			continue;
 		if (rt->rt6i_flags & RTF_REJECT)
 			break;
@@ -2909,10 +2909,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		goto out;
 
 	if (cfg->fc_flags & RTF_EXPIRES)
-		rt6_set_expires(rt, jiffies +
+		fib6_set_expires(rt, jiffies +
 				clock_t_to_jiffies(cfg->fc_expires));
 	else
-		rt6_clean_expires(rt);
+		fib6_clean_expires(rt);
 
 	if (cfg->fc_protocol == RTPROT_UNSPEC)
 		cfg->fc_protocol = RTPROT_BOOT;
@@ -4599,8 +4599,10 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 			goto nla_put_failure;
 	}
 
-	if (rt->rt6i_flags & RTF_EXPIRES && dst)
-		expires = dst->expires - jiffies;
+	if (rt->rt6i_flags & RTF_EXPIRES) {
+		expires = dst ? dst->expires : rt->expires;
+		expires -= jiffies;
+	}
 
 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
 		goto nla_put_failure;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 12/21] net/ipv6: Add fib6_null_entry
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

ip6_null_entry will stay a dst based return for lookups that fail to
match an entry.

Add a new fib6_null_entry which constitutes the root node and leafs
for fibs. Replace existing references to ip6_null_entry with the
new fib6_null_entry when dealing with FIBs.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/netns/ipv6.h |  3 ++-
 net/ipv6/ip6_fib.c       | 26 ++++++++++----------
 net/ipv6/route.c         | 62 +++++++++++++++++++++++++++++++++---------------
 3 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index c29f09cfc9d7..74e4e1e449d5 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -60,7 +60,8 @@ struct netns_ipv6 {
 #endif
 	struct xt_table		*ip6table_nat;
 #endif
-	struct rt6_info         *ip6_null_entry;
+	struct rt6_info         *fib6_null_entry;
+	struct rt6_info		*ip6_null_entry;
 	struct rt6_statistics   *rt6_stats;
 	struct timer_list       ip6_fib_timer;
 	struct hlist_head       *fib_table_hash;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f25f4d9831e8..280b69497ad0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -231,7 +231,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
 	if (table) {
 		table->tb6_id = id;
 		rcu_assign_pointer(table->tb6_root.leaf,
-				   net->ipv6.ip6_null_entry);
+				   net->ipv6.fib6_null_entry);
 		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
 		inet_peer_base_init(&table->tb6_peers);
 	}
@@ -369,7 +369,7 @@ struct fib6_dump_arg {
 
 static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
 {
-	if (rt == arg->net->ipv6.ip6_null_entry)
+	if (rt == arg->net->ipv6.fib6_null_entry)
 		return;
 	call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
 }
@@ -658,7 +658,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
 			/* remove null_entry in the root node */
 			} else if (fn->fn_flags & RTN_TL_ROOT &&
 				   rcu_access_pointer(fn->leaf) ==
-				   net->ipv6.ip6_null_entry) {
+				   net->ipv6.fib6_null_entry) {
 				RCU_INIT_POINTER(fn->leaf, NULL);
 			}
 
@@ -1171,9 +1171,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			if (!sfn)
 				goto failure;
 
-			atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
+			atomic_inc(&info->nl_net->ipv6.fib6_null_entry->rt6i_ref);
 			rcu_assign_pointer(sfn->leaf,
-					   info->nl_net->ipv6.ip6_null_entry);
+					   info->nl_net->ipv6.fib6_null_entry);
 			sfn->fn_flags = RTN_ROOT;
 
 			/* Now add the first leaf node to new subtree */
@@ -1212,7 +1212,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			if (fn->fn_flags & RTN_TL_ROOT) {
 				/* put back null_entry for root node */
 				rcu_assign_pointer(fn->leaf,
-					    info->nl_net->ipv6.ip6_null_entry);
+					    info->nl_net->ipv6.fib6_null_entry);
 			} else {
 				atomic_inc(&rt->rt6i_ref);
 				rcu_assign_pointer(fn->leaf, rt);
@@ -1251,7 +1251,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 				if (!pn_leaf) {
 					WARN_ON(!pn_leaf);
 					pn_leaf =
-					    info->nl_net->ipv6.ip6_null_entry;
+					    info->nl_net->ipv6.fib6_null_entry;
 				}
 #endif
 				atomic_inc(&pn_leaf->rt6i_ref);
@@ -1494,7 +1494,7 @@ static struct rt6_info *fib6_find_prefix(struct net *net,
 	struct fib6_node *child_left, *child_right;
 
 	if (fn->fn_flags & RTN_ROOT)
-		return net->ipv6.ip6_null_entry;
+		return net->ipv6.fib6_null_entry;
 
 	while (fn) {
 		child_left = rcu_dereference_protected(fn->left,
@@ -1531,7 +1531,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 
 	/* Set fn->leaf to null_entry for root node. */
 	if (fn->fn_flags & RTN_TL_ROOT) {
-		rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
+		rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry);
 		return fn;
 	}
 
@@ -1576,7 +1576,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 #if RT6_DEBUG >= 2
 			if (!new_fn_leaf) {
 				WARN_ON(!new_fn_leaf);
-				new_fn_leaf = net->ipv6.ip6_null_entry;
+				new_fn_leaf = net->ipv6.fib6_null_entry;
 			}
 #endif
 			atomic_inc(&new_fn_leaf->rt6i_ref);
@@ -1726,7 +1726,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 		return -ENOENT;
 	}
 #endif
-	if (!fn || rt == net->ipv6.ip6_null_entry)
+	if (!fn || rt == net->ipv6.fib6_null_entry)
 		return -ENOENT;
 
 	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
@@ -2087,7 +2087,7 @@ static int __net_init fib6_net_init(struct net *net)
 
 	net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
 	rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
-			   net->ipv6.ip6_null_entry);
+			   net->ipv6.fib6_null_entry);
 	net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
 		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
 	inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
@@ -2099,7 +2099,7 @@ static int __net_init fib6_net_init(struct net *net)
 		goto out_fib6_main_tbl;
 	net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
 	rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
-			   net->ipv6.ip6_null_entry);
+			   net->ipv6.fib6_null_entry);
 	net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
 		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
 	inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f6ca55d21fac..7c141394d4f1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -276,6 +276,15 @@ static const u32 ip6_template_metrics[RTAX_MAX] = {
 	[RTAX_HOPLIMIT - 1] = 0,
 };
 
+static const struct rt6_info fib6_null_entry_template = {
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_protocol  = RTPROT_KERNEL,
+	.rt6i_metric	= ~(u32)0,
+	.rt6i_ref	= ATOMIC_INIT(1),
+	.fib6_type	= RTN_UNREACHABLE,
+	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
+};
+
 static const struct rt6_info ip6_null_entry_template = {
 	.dst = {
 		.__refcnt	= ATOMIC_INIT(1),
@@ -522,10 +531,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 			return local;
 
 		if (flags & RT6_LOOKUP_F_IFACE)
-			return net->ipv6.ip6_null_entry;
+			return net->ipv6.fib6_null_entry;
 	}
 
-	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
+	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
 }
 
 #ifdef CONFIG_IPV6_ROUTER_PREF
@@ -758,8 +767,8 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
 	bool do_rr = false;
 	int key_plen;
 
-	if (!leaf || leaf == net->ipv6.ip6_null_entry)
-		return net->ipv6.ip6_null_entry;
+	if (!leaf || leaf == net->ipv6.fib6_null_entry)
+		return net->ipv6.fib6_null_entry;
 
 	rt0 = rcu_dereference(fn->rr_ptr);
 	if (!rt0)
@@ -776,7 +785,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
 		key_plen = rt0->rt6i_src.plen;
 #endif
 	if (fn->fn_bit != key_plen)
-		return net->ipv6.ip6_null_entry;
+		return net->ipv6.fib6_null_entry;
 
 	match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
 			     &do_rr);
@@ -797,7 +806,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
 		}
 	}
 
-	return match ? match : net->ipv6.ip6_null_entry;
+	return match ? match : net->ipv6.fib6_null_entry;
 }
 
 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
@@ -1063,7 +1072,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 restart:
 	rt = rcu_dereference(fn->leaf);
 	if (!rt) {
-		rt = net->ipv6.ip6_null_entry;
+		rt = net->ipv6.fib6_null_entry;
 	} else {
 		rt = rt6_device_match(net, rt, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
@@ -1071,7 +1080,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 			rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
 						  skb, flags);
 	}
-	if (rt == net->ipv6.ip6_null_entry) {
+	if (rt == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
@@ -1820,7 +1829,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 	rt = rt6_select(net, fn, oif, strict);
 	if (rt->rt6i_nsiblings)
 		rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
-	if (rt == net->ipv6.ip6_null_entry) {
+	if (rt == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto redo_rt6_select;
@@ -1837,7 +1846,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 	if (rt_cache)
 		rt = rt_cache;
 
-	if (rt == net->ipv6.ip6_null_entry) {
+	if (rt == net->ipv6.fib6_null_entry) {
+		rt = net->ipv6.ip6_null_entry;
 		rcu_read_unlock();
 		dst_hold(&rt->dst);
 		trace_fib6_table_lookup(net, rt, table, fl6);
@@ -2412,13 +2422,13 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	}
 
 	if (!rt)
-		rt = net->ipv6.ip6_null_entry;
+		rt = net->ipv6.fib6_null_entry;
 	else if (rt->rt6i_flags & RTF_REJECT) {
 		rt = net->ipv6.ip6_null_entry;
 		goto out;
 	}
 
-	if (rt == net->ipv6.ip6_null_entry) {
+	if (rt == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
@@ -3051,7 +3061,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	struct fib6_table *table;
 	int err;
 
-	if (rt == net->ipv6.ip6_null_entry) {
+	if (rt == net->ipv6.fib6_null_entry) {
 		err = -ENOENT;
 		goto out;
 	}
@@ -3081,7 +3091,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 	struct fib6_table *table;
 	int err = -ENOENT;
 
-	if (rt == net->ipv6.ip6_null_entry)
+	if (rt == net->ipv6.fib6_null_entry)
 		goto out_put;
 	table = rt->rt6i_table;
 	spin_lock_bh(&table->tb6_lock);
@@ -3634,7 +3644,7 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
 
 	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
-	    rt != net->ipv6.ip6_null_entry &&
+	    rt != net->ipv6.fib6_null_entry &&
 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
 		spin_lock_bh(&rt6_exception_lock);
 		/* remove prefsrc entry */
@@ -3789,7 +3799,7 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg)
 	const struct arg_netdev_event *arg = p_arg;
 	struct net *net = dev_net(arg->dev);
 
-	if (rt != net->ipv6.ip6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
+	if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
 		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
 		fib6_update_sernum_upto_root(net, rt);
 		rt6_multipath_rebalance(rt);
@@ -3873,7 +3883,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
 	const struct net_device *dev = arg->dev;
 	struct net *net = dev_net(dev);
 
-	if (rt == net->ipv6.ip6_null_entry)
+	if (rt == net->ipv6.fib6_null_entry)
 		return 0;
 
 	switch (arg->event) {
@@ -4624,7 +4634,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 	struct net *net = arg->net;
 
-	if (rt == net->ipv6.ip6_null_entry)
+	if (rt == net->ipv6.fib6_null_entry)
 		return 0;
 
 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
@@ -4813,6 +4823,8 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 		return NOTIFY_OK;
 
 	if (event == NETDEV_REGISTER) {
+		net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
+		net->ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(dev);
 		net->ipv6.ip6_null_entry->dst.dev = dev;
 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -4826,6 +4838,7 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 		/* NETDEV_UNREGISTER could be fired for multiple times by
 		 * netdev_wait_allrefs(). Make sure we only call this once.
 		 */
+		in6_dev_put_clear(&net->ipv6.fib6_null_entry->rt6i_idev);
 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
@@ -5009,11 +5022,17 @@ static int __net_init ip6_route_net_init(struct net *net)
 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
 		goto out_ip6_dst_ops;
 
+	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
+					    sizeof(*net->ipv6.fib6_null_entry),
+					    GFP_KERNEL);
+	if (!net->ipv6.fib6_null_entry)
+		goto out_ip6_dst_entries;
+
 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
 					   sizeof(*net->ipv6.ip6_null_entry),
 					   GFP_KERNEL);
 	if (!net->ipv6.ip6_null_entry)
-		goto out_ip6_dst_entries;
+		goto out_fib6_null_entry;
 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
 			 ip6_template_metrics, true);
@@ -5060,6 +5079,8 @@ static int __net_init ip6_route_net_init(struct net *net)
 out_ip6_null_entry:
 	kfree(net->ipv6.ip6_null_entry);
 #endif
+out_fib6_null_entry:
+	kfree(net->ipv6.fib6_null_entry);
 out_ip6_dst_entries:
 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
 out_ip6_dst_ops:
@@ -5068,6 +5089,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 
 static void __net_exit ip6_route_net_exit(struct net *net)
 {
+	kfree(net->ipv6.fib6_null_entry);
 	kfree(net->ipv6.ip6_null_entry);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	kfree(net->ipv6.ip6_prohibit_entry);
@@ -5138,6 +5160,8 @@ void __init ip6_route_init_special_entries(void)
 	/* Registering of the loopback is done before this portion of code,
 	 * the loopback reference in rt6_info will not be taken, do it
 	 * manually for init_net */
+	init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
+	init_net.ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 13/21] net/ipv6: Add rt6_info create function for ip6_pol_route_lookup
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

ip6_pol_route_lookup is the lookup function for ip6_route_lookup and
rt6_lookup. At the moment it returns either a reference to a FIB entry
or a cached exception. To move FIB entries to a separate struct, this
lookup function needs to convert FIB entries to an rt6_info that is
returned to the caller.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/route.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7c141394d4f1..e293692174ba 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1055,6 +1055,19 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 	return false;
 }
 
+/* called with rcu_lock held */
+static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
+{
+	struct net_device *dev = rt->fib6_nh.nh_dev;
+	struct rt6_info *nrt;
+
+	nrt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+	if (nrt)
+		ip6_rt_copy_init(nrt, rt);
+
+	return nrt;
+}
+
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     struct fib6_table *table,
 					     struct flowi6 *fl6,
@@ -1087,18 +1100,26 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 	}
 	/* Search through exception table */
 	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
-	if (rt_cache)
+	if (rt_cache) {
 		rt = rt_cache;
+		if (ip6_hold_safe(net, &rt, true))
+			dst_use_noref(&rt->dst, jiffies);
+	} else if (dst_hold_safe(&rt->dst)) {
+		struct rt6_info *nrt;
 
-	if (ip6_hold_safe(net, &rt, true))
-		dst_use_noref(&rt->dst, jiffies);
+		nrt = ip6_create_rt_rcu(rt);
+		dst_release(&rt->dst);
+		rt = nrt;
+	} else {
+		rt = net->ipv6.ip6_null_entry;
+		dst_hold(&rt->dst);
+	}
 
 	rcu_read_unlock();
 
 	trace_fib6_table_lookup(net, rt, table, fl6);
 
 	return rt;
-
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 14/21] net/ipv6: Move dst flags to booleans in fib entries
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Continuing to wean FIB paths off of dst_entry, use a bool to hold
requests for certain dst settings. Add a helper to convert the
flags to DST flags when a FIB entry is converted to a dst_entry.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h |  5 ++++-
 net/ipv6/addrconf.c   |  4 ++--
 net/ipv6/route.c      | 29 ++++++++++++++++++++++++-----
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index c73b985734f5..159f651dee55 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -177,7 +177,10 @@ struct rt6_info {
 	u8				fib6_type;
 	u8				exception_bucket_flushed:1,
 					should_flush:1,
-					unused:6;
+					dst_nocount:1,
+					dst_nopolicy:1,
+					dst_host:1,
+					unused:3;
 
 	unsigned long			expires;
 	struct dst_metrics		*fib6_metrics;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a156f1a0b1a7..c8df5c6499db 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1046,7 +1046,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 
 	if (net->ipv6.devconf_all->disable_policy ||
 	    idev->cnf.disable_policy)
-		rt->dst.flags |= DST_NOPOLICY;
+		rt->dst_nopolicy = true;
 
 	neigh_parms_data_state_setall(idev->nd_parms);
 
@@ -5981,7 +5981,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
 			int cpu;
 
 			rcu_read_lock();
-			addrconf_set_nopolicy(ifa->rt, val);
+			ifa->rt->dst_nopolicy = val ? true : false;
 			if (rt->rt6i_pcpu) {
 				for_each_possible_cpu(cpu) {
 					struct rt6_info **rtp;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e293692174ba..1a3e0db31b34 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -937,6 +937,20 @@ static int ip6_rt_type_to_error(u8 fib6_type)
 	return fib6_prop[fib6_type];
 }
 
+static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
+{
+	unsigned short flags = 0;
+
+	if (rt->dst_nocount)
+		flags |= DST_NOCOUNT;
+	if (rt->dst_nopolicy)
+		flags |= DST_NOPOLICY;
+	if (rt->dst_host)
+		flags |= DST_HOST;
+
+	return flags;
+}
+
 static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
 {
 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
@@ -961,6 +975,8 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
 
 static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
 {
+	rt->dst.flags |= fib6_info_dst_flags(ort);
+
 	if (ort->rt6i_flags & RTF_REJECT) {
 		ip6_rt_init_dst_reject(rt, ort);
 		return;
@@ -970,7 +986,6 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
 	rt->dst.output = ip6_output;
 
 	if (ort->fib6_type == RTN_LOCAL) {
-		rt->dst.flags |= DST_HOST;
 		rt->dst.input = ip6_input;
 	} else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
 		rt->dst.input = ip6_mc_input;
@@ -1058,10 +1073,11 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 /* called with rcu_lock held */
 static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
 {
+	unsigned short flags = fib6_info_dst_flags(rt);
 	struct net_device *dev = rt->fib6_nh.nh_dev;
 	struct rt6_info *nrt;
 
-	nrt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+	nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
 	if (nrt)
 		ip6_rt_copy_init(nrt, rt);
 
@@ -1229,12 +1245,13 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 
 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 {
+	unsigned short flags = fib6_info_dst_flags(rt);
 	struct net_device *dev;
 	struct rt6_info *pcpu_rt;
 
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(rt);
-	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
+	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
 	rcu_read_unlock();
 	if (!pcpu_rt)
 		return NULL;
@@ -2965,7 +2982,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
 	rt->rt6i_dst.plen = cfg->fc_dst_len;
 	if (rt->rt6i_dst.plen == 128)
-		rt->dst.flags |= DST_HOST;
+		rt->dst_host = true;
 
 #ifdef CONFIG_IPV6_SUBTREES
 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -3626,10 +3643,12 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
 	if (!rt)
 		return ERR_PTR(-ENOMEM);
 
+	rt->dst_nocount = true;
+
 	in6_dev_hold(idev);
 	rt->rt6i_idev = idev;
 
-	rt->dst.flags |= DST_HOST;
+	rt->dst_host = true;
 	rt->rt6i_protocol = RTPROT_KERNEL;
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
 	if (anycast) {
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 15/21] net/ipv6: Create a neigh_lookup for FIB entries
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

The router discovery code has a FIB entry and wants to validate the
gateway has a neighbor entry. Refactor the existing dst_neigh_lookup
for IPv6 and create a new function that takes the gateway and device
and returns a neighbor entry. Use the new function in
ndisc_router_discovery to validate the gateway.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_route.h |  3 +++
 net/ipv6/ndisc.c        |  8 ++++++--
 net/ipv6/route.c        | 33 ++++++++++++++++++++-------------
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 655e13017a45..cb6fb7e16a28 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -279,4 +279,7 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
 	       !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
 }
 
+struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
+				   struct net_device *dev, struct sk_buff *skb,
+				   const void *daddr);
 #endif
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e4d9eea92139..556717154fa3 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1276,7 +1276,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
 
 	if (rt) {
-		neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
+		neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
+					 rt->fib6_nh.nh_dev, NULL,
+					  &ipv6_hdr(skb)->saddr);
 		if (!neigh) {
 			ND_PRINTK(0, err,
 				  "RA: %s got default router without neighbour\n",
@@ -1304,7 +1306,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			return;
 		}
 
-		neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr);
+		neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw,
+					 rt->fib6_nh.nh_dev, NULL,
+					  &ipv6_hdr(skb)->saddr);
 		if (!neigh) {
 			ND_PRINTK(0, err,
 				  "RA: %s got default router without neighbour\n",
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1a3e0db31b34..d635d71f7d51 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -182,12 +182,10 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 	}
 }
 
-static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+static inline const void *choose_neigh_daddr(const struct in6_addr *p,
 					     struct sk_buff *skb,
 					     const void *daddr)
 {
-	struct in6_addr *p = &rt->rt6i_gateway;
-
 	if (!ipv6_addr_any(p))
 		return (const void *) p;
 	else if (skb)
@@ -195,18 +193,27 @@ static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 	return daddr;
 }
 
-static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
-					  struct sk_buff *skb,
-					  const void *daddr)
+struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
+				   struct net_device *dev,
+				   struct sk_buff *skb,
+				   const void *daddr)
 {
-	struct rt6_info *rt = (struct rt6_info *) dst;
 	struct neighbour *n;
 
-	daddr = choose_neigh_daddr(rt, skb, daddr);
-	n = __ipv6_neigh_lookup(dst->dev, daddr);
+	daddr = choose_neigh_daddr(gw, skb, daddr);
+	n = __ipv6_neigh_lookup(dev, daddr);
 	if (n)
 		return n;
-	return neigh_create(&nd_tbl, daddr, dst->dev);
+	return neigh_create(&nd_tbl, daddr, dev);
+}
+
+static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
+					      struct sk_buff *skb,
+					      const void *daddr)
+{
+	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
+
+	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
 }
 
 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
@@ -214,7 +221,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 	struct net_device *dev = dst->dev;
 	struct rt6_info *rt = (struct rt6_info *)dst;
 
-	daddr = choose_neigh_daddr(rt, NULL, daddr);
+	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
 	if (!daddr)
 		return;
 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
@@ -239,7 +246,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.update_pmtu		=	ip6_rt_update_pmtu,
 	.redirect		=	rt6_do_redirect,
 	.local_out		=	__ip6_local_out,
-	.neigh_lookup		=	ip6_neigh_lookup,
+	.neigh_lookup		=	ip6_dst_neigh_lookup,
 	.confirm_neigh		=	ip6_confirm_neigh,
 };
 
@@ -269,7 +276,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 	.redirect		=	ip6_rt_blackhole_redirect,
 	.cow_metrics		=	dst_cow_metrics_generic,
-	.neigh_lookup		=	ip6_neigh_lookup,
+	.neigh_lookup		=	ip6_dst_neigh_lookup,
 };
 
 static const u32 ip6_template_metrics[RTAX_MAX] = {
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 16/21] net/ipv6: Add gfp_flags to route add functions
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Most FIB entries can be added using memory allocated with GFP_KERNEL.
Add gfp_flags to ip6_route_add and addrconf_dst_alloc. Code paths that
can be reached from the packet path (e.g., ndisc and autoconfig) or
atomic notifiers use GFP_ATOMIC; paths from user context (adding
addresses and routes) use GFP_KERNEL.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_route.h |  6 ++++--
 net/ipv6/addrconf.c     | 39 +++++++++++++++++++++++----------------
 net/ipv6/anycast.c      |  2 +-
 net/ipv6/route.c        | 18 ++++++++++--------
 4 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index cb6fb7e16a28..ff70266e30d7 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -100,7 +100,8 @@ void ip6_route_cleanup(void);
 
 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
 
-int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack);
+int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
+		  struct netlink_ext_ack *extack);
 int ip6_ins_rt(struct net *net, struct rt6_info *rt);
 int ip6_del_rt(struct net *net, struct rt6_info *rt);
 
@@ -138,7 +139,8 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 void fib6_force_start_gc(struct net *net);
 
 struct rt6_info *addrconf_dst_alloc(struct net *net, struct inet6_dev *idev,
-				    const struct in6_addr *addr, bool anycast);
+				    const struct in6_addr *addr, bool anycast,
+				    gfp_t gfp_flags);
 
 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 			       int flags);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c8df5c6499db..915cd0734b27 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1037,7 +1037,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 		goto out;
 	}
 
-	rt = addrconf_dst_alloc(net, idev, addr, false);
+	rt = addrconf_dst_alloc(net, idev, addr, false, gfp_flags);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		rt = NULL;
@@ -2320,7 +2320,7 @@ static void  ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad
 
 static void
 addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
-		      unsigned long expires, u32 flags)
+		      unsigned long expires, u32 flags, gfp_t gfp_flags)
 {
 	struct fib6_config cfg = {
 		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
@@ -2345,7 +2345,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		cfg.fc_flags |= RTF_NONEXTHOP;
 #endif
 
-	ip6_route_add(&cfg, NULL);
+	ip6_route_add(&cfg, gfp_flags, NULL);
 }
 
 
@@ -2401,7 +2401,7 @@ static void addrconf_add_mroute(struct net_device *dev)
 
 	ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
 
-	ip6_route_add(&cfg, NULL);
+	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
 }
 
 static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
@@ -2685,7 +2685,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 				expires = jiffies_to_clock_t(rt_expires);
 			}
 			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
-					      dev, expires, flags);
+					      dev, expires, flags, GFP_ATOMIC);
 		}
 		ip6_rt_put(rt);
 	}
@@ -2900,7 +2900,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
 	if (!IS_ERR(ifp)) {
 		if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
 			addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
-					      expires, flags);
+					      expires, flags, GFP_KERNEL);
 		}
 
 		/* Send a netlink notification if DAD is enabled and
@@ -3053,7 +3053,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 
 	if (addr.s6_addr32[3]) {
 		add_addr(idev, &addr, plen, scope);
-		addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags);
+		addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags,
+				      GFP_ATOMIC);
 		return;
 	}
 
@@ -3078,7 +3079,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 
 				add_addr(idev, &addr, plen, flag);
 				addrconf_prefix_route(&addr, plen, idev->dev, 0,
-						      pflags);
+						      pflags, GFP_ATOMIC);
 			}
 		}
 	}
@@ -3118,7 +3119,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
 	ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
 			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
 	if (!IS_ERR(ifp)) {
-		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev,
+				      0, 0, GFP_ATOMIC);
 		addrconf_dad_start(ifp);
 		in6_ifa_put(ifp);
 	}
@@ -3233,7 +3235,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 			addrconf_add_linklocal(idev, &addr,
 					       IFA_F_STABLE_PRIVACY);
 		else if (prefix_route)
-			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+			addrconf_prefix_route(&addr, 64, idev->dev,
+					      0, 0, GFP_KERNEL);
 		break;
 	case IN6_ADDR_GEN_MODE_EUI64:
 		/* addrconf_add_linklocal also adds a prefix_route and we
@@ -3243,7 +3246,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 		if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
 			addrconf_add_linklocal(idev, &addr, 0);
 		else if (prefix_route)
-			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+			addrconf_prefix_route(&addr, 64, idev->dev,
+					      0, 0, GFP_ATOMIC);
 		break;
 	case IN6_ADDR_GEN_MODE_NONE:
 	default:
@@ -3346,7 +3350,8 @@ static int fixup_permanent_addr(struct net *net,
 	if (!ifp->rt || !ifp->rt->rt6i_node) {
 		struct rt6_info *rt, *prev;
 
-		rt = addrconf_dst_alloc(net, idev, &ifp->addr, false);
+		rt = addrconf_dst_alloc(net, idev, &ifp->addr, false,
+					GFP_ATOMIC);
 		if (IS_ERR(rt))
 			return PTR_ERR(rt);
 
@@ -3361,7 +3366,7 @@ static int fixup_permanent_addr(struct net *net,
 
 	if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
-				      idev->dev, 0, 0);
+				      idev->dev, 0, 0, GFP_ATOMIC);
 	}
 
 	if (ifp->state == INET6_IFADDR_STATE_PREDAD)
@@ -4572,8 +4577,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 		ipv6_ifa_notify(0, ifp);
 
 	if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
-		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
-				      expires, flags);
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+				      ifp->idev->dev, expires, flags,
+				      GFP_KERNEL);
 	} else if (had_prefixroute) {
 		enum cleanup_prefix_rt_t action;
 		unsigned long rt_expires;
@@ -5614,7 +5620,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 			addrconf_join_anycast(ifp);
 		if (!ipv6_addr_any(&ifp->peer_addr))
 			addrconf_prefix_route(&ifp->peer_addr, 128,
-					      ifp->idev->dev, 0, 0);
+					      ifp->idev->dev, 0, 0,
+					      GFP_KERNEL);
 		break;
 	case RTM_DELADDR:
 		if (ifp->idev->cnf.forwarding)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 1122fb299b86..e456386fe4d5 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -267,7 +267,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 	}
 
 	net = dev_net(idev->dev);
-	rt = addrconf_dst_alloc(net, idev, addr, true);
+	rt = addrconf_dst_alloc(net, idev, addr, true, GFP_ATOMIC);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		goto out;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d635d71f7d51..56a854b426a6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2866,6 +2866,7 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
 }
 
 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
+					      gfp_t gfp_flags,
 					      struct netlink_ext_ack *extack)
 {
 	struct net *net = cfg->fc_nlinfo.nl_net;
@@ -3086,12 +3087,13 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	return ERR_PTR(err);
 }
 
-int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack)
+int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
+		  struct netlink_ext_ack *extack)
 {
 	struct rt6_info *rt;
 	int err;
 
-	rt = ip6_route_info_create(cfg, extack);
+	rt = ip6_route_info_create(cfg, gfp_flags, extack);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
@@ -3418,7 +3420,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 	if (!prefixlen)
 		cfg.fc_flags |= RTF_DEFAULT;
 
-	ip6_route_add(&cfg, NULL);
+	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
 
 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
 }
@@ -3469,7 +3471,7 @@ struct rt6_info *rt6_add_dflt_router(struct net *net,
 
 	cfg.fc_gateway = *gwaddr;
 
-	if (!ip6_route_add(&cfg, NULL)) {
+	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
 		struct fib6_table *table;
 
 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
@@ -3567,7 +3569,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		rtnl_lock();
 		switch (cmd) {
 		case SIOCADDRT:
-			err = ip6_route_add(&cfg, NULL);
+			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
 			break;
 		case SIOCDELRT:
 			err = ip6_route_del(&cfg, NULL);
@@ -3640,7 +3642,7 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff
 struct rt6_info *addrconf_dst_alloc(struct net *net,
 				    struct inet6_dev *idev,
 				    const struct in6_addr *addr,
-				    bool anycast)
+				    bool anycast, gfp_t gfp_flags)
 {
 	u32 tb_id;
 	struct net_device *dev = idev->dev;
@@ -4293,7 +4295,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 		}
 
 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
-		rt = ip6_route_info_create(&r_cfg, extack);
+		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
 		if (IS_ERR(rt)) {
 			err = PTR_ERR(rt);
 			rt = NULL;
@@ -4446,7 +4448,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (cfg.fc_mp)
 		return ip6_route_multipath_add(&cfg, extack);
 	else
-		return ip6_route_add(&cfg, extack);
+		return ip6_route_add(&cfg, GFP_KERNEL, extack);
 }
 
 static size_t rt6_nlmsg_size(struct rt6_info *rt)
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 17/21] net/ipv6: Cleanup exception and cache route handling
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

IPv6 FIB will only contain FIB entries with exception routes added to
the FIB entry. Once this transformation is complete, FIB lookups will
return a fib6_info with the lookup functions still returning a dst
based rt6_info. The current code uses rt6_info for both paths and
overloads the rt6_info variable usually called 'rt'.

This patch introduces a new 'f6i' variable name for the result of the FIB
lookup and keeps 'rt' as the dst based return variable. 'f6i' becomes a
fib6_info in a later patch which is why it is introduced as f6i now;
avoids the additional churn in the later patch.

In addition, remove RTF_CACHE and dst checks from fib6 add and delete
since they can not happen now and will never happen after the data
type flip.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_route.h |   1 -
 net/ipv6/ip6_fib.c      |  16 +-----
 net/ipv6/route.c        | 142 +++++++++++++++++++++++++++---------------------
 3 files changed, 81 insertions(+), 78 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index ff70266e30d7..686cdc7f356a 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -106,7 +106,6 @@ int ip6_ins_rt(struct net *net, struct rt6_info *rt);
 int ip6_del_rt(struct net *net, struct rt6_info *rt);
 
 void rt6_flush_exceptions(struct rt6_info *rt);
-int rt6_remove_exception_rt(struct rt6_info *rt);
 void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
 			unsigned long now);
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 280b69497ad0..53df4a98a7f7 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1074,7 +1074,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 static void fib6_start_gc(struct net *net, struct rt6_info *rt)
 {
 	if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
-	    (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
+	    (rt->rt6i_flags & RTF_EXPIRES))
 		mod_timer(&net->ipv6.ip6_fib_timer,
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
@@ -1125,8 +1125,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 	if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
 		return -EINVAL;
-	if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
-		return -EINVAL;
 
 	if (info->nlh) {
 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1650,8 +1648,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 
 	RT6_TRACE("fib6_del_route\n");
 
-	WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
-
 	/* Unlink it */
 	*rtp = rt->rt6_next;
 	rt->rt6i_node = NULL;
@@ -1720,21 +1716,11 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	struct rt6_info __rcu **rtp;
 	struct rt6_info __rcu **rtp_next;
 
-#if RT6_DEBUG >= 2
-	if (rt->dst.obsolete > 0) {
-		WARN_ON(fn);
-		return -ENOENT;
-	}
-#endif
 	if (!fn || rt == net->ipv6.fib6_null_entry)
 		return -ENOENT;
 
 	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
 
-	/* remove cached dst from exception table */
-	if (rt->rt6i_flags & RTF_CACHE)
-		return rt6_remove_exception_rt(rt);
-
 	/*
 	 *	Walk the leaf entries looking for ourself
 	 */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 56a854b426a6..ad9eaecf539c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1013,8 +1013,8 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 	BUG_ON(from->from);
 
 	rt->rt6i_flags &= ~RTF_EXPIRES;
-	dst_hold(&from->dst);
-	rt->from = from;
+	if (dst_hold_safe(&from->dst))
+		rt->from = from;
 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
 	if (from->fib6_metrics != &dst_default_metrics) {
 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
@@ -1097,8 +1097,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     const struct sk_buff *skb,
 					     int flags)
 {
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *f6i;
 	struct fib6_node *fn;
+	struct rt6_info *rt;
 
 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
 		flags &= ~RT6_LOOKUP_F_IFACE;
@@ -1106,36 +1107,36 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 	rcu_read_lock();
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
-	rt = rcu_dereference(fn->leaf);
-	if (!rt) {
-		rt = net->ipv6.fib6_null_entry;
+	f6i = rcu_dereference(fn->leaf);
+	if (!f6i) {
+		f6i = net->ipv6.fib6_null_entry;
 	} else {
-		rt = rt6_device_match(net, rt, &fl6->saddr,
+		f6i = rt6_device_match(net, f6i, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
-		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-			rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
-						  skb, flags);
+		if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0)
+			f6i = rt6_multipath_select(net, f6i, fl6,
+						   fl6->flowi6_oif, skb, flags);
 	}
-	if (rt == net->ipv6.fib6_null_entry) {
+	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
 	}
+
 	/* Search through exception table */
-	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
-	if (rt_cache) {
-		rt = rt_cache;
+	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	if (rt) {
 		if (ip6_hold_safe(net, &rt, true))
 			dst_use_noref(&rt->dst, jiffies);
-	} else if (dst_hold_safe(&rt->dst)) {
-		struct rt6_info *nrt;
-
-		nrt = ip6_create_rt_rcu(rt);
-		dst_release(&rt->dst);
-		rt = nrt;
-	} else {
+	} else if (f6i == net->ipv6.fib6_null_entry) {
 		rt = net->ipv6.ip6_null_entry;
 		dst_hold(&rt->dst);
+	} else {
+		rt = ip6_create_rt_rcu(f6i);
+		if (!rt) {
+			rt = net->ipv6.ip6_null_entry;
+			dst_hold(&rt->dst);
+		}
 	}
 
 	rcu_read_unlock();
@@ -1218,9 +1219,6 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	 *	Clone the route.
 	 */
 
-	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
-		ort = ort->from;
-
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(ort);
 	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
@@ -1446,11 +1444,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	struct rt6_exception *rt6_ex;
 	int err = 0;
 
-	/* ort can't be a cache or pcpu route */
-	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
-		ort = ort->from;
-	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
-
 	spin_lock_bh(&rt6_exception_lock);
 
 	if (ort->exception_bucket_flushed) {
@@ -1589,7 +1582,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
 }
 
 /* Remove the passed in cached rt from the hash table that contains it */
-int rt6_remove_exception_rt(struct rt6_info *rt)
+static int rt6_remove_exception_rt(struct rt6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_info *from = rt->from;
@@ -1854,7 +1847,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			       const struct sk_buff *skb, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *f6i;
+	struct rt6_info *rt;
 	int strict = 0;
 
 	strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1871,10 +1865,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		oif = 0;
 
 redo_rt6_select:
-	rt = rt6_select(net, fn, oif, strict);
-	if (rt->rt6i_nsiblings)
-		rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
-	if (rt == net->ipv6.fib6_null_entry) {
+	f6i = rt6_select(net, fn, oif, strict);
+	if (f6i->rt6i_nsiblings)
+		f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
+	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto redo_rt6_select;
@@ -1886,18 +1880,17 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		}
 	}
 
-	/*Search through exception table */
-	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
-	if (rt_cache)
-		rt = rt_cache;
-
-	if (rt == net->ipv6.fib6_null_entry) {
+	if (f6i == net->ipv6.fib6_null_entry) {
 		rt = net->ipv6.ip6_null_entry;
 		rcu_read_unlock();
 		dst_hold(&rt->dst);
 		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
-	} else if (rt->rt6i_flags & RTF_CACHE) {
+	}
+
+	/*Search through exception table */
+	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	if (rt) {
 		if (ip6_hold_safe(net, &rt, true))
 			dst_use_noref(&rt->dst, jiffies);
 
@@ -1905,7 +1898,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
-			    !(rt->rt6i_flags & RTF_GATEWAY))) {
+			    !(f6i->rt6i_flags & RTF_GATEWAY))) {
 		/* Create a RTF_CACHE clone which will not be
 		 * owned by the fib6 tree.  It is for the special case where
 		 * the daddr in the skb during the neighbor look-up is different
@@ -1914,16 +1907,16 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *uncached_rt;
 
-		if (ip6_hold_safe(net, &rt, true)) {
-			dst_use_noref(&rt->dst, jiffies);
+		if (ip6_hold_safe(net, &f6i, true)) {
+			dst_use_noref(&f6i->dst, jiffies);
 		} else {
 			rcu_read_unlock();
-			uncached_rt = rt;
+			uncached_rt = f6i;
 			goto uncached_rt_out;
 		}
 		rcu_read_unlock();
 
-		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
+		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
 		dst_release(&rt->dst);
 
 		if (uncached_rt) {
@@ -1946,18 +1939,18 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *pcpu_rt;
 
-		dst_use_noref(&rt->dst, jiffies);
+		dst_use_noref(&f6i->dst, jiffies);
 		local_bh_disable();
-		pcpu_rt = rt6_get_pcpu_route(rt);
+		pcpu_rt = rt6_get_pcpu_route(f6i);
 
 		if (!pcpu_rt) {
 			/* atomic_inc_not_zero() is needed when using rcu */
-			if (atomic_inc_not_zero(&rt->rt6i_ref)) {
+			if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
 				/* No dst_hold() on rt is needed because grabbing
 				 * rt->rt6i_ref makes sure rt can't be released.
 				 */
-				pcpu_rt = rt6_make_pcpu_route(net, rt);
-				rt6_release(rt);
+				pcpu_rt = rt6_make_pcpu_route(net, f6i);
+				rt6_release(f6i);
 			} else {
 				/* rt is already removed from tree */
 				pcpu_rt = net->ipv6.ip6_null_entry;
@@ -2419,7 +2412,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 					     int flags)
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *ret = NULL, *rt_cache;
+	struct rt6_info *rt;
 	struct fib6_node *fn;
 
 	/* Get the "current" route for this destination and
@@ -2458,7 +2452,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 			if (rt_cache &&
 			    ipv6_addr_equal(&rdfl->gateway,
 					    &rt_cache->rt6i_gateway)) {
-				rt = rt_cache;
+				ret = rt_cache;
 				break;
 			}
 			continue;
@@ -2469,7 +2463,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	if (!rt)
 		rt = net->ipv6.fib6_null_entry;
 	else if (rt->rt6i_flags & RTF_REJECT) {
-		rt = net->ipv6.ip6_null_entry;
+		ret = net->ipv6.ip6_null_entry;
 		goto out;
 	}
 
@@ -2480,12 +2474,15 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	}
 
 out:
-	ip6_hold_safe(net, &rt, true);
+	if (ret)
+		dst_hold(&ret->dst);
+	else
+		ret = ip6_create_rt_rcu(rt);
 
 	rcu_read_unlock();
 
-	trace_fib6_table_lookup(net, rt, table, fl6);
-	return rt;
+	trace_fib6_table_lookup(net, ret, table, fl6);
+	return ret;
 };
 
 static struct dst_entry *ip6_route_redirect(struct net *net,
@@ -3182,6 +3179,22 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 	return err;
 }
 
+static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
+{
+	int rc = -ESRCH;
+
+	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
+		goto out;
+
+	if (cfg->fc_flags & RTF_GATEWAY &&
+	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
+		goto out;
+	if (dst_hold_safe(&rt->dst))
+		rc = rt6_remove_exception_rt(rt);
+out:
+	return rc;
+}
+
 static int ip6_route_del(struct fib6_config *cfg,
 			 struct netlink_ext_ack *extack)
 {
@@ -3206,11 +3219,16 @@ static int ip6_route_del(struct fib6_config *cfg,
 	if (fn) {
 		for_each_fib6_node_rt_rcu(fn) {
 			if (cfg->fc_flags & RTF_CACHE) {
+				int rc;
+
 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
 							      &cfg->fc_src);
-				if (!rt_cache)
-					continue;
-				rt = rt_cache;
+				if (rt_cache) {
+					rc = ip6_del_cached_rt(rt_cache, cfg);
+					if (rc != -ESRCH)
+						return rc;
+				}
+				continue;
 			}
 			if (cfg->fc_ifindex &&
 			    (!rt->fib6_nh.nh_dev ||
@@ -3327,7 +3345,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 				     NEIGH_UPDATE_F_ISROUTER)),
 		     NDISC_REDIRECT, &ndopts);
 
-	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
+	nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL);
 	if (!nrt)
 		goto out;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 18/21] net/ipv6: introduce fib6_info struct and helpers
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Add fib6_info struct and alloc, destroy, hold and release helpers.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_fib.c    | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 159f651dee55..630392ae12d8 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -38,6 +38,7 @@
 #endif
 
 struct rt6_info;
+struct fib6_info;
 
 struct fib6_config {
 	u32		fc_table;
@@ -132,6 +133,46 @@ struct fib6_nh {
 	int			nh_weight;
 };
 
+struct fib6_info {
+	struct fib6_table		*rt6i_table;
+	struct fib6_info __rcu		*rt6_next;
+	struct fib6_node __rcu		*rt6i_node;
+
+	/* Multipath routes:
+	 * siblings is a list of fib6_info that have the the same metric/weight,
+	 * destination, but not the same gateway. nsiblings is just a cache
+	 * to speed up lookup.
+	 */
+	struct list_head		rt6i_siblings;
+	unsigned int			rt6i_nsiblings;
+
+	atomic_t			rt6i_ref;
+	struct inet6_dev		*rt6i_idev;
+	unsigned long			expires;
+	struct dst_metrics		*fib6_metrics;
+#define fib6_pmtu		fib6_metrics->metrics[RTAX_MTU-1]
+
+	struct rt6key			rt6i_dst;
+	u32				rt6i_flags;
+	struct rt6key			rt6i_src;
+	struct rt6key			rt6i_prefsrc;
+
+	struct rt6_info * __percpu	*rt6i_pcpu;
+	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
+
+	u32				rt6i_metric;
+	u8				rt6i_protocol;
+	u8				fib6_type;
+	u8				exception_bucket_flushed:1,
+					should_flush:1,
+					dst_nocount:1,
+					dst_nopolicy:1,
+					dst_host:1,
+					unused:3;
+
+	struct fib6_nh			fib6_nh;
+};
+
 struct rt6_info {
 	struct dst_entry		dst;
 	struct rt6_info __rcu		*rt6_next;
@@ -291,6 +332,20 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 
 void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
 
+struct rt6_info *fib6_info_alloc(gfp_t gfp_flags);
+void fib6_info_destroy(struct rt6_info *f6i);
+
+static inline void fib6_info_hold(struct rt6_info *f6i)
+{
+	atomic_inc(&f6i->rt6i_ref);
+}
+
+static inline void fib6_info_release(struct rt6_info *f6i)
+{
+	if (f6i && atomic_dec_and_test(&f6i->rt6i_ref))
+		fib6_info_destroy(f6i);
+}
+
 static inline void rt6_hold(struct rt6_info *rt)
 {
 	atomic_inc(&rt->rt6i_ref);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 53df4a98a7f7..d07578d84db0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -145,6 +145,66 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
 	       addr[fn_bit >> 5];
 }
 
+struct rt6_info *fib6_info_alloc(gfp_t gfp_flags)
+{
+	struct rt6_info *f6i;
+
+	f6i = kzalloc(sizeof(*f6i), gfp_flags);
+	if (!f6i)
+		return NULL;
+
+	f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+	if (!f6i->rt6i_pcpu) {
+		kfree(f6i);
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&f6i->rt6i_siblings);
+	f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
+
+	atomic_inc(&f6i->rt6i_ref);
+
+	return f6i;
+}
+
+void fib6_info_destroy(struct rt6_info *f6i)
+{
+	struct rt6_exception_bucket *bucket;
+
+	WARN_ON(f6i->rt6i_node);
+
+	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
+	if (bucket) {
+		f6i->rt6i_exception_bucket = NULL;
+		kfree(bucket);
+	}
+
+	if (f6i->rt6i_pcpu) {
+		int cpu;
+
+		for_each_possible_cpu(cpu) {
+			struct rt6_info **ppcpu_rt;
+			struct rt6_info *pcpu_rt;
+
+			ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+			pcpu_rt = *ppcpu_rt;
+			if (pcpu_rt) {
+				dst_dev_put(&pcpu_rt->dst);
+				dst_release(&pcpu_rt->dst);
+				*ppcpu_rt = NULL;
+			}
+		}
+	}
+
+	if (f6i->rt6i_idev)
+		in6_dev_put(f6i->rt6i_idev);
+	if (f6i->fib6_nh.nh_dev)
+		dev_put(f6i->fib6_nh.nh_dev);
+
+	kfree(f6i);
+}
+EXPORT_SYMBOL_GPL(fib6_info_destroy);
+
 static struct fib6_node *node_alloc(struct net *net)
 {
 	struct fib6_node *fn;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 19/21] net/ipv6: separate handling of FIB entries from dst based routes
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Last step before flipping the data type for FIB entries:
- use fib6_info_alloc to create FIB entries in ip6_route_info_create
  and addrconf_dst_alloc
- use fib6_info_release in place of dst_release, ip6_rt_put and
  rt6_release
- remove the dst_hold before calling __ip6_ins_rt or ip6_del_rt
- when purging routes, drop per-cpu routes
- replace inc and dec of rt6i_ref with fib6_info_hold and fib6_info_release
- use rt->from since it points to the FIB entry
- drop references to exception bucket, fib6_metrics and per-cpu from
  dst entries (those are relevant for fib entries only)

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h   |   4 +-
 include/net/ip6_route.h |   3 +-
 net/ipv6/addrconf.c     |  18 +++--
 net/ipv6/anycast.c      |   7 +-
 net/ipv6/ip6_fib.c      |  55 ++++++++++------
 net/ipv6/ip6_output.c   |   3 +-
 net/ipv6/ndisc.c        |   6 +-
 net/ipv6/route.c        | 171 +++++++++++++++++-------------------------------
 8 files changed, 115 insertions(+), 152 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 630392ae12d8..6c3d92bb3459 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -314,9 +314,7 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 
 	if (rt->rt6i_flags & RTF_PCPU ||
 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
-		rt = rt->from;
-
-	rt6_get_cookie_safe(rt, &cookie);
+		rt6_get_cookie_safe(rt->from, &cookie);
 
 	return cookie;
 }
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 686cdc7f356a..57d0d45667f1 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -114,8 +114,7 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 				      unsigned int prefs,
 				      struct in6_addr *saddr)
 {
-	struct inet6_dev *idev =
-			rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
+	struct inet6_dev *idev = rt ? rt->rt6i_idev : NULL;
 	int err = 0;
 
 	if (rt && rt->rt6i_prefsrc.plen)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 915cd0734b27..e533a447f68c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -916,7 +916,6 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 		pr_warn("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
-	ip6_rt_put(ifp->rt);
 
 	kfree_rcu(ifp, rcu);
 }
@@ -1102,8 +1101,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	inet6addr_notifier_call_chain(NETDEV_UP, ifa);
 out:
 	if (unlikely(err < 0)) {
-		if (rt)
-			ip6_rt_put(rt);
+		fib6_info_release(rt);
+
 		if (ifa) {
 			if (ifa->idev)
 				in6_dev_put(ifa->idev);
@@ -1191,7 +1190,7 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r
 		else {
 			if (!(rt->rt6i_flags & RTF_EXPIRES))
 				fib6_set_expires(rt, expires);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 		}
 	}
 }
@@ -2375,8 +2374,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 			continue;
 		if ((rt->rt6i_flags & noflags) != 0)
 			continue;
-		if (!dst_hold_safe(&rt->dst))
-			rt = NULL;
+		fib6_info_hold(rt);
 		break;
 	}
 out:
@@ -2687,7 +2685,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
 					      dev, expires, flags, GFP_ATOMIC);
 		}
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 	}
 
 	/* Try to figure out our local address for this prefix */
@@ -3361,7 +3359,7 @@ static int fixup_permanent_addr(struct net *net,
 		ifp->rt = rt;
 		spin_unlock(&ifp->lock);
 
-		ip6_rt_put(prev);
+		fib6_info_release(prev);
 	}
 
 	if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
@@ -5636,8 +5634,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 				ip6_del_rt(net, rt);
 		}
 		if (ifp->rt) {
-			if (dst_hold_safe(&ifp->rt->dst))
-				ip6_del_rt(net, ifp->rt);
+			ip6_del_rt(net, ifp->rt);
+			ifp->rt = NULL;
 		}
 		rt_genid_bump_ipv6(net);
 		break;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index e456386fe4d5..3db8fe10322b 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -213,7 +213,7 @@ static void aca_put(struct ifacaddr6 *ac)
 {
 	if (refcount_dec_and_test(&ac->aca_refcnt)) {
 		in6_dev_put(ac->aca_idev);
-		dst_release(&ac->aca_rt->dst);
+		fib6_info_release(ac->aca_rt);
 		kfree(ac);
 	}
 }
@@ -231,6 +231,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
 	aca->aca_addr = *addr;
 	in6_dev_hold(idev);
 	aca->aca_idev = idev;
+	fib6_info_hold(rt);
 	aca->aca_rt = rt;
 	aca->aca_users = 1;
 	/* aca_tstamp should be updated upon changes */
@@ -274,7 +275,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 	}
 	aca = aca_alloc(rt, addr);
 	if (!aca) {
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 		err = -ENOMEM;
 		goto out;
 	}
@@ -330,7 +331,6 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
-	dst_hold(&aca->aca_rt->dst);
 	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 
 	aca_put(aca);
@@ -358,7 +358,6 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
 
-		dst_hold(&aca->aca_rt->dst);
 		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 
 		aca_put(aca);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d07578d84db0..4d6bd033dccd 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -170,6 +170,7 @@ struct rt6_info *fib6_info_alloc(gfp_t gfp_flags)
 void fib6_info_destroy(struct rt6_info *f6i)
 {
 	struct rt6_exception_bucket *bucket;
+	struct dst_metrics *m;
 
 	WARN_ON(f6i->rt6i_node);
 
@@ -201,6 +202,10 @@ void fib6_info_destroy(struct rt6_info *f6i)
 	if (f6i->fib6_nh.nh_dev)
 		dev_put(f6i->fib6_nh.nh_dev);
 
+	m = f6i->fib6_metrics;
+	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
+		kfree(m);
+
 	kfree(f6i);
 }
 EXPORT_SYMBOL_GPL(fib6_info_destroy);
@@ -714,7 +719,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
 			/* clean up an intermediate node */
 			if (!(fn->fn_flags & RTN_RTINFO)) {
 				RCU_INIT_POINTER(fn->leaf, NULL);
-				rt6_release(leaf);
+				fib6_info_release(leaf);
 			/* remove null_entry in the root node */
 			} else if (fn->fn_flags & RTN_TL_ROOT &&
 				   rcu_access_pointer(fn->leaf) ==
@@ -898,12 +903,32 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
 				new_leaf = fib6_find_prefix(net, table, fn);
 				atomic_inc(&new_leaf->rt6i_ref);
+
 				rcu_assign_pointer(fn->leaf, new_leaf);
-				rt6_release(rt);
+				fib6_info_release(rt);
 			}
 			fn = rcu_dereference_protected(fn->parent,
 				    lockdep_is_held(&table->tb6_lock));
 		}
+
+		if (rt->rt6i_pcpu) {
+			int cpu;
+
+			/* release the reference to this fib entry from
+			 * all of its cached pcpu routes
+			 */
+			for_each_possible_cpu(cpu) {
+				struct rt6_info **ppcpu_rt;
+				struct rt6_info *pcpu_rt;
+
+				ppcpu_rt = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+				pcpu_rt = *ppcpu_rt;
+				if (pcpu_rt) {
+					fib6_info_release(pcpu_rt->from);
+					pcpu_rt->from = NULL;
+				}
+			}
+		}
 	}
 }
 
@@ -1099,7 +1124,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		fib6_purge_rt(iter, fn, info->nl_net);
 		if (rcu_access_pointer(fn->rr_ptr) == iter)
 			fn->rr_ptr = NULL;
-		rt6_release(iter);
+		fib6_info_release(iter);
 
 		if (nsiblings) {
 			/* Replacing an ECMP route, remove all siblings */
@@ -1115,7 +1140,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 					fib6_purge_rt(iter, fn, info->nl_net);
 					if (rcu_access_pointer(fn->rr_ptr) == iter)
 						fn->rr_ptr = NULL;
-					rt6_release(iter);
+					fib6_info_release(iter);
 					nsiblings--;
 					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
 				} else {
@@ -1183,9 +1208,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	int replace_required = 0;
 	int sernum = fib6_new_sernum(info->nl_net);
 
-	if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
-		return -EINVAL;
-
 	if (info->nlh) {
 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
 			allow_create = 0;
@@ -1300,7 +1322,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			if (pn_leaf == rt) {
 				pn_leaf = NULL;
 				RCU_INIT_POINTER(pn->leaf, NULL);
-				atomic_dec(&rt->rt6i_ref);
+				fib6_info_release(rt);
 			}
 			if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
 				pn_leaf = fib6_find_prefix(info->nl_net, table,
@@ -1312,7 +1334,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 					    info->nl_net->ipv6.fib6_null_entry;
 				}
 #endif
-				atomic_inc(&pn_leaf->rt6i_ref);
+				fib6_info_hold(pn_leaf);
 				rcu_assign_pointer(pn->leaf, pn_leaf);
 			}
 		}
@@ -1334,10 +1356,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	     (fn->fn_flags & RTN_TL_ROOT &&
 	      !rcu_access_pointer(fn->leaf))))
 		fib6_repair_tree(info->nl_net, table, fn);
-	/* Always release dst as dst->__refcnt is guaranteed
-	 * to be taken before entering this function
-	 */
-	dst_release_immediate(&rt->dst);
 	return err;
 }
 
@@ -1637,7 +1655,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 				new_fn_leaf = net->ipv6.fib6_null_entry;
 			}
 #endif
-			atomic_inc(&new_fn_leaf->rt6i_ref);
+			fib6_info_hold(new_fn_leaf);
 			rcu_assign_pointer(fn->leaf, new_fn_leaf);
 			return pn;
 		}
@@ -1693,7 +1711,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 			return pn;
 
 		RCU_INIT_POINTER(pn->leaf, NULL);
-		rt6_release(pn_leaf);
+		fib6_info_release(pn_leaf);
 		fn = pn;
 	}
 }
@@ -1763,7 +1781,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
 	if (!info->skip_notify)
 		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
-	rt6_release(rt);
+	fib6_info_release(rt);
 }
 
 /* Need to own table->tb6_lock */
@@ -2261,9 +2279,8 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 
 	dev = rt->fib6_nh.nh_dev;
 	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
-		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
-		   rt->dst.__use, rt->rt6i_flags,
-		   dev ? dev->name : "");
+		   rt->rt6i_metric, atomic_read(&rt->rt6i_ref), 0,
+		   rt->rt6i_flags, dev ? dev->name : "");
 	iter->w.leaf = NULL;
 	return 0;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a39b04f9fa6e..3db47986ef38 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -968,7 +968,8 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 		if (!had_dst)
 			*dst = ip6_route_output(net, sk, fl6);
 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
-		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+		err = ip6_route_get_saddr(net, rt ? rt->from : NULL,
+					  &fl6->daddr,
 					  sk ? inet6_sk(sk)->srcprefs : 0,
 					  &fl6->saddr);
 		if (err)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 556717154fa3..a28857088bff 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1283,7 +1283,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK(0, err,
 				  "RA: %s got default router without neighbour\n",
 				  __func__);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 			return;
 		}
 	}
@@ -1313,7 +1313,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK(0, err,
 				  "RA: %s got default router without neighbour\n",
 				  __func__);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 			return;
 		}
 		neigh->flags |= NTF_ROUTER;
@@ -1499,7 +1499,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		ND_PRINTK(2, warn, "RA: invalid RA options\n");
 	}
 out:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 	if (neigh)
 		neigh_release(neigh);
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ad9eaecf539c..0d861bd07673 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -351,13 +351,11 @@ static void rt6_info_init(struct rt6_info *rt)
 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 	INIT_LIST_HEAD(&rt->rt6i_siblings);
 	INIT_LIST_HEAD(&rt->rt6i_uncached);
-	rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
 }
 
 /* allocate dst with ip6_dst_ops */
-static struct rt6_info *__ip6_dst_alloc(struct net *net,
-					struct net_device *dev,
-					int flags)
+struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
+			       int flags)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 					1, DST_OBSOLETE_FORCE_CHK, flags);
@@ -369,35 +367,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
 
 	return rt;
 }
-
-struct rt6_info *ip6_dst_alloc(struct net *net,
-			       struct net_device *dev,
-			       int flags)
-{
-	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
-
-	if (rt) {
-		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
-		if (!rt->rt6i_pcpu) {
-			dst_release_immediate(&rt->dst);
-			return NULL;
-		}
-	}
-
-	return rt;
-}
 EXPORT_SYMBOL(ip6_dst_alloc);
 
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
-	struct rt6_exception_bucket *bucket;
 	struct rt6_info *from = rt->from;
 	struct inet6_dev *idev;
-	struct dst_metrics *m;
 
 	dst_destroy_metrics_generic(dst);
-	free_percpu(rt->rt6i_pcpu);
 	rt6_uncached_list_del(rt);
 
 	idev = rt->rt6i_idev;
@@ -405,18 +383,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 		rt->rt6i_idev = NULL;
 		in6_dev_put(idev);
 	}
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
-	if (bucket) {
-		rt->rt6i_exception_bucket = NULL;
-		kfree(bucket);
-	}
-
-	m = rt->fib6_metrics;
-	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
-		kfree(m);
 
 	rt->from = NULL;
-	dst_release(&from->dst);
+	fib6_info_release(from);
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -891,7 +860,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 		else
 			fib6_set_expires(rt, jiffies + HZ * lifetime);
 
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 	}
 	return 0;
 }
@@ -1010,11 +979,9 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
 
 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 {
-	BUG_ON(from->from);
-
 	rt->rt6i_flags &= ~RTF_EXPIRES;
-	if (dst_hold_safe(&from->dst))
-		rt->from = from;
+	fib6_info_hold(from);
+	rt->from = from;
 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
 	if (from->fib6_metrics != &dst_default_metrics) {
 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
@@ -1084,7 +1051,7 @@ static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
 	struct net_device *dev = rt->fib6_nh.nh_dev;
 	struct rt6_info *nrt;
 
-	nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
+	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
 	if (nrt)
 		ip6_rt_copy_init(nrt, rt);
 
@@ -1203,8 +1170,6 @@ int ip6_ins_rt(struct net *net, struct rt6_info *rt)
 {
 	struct nl_info info = {	.nl_net = net, };
 
-	/* Hold dst to account for the reference from the fib6 tree */
-	dst_hold(&rt->dst);
 	return __ip6_ins_rt(rt, &info, NULL);
 }
 
@@ -1221,7 +1186,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(ort);
-	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
 	rcu_read_unlock();
 	if (!rt)
 		return NULL;
@@ -1256,7 +1221,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(rt);
-	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
+	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
 	rcu_read_unlock();
 	if (!pcpu_rt)
 		return NULL;
@@ -1317,7 +1282,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
 	net = dev_net(rt6_ex->rt6i->dst.dev);
 	rt6_ex->rt6i->rt6i_node = NULL;
 	hlist_del_rcu(&rt6_ex->hlist);
-	rt6_release(rt6_ex->rt6i);
+	ip6_rt_put(rt6_ex->rt6i);
 	kfree_rcu(rt6_ex, rcu);
 	WARN_ON_ONCE(!bucket->depth);
 	bucket->depth--;
@@ -1907,17 +1872,11 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *uncached_rt;
 
-		if (ip6_hold_safe(net, &f6i, true)) {
-			dst_use_noref(&f6i->dst, jiffies);
-		} else {
-			rcu_read_unlock();
-			uncached_rt = f6i;
-			goto uncached_rt_out;
-		}
+		fib6_info_hold(f6i);
 		rcu_read_unlock();
 
 		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
-		dst_release(&rt->dst);
+		fib6_info_release(f6i);
 
 		if (uncached_rt) {
 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
@@ -1930,7 +1889,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			dst_hold(&uncached_rt->dst);
 		}
 
-uncached_rt_out:
 		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
 		return uncached_rt;
 
@@ -1939,24 +1897,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *pcpu_rt;
 
-		dst_use_noref(&f6i->dst, jiffies);
 		local_bh_disable();
 		pcpu_rt = rt6_get_pcpu_route(f6i);
 
-		if (!pcpu_rt) {
-			/* atomic_inc_not_zero() is needed when using rcu */
-			if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
-				/* No dst_hold() on rt is needed because grabbing
-				 * rt->rt6i_ref makes sure rt can't be released.
-				 */
-				pcpu_rt = rt6_make_pcpu_route(net, f6i);
-				rt6_release(f6i);
-			} else {
-				/* rt is already removed from tree */
-				pcpu_rt = net->ipv6.ip6_null_entry;
-				dst_hold(&pcpu_rt->dst);
-			}
-		}
+		if (!pcpu_rt)
+			pcpu_rt = rt6_make_pcpu_route(net, f6i);
+
 		local_bh_enable();
 		rcu_read_unlock();
 		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
@@ -2193,11 +2139,26 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
  *	Destination cache support functions
  */
 
+static bool fib6_check(struct rt6_info *f6i, u32 cookie)
+{
+	u32 rt_cookie = 0;
+
+	if ((f6i && !rt6_get_cookie_safe(f6i, &rt_cookie)) ||
+	     rt_cookie != cookie)
+		return false;
+
+	if (fib6_check_expired(f6i))
+		return false;
+
+	return true;
+}
+
 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
 {
 	u32 rt_cookie = 0;
 
-	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
+	if ((rt->from && !rt6_get_cookie_safe(rt->from, &rt_cookie)) ||
+	    rt_cookie != cookie)
 		return NULL;
 
 	if (rt6_check_expired(rt))
@@ -2210,7 +2171,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
 {
 	if (!__rt6_check_expired(rt) &&
 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
-	    rt6_check(rt->from, cookie))
+	    fib6_check(rt->from, cookie))
 		return &rt->dst;
 	else
 		return NULL;
@@ -2241,7 +2202,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (rt6_check_expired(rt)) {
-				ip6_del_rt(dev_net(dst->dev), rt);
+				rt6_remove_exception_rt(rt);
 				dst = NULL;
 			}
 		} else {
@@ -2262,12 +2223,12 @@ static void ip6_link_failure(struct sk_buff *skb)
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (dst_hold_safe(&rt->dst))
-				ip6_del_rt(dev_net(rt->dst.dev), rt);
-		} else {
+				rt6_remove_exception_rt(rt);
+		} else if (rt->from) {
 			struct fib6_node *fn;
 
 			rcu_read_lock();
-			fn = rcu_dereference(rt->rt6i_node);
+			fn = rcu_dereference(rt->from->rt6i_node);
 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
 				fn->fn_sernum = -1;
 			rcu_read_unlock();
@@ -2949,13 +2910,13 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	if (!table)
 		goto out;
 
-	rt = ip6_dst_alloc(net, NULL,
-			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
-
-	if (!rt) {
-		err = -ENOMEM;
+	err = -ENOMEM;
+	rt = fib6_info_alloc(gfp_flags);
+	if (!rt)
 		goto out;
-	}
+
+	if (cfg->fc_flags & RTF_ADDRCONF)
+		rt->dst_nocount = true;
 
 	err = ip6_convert_metrics(net, rt, cfg);
 	if (err < 0)
@@ -3029,7 +2990,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (err)
 			goto out;
 
-		rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
+		rt->fib6_nh.nh_gw = cfg->fc_gateway;
 	}
 
 	err = -ENODEV;
@@ -3066,7 +3027,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	    !netif_carrier_ok(dev))
 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
 	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
-	rt->fib6_nh.nh_dev = rt->dst.dev = dev;
+	rt->fib6_nh.nh_dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
 
@@ -3078,9 +3039,8 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		dev_put(dev);
 	if (idev)
 		in6_dev_put(idev);
-	if (rt)
-		dst_release_immediate(&rt->dst);
 
+	fib6_info_release(rt);
 	return ERR_PTR(err);
 }
 
@@ -3095,6 +3055,7 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 		return PTR_ERR(rt);
 
 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
+	fib6_info_release(rt);
 
 	return err;
 }
@@ -3116,7 +3077,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	spin_unlock_bh(&table->tb6_lock);
 
 out:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 	return err;
 }
 
@@ -3170,7 +3131,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 out_unlock:
 	spin_unlock_bh(&table->tb6_lock);
 out_put:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 
 	if (skb) {
 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
@@ -3241,8 +3202,7 @@ static int ip6_route_del(struct fib6_config *cfg,
 				continue;
 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
 				continue;
-			if (!dst_hold_safe(&rt->dst))
-				break;
+			fib6_info_hold(rt);
 			rcu_read_unlock();
 
 			/* if gateway was specified only delete the one hop */
@@ -3510,12 +3470,9 @@ static void __rt6_purge_dflt_routers(struct net *net,
 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
-			if (dst_hold_safe(&rt->dst)) {
-				rcu_read_unlock();
-				ip6_del_rt(net, rt);
-			} else {
-				rcu_read_unlock();
-			}
+			fib6_info_hold(rt);
+			rcu_read_unlock();
+			ip6_del_rt(net, rt);
 			goto restart;
 		}
 	}
@@ -3666,7 +3623,7 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
 	struct net_device *dev = idev->dev;
 	struct rt6_info *rt;
 
-	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
+	rt = fib6_info_alloc(gfp_flags);
 	if (!rt)
 		return ERR_PTR(-ENOMEM);
 
@@ -3687,8 +3644,8 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
 	}
 
 	rt->fib6_nh.nh_gw = *addr;
+	dev_hold(dev);
 	rt->fib6_nh.nh_dev = dev;
-	rt->rt6i_gateway  = *addr;
 	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
@@ -4325,7 +4282,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
 					    rt, &r_cfg);
 		if (err) {
-			dst_release_immediate(&rt->dst);
+			fib6_info_release(rt);
 			goto cleanup;
 		}
 
@@ -4342,6 +4299,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 	list_for_each_entry(nh, &rt6_nh_list, next) {
 		rt_last = nh->rt6_info;
 		err = __ip6_ins_rt(nh->rt6_info, info, extack);
+		fib6_info_release(nh->rt6_info);
+
 		/* save reference to first route for notification */
 		if (!rt_notif && !err)
 			rt_notif = nh->rt6_info;
@@ -4389,7 +4348,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 cleanup:
 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
 		if (nh->rt6_info)
-			dst_release_immediate(&nh->rt6_info->dst);
+			fib6_info_release(nh->rt6_info);
 		list_del(&nh->next);
 		kfree(nh);
 	}
@@ -4814,14 +4773,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		goto errout;
 	}
 
-	if (fibmatch && rt->from) {
-		struct rt6_info *ort = rt->from;
-
-		dst_hold(&ort->dst);
-		ip6_rt_put(rt);
-		rt = ort;
-	}
-
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb) {
 		ip6_rt_put(rt);
@@ -4831,12 +4782,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	skb_dst_set(skb, &rt->dst);
 	if (fibmatch)
-		err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
+		err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif,
 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
 				    nlh->nlmsg_seq, 0);
 	else
-		err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
-				    iif, RTM_NEWROUTE,
+		err = rt6_fill_node(net, skb, rt->from, dst,
+				    &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE,
 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
 				    0);
 	if (err < 0) {
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 20/21] net/ipv6: Flip FIB entries to fib6_info
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Convert all code paths referencing a FIB entry from
rt6_info to fib6_info.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  |  64 ++---
 include/net/if_inet6.h                             |   4 +-
 include/net/ip6_fib.h                              |  42 ++--
 include/net/ip6_route.h                            |  28 +--
 include/net/netns/ipv6.h                           |   2 +-
 net/ipv6/addrconf.c                                |  20 +-
 net/ipv6/anycast.c                                 |   4 +-
 net/ipv6/ip6_fib.c                                 | 116 ++++-----
 net/ipv6/ndisc.c                                   |   2 +-
 net/ipv6/route.c                                   | 259 +++++++++++----------
 10 files changed, 271 insertions(+), 270 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index d995a0b52d7c..b85b15851841 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -442,7 +442,7 @@ struct mlxsw_sp_fib6_entry {
 
 struct mlxsw_sp_rt6 {
 	struct list_head list;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 };
 
 struct mlxsw_sp_lpm_tree {
@@ -3834,7 +3834,7 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
 
 	for (i = 0; i < nh_grp->count; i++) {
 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
-		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
 		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
@@ -3920,7 +3920,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
 				  common);
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
 		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
 	}
@@ -4699,7 +4699,7 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
-static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
 {
 	/* Packets with link-local destination IP arriving to the router
 	 * are trapped to the CPU, so no need to program specific routes
@@ -4721,7 +4721,7 @@ static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
 	return false;
 }
 
-static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 
@@ -4734,18 +4734,18 @@ static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
 	 * memory.
 	 */
 	mlxsw_sp_rt6->rt = rt;
-	rt6_hold(rt);
+	fib6_info_hold(rt);
 
 	return mlxsw_sp_rt6;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+static void mlxsw_sp_rt6_release(struct fib6_info *rt)
 {
-	rt6_release(rt);
+	fib6_info_release(rt);
 }
 #else
-static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+static void mlxsw_sp_rt6_release(struct fib6_info *rt)
 {
 }
 #endif
@@ -4756,13 +4756,13 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
 	kfree(mlxsw_sp_rt6);
 }
 
-static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
+static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
 {
 	/* RTF_CACHE routes are ignored */
 	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
 }
 
-static struct rt6_info *
+static struct fib6_info *
 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
 {
 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
@@ -4771,7 +4771,7 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-				 const struct rt6_info *nrt, bool replace)
+				 const struct fib6_info *nrt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 
@@ -4779,7 +4779,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
 		return NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
 
 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
 		 * virtual router.
@@ -4802,7 +4802,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
 
 static struct mlxsw_sp_rt6 *
 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
-			    const struct rt6_info *rt)
+			    const struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 
@@ -4815,7 +4815,7 @@ mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
 }
 
 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
-					const struct rt6_info *rt,
+					const struct fib6_info *rt,
 					enum mlxsw_sp_ipip_type *ret)
 {
 	return rt->fib6_nh.nh_dev &&
@@ -4825,7 +4825,7 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
 				       struct mlxsw_sp_nexthop_group *nh_grp,
 				       struct mlxsw_sp_nexthop *nh,
-				       const struct rt6_info *rt)
+				       const struct fib6_info *rt)
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
@@ -4870,7 +4870,7 @@ static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_nexthop_group *nh_grp,
 				  struct mlxsw_sp_nexthop *nh,
-				  const struct rt6_info *rt)
+				  const struct fib6_info *rt)
 {
 	struct net_device *dev = rt->fib6_nh.nh_dev;
 
@@ -4897,7 +4897,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
 }
 
 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
-				    const struct rt6_info *rt)
+				    const struct fib6_info *rt)
 {
 	return rt->rt6i_flags & RTF_GATEWAY ||
 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
@@ -4928,7 +4928,7 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
 	nh_grp->count = fib6_entry->nrt6;
 	for (i = 0; i < nh_grp->count; i++) {
-		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
 		nh = &nh_grp->nexthops[i];
 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
@@ -5040,7 +5040,7 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
 static int
 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib6_entry *fib6_entry,
-				struct rt6_info *rt)
+				struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 	int err;
@@ -5068,7 +5068,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
 static void
 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib6_entry *fib6_entry,
-				struct rt6_info *rt)
+				struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 
@@ -5084,7 +5084,7 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
 
 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_fib_entry *fib_entry,
-					 const struct rt6_info *rt)
+					 const struct fib6_info *rt)
 {
 	/* Packets hitting RTF_REJECT routes need to be discarded by the
 	 * stack. We can rely on their destination device not having a
@@ -5118,7 +5118,7 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_fib_node *fib_node,
-			   struct rt6_info *rt)
+			   struct fib6_info *rt)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_entry *fib_entry;
@@ -5168,12 +5168,12 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-			      const struct rt6_info *nrt, bool replace)
+			      const struct fib6_info *nrt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
 
 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
 			continue;
@@ -5198,7 +5198,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
 			       bool replace)
 {
 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
-	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
+	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 
 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
@@ -5213,7 +5213,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
 		struct mlxsw_sp_fib6_entry *last;
 
 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
-			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
+			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
 
 			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
 				break;
@@ -5268,7 +5268,7 @@ mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
-			   const struct rt6_info *rt)
+			   const struct fib6_info *rt)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
@@ -5287,7 +5287,7 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
 		return NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
 
 		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
 		    rt->rt6i_metric == iter_rt->rt6i_metric &&
@@ -5316,7 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
-				    struct rt6_info *rt, bool replace)
+				    struct fib6_info *rt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
@@ -5373,7 +5373,7 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
-				     struct rt6_info *rt)
+				     struct fib6_info *rt)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
@@ -5836,7 +5836,7 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
 					 info);
 		fib_work->fen6_info = *fen6_info;
-		rt6_hold(fib_work->fen6_info.rt);
+		fib6_info_hold(fib_work->fen6_info.rt);
 		break;
 	}
 }
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d4088d1a688d..d6089b2e64fe 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -64,7 +64,7 @@ struct inet6_ifaddr {
 	struct delayed_work	dad_work;
 
 	struct inet6_dev	*idev;
-	struct rt6_info		*rt;
+	struct fib6_info	*rt;
 
 	struct hlist_node	addr_lst;
 	struct list_head	if_list;
@@ -144,7 +144,7 @@ struct ipv6_ac_socklist {
 struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct inet6_dev	*aca_idev;
-	struct rt6_info		*aca_rt;
+	struct fib6_info	*aca_rt;
 	struct ifacaddr6	*aca_next;
 	int			aca_users;
 	refcount_t		aca_refcnt;
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 6c3d92bb3459..d41b7bd69fb3 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -75,12 +75,12 @@ struct fib6_node {
 #ifdef CONFIG_IPV6_SUBTREES
 	struct fib6_node __rcu	*subtree;
 #endif
-	struct rt6_info __rcu	*leaf;
+	struct fib6_info __rcu	*leaf;
 
 	__u16			fn_bit;		/* bit key */
 	__u16			fn_flags;
 	int			fn_sernum;
-	struct rt6_info __rcu	*rr_ptr;
+	struct fib6_info __rcu	*rr_ptr;
 	struct rcu_head		rcu;
 };
 
@@ -176,7 +176,7 @@ struct fib6_info {
 struct rt6_info {
 	struct dst_entry		dst;
 	struct rt6_info __rcu		*rt6_next;
-	struct rt6_info			*from;
+	struct fib6_info		*from;
 
 	/*
 	 * Tail elements of dst_entry (__refcnt etc.)
@@ -242,20 +242,20 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
 	return ((struct rt6_info *)dst)->rt6i_idev;
 }
 
-static inline void fib6_clean_expires(struct rt6_info *f6i)
+static inline void fib6_clean_expires(struct fib6_info *f6i)
 {
 	f6i->rt6i_flags &= ~RTF_EXPIRES;
 	f6i->expires = 0;
 }
 
-static inline void fib6_set_expires(struct rt6_info *f6i,
+static inline void fib6_set_expires(struct fib6_info *f6i,
 				    unsigned long expires)
 {
 	f6i->expires = expires;
 	f6i->rt6i_flags |= RTF_EXPIRES;
 }
 
-static inline bool fib6_check_expired(const struct rt6_info *f6i)
+static inline bool fib6_check_expired(const struct fib6_info *f6i)
 {
 	if (f6i->rt6i_flags & RTF_EXPIRES)
 		return time_after(jiffies, f6i->expires);
@@ -288,7 +288,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
  * Return true if we can get cookie safely
  * Return false if not
  */
-static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
+static inline bool rt6_get_cookie_safe(const struct fib6_info *rt,
 				       u32 *cookie)
 {
 	struct fib6_node *fn;
@@ -330,15 +330,15 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 
 void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
 
-struct rt6_info *fib6_info_alloc(gfp_t gfp_flags);
-void fib6_info_destroy(struct rt6_info *f6i);
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
+void fib6_info_destroy(struct fib6_info *f6i);
 
-static inline void fib6_info_hold(struct rt6_info *f6i)
+static inline void fib6_info_hold(struct fib6_info *f6i)
 {
 	atomic_inc(&f6i->rt6i_ref);
 }
 
-static inline void fib6_info_release(struct rt6_info *f6i)
+static inline void fib6_info_release(struct fib6_info *f6i)
 {
 	if (f6i && atomic_dec_and_test(&f6i->rt6i_ref))
 		fib6_info_destroy(f6i);
@@ -371,7 +371,7 @@ enum fib6_walk_state {
 struct fib6_walker {
 	struct list_head lh;
 	struct fib6_node *root, *node;
-	struct rt6_info *leaf;
+	struct fib6_info *leaf;
 	enum fib6_walk_state state;
 	unsigned int skip;
 	unsigned int count;
@@ -435,7 +435,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
 
 struct fib6_entry_notifier_info {
 	struct fib_notifier_info info; /* must be first */
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 };
 
 /*
@@ -457,14 +457,14 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 			      const struct in6_addr *saddr, int src_len,
 			      bool exact_match);
 
-void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
+void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg),
 		    void *arg);
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt,
+int fib6_add(struct fib6_node *root, struct fib6_info *rt,
 	     struct nl_info *info, struct netlink_ext_ack *extack);
-int fib6_del(struct rt6_info *rt, struct nl_info *info);
+int fib6_del(struct fib6_info *rt, struct nl_info *info);
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int flags);
 
 void fib6_run_gc(unsigned long expires, struct net *net, bool force);
@@ -487,11 +487,11 @@ void __net_exit fib6_notifier_exit(struct net *net);
 unsigned int fib6_tables_seq_read(struct net *net);
 int fib6_tables_dump(struct net *net, struct notifier_block *nb);
 
-void fib6_update_sernum(struct net *net, struct rt6_info *rt);
-void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
+void fib6_update_sernum(struct net *net, struct fib6_info *rt);
+void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
 
-void fib6_metric_set(struct rt6_info *f6i, int metric, u32 val);
-static inline bool fib6_metric_locked(struct rt6_info *f6i, int metric)
+void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val);
+static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
 {
 	return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
 }
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 57d0d45667f1..d5fb1e4ae7ac 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,7 +66,7 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
-static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
+static inline bool rt6_qualify_for_ecmp(const struct fib6_info *rt)
 {
 	return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
 	       RTF_GATEWAY;
@@ -102,14 +102,14 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
 
 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 		  struct netlink_ext_ack *extack);
-int ip6_ins_rt(struct net *net, struct rt6_info *rt);
-int ip6_del_rt(struct net *net, struct rt6_info *rt);
+int ip6_ins_rt(struct net *net, struct fib6_info *rt);
+int ip6_del_rt(struct net *net, struct fib6_info *rt);
 
-void rt6_flush_exceptions(struct rt6_info *rt);
-void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
+void rt6_flush_exceptions(struct fib6_info *rt);
+void rt6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args,
 			unsigned long now);
 
-static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
+static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *rt,
 				      const struct in6_addr *daddr,
 				      unsigned int prefs,
 				      struct in6_addr *saddr)
@@ -136,9 +136,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
 void fib6_force_start_gc(struct net *net);
 
-struct rt6_info *addrconf_dst_alloc(struct net *net, struct inet6_dev *idev,
-				    const struct in6_addr *addr, bool anycast,
-				    gfp_t gfp_flags);
+struct fib6_info *addrconf_dst_alloc(struct net *net, struct inet6_dev *idev,
+				     const struct in6_addr *addr, bool anycast,
+				     gfp_t gfp_flags);
 
 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 			       int flags);
@@ -147,10 +147,10 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
  *	support functions for ND
  *
  */
-struct rt6_info *rt6_get_dflt_router(struct net *net,
+struct fib6_info *rt6_get_dflt_router(struct net *net,
 				     const struct in6_addr *addr,
 				     struct net_device *dev);
-struct rt6_info *rt6_add_dflt_router(struct net *net,
+struct fib6_info *rt6_add_dflt_router(struct net *net,
 				     const struct in6_addr *gwaddr,
 				     struct net_device *dev, unsigned int pref);
 
@@ -176,14 +176,14 @@ struct rt6_rtnl_dump_arg {
 	struct net *net;
 };
 
-int rt6_dump_route(struct rt6_info *rt, void *p_arg);
+int rt6_dump_route(struct fib6_info *rt, void *p_arg);
 void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
 void rt6_disable_ip(struct net_device *dev, unsigned long event);
 void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
-void rt6_multipath_rebalance(struct rt6_info *rt);
+void rt6_multipath_rebalance(struct fib6_info *rt);
 
 void rt6_uncached_list_add(struct rt6_info *rt);
 void rt6_uncached_list_del(struct rt6_info *rt);
@@ -271,7 +271,7 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
 		return daddr;
 }
 
-static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
+static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
 {
 	return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev &&
 	       a->rt6i_idev == b->rt6i_idev &&
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 74e4e1e449d5..97b3a54579c8 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -60,7 +60,7 @@ struct netns_ipv6 {
 #endif
 	struct xt_table		*ip6table_nat;
 #endif
-	struct rt6_info         *fib6_null_entry;
+	struct fib6_info	*fib6_null_entry;
 	struct rt6_info		*ip6_null_entry;
 	struct rt6_statistics   *rt6_stats;
 	struct timer_list       ip6_fib_timer;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e533a447f68c..a294e86a9b25 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -170,7 +170,7 @@ static void addrconf_type_change(struct net_device *dev,
 				 unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 
-static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  int plen,
 						  const struct net_device *dev,
 						  u32 flags, u32 noflags);
@@ -994,7 +994,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
 	struct net *net = dev_net(idev->dev);
 	struct inet6_ifaddr *ifa = NULL;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	int err = 0;
 	int addr_type = ipv6_addr_type(addr);
 
@@ -1178,7 +1178,7 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
 static void
 cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
 {
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	rt = addrconf_get_prefix_route(&ifp->addr,
 				       ifp->prefix_len,
@@ -2348,13 +2348,13 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 }
 
 
-static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  int plen,
 						  const struct net_device *dev,
 						  u32 flags, u32 noflags)
 {
 	struct fib6_node *fn;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct fib6_table *table;
 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
 
@@ -2641,7 +2641,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 	 */
 
 	if (pinfo->onlink) {
-		struct rt6_info *rt;
+		struct fib6_info *rt;
 		unsigned long rt_expires;
 
 		/* Avoid arithmetic overflow. Really, we could
@@ -3346,7 +3346,7 @@ static int fixup_permanent_addr(struct net *net,
 	 * case regenerate the host route.
 	 */
 	if (!ifp->rt || !ifp->rt->rt6i_node) {
-		struct rt6_info *rt, *prev;
+		struct fib6_info *rt, *prev;
 
 		rt = addrconf_dst_alloc(net, idev, &ifp->addr, false,
 					GFP_ATOMIC);
@@ -3713,7 +3713,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
 
 	list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
-		struct rt6_info *rt = NULL;
+		struct fib6_info *rt = NULL;
 		bool keep;
 
 		addrconf_del_dad_work(ifa);
@@ -5626,7 +5626,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		if (!ipv6_addr_any(&ifp->peer_addr)) {
-			struct rt6_info *rt;
+			struct fib6_info *rt;
 
 			rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
 						       ifp->idev->dev, 0, 0);
@@ -5982,7 +5982,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
 		spin_lock(&ifa->lock);
 		if (ifa->rt) {
-			struct rt6_info *rt = ifa->rt;
+			struct fib6_info *rt = ifa->rt;
 			int cpu;
 
 			rcu_read_lock();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 3db8fe10322b..0e35657501a7 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -218,7 +218,7 @@ static void aca_put(struct ifacaddr6 *ac)
 	}
 }
 
-static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
+static struct ifacaddr6 *aca_alloc(struct fib6_info *rt,
 				   const struct in6_addr *addr)
 {
 	struct inet6_dev *idev = rt->rt6i_idev;
@@ -247,7 +247,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
 int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct ifacaddr6 *aca;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct net *net;
 	int err;
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 4d6bd033dccd..77cf43b2d858 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -43,7 +43,7 @@ static struct kmem_cache *fib6_node_kmem __read_mostly;
 struct fib6_cleaner {
 	struct fib6_walker w;
 	struct net *net;
-	int (*func)(struct rt6_info *, void *arg);
+	int (*func)(struct fib6_info *, void *arg);
 	int sernum;
 	void *arg;
 };
@@ -54,7 +54,7 @@ struct fib6_cleaner {
 #define FWS_INIT FWS_L
 #endif
 
-static struct rt6_info *fib6_find_prefix(struct net *net,
+static struct fib6_info *fib6_find_prefix(struct net *net,
 					 struct fib6_table *table,
 					 struct fib6_node *fn);
 static struct fib6_node *fib6_repair_tree(struct net *net,
@@ -105,7 +105,7 @@ enum {
 	FIB6_NO_SERNUM_CHANGE = 0,
 };
 
-void fib6_update_sernum(struct net *net, struct rt6_info *rt)
+void fib6_update_sernum(struct net *net, struct fib6_info *rt)
 {
 	struct fib6_node *fn;
 
@@ -145,9 +145,9 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
 	       addr[fn_bit >> 5];
 }
 
-struct rt6_info *fib6_info_alloc(gfp_t gfp_flags)
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
 {
-	struct rt6_info *f6i;
+	struct fib6_info *f6i;
 
 	f6i = kzalloc(sizeof(*f6i), gfp_flags);
 	if (!f6i)
@@ -167,7 +167,7 @@ struct rt6_info *fib6_info_alloc(gfp_t gfp_flags)
 	return f6i;
 }
 
-void fib6_info_destroy(struct rt6_info *f6i)
+void fib6_info_destroy(struct fib6_info *f6i)
 {
 	struct rt6_exception_bucket *bucket;
 	struct dst_metrics *m;
@@ -404,7 +404,7 @@ unsigned int fib6_tables_seq_read(struct net *net)
 
 static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
 				    enum fib_event_type event_type,
-				    struct rt6_info *rt)
+				    struct fib6_info *rt)
 {
 	struct fib6_entry_notifier_info info = {
 		.rt = rt,
@@ -415,7 +415,7 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
 
 static int call_fib6_entry_notifiers(struct net *net,
 				     enum fib_event_type event_type,
-				     struct rt6_info *rt,
+				     struct fib6_info *rt,
 				     struct netlink_ext_ack *extack)
 {
 	struct fib6_entry_notifier_info info = {
@@ -432,7 +432,7 @@ struct fib6_dump_arg {
 	struct notifier_block *nb;
 };
 
-static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
 {
 	if (rt == arg->net->ipv6.fib6_null_entry)
 		return;
@@ -441,7 +441,7 @@ static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
 
 static int fib6_node_dump(struct fib6_walker *w)
 {
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	for_each_fib6_walker_rt(w)
 		fib6_rt_dump(rt, w->args);
@@ -490,7 +490,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb)
 static int fib6_dump_node(struct fib6_walker *w)
 {
 	int res;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	for_each_fib6_walker_rt(w) {
 		res = rt6_dump_route(rt, w->args);
@@ -507,7 +507,7 @@ static int fib6_dump_node(struct fib6_walker *w)
 		 */
 		if (rt->rt6i_nsiblings)
 			rt = list_last_entry(&rt->rt6i_siblings,
-					     struct rt6_info,
+					     struct fib6_info,
 					     rt6i_siblings);
 	}
 	w->leaf = NULL;
@@ -643,7 +643,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	return res;
 }
 
-void fib6_metric_set(struct rt6_info *f6i, int metric, u32 val)
+void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
 {
 	if (!f6i)
 		return;
@@ -690,7 +690,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
 	fn = root;
 
 	do {
-		struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+		struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
 		key = (struct rt6key *)((u8 *)leaf + offset);
 
@@ -884,7 +884,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
 	return ln;
 }
 
-static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
+static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
 			  struct net *net)
 {
 	struct fib6_table *table = rt->rt6i_table;
@@ -897,9 +897,9 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 		 * to still alive ones.
 		 */
 		while (fn) {
-			struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+			struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
-			struct rt6_info *new_leaf;
+			struct fib6_info *new_leaf;
 			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
 				new_leaf = fib6_find_prefix(net, table, fn);
 				atomic_inc(&new_leaf->rt6i_ref);
@@ -936,15 +936,15 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
  *	Insert routing information in a node.
  */
 
-static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
+static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 			    struct nl_info *info,
 			    struct netlink_ext_ack *extack)
 {
-	struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+	struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
-	struct rt6_info *iter = NULL;
-	struct rt6_info __rcu **ins;
-	struct rt6_info __rcu **fallback_ins = NULL;
+	struct fib6_info *iter = NULL;
+	struct fib6_info __rcu **ins;
+	struct fib6_info __rcu **fallback_ins = NULL;
 	int replace = (info->nlh &&
 		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
 	int add = (!info->nlh ||
@@ -1035,7 +1035,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	/* Link this route to others same route. */
 	if (rt->rt6i_nsiblings) {
 		unsigned int rt6i_nsiblings;
-		struct rt6_info *sibling, *temp_sibling;
+		struct fib6_info *sibling, *temp_sibling;
 
 		/* Find the first route that have the same metric */
 		sibling = leaf;
@@ -1156,7 +1156,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	return 0;
 }
 
-static void fib6_start_gc(struct net *net, struct rt6_info *rt)
+static void fib6_start_gc(struct net *net, struct fib6_info *rt)
 {
 	if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
 	    (rt->rt6i_flags & RTF_EXPIRES))
@@ -1171,7 +1171,7 @@ void fib6_force_start_gc(struct net *net)
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
-static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
+static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
 					   int sernum)
 {
 	struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
@@ -1186,7 +1186,7 @@ static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
 	}
 }
 
-void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
+void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
 {
 	__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
 }
@@ -1198,7 +1198,7 @@ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
  *	Need to own table->tb6_lock
  */
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt,
+int fib6_add(struct fib6_node *root, struct fib6_info *rt,
 	     struct nl_info *info, struct netlink_ext_ack *extack)
 {
 	struct fib6_table *table = rt->rt6i_table;
@@ -1219,7 +1219,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 	fn = fib6_add_1(info->nl_net, table, root,
 			&rt->rt6i_dst.addr, rt->rt6i_dst.plen,
-			offsetof(struct rt6_info, rt6i_dst), allow_create,
+			offsetof(struct fib6_info, rt6i_dst), allow_create,
 			replace_required, extack);
 	if (IS_ERR(fn)) {
 		err = PTR_ERR(fn);
@@ -1260,7 +1260,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 			sn = fib6_add_1(info->nl_net, table, sfn,
 					&rt->rt6i_src.addr, rt->rt6i_src.plen,
-					offsetof(struct rt6_info, rt6i_src),
+					offsetof(struct fib6_info, rt6i_src),
 					allow_create, replace_required, extack);
 
 			if (IS_ERR(sn)) {
@@ -1279,7 +1279,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 		} else {
 			sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
 					&rt->rt6i_src.addr, rt->rt6i_src.plen,
-					offsetof(struct rt6_info, rt6i_src),
+					offsetof(struct fib6_info, rt6i_src),
 					allow_create, replace_required, extack);
 
 			if (IS_ERR(sn)) {
@@ -1316,7 +1316,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 		 * super-tree leaf node we have to find a new one for it.
 		 */
 		if (pn != fn) {
-			struct rt6_info *pn_leaf =
+			struct fib6_info *pn_leaf =
 				rcu_dereference_protected(pn->leaf,
 				    lockdep_is_held(&table->tb6_lock));
 			if (pn_leaf == rt) {
@@ -1365,7 +1365,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
  */
 
 struct lookup_args {
-	int			offset;		/* key offset on rt6_info	*/
+	int			offset;		/* key offset on fib6_info */
 	const struct in6_addr	*addr;		/* search key			*/
 };
 
@@ -1403,7 +1403,7 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
 		struct fib6_node *subtree = FIB6_SUBTREE(fn);
 
 		if (subtree || fn->fn_flags & RTN_RTINFO) {
-			struct rt6_info *leaf = rcu_dereference(fn->leaf);
+			struct fib6_info *leaf = rcu_dereference(fn->leaf);
 			struct rt6key *key;
 
 			if (!leaf)
@@ -1443,12 +1443,12 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
 	struct fib6_node *fn;
 	struct lookup_args args[] = {
 		{
-			.offset = offsetof(struct rt6_info, rt6i_dst),
+			.offset = offsetof(struct fib6_info, rt6i_dst),
 			.addr = daddr,
 		},
 #ifdef CONFIG_IPV6_SUBTREES
 		{
-			.offset = offsetof(struct rt6_info, rt6i_src),
+			.offset = offsetof(struct fib6_info, rt6i_src),
 			.addr = saddr,
 		},
 #endif
@@ -1484,7 +1484,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
 	struct fib6_node *fn, *prev = NULL;
 
 	for (fn = root; fn ; ) {
-		struct rt6_info *leaf = rcu_dereference(fn->leaf);
+		struct fib6_info *leaf = rcu_dereference(fn->leaf);
 		struct rt6key *key;
 
 		/* This node is being deleted */
@@ -1533,7 +1533,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 	struct fib6_node *fn;
 
 	fn = fib6_locate_1(root, daddr, dst_len,
-			   offsetof(struct rt6_info, rt6i_dst),
+			   offsetof(struct fib6_info, rt6i_dst),
 			   exact_match);
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -1544,7 +1544,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 
 			if (subtree) {
 				fn = fib6_locate_1(subtree, saddr, src_len,
-					   offsetof(struct rt6_info, rt6i_src),
+					   offsetof(struct fib6_info, rt6i_src),
 					   exact_match);
 			}
 		}
@@ -1563,7 +1563,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
  *
  */
 
-static struct rt6_info *fib6_find_prefix(struct net *net,
+static struct fib6_info *fib6_find_prefix(struct net *net,
 					 struct fib6_table *table,
 					 struct fib6_node *fn)
 {
@@ -1622,11 +1622,11 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 					    lockdep_is_held(&table->tb6_lock));
 		struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
 					    lockdep_is_held(&table->tb6_lock));
-		struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
+		struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
-		struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+		struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
-		struct rt6_info *new_fn_leaf;
+		struct fib6_info *new_fn_leaf;
 
 		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
 		iter++;
@@ -1717,10 +1717,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 }
 
 static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
-			   struct rt6_info __rcu **rtp, struct nl_info *info)
+			   struct fib6_info __rcu **rtp, struct nl_info *info)
 {
 	struct fib6_walker *w;
-	struct rt6_info *rt = rcu_dereference_protected(*rtp,
+	struct fib6_info *rt = rcu_dereference_protected(*rtp,
 				    lockdep_is_held(&table->tb6_lock));
 	struct net *net = info->nl_net;
 
@@ -1741,7 +1741,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 
 	/* Remove this entry from other siblings */
 	if (rt->rt6i_nsiblings) {
-		struct rt6_info *sibling, *next_sibling;
+		struct fib6_info *sibling, *next_sibling;
 
 		list_for_each_entry_safe(sibling, next_sibling,
 					 &rt->rt6i_siblings, rt6i_siblings)
@@ -1785,14 +1785,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 }
 
 /* Need to own table->tb6_lock */
-int fib6_del(struct rt6_info *rt, struct nl_info *info)
+int fib6_del(struct fib6_info *rt, struct nl_info *info)
 {
 	struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
 				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
 	struct fib6_table *table = rt->rt6i_table;
 	struct net *net = info->nl_net;
-	struct rt6_info __rcu **rtp;
-	struct rt6_info __rcu **rtp_next;
+	struct fib6_info __rcu **rtp;
+	struct fib6_info __rcu **rtp_next;
 
 	if (!fn || rt == net->ipv6.fib6_null_entry)
 		return -ENOENT;
@@ -1804,7 +1804,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	 */
 
 	for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
-		struct rt6_info *cur = rcu_dereference_protected(*rtp,
+		struct fib6_info *cur = rcu_dereference_protected(*rtp,
 					lockdep_is_held(&table->tb6_lock));
 		if (rt == cur) {
 			fib6_del_route(table, fn, rtp, info);
@@ -1948,7 +1948,7 @@ static int fib6_walk(struct net *net, struct fib6_walker *w)
 static int fib6_clean_node(struct fib6_walker *w)
 {
 	int res;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
 	struct nl_info info = {
 		.nl_net = c->net,
@@ -1983,7 +1983,7 @@ static int fib6_clean_node(struct fib6_walker *w)
 			if (WARN_ON(!rt->rt6i_nsiblings))
 				continue;
 			rt = list_last_entry(&rt->rt6i_siblings,
-					     struct rt6_info, rt6i_siblings);
+					     struct fib6_info, rt6i_siblings);
 			continue;
 		}
 		WARN_ON(res != 0);
@@ -2002,7 +2002,7 @@ static int fib6_clean_node(struct fib6_walker *w)
  */
 
 static void fib6_clean_tree(struct net *net, struct fib6_node *root,
-			    int (*func)(struct rt6_info *, void *arg),
+			    int (*func)(struct fib6_info *, void *arg),
 			    int sernum, void *arg)
 {
 	struct fib6_cleaner c;
@@ -2020,7 +2020,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
 }
 
 static void __fib6_clean_all(struct net *net,
-			     int (*func)(struct rt6_info *, void *),
+			     int (*func)(struct fib6_info *, void *),
 			     int sernum, void *arg)
 {
 	struct fib6_table *table;
@@ -2040,7 +2040,7 @@ static void __fib6_clean_all(struct net *net,
 	rcu_read_unlock();
 }
 
-void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
+void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
 		    void *arg)
 {
 	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
@@ -2057,7 +2057,7 @@ static void fib6_flush_trees(struct net *net)
  *	Garbage collection
  */
 
-static int fib6_age(struct rt6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, void *arg)
 {
 	struct fib6_gc_args *gc_args = arg;
 	unsigned long now = jiffies;
@@ -2261,7 +2261,7 @@ struct ipv6_route_iter {
 
 static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 {
-	struct rt6_info *rt = v;
+	struct fib6_info *rt = v;
 	struct ipv6_route_iter *iter = seq->private;
 	const struct net_device *dev;
 
@@ -2353,14 +2353,14 @@ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
 static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	int r;
-	struct rt6_info *n;
+	struct fib6_info *n;
 	struct net *net = seq_file_net(seq);
 	struct ipv6_route_iter *iter = seq->private;
 
 	if (!v)
 		goto iter_table;
 
-	n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next);
+	n = rcu_dereference_bh(((struct fib6_info *)v)->rt6_next);
 	if (n) {
 		++*pos;
 		return n;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a28857088bff..102645298692 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1155,7 +1155,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
 	struct neighbour *neigh = NULL;
 	struct inet6_dev *in6_dev;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct net *net;
 	int lifetime;
 	struct ndisc_options ndopts;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0d861bd07673..2ccf939e1a20 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -96,24 +96,24 @@ static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 					   struct sk_buff *skb, u32 mtu);
 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 					struct sk_buff *skb);
-static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
-static size_t rt6_nlmsg_size(struct rt6_info *rt);
+static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
+static size_t rt6_nlmsg_size(struct fib6_info *rt);
 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
-			 struct rt6_info *rt, struct dst_entry *dst,
+			 struct fib6_info *rt, struct dst_entry *dst,
 			 struct in6_addr *dest, struct in6_addr *src,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags);
-static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
 					   struct in6_addr *daddr,
 					   struct in6_addr *saddr);
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct fib6_info *rt6_add_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev,
 					   unsigned int pref);
-static struct rt6_info *rt6_get_route_info(struct net *net,
+static struct fib6_info *rt6_get_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev);
@@ -283,7 +283,7 @@ static const u32 ip6_template_metrics[RTAX_MAX] = {
 	[RTAX_HOPLIMIT - 1] = 0,
 };
 
-static const struct rt6_info fib6_null_entry_template = {
+static const struct fib6_info fib6_null_entry_template = {
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32)0,
@@ -372,7 +372,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
-	struct rt6_info *from = rt->from;
+	struct fib6_info *from = rt->from;
 	struct inet6_dev *idev;
 
 	dst_destroy_metrics_generic(dst);
@@ -425,13 +425,13 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 	return false;
 }
 
-static struct rt6_info *rt6_multipath_select(const struct net *net,
-					     struct rt6_info *match,
+static struct fib6_info *rt6_multipath_select(const struct net *net,
+					      struct fib6_info *match,
 					     struct flowi6 *fl6, int oif,
 					     const struct sk_buff *skb,
 					     int strict)
 {
-	struct rt6_info *sibling, *next_sibling;
+	struct fib6_info *sibling, *next_sibling;
 
 	/* We might have already computed the hash for ICMPv6 errors. In such
 	 * case it will always be non-zero. Otherwise now is the time to do it.
@@ -462,14 +462,14 @@ static struct rt6_info *rt6_multipath_select(const struct net *net,
  *	Route lookup. rcu_read_lock() should be held.
  */
 
-static inline struct rt6_info *rt6_device_match(struct net *net,
-						    struct rt6_info *rt,
+static inline struct fib6_info *rt6_device_match(struct net *net,
+						 struct fib6_info *rt,
 						    const struct in6_addr *saddr,
 						    int oif,
 						    int flags)
 {
-	struct rt6_info *local = NULL;
-	struct rt6_info *sprt;
+	struct fib6_info *local = NULL;
+	struct fib6_info *sprt;
 
 	if (!oif && ipv6_addr_any(saddr) &&
 	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
@@ -532,7 +532,7 @@ static void rt6_probe_deferred(struct work_struct *w)
 	kfree(work);
 }
 
-static void rt6_probe(struct rt6_info *rt)
+static void rt6_probe(struct fib6_info *rt)
 {
 	struct __rt6_probe_work *work;
 	const struct in6_addr *nh_gw;
@@ -585,7 +585,7 @@ static void rt6_probe(struct rt6_info *rt)
 	rcu_read_unlock_bh();
 }
 #else
-static inline void rt6_probe(struct rt6_info *rt)
+static inline void rt6_probe(struct fib6_info *rt)
 {
 }
 #endif
@@ -593,7 +593,7 @@ static inline void rt6_probe(struct rt6_info *rt)
 /*
  * Default Router Selection (RFC 2461 6.3.6)
  */
-static inline int rt6_check_dev(struct rt6_info *rt, int oif)
+static inline int rt6_check_dev(struct fib6_info *rt, int oif)
 {
 	const struct net_device *dev = rt->fib6_nh.nh_dev;
 
@@ -605,7 +605,7 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 	return 0;
 }
 
-static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
+static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
 {
 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 	struct neighbour *neigh;
@@ -637,8 +637,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 	return ret;
 }
 
-static int rt6_score_route(struct rt6_info *rt, int oif,
-			   int strict)
+static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
 {
 	int m;
 
@@ -656,8 +655,8 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
 	return m;
 }
 
-static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
-				   int *mpri, struct rt6_info *match,
+static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
+				   int *mpri, struct fib6_info *match,
 				   bool *do_rr)
 {
 	int m;
@@ -696,13 +695,13 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 	return match;
 }
 
-static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
-				     struct rt6_info *leaf,
-				     struct rt6_info *rr_head,
+static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
+				     struct fib6_info *leaf,
+				     struct fib6_info *rr_head,
 				     u32 metric, int oif, int strict,
 				     bool *do_rr)
 {
-	struct rt6_info *rt, *match, *cont;
+	struct fib6_info *rt, *match, *cont;
 	int mpri = -1;
 
 	match = NULL;
@@ -735,11 +734,11 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 	return match;
 }
 
-static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
+static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
 				   int oif, int strict)
 {
-	struct rt6_info *leaf = rcu_dereference(fn->leaf);
-	struct rt6_info *match, *rt0;
+	struct fib6_info *leaf = rcu_dereference(fn->leaf);
+	struct fib6_info *match, *rt0;
 	bool do_rr = false;
 	int key_plen;
 
@@ -767,7 +766,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
 			     &do_rr);
 
 	if (do_rr) {
-		struct rt6_info *next = rcu_dereference(rt0->rt6_next);
+		struct fib6_info *next = rcu_dereference(rt0->rt6_next);
 
 		/* no entries matched; do round-robin */
 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -785,7 +784,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
 	return match ? match : net->ipv6.fib6_null_entry;
 }
 
-static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
+static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
 {
 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 }
@@ -799,7 +798,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	struct in6_addr prefix_buf, *prefix;
 	unsigned int pref;
 	unsigned long lifetime;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	if (len < sizeof(struct route_info)) {
 		return -EINVAL;
@@ -871,7 +870,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
  */
 
 /* called with rcu_lock held */
-static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
+static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
 {
 	struct net_device *dev = rt->fib6_nh.nh_dev;
 
@@ -913,7 +912,7 @@ static int ip6_rt_type_to_error(u8 fib6_type)
 	return fib6_prop[fib6_type];
 }
 
-static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
+static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
 {
 	unsigned short flags = 0;
 
@@ -927,7 +926,7 @@ static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
 	return flags;
 }
 
-static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
+static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
 {
 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
 
@@ -949,7 +948,7 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
 	}
 }
 
-static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
+static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
 {
 	rt->dst.flags |= fib6_info_dst_flags(ort);
 
@@ -977,7 +976,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
 	rt->dst.lastuse = jiffies;
 }
 
-static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
+static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
 {
 	rt->rt6i_flags &= ~RTF_EXPIRES;
 	fib6_info_hold(from);
@@ -989,7 +988,7 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 	}
 }
 
-static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
+static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
 {
 	ip6_rt_init_dst(rt, ort);
 
@@ -1045,7 +1044,7 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 }
 
 /* called with rcu_lock held */
-static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
+static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
 {
 	unsigned short flags = fib6_info_dst_flags(rt);
 	struct net_device *dev = rt->fib6_nh.nh_dev;
@@ -1064,7 +1063,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     const struct sk_buff *skb,
 					     int flags)
 {
-	struct rt6_info *f6i;
+	struct fib6_info *f6i;
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 
@@ -1152,7 +1151,7 @@ EXPORT_SYMBOL(rt6_lookup);
  * Caller must hold dst before calling it.
  */
 
-static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
+static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
 			struct netlink_ext_ack *extack)
 {
 	int err;
@@ -1166,14 +1165,14 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 	return err;
 }
 
-int ip6_ins_rt(struct net *net, struct rt6_info *rt)
+int ip6_ins_rt(struct net *net, struct fib6_info *rt)
 {
 	struct nl_info info = {	.nl_net = net, };
 
 	return __ip6_ins_rt(rt, &info, NULL);
 }
 
-static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
+static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
 					   const struct in6_addr *daddr,
 					   const struct in6_addr *saddr)
 {
@@ -1213,7 +1212,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	return rt;
 }
 
-static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
+static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
 {
 	unsigned short flags = fib6_info_dst_flags(rt);
 	struct net_device *dev;
@@ -1232,7 +1231,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 }
 
 /* It should be called with rcu_read_lock() acquired */
-static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
+static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
 {
 	struct rt6_info *pcpu_rt, **p;
 
@@ -1246,7 +1245,7 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 }
 
 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
-					    struct rt6_info *rt)
+					    struct fib6_info *rt)
 {
 	struct rt6_info *pcpu_rt, *prev, **p;
 
@@ -1390,7 +1389,7 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
 	return NULL;
 }
 
-static unsigned int fib6_mtu(const struct rt6_info *rt)
+static unsigned int fib6_mtu(const struct fib6_info *rt)
 {
 	unsigned int mtu;
 
@@ -1401,7 +1400,7 @@ static unsigned int fib6_mtu(const struct rt6_info *rt)
 }
 
 static int rt6_insert_exception(struct rt6_info *nrt,
-				struct rt6_info *ort)
+				struct fib6_info *ort)
 {
 	struct net *net = dev_net(nrt->dst.dev);
 	struct rt6_exception_bucket *bucket;
@@ -1487,7 +1486,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	return err;
 }
 
-void rt6_flush_exceptions(struct rt6_info *rt)
+void rt6_flush_exceptions(struct fib6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1517,7 +1516,7 @@ void rt6_flush_exceptions(struct rt6_info *rt)
 /* Find cached rt in the hash table inside passed in rt
  * Caller has to hold rcu_read_lock()
  */
-static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
 					   struct in6_addr *daddr,
 					   struct in6_addr *saddr)
 {
@@ -1550,7 +1549,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
 static int rt6_remove_exception_rt(struct rt6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
-	struct rt6_info *from = rt->from;
+	struct fib6_info *from = rt->from;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 	int err;
@@ -1595,7 +1594,7 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
-	struct rt6_info *from = rt->from;
+	struct fib6_info *from = rt->from;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 
@@ -1625,7 +1624,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 	rcu_read_unlock();
 }
 
-static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
+static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1667,7 +1666,7 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
 }
 
 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
-				       struct rt6_info *rt, int mtu)
+				       struct fib6_info *rt, int mtu)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1697,7 +1696,7 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
 
 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
 
-static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
+static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
 					struct in6_addr *gateway)
 {
 	struct rt6_exception_bucket *bucket;
@@ -1776,7 +1775,7 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
 	gc_args->more++;
 }
 
-void rt6_age_exceptions(struct rt6_info *rt,
+void rt6_age_exceptions(struct fib6_info *rt,
 			struct fib6_gc_args *gc_args,
 			unsigned long now)
 {
@@ -1812,7 +1811,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			       const struct sk_buff *skb, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
-	struct rt6_info *f6i;
+	struct fib6_info *f6i;
 	struct rt6_info *rt;
 	int strict = 0;
 
@@ -2139,7 +2138,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
  *	Destination cache support functions
  */
 
-static bool fib6_check(struct rt6_info *f6i, u32 cookie)
+static bool fib6_check(struct fib6_info *f6i, u32 cookie)
 {
 	u32 rt_cookie = 0;
 
@@ -2374,7 +2373,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
 	struct rt6_info *ret = NULL, *rt_cache;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct fib6_node *fn;
 
 	/* Get the "current" route for this destination and
@@ -2620,7 +2619,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
 	return entries > rt_max_size;
 }
 
-static int ip6_convert_metrics(struct net *net, struct rt6_info *rt,
+static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
 			       struct fib6_config *cfg)
 {
 	int err = 0;
@@ -2823,12 +2822,12 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
 	return err;
 }
 
-static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
+static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 					      gfp_t gfp_flags,
 					      struct netlink_ext_ack *extack)
 {
 	struct net *net = cfg->fc_nlinfo.nl_net;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 	struct fib6_table *table;
@@ -3047,7 +3046,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 		  struct netlink_ext_ack *extack)
 {
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	int err;
 
 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
@@ -3060,7 +3059,7 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 	return err;
 }
 
-static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
+static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
 {
 	struct net *net = info->nl_net;
 	struct fib6_table *table;
@@ -3081,14 +3080,14 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	return err;
 }
 
-int ip6_del_rt(struct net *net, struct rt6_info *rt)
+int ip6_del_rt(struct net *net, struct fib6_info *rt)
 {
 	struct nl_info info = { .nl_net = net };
 
 	return __ip6_del_rt(rt, &info);
 }
 
-static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
+static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
 {
 	struct nl_info *info = &cfg->fc_nlinfo;
 	struct net *net = info->nl_net;
@@ -3102,7 +3101,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 	spin_lock_bh(&table->tb6_lock);
 
 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
-		struct rt6_info *sibling, *next_sibling;
+		struct fib6_info *sibling, *next_sibling;
 
 		/* prefer to send a single notification with all hops */
 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
@@ -3159,8 +3158,9 @@ static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
 static int ip6_route_del(struct fib6_config *cfg,
 			 struct netlink_ext_ack *extack)
 {
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *rt_cache;
 	struct fib6_table *table;
+	struct fib6_info *rt;
 	struct fib6_node *fn;
 	int err = -ESRCH;
 
@@ -3336,7 +3336,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 }
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_get_route_info(struct net *net,
+static struct fib6_info *rt6_get_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev)
@@ -3344,7 +3344,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
 	int ifindex = dev->ifindex;
 	struct fib6_node *fn;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct fib6_table *table;
 
 	table = fib6_get_table(net, tb_id);
@@ -3363,7 +3363,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 			continue;
 		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
 			continue;
-		ip6_hold_safe(NULL, &rt, false);
+		fib6_info_hold(rt);
 		break;
 	}
 out:
@@ -3371,7 +3371,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	return rt;
 }
 
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct fib6_info *rt6_add_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev,
@@ -3404,12 +3404,12 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 }
 #endif
 
-struct rt6_info *rt6_get_dflt_router(struct net *net,
+struct fib6_info *rt6_get_dflt_router(struct net *net,
 				     const struct in6_addr *addr,
 				     struct net_device *dev)
 {
 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct fib6_table *table;
 
 	table = fib6_get_table(net, tb_id);
@@ -3424,12 +3424,12 @@ struct rt6_info *rt6_get_dflt_router(struct net *net,
 			break;
 	}
 	if (rt)
-		ip6_hold_safe(NULL, &rt, false);
+		fib6_info_hold(rt);
 	rcu_read_unlock();
 	return rt;
 }
 
-struct rt6_info *rt6_add_dflt_router(struct net *net,
+struct fib6_info *rt6_add_dflt_router(struct net *net,
 				     const struct in6_addr *gwaddr,
 				     struct net_device *dev,
 				     unsigned int pref)
@@ -3463,7 +3463,7 @@ struct rt6_info *rt6_add_dflt_router(struct net *net,
 static void __rt6_purge_dflt_routers(struct net *net,
 				     struct fib6_table *table)
 {
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 restart:
 	rcu_read_lock();
@@ -3614,14 +3614,14 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff
  *	Allocate a dst for local (unicast / anycast) address.
  */
 
-struct rt6_info *addrconf_dst_alloc(struct net *net,
+struct fib6_info *addrconf_dst_alloc(struct net *net,
 				    struct inet6_dev *idev,
 				    const struct in6_addr *addr,
 				    bool anycast, gfp_t gfp_flags)
 {
 	u32 tb_id;
 	struct net_device *dev = idev->dev;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	rt = fib6_info_alloc(gfp_flags);
 	if (!rt)
@@ -3661,7 +3661,7 @@ struct arg_dev_net_ip {
 	struct in6_addr *addr;
 };
 
-static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
+static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
 {
 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
@@ -3694,7 +3694,7 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
 
 /* Remove routers and update dst entries when gateway turn into host. */
-static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
+static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
 {
 	struct in6_addr *gateway = (struct in6_addr *)arg;
 
@@ -3725,9 +3725,9 @@ struct arg_netdev_event {
 	};
 };
 
-static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
+static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	struct fib6_node *fn;
 
 	fn = rcu_dereference_protected(rt->rt6i_node,
@@ -3745,7 +3745,7 @@ static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
 	return NULL;
 }
 
-static bool rt6_is_dead(const struct rt6_info *rt)
+static bool rt6_is_dead(const struct fib6_info *rt)
 {
 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
 	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
@@ -3755,9 +3755,9 @@ static bool rt6_is_dead(const struct rt6_info *rt)
 	return false;
 }
 
-static int rt6_multipath_total_weight(const struct rt6_info *rt)
+static int rt6_multipath_total_weight(const struct fib6_info *rt)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	int total = 0;
 
 	if (!rt6_is_dead(rt))
@@ -3771,7 +3771,7 @@ static int rt6_multipath_total_weight(const struct rt6_info *rt)
 	return total;
 }
 
-static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
+static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
 {
 	int upper_bound = -1;
 
@@ -3783,9 +3783,9 @@ static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
 	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
 }
 
-static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
+static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	int weight = 0;
 
 	rt6_upper_bound_set(rt, &weight, total);
@@ -3794,9 +3794,9 @@ static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
 		rt6_upper_bound_set(iter, &weight, total);
 }
 
-void rt6_multipath_rebalance(struct rt6_info *rt)
+void rt6_multipath_rebalance(struct fib6_info *rt)
 {
-	struct rt6_info *first;
+	struct fib6_info *first;
 	int total;
 
 	/* In case the entire multipath route was marked for flushing,
@@ -3818,7 +3818,7 @@ void rt6_multipath_rebalance(struct rt6_info *rt)
 	rt6_multipath_upper_bound_set(first, total);
 }
 
-static int fib6_ifup(struct rt6_info *rt, void *p_arg)
+static int fib6_ifup(struct fib6_info *rt, void *p_arg)
 {
 	const struct arg_netdev_event *arg = p_arg;
 	struct net *net = dev_net(arg->dev);
@@ -3847,10 +3847,10 @@ void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
 }
 
-static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
+static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
 				   const struct net_device *dev)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 
 	if (rt->fib6_nh.nh_dev == dev)
 		return true;
@@ -3861,19 +3861,19 @@ static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
 	return false;
 }
 
-static void rt6_multipath_flush(struct rt6_info *rt)
+static void rt6_multipath_flush(struct fib6_info *rt)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 
 	rt->should_flush = 1;
 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
 		iter->should_flush = 1;
 }
 
-static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
+static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
 					     const struct net_device *down_dev)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	unsigned int dead = 0;
 
 	if (rt->fib6_nh.nh_dev == down_dev ||
@@ -3887,11 +3887,11 @@ static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
 	return dead;
 }
 
-static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
+static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
 				       const struct net_device *dev,
 				       unsigned int nh_flags)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 
 	if (rt->fib6_nh.nh_dev == dev)
 		rt->fib6_nh.nh_flags |= nh_flags;
@@ -3901,7 +3901,7 @@ static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
 }
 
 /* called with write lock held for table with rt */
-static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
+static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
 {
 	const struct arg_netdev_event *arg = p_arg;
 	const struct net_device *dev = arg->dev;
@@ -3968,7 +3968,7 @@ struct rt6_mtu_change_arg {
 	unsigned int mtu;
 };
 
-static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
+static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
 {
 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
 	struct inet6_dev *idev;
@@ -4155,7 +4155,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 }
 
 struct rt6_nh {
-	struct rt6_info *rt6_info;
+	struct fib6_info *fib6_info;
 	struct fib6_config r_cfg;
 	struct list_head next;
 };
@@ -4173,21 +4173,22 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
 
 static int ip6_route_info_append(struct net *net,
 				 struct list_head *rt6_nh_list,
-				 struct rt6_info *rt, struct fib6_config *r_cfg)
+				 struct fib6_info *rt,
+				 struct fib6_config *r_cfg)
 {
 	struct rt6_nh *nh;
 	int err = -EEXIST;
 
 	list_for_each_entry(nh, rt6_nh_list, next) {
-		/* check if rt6_info already exists */
-		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
+		/* check if fib6_info already exists */
+		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
 			return err;
 	}
 
 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
 	if (!nh)
 		return -ENOMEM;
-	nh->rt6_info = rt;
+	nh->fib6_info = rt;
 	err = ip6_convert_metrics(net, rt, r_cfg);
 	if (err) {
 		kfree(nh);
@@ -4199,8 +4200,8 @@ static int ip6_route_info_append(struct net *net,
 	return 0;
 }
 
-static void ip6_route_mpath_notify(struct rt6_info *rt,
-				   struct rt6_info *rt_last,
+static void ip6_route_mpath_notify(struct fib6_info *rt,
+				   struct fib6_info *rt_last,
 				   struct nl_info *info,
 				   __u16 nlflags)
 {
@@ -4212,7 +4213,7 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
 	 */
 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
 		rt = list_first_entry(&rt_last->rt6i_siblings,
-				      struct rt6_info,
+				      struct fib6_info,
 				      rt6i_siblings);
 	}
 
@@ -4223,11 +4224,11 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
 static int ip6_route_multipath_add(struct fib6_config *cfg,
 				   struct netlink_ext_ack *extack)
 {
-	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
+	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
 	struct nl_info *info = &cfg->fc_nlinfo;
 	struct fib6_config r_cfg;
 	struct rtnexthop *rtnh;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct rt6_nh *err_nh;
 	struct rt6_nh *nh, *nh_safe;
 	__u16 nlflags;
@@ -4247,7 +4248,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 	rtnh = (struct rtnexthop *)cfg->fc_mp;
 
 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
-	 * rt6_info structs per nexthop
+	 * fib6_info structs per nexthop
 	 */
 	while (rtnh_ok(rtnh, remaining)) {
 		memcpy(&r_cfg, cfg, sizeof(*cfg));
@@ -4297,16 +4298,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 
 	err_nh = NULL;
 	list_for_each_entry(nh, &rt6_nh_list, next) {
-		rt_last = nh->rt6_info;
-		err = __ip6_ins_rt(nh->rt6_info, info, extack);
-		fib6_info_release(nh->rt6_info);
+		rt_last = nh->fib6_info;
+		err = __ip6_ins_rt(nh->fib6_info, info, extack);
+		fib6_info_release(nh->fib6_info);
 
 		/* save reference to first route for notification */
 		if (!rt_notif && !err)
-			rt_notif = nh->rt6_info;
+			rt_notif = nh->fib6_info;
 
-		/* nh->rt6_info is used or freed at this point, reset to NULL*/
-		nh->rt6_info = NULL;
+		/* nh->fib6_info is used or freed at this point, reset to NULL*/
+		nh->fib6_info = NULL;
 		if (err) {
 			if (replace && nhn)
 				ip6_print_replace_route_err(&rt6_nh_list);
@@ -4347,8 +4348,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 
 cleanup:
 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
-		if (nh->rt6_info)
-			fib6_info_release(nh->rt6_info);
+		if (nh->fib6_info)
+			fib6_info_release(nh->fib6_info);
 		list_del(&nh->next);
 		kfree(nh);
 	}
@@ -4428,7 +4429,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
 }
 
-static size_t rt6_nlmsg_size(struct rt6_info *rt)
+static size_t rt6_nlmsg_size(struct fib6_info *rt)
 {
 	int nexthop_len = 0;
 
@@ -4458,7 +4459,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 	       + nexthop_len;
 }
 
-static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
+static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
 			    unsigned int *flags, bool skip_oif)
 {
 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
@@ -4495,7 +4496,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 }
 
 /* add multipath next hop */
-static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
+static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
 {
 	const struct net_device *dev = rt->fib6_nh.nh_dev;
 	struct rtnexthop *rtnh;
@@ -4523,7 +4524,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 }
 
 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
-			 struct rt6_info *rt, struct dst_entry *dst,
+			 struct fib6_info *rt, struct dst_entry *dst,
 			 struct in6_addr *dest, struct in6_addr *src,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags)
@@ -4613,7 +4614,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 	 * each as a nexthop within RTA_MULTIPATH.
 	 */
 	if (rt->rt6i_nsiblings) {
-		struct rt6_info *sibling, *next_sibling;
+		struct fib6_info *sibling, *next_sibling;
 		struct nlattr *mp;
 
 		mp = nla_nest_start(skb, RTA_MULTIPATH);
@@ -4655,7 +4656,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 	return -EMSGSIZE;
 }
 
-int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct fib6_info *rt, void *p_arg)
 {
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 	struct net *net = arg->net;
@@ -4800,7 +4801,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	return err;
 }
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int nlm_flags)
 {
 	struct sk_buff *skb;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v2 21/21] net/ipv6: Remove unused code and variables for rt6_info
From: David Ahern @ 2018-04-18  0:33 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com>

Drop unneeded elements from rt6_info struct and rearrange layout to
something more relevant for the data path.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h   | 60 +++----------------------------------------------
 net/ipv6/ip6_fib.c      | 22 ------------------
 net/ipv6/route.c        | 27 ++--------------------
 net/ipv6/xfrm6_policy.c |  2 --
 4 files changed, 5 insertions(+), 106 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index d41b7bd69fb3..a36116b92100 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -175,58 +175,20 @@ struct fib6_info {
 
 struct rt6_info {
 	struct dst_entry		dst;
-	struct rt6_info __rcu		*rt6_next;
 	struct fib6_info		*from;
 
-	/*
-	 * Tail elements of dst_entry (__refcnt etc.)
-	 * and these elements (rarely used in hot path) are in
-	 * the same cache line.
-	 */
-	struct fib6_table		*rt6i_table;
-	struct fib6_node __rcu		*rt6i_node;
-
+	struct rt6key			rt6i_dst;
+	struct rt6key			rt6i_src;
 	struct in6_addr			rt6i_gateway;
-
-	/* Multipath routes:
-	 * siblings is a list of rt6_info that have the the same metric/weight,
-	 * destination, but not the same gateway. nsiblings is just a cache
-	 * to speed up lookup.
-	 */
-	struct list_head		rt6i_siblings;
-	unsigned int			rt6i_nsiblings;
-
-	atomic_t			rt6i_ref;
-
-	/* These are in a separate cache line. */
-	struct rt6key			rt6i_dst ____cacheline_aligned_in_smp;
+	struct inet6_dev		*rt6i_idev;
 	u32				rt6i_flags;
-	struct rt6key			rt6i_src;
 	struct rt6key			rt6i_prefsrc;
 
 	struct list_head		rt6i_uncached;
 	struct uncached_list		*rt6i_uncached_list;
 
-	struct inet6_dev		*rt6i_idev;
-	struct rt6_info * __percpu	*rt6i_pcpu;
-	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-
-	u32				rt6i_metric;
 	/* more non-fragment space at head required */
 	unsigned short			rt6i_nfheader_len;
-	u8				rt6i_protocol;
-	u8				fib6_type;
-	u8				exception_bucket_flushed:1,
-					should_flush:1,
-					dst_nocount:1,
-					dst_nopolicy:1,
-					dst_host:1,
-					unused:3;
-
-	unsigned long			expires;
-	struct dst_metrics		*fib6_metrics;
-#define fib6_pmtu		fib6_metrics->metrics[RTAX_MTU-1]
-	struct fib6_nh			fib6_nh;
 };
 
 #define for_each_fib6_node_rt_rcu(fn)					\
@@ -328,8 +290,6 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 	dst_release(&rt->dst);
 }
 
-void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
-
 struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
 void fib6_info_destroy(struct fib6_info *f6i);
 
@@ -344,20 +304,6 @@ static inline void fib6_info_release(struct fib6_info *f6i)
 		fib6_info_destroy(f6i);
 }
 
-static inline void rt6_hold(struct rt6_info *rt)
-{
-	atomic_inc(&rt->rt6i_ref);
-}
-
-static inline void rt6_release(struct rt6_info *rt)
-{
-	if (atomic_dec_and_test(&rt->rt6i_ref)) {
-		rt6_free_pcpu(rt);
-		dst_dev_put(&rt->dst);
-		dst_release(&rt->dst);
-	}
-}
-
 enum fib6_walk_state {
 #ifdef CONFIG_IPV6_SUBTREES
 	FWS_S,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 77cf43b2d858..2ab49b7cac22 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -240,28 +240,6 @@ static void node_free(struct net *net, struct fib6_node *fn)
 	net->ipv6.rt6_stats->fib_nodes--;
 }
 
-void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
-{
-	int cpu;
-
-	if (!non_pcpu_rt->rt6i_pcpu)
-		return;
-
-	for_each_possible_cpu(cpu) {
-		struct rt6_info **ppcpu_rt;
-		struct rt6_info *pcpu_rt;
-
-		ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
-		pcpu_rt = *ppcpu_rt;
-		if (pcpu_rt) {
-			dst_dev_put(&pcpu_rt->dst);
-			dst_release(&pcpu_rt->dst);
-			*ppcpu_rt = NULL;
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(rt6_free_pcpu);
-
 static void fib6_free_table(struct fib6_table *table)
 {
 	inetpeer_invalidate_tree(&table->tb6_peers);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2ccf939e1a20..f9c363327d62 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -302,10 +302,6 @@ static const struct rt6_info ip6_null_entry_template = {
 		.output		= ip6_pkt_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
-	.rt6i_protocol  = RTPROT_KERNEL,
-	.rt6i_metric	= ~(u32) 0,
-	.rt6i_ref	= ATOMIC_INIT(1),
-	.fib6_type	= RTN_UNREACHABLE,
 };
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -320,10 +316,6 @@ static const struct rt6_info ip6_prohibit_entry_template = {
 		.output		= ip6_pkt_prohibit_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
-	.rt6i_protocol  = RTPROT_KERNEL,
-	.rt6i_metric	= ~(u32) 0,
-	.rt6i_ref	= ATOMIC_INIT(1),
-	.fib6_type	= RTN_PROHIBIT,
 };
 
 static const struct rt6_info ip6_blk_hole_entry_template = {
@@ -336,10 +328,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
 		.output		= dst_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
-	.rt6i_protocol  = RTPROT_KERNEL,
-	.rt6i_metric	= ~(u32) 0,
-	.rt6i_ref	= ATOMIC_INIT(1),
-	.fib6_type	= RTN_BLACKHOLE,
 };
 
 #endif
@@ -349,7 +337,6 @@ static void rt6_info_init(struct rt6_info *rt)
 	struct dst_entry *dst = &rt->dst;
 
 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
-	INIT_LIST_HEAD(&rt->rt6i_siblings);
 	INIT_LIST_HEAD(&rt->rt6i_uncached);
 }
 
@@ -999,12 +986,10 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
 	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
 	rt->rt6i_flags = ort->rt6i_flags;
 	rt6_set_from(rt, ort);
-	rt->rt6i_metric = ort->rt6i_metric;
 #ifdef CONFIG_IPV6_SUBTREES
 	rt->rt6i_src = ort->rt6i_src;
 #endif
 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
-	rt->rt6i_table = ort->rt6i_table;
 	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
 }
 
@@ -1192,7 +1177,6 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
 
 	ip6_rt_copy_init(rt, ort);
 	rt->rt6i_flags |= RTF_CACHE;
-	rt->rt6i_metric = 0;
 	rt->dst.flags |= DST_HOST;
 	rt->rt6i_dst.addr = *daddr;
 	rt->rt6i_dst.plen = 128;
@@ -1225,7 +1209,6 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
 	if (!pcpu_rt)
 		return NULL;
 	ip6_rt_copy_init(pcpu_rt, rt);
-	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 	pcpu_rt->rt6i_flags |= RTF_PCPU;
 	return pcpu_rt;
 }
@@ -1279,9 +1262,8 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
 		return;
 
 	net = dev_net(rt6_ex->rt6i->dst.dev);
-	rt6_ex->rt6i->rt6i_node = NULL;
 	hlist_del_rcu(&rt6_ex->hlist);
-	ip6_rt_put(rt6_ex->rt6i);
+	dst_release(&rt6_ex->rt6i->dst);
 	kfree_rcu(rt6_ex, rcu);
 	WARN_ON_ONCE(!bucket->depth);
 	bucket->depth--;
@@ -1463,8 +1445,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	}
 	rt6_ex->rt6i = nrt;
 	rt6_ex->stamp = jiffies;
-	atomic_inc(&nrt->rt6i_ref);
-	nrt->rt6i_node = ort->rt6i_node;
 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
 	bucket->depth++;
 	net->ipv6.rt6_stats->fib_rt_cache++;
@@ -2122,7 +2102,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 		rt->rt6i_idev = in6_dev_get(loopback_dev);
 		rt->rt6i_gateway = ort->rt6i_gateway;
 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
-		rt->rt6i_metric = 0;
 
 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 #ifdef CONFIG_IPV6_SUBTREES
@@ -2247,8 +2226,7 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
 {
 	return !(rt->rt6i_flags & RTF_CACHE) &&
-		(rt->rt6i_flags & RTF_PCPU ||
-		 rcu_access_pointer(rt->rt6i_node));
+		(rt->rt6i_flags & RTF_PCPU || rt->from);
 }
 
 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -3313,7 +3291,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	if (on_link)
 		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
-	nrt->rt6i_protocol = RTPROT_REDIRECT;
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
 	/* No need to remove rt from the exception table if rt is
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 416fe67271a9..2cff209d0fc1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -107,8 +107,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	 * it was magically lost, so this code needs audit */
 	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
 						   RTF_LOCAL);
-	xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
-	xdst->u.rt6.rt6i_node = rt->rt6i_node;
 	xdst->route_cookie = rt6_get_cookie(rt);
 	xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
 	xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
-- 
2.11.0

^ permalink raw reply related

* Re: Repeatable inet6_dump_fib crash in stock 4.12.0-rc4+
From: David Ahern @ 2018-04-18  0:38 UTC (permalink / raw)
  To: Ben Greear, Michal Kubecek; +Cc: Cong Wang, Eric Dumazet, netdev
In-Reply-To: <0b938b4e-bfe7-c320-4f61-031ae5870159@candelatech.com>

On 4/17/18 5:29 PM, Ben Greear wrote:
> 
> FYI, problem still happens in 4.16.  I'm going to re-enable my hack below
> for this kernel as well...I had hopes it might be fixed...

Interesting. I was hoping the same.

> 
> BUG: unable to handle kernel NULL pointer dereference at 8
> IP: fib6_walk_continue+0x5b/0x140 [ipv6]
> PGD 80000007dfc0c067 P4D 80000007dfc0c067 PUD 7e66ff067 PMD 0
> Oops: 0000 [#1] PREEMPT SMP PTI
> Modules linked in: nf_conntrack_netlink nf_conntrack nfnetlink
> nf_defrag_ipv4 libcrc32c vrf]
> CPU: 3 PID: 15117 Comm: ip Tainted: G           O     4.16.0+ #5
> Hardware name: Iron_Systems,Inc CS-CAD-2U-A02/X10SRL-F, BIOS 2.0b
> 05/02/2017
> RIP: 0010:fib6_walk_continue+0x5b/0x140 [ipv6]
> RSP: 0018:ffffc90008c3bc10 EFLAGS: 00010287
> RAX: ffff88085ac45050 RBX: ffff8807e03008a0 RCX: 0000000000000000
> RDX: 0000000000000000 RSI: ffffc90008c3bc48 RDI: ffffffff8232b240
> RBP: ffff880819167600 R08: 0000000000000008 R09: ffff8807dff10071
> R10: ffffc90008c3bbd0 R11: 0000000000000000 R12: ffff8807e03008a0
> R13: 0000000000000002 R14: ffff8807e05744c8 R15: ffff8807e08ef000
> FS:  00007f2f04342700(0000) GS:ffff88087fcc0000(0000)
> knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000000008 CR3: 00000007e0556002 CR4: 00000000003606e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Call Trace:
>  inet6_dump_fib+0x14b/0x2c0 [ipv6]
>  netlink_dump+0x216/0x2a0
>  netlink_recvmsg+0x254/0x400
>  ? copy_msghdr_from_user+0xb5/0x110
>  ___sys_recvmsg+0xe9/0x230
>  ? find_held_lock+0x3b/0xb0
>  ? __handle_mm_fault+0x617/0x1180
>  ? __audit_syscall_entry+0xb3/0x110
>  ? __sys_recvmsg+0x39/0x70
>  __sys_recvmsg+0x39/0x70
>  do_syscall_64+0x63/0x120
>  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
> RIP: 0033:0x7f2f03a72030
> RSP: 002b:00007fffab3de508 EFLAGS: 00000246 ORIG_RAX: 000000000000002f
> RAX: ffffffffffffffda RBX: 00007fffab3e641c RCX: 00007f2f03a72030
> RDX: 0000000000000000 RSI: 00007fffab3de570 RDI: 0000000000000004
> RBP: 0000000000000000 R08: 0000000000007e6c R09: 00007fffab3e63a8
> R10: 00007fffab3de5b0 R11: 0000000000000246 R12: 00007fffab3e6608
> R13: 000000000066b460 R14: 0000000000007e6c R15: 0000000000000000
> Code: 85 d2 74 17 f6 40 2a 04 74 11 8b 53 2c 85 d2 0f 84 d7 00 00 00 83
> ea 01 89 53 2c c7 4
> RIP: fib6_walk_continue+0x5b/0x140 [ipv6] RSP: ffffc90008c3bc10
> CR2: 0000000000000008
> ---[ end trace bd03458864eb266c ]---
> Kernel panic - not syncing: Fatal exception in interrupt
> Kernel Offset: disabled
> Rebooting in 10 seconds..
> ACPI MEMORY or I/O RESET_REG.
> 


Since you can reproduce, would you mind trying
    https://github.com/dsahern/linux.git ipv6/fib6-change-v2

Hopefully these will be committed upstream soon. It changes the game a
bit with the FIB walker. Would be interesting to know if this problem
goes away.

^ permalink raw reply

* Re: [PATCH bpf-next 08/10] [bpf]: make netronome nfp compatible w/ bpf_xdp_adjust_tail
From: Jakub Kicinski @ 2018-04-18  0:40 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Nikita V. Shirokov, Alexei Starovoitov, Daniel Borkmann, netdev
In-Reply-To: <20180417230828.p745neslgxoy5fus@ast-mbp>

On Tue, 17 Apr 2018 16:08:29 -0700, Alexei Starovoitov wrote:
> On Mon, Apr 16, 2018 at 11:51:29PM -0700, Nikita V. Shirokov wrote:
> > w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as
> > well (only "decrease" of pointer's location is going to be supported).
> > changing of this pointer will change packet's size.
> > for nfp driver we will just calculate packet's length unconditionally
> > 
> > Signed-off-by: Nikita V. Shirokov <tehnerd@tehnerd.com>
> > ---
> >  drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
> > index 1eb6549f2a54..d9111c077699 100644
> > --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
> > +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
> > @@ -1722,7 +1722,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
> >  
> >  			act = bpf_prog_run_xdp(xdp_prog, &xdp);
> >  
> > -			pkt_len -= xdp.data - orig_data;
> > +			pkt_len = xdp.data_end - xdp.data;  
> 
> Looks correct, but Jakub please review.

Indeed:

Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Thanks!

^ permalink raw reply

* Re: [PATCH RFC net-next 06/11] udp: add gso support to virtual devices
From: Dimitris Michailidis @ 2018-04-18  0:43 UTC (permalink / raw)
  To: Willem de Bruijn; +Cc: netdev, Willem de Bruijn
In-Reply-To: <20180417200059.30154-7-willemdebruijn.kernel@gmail.com>

On Tue, Apr 17, 2018 at 1:00 PM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
> From: Willem de Bruijn <willemb@google.com>
>
> Virtual devices such as tunnels and bonding can handle large packets.
> Only segment packets when reaching a physical or loopback device.
>
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
>  include/linux/netdev_features.h | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
> index 35b79f47a13d..1e4883bb02a7 100644
> --- a/include/linux/netdev_features.h
> +++ b/include/linux/netdev_features.h
> @@ -80,6 +80,7 @@ enum {
>
>         NETIF_F_GRO_HW_BIT,             /* Hardware Generic receive offload */
>         NETIF_F_HW_TLS_RECORD_BIT,      /* Offload TLS record */
> +       NETIF_F_GSO_UDP_L4_BIT,         /* UDP payload GSO (not UFO) */

Please add an entry for the new flag to
net/core/ethtool.c:netdev_features_strings
and a description to Documentation/networking/netdev-features.txt.

>
>         /*
>          * Add your fresh new feature above and remember to update
> @@ -147,6 +148,7 @@ enum {
>  #define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM)
>  #define        NETIF_F_RX_UDP_TUNNEL_PORT  __NETIF_F(RX_UDP_TUNNEL_PORT)
>  #define NETIF_F_HW_TLS_RECORD  __NETIF_F(HW_TLS_RECORD)
> +#define NETIF_F_GSO_UDP_L4     __NETIF_F(GSO_UDP_L4)
>
>  #define for_each_netdev_feature(mask_addr, bit)        \
>         for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
> @@ -216,6 +218,7 @@ enum {
>                                  NETIF_F_GSO_GRE_CSUM |                 \
>                                  NETIF_F_GSO_IPXIP4 |                   \
>                                  NETIF_F_GSO_IPXIP6 |                   \
> +                                NETIF_F_GSO_UDP_L4 |                   \
>                                  NETIF_F_GSO_UDP_TUNNEL |               \
>                                  NETIF_F_GSO_UDP_TUNNEL_CSUM)
>
> --
> 2.17.0.484.g0c8726318c-goog
>

^ permalink raw reply

* Re: [PATCH 04/10] net: ax88796: Add block_input/output hooks to ax_plat_data
From: Michael Schmitz @ 2018-04-18  0:53 UTC (permalink / raw)
  To: kbuild test robot
  Cc: kbuild-all, netdev, Linux/m68k, Michael Karcher, Michael Karcher
In-Reply-To: <201804180104.AHpJjbYg%fengguang.wu@intel.com>

I think this is a false positive - we're encouraged to provide the
full parameter list for functions, so the sreuct sk_buff* can't be
avoided.

Cheers,

  Michael


On Wed, Apr 18, 2018 at 6:46 AM, kbuild test robot <lkp@intel.com> wrote:
> Hi Michael,
>
> I love your patch! Perhaps something to improve:
>
> [auto build test WARNING on v4.16]
> [cannot apply to net-next/master net/master v4.17-rc1 next-20180417]
> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
>
> url:    https://github.com/0day-ci/linux/commits/Michael-Schmitz/New-network-driver-for-Amiga-X-Surf-100-m68k/20180417-141150
> config: arm-samsung (attached as .config)
> compiler: arm-linux-gnueabi-gcc (Debian 7.2.0-11) 7.2.0
> reproduce:
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # save the attached .config to linux build tree
>         make.cross ARCH=arm
>
> All warnings (new ones prefixed by >>):
>
>    In file included from arch/arm/mach-s3c24xx/mach-anubis.c:42:0:
>>> include/net/ax88796.h:35:11: warning: 'struct sk_buff' declared inside parameter list will not be visible outside of this definition or declaration
>        struct sk_buff *skb, int ring_offset);
>               ^~~~~~~
>
> vim +35 include/net/ax88796.h
>
>     20
>     21  struct ax_plat_data {
>     22          unsigned int     flags;
>     23          unsigned char    wordlength;    /* 1 or 2 */
>     24          unsigned char    dcr_val;       /* default value for DCR */
>     25          unsigned char    rcr_val;       /* default value for RCR */
>     26          unsigned char    gpoc_val;      /* default value for GPOC */
>     27          u32             *reg_offsets;   /* register offsets */
>     28          u8              *mac_addr;      /* MAC addr (only used when
>     29                                             AXFLG_MAC_FROMPLATFORM is used */
>     30
>     31          /* uses default ax88796 buffer if set to NULL */
>     32          void (*block_output)(struct net_device *dev, int count,
>     33                          const unsigned char *buf, int star_page);
>     34          void (*block_input)(struct net_device *dev, int count,
>   > 35                          struct sk_buff *skb, int ring_offset);
>     36  };
>     37
>
> ---
> 0-DAY kernel test infrastructure                Open Source Technology Center
> https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply

* Re: [PATCH v2 bpf-next 0/3] Add missing types to bpftool, libbpf
From: Alexei Starovoitov @ 2018-04-18  0:58 UTC (permalink / raw)
  To: Andrey Ignatov; +Cc: ast, daniel, kubakici, quentin.monnet, netdev, kernel-team
In-Reply-To: <cover.1523985784.git.rdna@fb.com>

On Tue, Apr 17, 2018 at 10:28:43AM -0700, Andrey Ignatov wrote:
> v1->v2:
> - add new types to bpftool-cgroup man page;
> - add new types to bash completion for bpftool;
> - don't add types that should not be in bpftool cgroup.
> 
> Add support for various BPF prog types and attach types that have been
> added to kernel recently but not to bpftool or libbpf yet.

lgtm. for the set:
Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply

* Re: [PATCH bpf-next v3 2/8] bpf: add documentation for eBPF helpers (01-11)
From: Alexei Starovoitov @ 2018-04-18  1:04 UTC (permalink / raw)
  To: Quentin Monnet; +Cc: daniel, ast, netdev, oss-drivers, linux-doc, linux-man
In-Reply-To: <20180417143438.7018-3-quentin.monnet@netronome.com>

On Tue, Apr 17, 2018 at 03:34:32PM +0100, Quentin Monnet wrote:
> Add documentation for eBPF helper functions to bpf.h user header file.
> This documentation can be parsed with the Python script provided in
> another commit of the patch series, in order to provide a RST document
> that can later be converted into a man page.
> 
> The objective is to make the documentation easily understandable and
> accessible to all eBPF developers, including beginners.
> 
> This patch contains descriptions for the following helper functions, all
> written by Alexei:
> 
> - bpf_map_lookup_elem()
> - bpf_map_update_elem()
> - bpf_map_delete_elem()
> - bpf_probe_read()
> - bpf_ktime_get_ns()
> - bpf_trace_printk()
> - bpf_skb_store_bytes()
> - bpf_l3_csum_replace()
> - bpf_l4_csum_replace()
> - bpf_tail_call()
> - bpf_clone_redirect()
> 
> v3:
> - bpf_map_lookup_elem(): Fix description of restrictions for flags
>   related to the existence of the entry.
> - bpf_trace_printk(): State that trace_pipe can be configured. Fix
>   return value in case an unknown format specifier is met. Add a note on
>   kernel log notice when the helper is used. Edit example.
> - bpf_tail_call(): Improve comment on stack inheritance.
> - bpf_clone_redirect(): Improve description of BPF_F_INGRESS flag.
> 
> Cc: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>

Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply

* [PATCH net-next] ipv6: frags: fix a lockdep false positive
From: Eric Dumazet @ 2018-04-18  1:11 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet

lockdep does not know that the locks used by IPv4 defrag
and IPv6 reassembly units are of different classes.

It complains because of following chains :

1) sch_direct_xmit()        (lock txq->_xmit_lock)
    dev_hard_start_xmit()
     xmit_one()
      dev_queue_xmit_nit()
       packet_rcv_fanout()
        ip_check_defrag()
         ip_defrag()
          spin_lock()     (lock frag queue spinlock)

2) ip6_input_finish()
    ipv6_frag_rcv()       (lock frag queue spinlock)
     ip6_frag_queue()
      icmpv6_param_prob() (lock txq->_xmit_lock at some point)

We could add lockdep annotations, but we also can make sure IPv6
calls icmpv6_param_prob() only after the release of the frag queue spinlock,
since this naturally makes frag queue spinlock a leaf in lock hierarchy.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
Note do David: I chose net-next because of recent changes in net-next,
and because it is a false positive, but can respin for net tree
if you prefer. Thanks !

 net/ipv6/reassembly.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2cdf3dcf8c2c1f7629154f71a7bd199b2bf05fd1..b939b94e7e91ddae1552f0b6f6a54c42ab180615 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -163,7 +163,8 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
 }
 
 static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
-			   struct frag_hdr *fhdr, int nhoff)
+			  struct frag_hdr *fhdr, int nhoff,
+			  u32 *prob_offset)
 {
 	struct sk_buff *prev, *next;
 	struct net_device *dev;
@@ -179,11 +180,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
-		__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
-				IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
-				  ((u8 *)&fhdr->frag_off -
-				   skb_network_header(skb)));
+		*prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
 		return -1;
 	}
 
@@ -214,10 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			/* RFC2460 says always send parameter problem in
 			 * this case. -DaveM
 			 */
-			__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
-					IPSTATS_MIB_INHDRERRORS);
-			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
-					  offsetof(struct ipv6hdr, payload_len));
+			*prob_offset = offsetof(struct ipv6hdr, payload_len);
 			return -1;
 		}
 		if (end > fq->q.len) {
@@ -519,15 +513,22 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	iif = skb->dev ? skb->dev->ifindex : 0;
 	fq = fq_find(net, fhdr->identification, hdr, iif);
 	if (fq) {
+		u32 prob_offset = 0;
 		int ret;
 
 		spin_lock(&fq->q.lock);
 
 		fq->iif = iif;
-		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
+				     &prob_offset);
 
 		spin_unlock(&fq->q.lock);
 		inet_frag_put(&fq->q);
+		if (prob_offset) {
+			__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
+					IPSTATS_MIB_INHDRERRORS);
+			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
+		}
 		return ret;
 	}
 
-- 
2.17.0.484.g0c8726318c-goog

^ permalink raw reply related

* Re: [RFC v2] virtio: support packed ring
From: Tiwei Bie @ 2018-04-18  1:17 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180417184810-mutt-send-email-mst@kernel.org>

On Tue, Apr 17, 2018 at 06:54:51PM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 17, 2018 at 10:56:26PM +0800, Tiwei Bie wrote:
> > On Tue, Apr 17, 2018 at 05:04:59PM +0300, Michael S. Tsirkin wrote:
> > > On Tue, Apr 17, 2018 at 08:47:16PM +0800, Tiwei Bie wrote:
> > > > On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> > > > > On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > > > > > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > > > > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > > > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > > [...]
> > > > > > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > > > > > > > +			      void **ctx)
> > > > > > > > > > +{
> > > > > > > > > > +	struct vring_packed_desc *desc;
> > > > > > > > > > +	unsigned int i, j;
> > > > > > > > > > +
> > > > > > > > > > +	/* Clear data ptr. */
> > > > > > > > > > +	vq->desc_state[head].data = NULL;
> > > > > > > > > > +
> > > > > > > > > > +	i = head;
> > > > > > > > > > +
> > > > > > > > > > +	for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > > > > > +		desc = &vq->vring_packed.desc[i];
> > > > > > > > > > +		vring_unmap_one_packed(vq, desc);
> > > > > > > > > > +		desc->flags = 0x0;
> > > > > > > > > Looks like this is unnecessary.
> > > > > > > > It's safer to zero it. If we don't zero it, after we
> > > > > > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > > > > > this function, the desc is still available to the
> > > > > > > > device.
> > > > > > > 
> > > > > > > Well detach_unused_buf_packed() should be called after device is stopped,
> > > > > > > otherwise even if you try to clear, there will still be a window that device
> > > > > > > may use it.
> > > > > > 
> > > > > > This is not about whether the device has been stopped or
> > > > > > not. We don't have other places to re-initialize the ring
> > > > > > descriptors and wrap_counter. So they need to be set to
> > > > > > the correct values when doing detach_unused_buf.
> > > > > > 
> > > > > > Best regards,
> > > > > > Tiwei Bie
> > > > > 
> > > > > find vqs is the time to do it.
> > > > 
> > > > The .find_vqs() will call .setup_vq() which will eventually
> > > > call vring_create_virtqueue(). It's a different case. Here
> > > > we're talking about re-initializing the descs and updating
> > > > the wrap counter when detaching the unused descs (In this
> > > > case, split ring just needs to decrease vring.avail->idx).
> > > > 
> > > > Best regards,
> > > > Tiwei Bie
> > > 
> > > There's no requirement that  virtqueue_detach_unused_buf re-initializes
> > > the descs. It happens on cleanup path just before drivers delete the
> > > vqs.
> > 
> > Cool, I wasn't aware of it. I saw split ring decrease
> > vring.avail->idx after detaching an unused desc, so I
> > thought detaching unused desc also needs to make sure
> > that the ring state will be updated correspondingly.
> 
> 
> Hmm. You are right. Seems to be out console driver being out of spec.
> Will have to look at how to fix that :(
> 
> It was done here:
> 
> Commit b3258ff1d6086bd2b9eeb556844a868ad7d49bc8
> Author: Amit Shah <amit.shah@redhat.com>
> Date:   Wed Mar 16 19:12:10 2011 +0530
> 
>     virtio: Decrement avail idx on buffer detach
>     
>     When detaching a buffer from a vq, the avail.idx value should be
>     decremented as well.
>     
>     This was noticed by hot-unplugging a virtio console port and then
>     plugging in a new one on the same number (re-using the vqs which were
>     just 'disowned').  qemu reported
>     
>        'Guest moved used index from 0 to 256'
>     
>     when any IO was attempted on the new port.
>     
>     CC: stable@kernel.org
>     Reported-by: juzhang <juzhang@redhat.com>
>     Signed-off-by: Amit Shah <amit.shah@redhat.com>
>     Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> 
> The spec is quite explicit though:
> 	A driver MUST NOT decrement the available idx on a live virtqueue (ie. there is no way to “unexpose”
> 	buffers).
> 

Hmm.. Got it. Thanks!

Best regards,
Tiwei Bie


> 
> 
> 
> 
> > If there is no such requirement, do you think it's OK
> > to remove below two lines:
> > 
> > vq->avail_idx_shadow--;
> > vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
> > 
> > from virtqueue_detach_unused_buf(), and we could have
> > one generic function to handle both rings:
> > 
> > void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
> > {
> > 	struct vring_virtqueue *vq = to_vvq(_vq);
> > 	unsigned int num, i;
> > 	void *buf;
> > 
> > 	START_USE(vq);
> > 
> > 	num = vq->packed ? vq->vring_packed.num : vq->vring.num;
> > 
> > 	for (i = 0; i < num; i++) {
> > 		if (!vq->desc_state[i].data)
> > 			continue;
> > 		/* detach_buf clears data, so grab it now. */
> > 		buf = vq->desc_state[i].data;
> > 		detach_buf(vq, i, NULL);
> > 		END_USE(vq);
> > 		return buf;
> > 	}
> > 	/* That should have freed everything. */
> > 	BUG_ON(vq->vq.num_free != num);
> > 
> > 	END_USE(vq);
> > 	return NULL;
> > }
> > 
> > Best regards,
> > Tiwei Bie
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox