Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 17/21] net/ipv6: Cleanup exception and cache route handling
From: David Ahern @ 2018-04-16 15:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180416152255.2256-1-dsahern@gmail.com>

IPv6 FIB will only contain FIB entries with exception routes added to
the FIB entry. Once this transformation is complete, FIB lookups will
return a fib6_info with the lookup functions still returning a dst
based rt6_info. The current code uses rt6_info for both paths and
overloads the rt6_info variable usually called 'rt'.

This patch introduces a new 'f6i' variable name for the result of the FIB
lookup and keeps 'rt' as the dst based return variable. 'f6i' becomes a
fib6_info in a later patch which is why it is introduced as f6i now;
avoids the additional churn in the later patch.

In addition, remove RTF_CACHE and dst checks from fib6 add and delete
since they can not happen now and will never happen after the data
type flip.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_route.h |   1 -
 net/ipv6/ip6_fib.c      |  16 +-----
 net/ipv6/route.c        | 142 +++++++++++++++++++++++++++---------------------
 3 files changed, 81 insertions(+), 78 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index ff70266e30d7..686cdc7f356a 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -106,7 +106,6 @@ int ip6_ins_rt(struct net *net, struct rt6_info *rt);
 int ip6_del_rt(struct net *net, struct rt6_info *rt);
 
 void rt6_flush_exceptions(struct rt6_info *rt);
-int rt6_remove_exception_rt(struct rt6_info *rt);
 void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
 			unsigned long now);
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 280b69497ad0..53df4a98a7f7 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1074,7 +1074,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 static void fib6_start_gc(struct net *net, struct rt6_info *rt)
 {
 	if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
-	    (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
+	    (rt->rt6i_flags & RTF_EXPIRES))
 		mod_timer(&net->ipv6.ip6_fib_timer,
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
@@ -1125,8 +1125,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 	if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
 		return -EINVAL;
-	if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
-		return -EINVAL;
 
 	if (info->nlh) {
 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1650,8 +1648,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 
 	RT6_TRACE("fib6_del_route\n");
 
-	WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
-
 	/* Unlink it */
 	*rtp = rt->rt6_next;
 	rt->rt6i_node = NULL;
@@ -1720,21 +1716,11 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	struct rt6_info __rcu **rtp;
 	struct rt6_info __rcu **rtp_next;
 
-#if RT6_DEBUG >= 2
-	if (rt->dst.obsolete > 0) {
-		WARN_ON(fn);
-		return -ENOENT;
-	}
-#endif
 	if (!fn || rt == net->ipv6.fib6_null_entry)
 		return -ENOENT;
 
 	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
 
-	/* remove cached dst from exception table */
-	if (rt->rt6i_flags & RTF_CACHE)
-		return rt6_remove_exception_rt(rt);
-
 	/*
 	 *	Walk the leaf entries looking for ourself
 	 */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 48fe7337b87b..250fa2a5092a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1013,8 +1013,8 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 	BUG_ON(from->from);
 
 	rt->rt6i_flags &= ~RTF_EXPIRES;
-	dst_hold(&from->dst);
-	rt->from = from;
+	if (dst_hold_safe(&from->dst))
+		rt->from = from;
 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
 	if (from->fib6_metrics != &dst_default_metrics) {
 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
@@ -1097,8 +1097,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     const struct sk_buff *skb,
 					     int flags)
 {
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *f6i;
 	struct fib6_node *fn;
+	struct rt6_info *rt;
 
 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
 		flags &= ~RT6_LOOKUP_F_IFACE;
@@ -1106,36 +1107,36 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 	rcu_read_lock();
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
-	rt = rcu_dereference(fn->leaf);
-	if (!rt) {
-		rt = net->ipv6.fib6_null_entry;
+	f6i = rcu_dereference(fn->leaf);
+	if (!f6i) {
+		f6i = net->ipv6.fib6_null_entry;
 	} else {
-		rt = rt6_device_match(net, rt, &fl6->saddr,
+		f6i = rt6_device_match(net, f6i, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
-		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-			rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
-						  skb, flags);
+		if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0)
+			f6i = rt6_multipath_select(net, f6i, fl6,
+						   fl6->flowi6_oif, skb, flags);
 	}
-	if (rt == net->ipv6.fib6_null_entry) {
+	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
 	}
+
 	/* Search through exception table */
-	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
-	if (rt_cache) {
-		rt = rt_cache;
+	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	if (rt) {
 		if (ip6_hold_safe(net, &rt, true))
 			dst_use_noref(&rt->dst, jiffies);
-	} else if (dst_hold_safe(&rt->dst)) {
-		struct rt6_info *nrt;
-
-		nrt = ip6_create_rt_rcu(rt);
-		dst_release(&rt->dst);
-		rt = nrt;
-	} else {
+	} else if (f6i == net->ipv6.fib6_null_entry) {
 		rt = net->ipv6.ip6_null_entry;
 		dst_hold(&rt->dst);
+	} else {
+		rt = ip6_create_rt_rcu(f6i);
+		if (!rt) {
+			rt = net->ipv6.ip6_null_entry;
+			dst_hold(&rt->dst);
+		}
 	}
 
 	rcu_read_unlock();
@@ -1218,9 +1219,6 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	 *	Clone the route.
 	 */
 
-	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
-		ort = ort->from;
-
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(ort);
 	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
@@ -1446,11 +1444,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	struct rt6_exception *rt6_ex;
 	int err = 0;
 
-	/* ort can't be a cache or pcpu route */
-	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
-		ort = ort->from;
-	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
-
 	spin_lock_bh(&rt6_exception_lock);
 
 	if (ort->exception_bucket_flushed) {
@@ -1589,7 +1582,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
 }
 
 /* Remove the passed in cached rt from the hash table that contains it */
-int rt6_remove_exception_rt(struct rt6_info *rt)
+static int rt6_remove_exception_rt(struct rt6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_info *from = rt->from;
@@ -1854,7 +1847,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			       const struct sk_buff *skb, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *f6i;
+	struct rt6_info *rt;
 	int strict = 0;
 
 	strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1871,10 +1865,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		oif = 0;
 
 redo_rt6_select:
-	rt = rt6_select(net, fn, oif, strict);
-	if (rt->rt6i_nsiblings)
-		rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
-	if (rt == net->ipv6.fib6_null_entry) {
+	f6i = rt6_select(net, fn, oif, strict);
+	if (f6i->rt6i_nsiblings)
+		f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
+	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto redo_rt6_select;
@@ -1886,18 +1880,17 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		}
 	}
 
-	/*Search through exception table */
-	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
-	if (rt_cache)
-		rt = rt_cache;
-
-	if (rt == net->ipv6.fib6_null_entry) {
+	if (f6i == net->ipv6.fib6_null_entry) {
 		rt = net->ipv6.ip6_null_entry;
 		rcu_read_unlock();
 		dst_hold(&rt->dst);
 		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
-	} else if (rt->rt6i_flags & RTF_CACHE) {
+	}
+
+	/*Search through exception table */
+	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	if (rt) {
 		if (ip6_hold_safe(net, &rt, true))
 			dst_use_noref(&rt->dst, jiffies);
 
@@ -1905,7 +1898,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
-			    !(rt->rt6i_flags & RTF_GATEWAY))) {
+			    !(f6i->rt6i_flags & RTF_GATEWAY))) {
 		/* Create a RTF_CACHE clone which will not be
 		 * owned by the fib6 tree.  It is for the special case where
 		 * the daddr in the skb during the neighbor look-up is different
@@ -1914,16 +1907,16 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *uncached_rt;
 
-		if (ip6_hold_safe(net, &rt, true)) {
-			dst_use_noref(&rt->dst, jiffies);
+		if (ip6_hold_safe(net, &f6i, true)) {
+			dst_use_noref(&f6i->dst, jiffies);
 		} else {
 			rcu_read_unlock();
-			uncached_rt = rt;
+			uncached_rt = f6i;
 			goto uncached_rt_out;
 		}
 		rcu_read_unlock();
 
-		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
+		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
 		dst_release(&rt->dst);
 
 		if (uncached_rt) {
@@ -1946,18 +1939,18 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *pcpu_rt;
 
-		dst_use_noref(&rt->dst, jiffies);
+		dst_use_noref(&f6i->dst, jiffies);
 		local_bh_disable();
-		pcpu_rt = rt6_get_pcpu_route(rt);
+		pcpu_rt = rt6_get_pcpu_route(f6i);
 
 		if (!pcpu_rt) {
 			/* atomic_inc_not_zero() is needed when using rcu */
-			if (atomic_inc_not_zero(&rt->rt6i_ref)) {
+			if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
 				/* No dst_hold() on rt is needed because grabbing
 				 * rt->rt6i_ref makes sure rt can't be released.
 				 */
-				pcpu_rt = rt6_make_pcpu_route(net, rt);
-				rt6_release(rt);
+				pcpu_rt = rt6_make_pcpu_route(net, f6i);
+				rt6_release(f6i);
 			} else {
 				/* rt is already removed from tree */
 				pcpu_rt = net->ipv6.ip6_null_entry;
@@ -2419,7 +2412,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 					     int flags)
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *ret = NULL, *rt_cache;
+	struct rt6_info *rt;
 	struct fib6_node *fn;
 
 	/* Get the "current" route for this destination and
@@ -2458,7 +2452,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 			if (rt_cache &&
 			    ipv6_addr_equal(&rdfl->gateway,
 					    &rt_cache->rt6i_gateway)) {
-				rt = rt_cache;
+				ret = rt_cache;
 				break;
 			}
 			continue;
@@ -2469,7 +2463,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	if (!rt)
 		rt = net->ipv6.fib6_null_entry;
 	else if (rt->rt6i_flags & RTF_REJECT) {
-		rt = net->ipv6.ip6_null_entry;
+		ret = net->ipv6.ip6_null_entry;
 		goto out;
 	}
 
@@ -2480,12 +2474,15 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	}
 
 out:
-	ip6_hold_safe(net, &rt, true);
+	if (ret)
+		dst_hold(&ret->dst);
+	else
+		ret = ip6_create_rt_rcu(rt);
 
 	rcu_read_unlock();
 
-	trace_fib6_table_lookup(net, rt, table, fl6);
-	return rt;
+	trace_fib6_table_lookup(net, ret, table, fl6);
+	return ret;
 };
 
 static struct dst_entry *ip6_route_redirect(struct net *net,
@@ -3182,6 +3179,22 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 	return err;
 }
 
+static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
+{
+	int rc = -ESRCH;
+
+	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
+		goto out;
+
+	if (cfg->fc_flags & RTF_GATEWAY &&
+	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
+		goto out;
+	if (dst_hold_safe(&rt->dst))
+		rc = rt6_remove_exception_rt(rt);
+out:
+	return rc;
+}
+
 static int ip6_route_del(struct fib6_config *cfg,
 			 struct netlink_ext_ack *extack)
 {
@@ -3206,11 +3219,16 @@ static int ip6_route_del(struct fib6_config *cfg,
 	if (fn) {
 		for_each_fib6_node_rt_rcu(fn) {
 			if (cfg->fc_flags & RTF_CACHE) {
+				int rc;
+
 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
 							      &cfg->fc_src);
-				if (!rt_cache)
-					continue;
-				rt = rt_cache;
+				if (rt_cache) {
+					rc = ip6_del_cached_rt(rt_cache, cfg);
+					if (rc != -ESRCH)
+						return rc;
+				}
+				continue;
 			}
 			if (cfg->fc_ifindex &&
 			    (!rt->fib6_nh.nh_dev ||
@@ -3327,7 +3345,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 				     NEIGH_UPDATE_F_ISROUTER)),
 		     NDISC_REDIRECT, &ndopts);
 
-	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
+	nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL);
 	if (!nrt)
 		goto out;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 18/21] net/ipv6: introduce fib6_info struct and helpers
From: David Ahern @ 2018-04-16 15:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180416152255.2256-1-dsahern@gmail.com>

Add fib6_info struct and alloc, destroy, hold and release helpers.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_fib.c    | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 159f651dee55..630392ae12d8 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -38,6 +38,7 @@
 #endif
 
 struct rt6_info;
+struct fib6_info;
 
 struct fib6_config {
 	u32		fc_table;
@@ -132,6 +133,46 @@ struct fib6_nh {
 	int			nh_weight;
 };
 
+struct fib6_info {
+	struct fib6_table		*rt6i_table;
+	struct fib6_info __rcu		*rt6_next;
+	struct fib6_node __rcu		*rt6i_node;
+
+	/* Multipath routes:
+	 * siblings is a list of fib6_info that have the the same metric/weight,
+	 * destination, but not the same gateway. nsiblings is just a cache
+	 * to speed up lookup.
+	 */
+	struct list_head		rt6i_siblings;
+	unsigned int			rt6i_nsiblings;
+
+	atomic_t			rt6i_ref;
+	struct inet6_dev		*rt6i_idev;
+	unsigned long			expires;
+	struct dst_metrics		*fib6_metrics;
+#define fib6_pmtu		fib6_metrics->metrics[RTAX_MTU-1]
+
+	struct rt6key			rt6i_dst;
+	u32				rt6i_flags;
+	struct rt6key			rt6i_src;
+	struct rt6key			rt6i_prefsrc;
+
+	struct rt6_info * __percpu	*rt6i_pcpu;
+	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
+
+	u32				rt6i_metric;
+	u8				rt6i_protocol;
+	u8				fib6_type;
+	u8				exception_bucket_flushed:1,
+					should_flush:1,
+					dst_nocount:1,
+					dst_nopolicy:1,
+					dst_host:1,
+					unused:3;
+
+	struct fib6_nh			fib6_nh;
+};
+
 struct rt6_info {
 	struct dst_entry		dst;
 	struct rt6_info __rcu		*rt6_next;
@@ -291,6 +332,20 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 
 void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
 
+struct rt6_info *fib6_info_alloc(gfp_t gfp_flags);
+void fib6_info_destroy(struct rt6_info *f6i);
+
+static inline void fib6_info_hold(struct rt6_info *f6i)
+{
+	atomic_inc(&f6i->rt6i_ref);
+}
+
+static inline void fib6_info_release(struct rt6_info *f6i)
+{
+	if (f6i && atomic_dec_and_test(&f6i->rt6i_ref))
+		fib6_info_destroy(f6i);
+}
+
 static inline void rt6_hold(struct rt6_info *rt)
 {
 	atomic_inc(&rt->rt6i_ref);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 53df4a98a7f7..d07578d84db0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -145,6 +145,66 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
 	       addr[fn_bit >> 5];
 }
 
+struct rt6_info *fib6_info_alloc(gfp_t gfp_flags)
+{
+	struct rt6_info *f6i;
+
+	f6i = kzalloc(sizeof(*f6i), gfp_flags);
+	if (!f6i)
+		return NULL;
+
+	f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+	if (!f6i->rt6i_pcpu) {
+		kfree(f6i);
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&f6i->rt6i_siblings);
+	f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
+
+	atomic_inc(&f6i->rt6i_ref);
+
+	return f6i;
+}
+
+void fib6_info_destroy(struct rt6_info *f6i)
+{
+	struct rt6_exception_bucket *bucket;
+
+	WARN_ON(f6i->rt6i_node);
+
+	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
+	if (bucket) {
+		f6i->rt6i_exception_bucket = NULL;
+		kfree(bucket);
+	}
+
+	if (f6i->rt6i_pcpu) {
+		int cpu;
+
+		for_each_possible_cpu(cpu) {
+			struct rt6_info **ppcpu_rt;
+			struct rt6_info *pcpu_rt;
+
+			ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+			pcpu_rt = *ppcpu_rt;
+			if (pcpu_rt) {
+				dst_dev_put(&pcpu_rt->dst);
+				dst_release(&pcpu_rt->dst);
+				*ppcpu_rt = NULL;
+			}
+		}
+	}
+
+	if (f6i->rt6i_idev)
+		in6_dev_put(f6i->rt6i_idev);
+	if (f6i->fib6_nh.nh_dev)
+		dev_put(f6i->fib6_nh.nh_dev);
+
+	kfree(f6i);
+}
+EXPORT_SYMBOL_GPL(fib6_info_destroy);
+
 static struct fib6_node *node_alloc(struct net *net)
 {
 	struct fib6_node *fn;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 19/21] net/ipv6: separate handling of FIB entries from dst based routes
From: David Ahern @ 2018-04-16 15:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180416152255.2256-1-dsahern@gmail.com>

Last step before flipping the data type for FIB entries:
- use fib6_info_alloc to create FIB entries in ip6_route_info_create
  and addrconf_dst_alloc
- use fib6_info_release in place of dst_release, ip6_rt_put and
  rt6_release
- remove the dst_hold before calling __ip6_ins_rt or ip6_del_rt
- when purging routes, drop per-cpu routes
- replace inc and dec of rt6i_ref with fib6_info_hold and fib6_info_release
- use rt->from since it points to the FIB entry
- drop references to exception bucket, fib6_metrics and per-cpu from
  dst entries (those are relevant for fib entries only)

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h   |   4 +-
 include/net/ip6_route.h |   3 +-
 net/ipv6/addrconf.c     |  18 +++--
 net/ipv6/anycast.c      |   7 +-
 net/ipv6/ip6_fib.c      |  55 ++++++++++------
 net/ipv6/ip6_output.c   |   3 +-
 net/ipv6/ndisc.c        |   6 +-
 net/ipv6/route.c        | 171 +++++++++++++++++-------------------------------
 8 files changed, 115 insertions(+), 152 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 630392ae12d8..6c3d92bb3459 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -314,9 +314,7 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 
 	if (rt->rt6i_flags & RTF_PCPU ||
 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
-		rt = rt->from;
-
-	rt6_get_cookie_safe(rt, &cookie);
+		rt6_get_cookie_safe(rt->from, &cookie);
 
 	return cookie;
 }
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 686cdc7f356a..57d0d45667f1 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -114,8 +114,7 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 				      unsigned int prefs,
 				      struct in6_addr *saddr)
 {
-	struct inet6_dev *idev =
-			rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
+	struct inet6_dev *idev = rt ? rt->rt6i_idev : NULL;
 	int err = 0;
 
 	if (rt && rt->rt6i_prefsrc.plen)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8796f00ac714..32d7571e587f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -916,7 +916,6 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 		pr_warn("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
-	ip6_rt_put(ifp->rt);
 
 	kfree_rcu(ifp, rcu);
 }
@@ -1102,8 +1101,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	inet6addr_notifier_call_chain(NETDEV_UP, ifa);
 out:
 	if (unlikely(err < 0)) {
-		if (rt)
-			ip6_rt_put(rt);
+		fib6_info_release(rt);
+
 		if (ifa) {
 			if (ifa->idev)
 				in6_dev_put(ifa->idev);
@@ -1191,7 +1190,7 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r
 		else {
 			if (!(rt->rt6i_flags & RTF_EXPIRES))
 				fib6_set_expires(rt, expires);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 		}
 	}
 }
@@ -2375,8 +2374,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 			continue;
 		if ((rt->rt6i_flags & noflags) != 0)
 			continue;
-		if (!dst_hold_safe(&rt->dst))
-			rt = NULL;
+		fib6_info_hold(rt);
 		break;
 	}
 out:
@@ -2688,7 +2686,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
 					      dev, expires, flags, GFP_ATOMIC);
 		}
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 	}
 
 	/* Try to figure out our local address for this prefix */
@@ -3357,7 +3355,7 @@ static int fixup_permanent_addr(struct net *net,
 		ifp->rt = rt;
 		spin_unlock(&ifp->lock);
 
-		ip6_rt_put(prev);
+		fib6_info_release(prev);
 	}
 
 	if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
@@ -5640,8 +5638,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 				ip6_del_rt(net, rt);
 		}
 		if (ifp->rt) {
-			if (dst_hold_safe(&ifp->rt->dst))
-				ip6_del_rt(net, ifp->rt);
+			ip6_del_rt(net, ifp->rt);
+			ifp->rt = NULL;
 		}
 		rt_genid_bump_ipv6(net);
 		break;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index e456386fe4d5..3db8fe10322b 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -213,7 +213,7 @@ static void aca_put(struct ifacaddr6 *ac)
 {
 	if (refcount_dec_and_test(&ac->aca_refcnt)) {
 		in6_dev_put(ac->aca_idev);
-		dst_release(&ac->aca_rt->dst);
+		fib6_info_release(ac->aca_rt);
 		kfree(ac);
 	}
 }
@@ -231,6 +231,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
 	aca->aca_addr = *addr;
 	in6_dev_hold(idev);
 	aca->aca_idev = idev;
+	fib6_info_hold(rt);
 	aca->aca_rt = rt;
 	aca->aca_users = 1;
 	/* aca_tstamp should be updated upon changes */
@@ -274,7 +275,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 	}
 	aca = aca_alloc(rt, addr);
 	if (!aca) {
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 		err = -ENOMEM;
 		goto out;
 	}
@@ -330,7 +331,6 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
-	dst_hold(&aca->aca_rt->dst);
 	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 
 	aca_put(aca);
@@ -358,7 +358,6 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
 
-		dst_hold(&aca->aca_rt->dst);
 		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 
 		aca_put(aca);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d07578d84db0..4d6bd033dccd 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -170,6 +170,7 @@ struct rt6_info *fib6_info_alloc(gfp_t gfp_flags)
 void fib6_info_destroy(struct rt6_info *f6i)
 {
 	struct rt6_exception_bucket *bucket;
+	struct dst_metrics *m;
 
 	WARN_ON(f6i->rt6i_node);
 
@@ -201,6 +202,10 @@ void fib6_info_destroy(struct rt6_info *f6i)
 	if (f6i->fib6_nh.nh_dev)
 		dev_put(f6i->fib6_nh.nh_dev);
 
+	m = f6i->fib6_metrics;
+	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
+		kfree(m);
+
 	kfree(f6i);
 }
 EXPORT_SYMBOL_GPL(fib6_info_destroy);
@@ -714,7 +719,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
 			/* clean up an intermediate node */
 			if (!(fn->fn_flags & RTN_RTINFO)) {
 				RCU_INIT_POINTER(fn->leaf, NULL);
-				rt6_release(leaf);
+				fib6_info_release(leaf);
 			/* remove null_entry in the root node */
 			} else if (fn->fn_flags & RTN_TL_ROOT &&
 				   rcu_access_pointer(fn->leaf) ==
@@ -898,12 +903,32 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
 				new_leaf = fib6_find_prefix(net, table, fn);
 				atomic_inc(&new_leaf->rt6i_ref);
+
 				rcu_assign_pointer(fn->leaf, new_leaf);
-				rt6_release(rt);
+				fib6_info_release(rt);
 			}
 			fn = rcu_dereference_protected(fn->parent,
 				    lockdep_is_held(&table->tb6_lock));
 		}
+
+		if (rt->rt6i_pcpu) {
+			int cpu;
+
+			/* release the reference to this fib entry from
+			 * all of its cached pcpu routes
+			 */
+			for_each_possible_cpu(cpu) {
+				struct rt6_info **ppcpu_rt;
+				struct rt6_info *pcpu_rt;
+
+				ppcpu_rt = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+				pcpu_rt = *ppcpu_rt;
+				if (pcpu_rt) {
+					fib6_info_release(pcpu_rt->from);
+					pcpu_rt->from = NULL;
+				}
+			}
+		}
 	}
 }
 
@@ -1099,7 +1124,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		fib6_purge_rt(iter, fn, info->nl_net);
 		if (rcu_access_pointer(fn->rr_ptr) == iter)
 			fn->rr_ptr = NULL;
-		rt6_release(iter);
+		fib6_info_release(iter);
 
 		if (nsiblings) {
 			/* Replacing an ECMP route, remove all siblings */
@@ -1115,7 +1140,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 					fib6_purge_rt(iter, fn, info->nl_net);
 					if (rcu_access_pointer(fn->rr_ptr) == iter)
 						fn->rr_ptr = NULL;
-					rt6_release(iter);
+					fib6_info_release(iter);
 					nsiblings--;
 					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
 				} else {
@@ -1183,9 +1208,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	int replace_required = 0;
 	int sernum = fib6_new_sernum(info->nl_net);
 
-	if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
-		return -EINVAL;
-
 	if (info->nlh) {
 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
 			allow_create = 0;
@@ -1300,7 +1322,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			if (pn_leaf == rt) {
 				pn_leaf = NULL;
 				RCU_INIT_POINTER(pn->leaf, NULL);
-				atomic_dec(&rt->rt6i_ref);
+				fib6_info_release(rt);
 			}
 			if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
 				pn_leaf = fib6_find_prefix(info->nl_net, table,
@@ -1312,7 +1334,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 					    info->nl_net->ipv6.fib6_null_entry;
 				}
 #endif
-				atomic_inc(&pn_leaf->rt6i_ref);
+				fib6_info_hold(pn_leaf);
 				rcu_assign_pointer(pn->leaf, pn_leaf);
 			}
 		}
@@ -1334,10 +1356,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	     (fn->fn_flags & RTN_TL_ROOT &&
 	      !rcu_access_pointer(fn->leaf))))
 		fib6_repair_tree(info->nl_net, table, fn);
-	/* Always release dst as dst->__refcnt is guaranteed
-	 * to be taken before entering this function
-	 */
-	dst_release_immediate(&rt->dst);
 	return err;
 }
 
@@ -1637,7 +1655,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 				new_fn_leaf = net->ipv6.fib6_null_entry;
 			}
 #endif
-			atomic_inc(&new_fn_leaf->rt6i_ref);
+			fib6_info_hold(new_fn_leaf);
 			rcu_assign_pointer(fn->leaf, new_fn_leaf);
 			return pn;
 		}
@@ -1693,7 +1711,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 			return pn;
 
 		RCU_INIT_POINTER(pn->leaf, NULL);
-		rt6_release(pn_leaf);
+		fib6_info_release(pn_leaf);
 		fn = pn;
 	}
 }
@@ -1763,7 +1781,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
 	if (!info->skip_notify)
 		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
-	rt6_release(rt);
+	fib6_info_release(rt);
 }
 
 /* Need to own table->tb6_lock */
@@ -2261,9 +2279,8 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 
 	dev = rt->fib6_nh.nh_dev;
 	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
-		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
-		   rt->dst.__use, rt->rt6i_flags,
-		   dev ? dev->name : "");
+		   rt->rt6i_metric, atomic_read(&rt->rt6i_ref), 0,
+		   rt->rt6i_flags, dev ? dev->name : "");
 	iter->w.leaf = NULL;
 	return 0;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2e891d2c30ef..21faeb6aa224 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -972,7 +972,8 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 		if (!had_dst)
 			*dst = ip6_route_output(net, sk, fl6);
 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
-		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+		err = ip6_route_get_saddr(net, rt ? rt->from : NULL,
+					  &fl6->daddr,
 					  sk ? inet6_sk(sk)->srcprefs : 0,
 					  &fl6->saddr);
 		if (err)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 556717154fa3..a28857088bff 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1283,7 +1283,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK(0, err,
 				  "RA: %s got default router without neighbour\n",
 				  __func__);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 			return;
 		}
 	}
@@ -1313,7 +1313,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK(0, err,
 				  "RA: %s got default router without neighbour\n",
 				  __func__);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 			return;
 		}
 		neigh->flags |= NTF_ROUTER;
@@ -1499,7 +1499,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		ND_PRINTK(2, warn, "RA: invalid RA options\n");
 	}
 out:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 	if (neigh)
 		neigh_release(neigh);
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 250fa2a5092a..a5ffb398bffb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -351,13 +351,11 @@ static void rt6_info_init(struct rt6_info *rt)
 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 	INIT_LIST_HEAD(&rt->rt6i_siblings);
 	INIT_LIST_HEAD(&rt->rt6i_uncached);
-	rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
 }
 
 /* allocate dst with ip6_dst_ops */
-static struct rt6_info *__ip6_dst_alloc(struct net *net,
-					struct net_device *dev,
-					int flags)
+struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
+			       int flags)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 					1, DST_OBSOLETE_FORCE_CHK, flags);
@@ -369,35 +367,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
 
 	return rt;
 }
-
-struct rt6_info *ip6_dst_alloc(struct net *net,
-			       struct net_device *dev,
-			       int flags)
-{
-	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
-
-	if (rt) {
-		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
-		if (!rt->rt6i_pcpu) {
-			dst_release_immediate(&rt->dst);
-			return NULL;
-		}
-	}
-
-	return rt;
-}
 EXPORT_SYMBOL(ip6_dst_alloc);
 
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
-	struct rt6_exception_bucket *bucket;
 	struct rt6_info *from = rt->from;
 	struct inet6_dev *idev;
-	struct dst_metrics *m;
 
 	dst_destroy_metrics_generic(dst);
-	free_percpu(rt->rt6i_pcpu);
 	rt6_uncached_list_del(rt);
 
 	idev = rt->rt6i_idev;
@@ -405,18 +383,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 		rt->rt6i_idev = NULL;
 		in6_dev_put(idev);
 	}
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
-	if (bucket) {
-		rt->rt6i_exception_bucket = NULL;
-		kfree(bucket);
-	}
-
-	m = rt->fib6_metrics;
-	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
-		kfree(m);
 
 	rt->from = NULL;
-	dst_release(&from->dst);
+	fib6_info_release(from);
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -891,7 +860,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 		else
 			fib6_set_expires(rt, jiffies + HZ * lifetime);
 
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 	}
 	return 0;
 }
@@ -1010,11 +979,9 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
 
 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 {
-	BUG_ON(from->from);
-
 	rt->rt6i_flags &= ~RTF_EXPIRES;
-	if (dst_hold_safe(&from->dst))
-		rt->from = from;
+	fib6_info_hold(from);
+	rt->from = from;
 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
 	if (from->fib6_metrics != &dst_default_metrics) {
 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
@@ -1084,7 +1051,7 @@ static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
 	struct net_device *dev = rt->fib6_nh.nh_dev;
 	struct rt6_info *nrt;
 
-	nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
+	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
 	if (nrt)
 		ip6_rt_copy_init(nrt, rt);
 
@@ -1203,8 +1170,6 @@ int ip6_ins_rt(struct net *net, struct rt6_info *rt)
 {
 	struct nl_info info = {	.nl_net = net, };
 
-	/* Hold dst to account for the reference from the fib6 tree */
-	dst_hold(&rt->dst);
 	return __ip6_ins_rt(rt, &info, NULL);
 }
 
@@ -1221,7 +1186,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(ort);
-	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
 	rcu_read_unlock();
 	if (!rt)
 		return NULL;
@@ -1256,7 +1221,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(rt);
-	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
+	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
 	rcu_read_unlock();
 	if (!pcpu_rt)
 		return NULL;
@@ -1317,7 +1282,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
 	net = dev_net(rt6_ex->rt6i->dst.dev);
 	rt6_ex->rt6i->rt6i_node = NULL;
 	hlist_del_rcu(&rt6_ex->hlist);
-	rt6_release(rt6_ex->rt6i);
+	ip6_rt_put(rt6_ex->rt6i);
 	kfree_rcu(rt6_ex, rcu);
 	WARN_ON_ONCE(!bucket->depth);
 	bucket->depth--;
@@ -1907,17 +1872,11 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *uncached_rt;
 
-		if (ip6_hold_safe(net, &f6i, true)) {
-			dst_use_noref(&f6i->dst, jiffies);
-		} else {
-			rcu_read_unlock();
-			uncached_rt = f6i;
-			goto uncached_rt_out;
-		}
+		fib6_info_hold(f6i);
 		rcu_read_unlock();
 
 		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
-		dst_release(&rt->dst);
+		fib6_info_release(f6i);
 
 		if (uncached_rt) {
 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
@@ -1930,7 +1889,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			dst_hold(&uncached_rt->dst);
 		}
 
-uncached_rt_out:
 		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
 		return uncached_rt;
 
@@ -1939,24 +1897,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *pcpu_rt;
 
-		dst_use_noref(&f6i->dst, jiffies);
 		local_bh_disable();
 		pcpu_rt = rt6_get_pcpu_route(f6i);
 
-		if (!pcpu_rt) {
-			/* atomic_inc_not_zero() is needed when using rcu */
-			if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
-				/* No dst_hold() on rt is needed because grabbing
-				 * rt->rt6i_ref makes sure rt can't be released.
-				 */
-				pcpu_rt = rt6_make_pcpu_route(net, f6i);
-				rt6_release(f6i);
-			} else {
-				/* rt is already removed from tree */
-				pcpu_rt = net->ipv6.ip6_null_entry;
-				dst_hold(&pcpu_rt->dst);
-			}
-		}
+		if (!pcpu_rt)
+			pcpu_rt = rt6_make_pcpu_route(net, f6i);
+
 		local_bh_enable();
 		rcu_read_unlock();
 		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
@@ -2193,11 +2139,26 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
  *	Destination cache support functions
  */
 
+static bool fib6_check(struct rt6_info *f6i, u32 cookie)
+{
+	u32 rt_cookie = 0;
+
+	if ((f6i && !rt6_get_cookie_safe(f6i, &rt_cookie)) ||
+	     rt_cookie != cookie)
+		return false;
+
+	if (fib6_check_expired(f6i))
+		return false;
+
+	return true;
+}
+
 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
 {
 	u32 rt_cookie = 0;
 
-	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
+	if ((rt->from && !rt6_get_cookie_safe(rt->from, &rt_cookie)) ||
+	    rt_cookie != cookie)
 		return NULL;
 
 	if (rt6_check_expired(rt))
@@ -2210,7 +2171,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
 {
 	if (!__rt6_check_expired(rt) &&
 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
-	    rt6_check(rt->from, cookie))
+	    fib6_check(rt->from, cookie))
 		return &rt->dst;
 	else
 		return NULL;
@@ -2241,7 +2202,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (rt6_check_expired(rt)) {
-				ip6_del_rt(dev_net(dst->dev), rt);
+				rt6_remove_exception_rt(rt);
 				dst = NULL;
 			}
 		} else {
@@ -2262,12 +2223,12 @@ static void ip6_link_failure(struct sk_buff *skb)
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (dst_hold_safe(&rt->dst))
-				ip6_del_rt(dev_net(rt->dst.dev), rt);
-		} else {
+				rt6_remove_exception_rt(rt);
+		} else if (rt->from) {
 			struct fib6_node *fn;
 
 			rcu_read_lock();
-			fn = rcu_dereference(rt->rt6i_node);
+			fn = rcu_dereference(rt->from->rt6i_node);
 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
 				fn->fn_sernum = -1;
 			rcu_read_unlock();
@@ -2949,13 +2910,13 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	if (!table)
 		goto out;
 
-	rt = ip6_dst_alloc(net, NULL,
-			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
-
-	if (!rt) {
-		err = -ENOMEM;
+	err = -ENOMEM;
+	rt = fib6_info_alloc(gfp_flags);
+	if (!rt)
 		goto out;
-	}
+
+	if (cfg->fc_flags & RTF_ADDRCONF)
+		rt->dst_nocount = true;
 
 	err = ip6_convert_metrics(net, rt, cfg);
 	if (err < 0)
@@ -3029,7 +2990,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (err)
 			goto out;
 
-		rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
+		rt->fib6_nh.nh_gw = cfg->fc_gateway;
 	}
 
 	err = -ENODEV;
@@ -3066,7 +3027,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	    !netif_carrier_ok(dev))
 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
 	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
-	rt->fib6_nh.nh_dev = rt->dst.dev = dev;
+	rt->fib6_nh.nh_dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
 
@@ -3078,9 +3039,8 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		dev_put(dev);
 	if (idev)
 		in6_dev_put(idev);
-	if (rt)
-		dst_release_immediate(&rt->dst);
 
+	fib6_info_release(rt);
 	return ERR_PTR(err);
 }
 
@@ -3095,6 +3055,7 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 		return PTR_ERR(rt);
 
 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
+	fib6_info_release(rt);
 
 	return err;
 }
@@ -3116,7 +3077,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	spin_unlock_bh(&table->tb6_lock);
 
 out:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 	return err;
 }
 
@@ -3170,7 +3131,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 out_unlock:
 	spin_unlock_bh(&table->tb6_lock);
 out_put:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 
 	if (skb) {
 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
@@ -3241,8 +3202,7 @@ static int ip6_route_del(struct fib6_config *cfg,
 				continue;
 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
 				continue;
-			if (!dst_hold_safe(&rt->dst))
-				break;
+			fib6_info_hold(rt);
 			rcu_read_unlock();
 
 			/* if gateway was specified only delete the one hop */
@@ -3510,12 +3470,9 @@ static void __rt6_purge_dflt_routers(struct net *net,
 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
-			if (dst_hold_safe(&rt->dst)) {
-				rcu_read_unlock();
-				ip6_del_rt(net, rt);
-			} else {
-				rcu_read_unlock();
-			}
+			fib6_info_hold(rt);
+			rcu_read_unlock();
+			ip6_del_rt(net, rt);
 			goto restart;
 		}
 	}
@@ -3665,7 +3622,7 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
 	struct net_device *dev = idev->dev;
 	struct rt6_info *rt;
 
-	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
+	rt = fib6_info_alloc(gfp_flags);
 	if (!rt)
 		return ERR_PTR(-ENOMEM);
 
@@ -3686,8 +3643,8 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
 	}
 
 	rt->fib6_nh.nh_gw = *addr;
+	dev_hold(dev);
 	rt->fib6_nh.nh_dev = dev;
-	rt->rt6i_gateway  = *addr;
 	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
@@ -4324,7 +4281,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
 					    rt, &r_cfg);
 		if (err) {
-			dst_release_immediate(&rt->dst);
+			fib6_info_release(rt);
 			goto cleanup;
 		}
 
@@ -4341,6 +4298,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 	list_for_each_entry(nh, &rt6_nh_list, next) {
 		rt_last = nh->rt6_info;
 		err = __ip6_ins_rt(nh->rt6_info, info, extack);
+		fib6_info_release(nh->rt6_info);
+
 		/* save reference to first route for notification */
 		if (!rt_notif && !err)
 			rt_notif = nh->rt6_info;
@@ -4388,7 +4347,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 cleanup:
 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
 		if (nh->rt6_info)
-			dst_release_immediate(&nh->rt6_info->dst);
+			fib6_info_release(nh->rt6_info);
 		list_del(&nh->next);
 		kfree(nh);
 	}
@@ -4813,14 +4772,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		goto errout;
 	}
 
-	if (fibmatch && rt->from) {
-		struct rt6_info *ort = rt->from;
-
-		dst_hold(&ort->dst);
-		ip6_rt_put(rt);
-		rt = ort;
-	}
-
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb) {
 		ip6_rt_put(rt);
@@ -4830,12 +4781,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	skb_dst_set(skb, &rt->dst);
 	if (fibmatch)
-		err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
+		err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif,
 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
 				    nlh->nlmsg_seq, 0);
 	else
-		err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
-				    iif, RTM_NEWROUTE,
+		err = rt6_fill_node(net, skb, rt->from, dst,
+				    &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE,
 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
 				    0);
 	if (err < 0) {
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 20/21] net/ipv6: Flip FIB entries to fib6_info
From: David Ahern @ 2018-04-16 15:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180416152255.2256-1-dsahern@gmail.com>

Convert all code paths referencing a FIB entry from
rt6_info to fib6_info.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  |  64 ++---
 include/net/if_inet6.h                             |   4 +-
 include/net/ip6_fib.h                              |  42 ++--
 include/net/ip6_route.h                            |  28 +--
 include/net/netns/ipv6.h                           |   2 +-
 net/ipv6/addrconf.c                                |  20 +-
 net/ipv6/anycast.c                                 |   4 +-
 net/ipv6/ip6_fib.c                                 | 116 ++++-----
 net/ipv6/ndisc.c                                   |   2 +-
 net/ipv6/route.c                                   | 259 +++++++++++----------
 10 files changed, 271 insertions(+), 270 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index d995a0b52d7c..b85b15851841 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -442,7 +442,7 @@ struct mlxsw_sp_fib6_entry {
 
 struct mlxsw_sp_rt6 {
 	struct list_head list;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 };
 
 struct mlxsw_sp_lpm_tree {
@@ -3834,7 +3834,7 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
 
 	for (i = 0; i < nh_grp->count; i++) {
 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
-		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
 		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
@@ -3920,7 +3920,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
 				  common);
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
 		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
 	}
@@ -4699,7 +4699,7 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
-static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
 {
 	/* Packets with link-local destination IP arriving to the router
 	 * are trapped to the CPU, so no need to program specific routes
@@ -4721,7 +4721,7 @@ static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
 	return false;
 }
 
-static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 
@@ -4734,18 +4734,18 @@ static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
 	 * memory.
 	 */
 	mlxsw_sp_rt6->rt = rt;
-	rt6_hold(rt);
+	fib6_info_hold(rt);
 
 	return mlxsw_sp_rt6;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+static void mlxsw_sp_rt6_release(struct fib6_info *rt)
 {
-	rt6_release(rt);
+	fib6_info_release(rt);
 }
 #else
-static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+static void mlxsw_sp_rt6_release(struct fib6_info *rt)
 {
 }
 #endif
@@ -4756,13 +4756,13 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
 	kfree(mlxsw_sp_rt6);
 }
 
-static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
+static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
 {
 	/* RTF_CACHE routes are ignored */
 	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
 }
 
-static struct rt6_info *
+static struct fib6_info *
 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
 {
 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
@@ -4771,7 +4771,7 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-				 const struct rt6_info *nrt, bool replace)
+				 const struct fib6_info *nrt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 
@@ -4779,7 +4779,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
 		return NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
 
 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
 		 * virtual router.
@@ -4802,7 +4802,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
 
 static struct mlxsw_sp_rt6 *
 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
-			    const struct rt6_info *rt)
+			    const struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 
@@ -4815,7 +4815,7 @@ mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
 }
 
 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
-					const struct rt6_info *rt,
+					const struct fib6_info *rt,
 					enum mlxsw_sp_ipip_type *ret)
 {
 	return rt->fib6_nh.nh_dev &&
@@ -4825,7 +4825,7 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
 				       struct mlxsw_sp_nexthop_group *nh_grp,
 				       struct mlxsw_sp_nexthop *nh,
-				       const struct rt6_info *rt)
+				       const struct fib6_info *rt)
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
@@ -4870,7 +4870,7 @@ static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_nexthop_group *nh_grp,
 				  struct mlxsw_sp_nexthop *nh,
-				  const struct rt6_info *rt)
+				  const struct fib6_info *rt)
 {
 	struct net_device *dev = rt->fib6_nh.nh_dev;
 
@@ -4897,7 +4897,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
 }
 
 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
-				    const struct rt6_info *rt)
+				    const struct fib6_info *rt)
 {
 	return rt->rt6i_flags & RTF_GATEWAY ||
 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
@@ -4928,7 +4928,7 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
 	nh_grp->count = fib6_entry->nrt6;
 	for (i = 0; i < nh_grp->count; i++) {
-		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+		struct fib6_info *rt = mlxsw_sp_rt6->rt;
 
 		nh = &nh_grp->nexthops[i];
 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
@@ -5040,7 +5040,7 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
 static int
 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib6_entry *fib6_entry,
-				struct rt6_info *rt)
+				struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 	int err;
@@ -5068,7 +5068,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
 static void
 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib6_entry *fib6_entry,
-				struct rt6_info *rt)
+				struct fib6_info *rt)
 {
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 
@@ -5084,7 +5084,7 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
 
 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_fib_entry *fib_entry,
-					 const struct rt6_info *rt)
+					 const struct fib6_info *rt)
 {
 	/* Packets hitting RTF_REJECT routes need to be discarded by the
 	 * stack. We can rely on their destination device not having a
@@ -5118,7 +5118,7 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_fib_node *fib_node,
-			   struct rt6_info *rt)
+			   struct fib6_info *rt)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_entry *fib_entry;
@@ -5168,12 +5168,12 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-			      const struct rt6_info *nrt, bool replace)
+			      const struct fib6_info *nrt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
 
 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
 			continue;
@@ -5198,7 +5198,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
 			       bool replace)
 {
 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
-	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
+	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 
 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
@@ -5213,7 +5213,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
 		struct mlxsw_sp_fib6_entry *last;
 
 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
-			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
+			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
 
 			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
 				break;
@@ -5268,7 +5268,7 @@ mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
-			   const struct rt6_info *rt)
+			   const struct fib6_info *rt)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
@@ -5287,7 +5287,7 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
 		return NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
 
 		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
 		    rt->rt6i_metric == iter_rt->rt6i_metric &&
@@ -5316,7 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
-				    struct rt6_info *rt, bool replace)
+				    struct fib6_info *rt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
@@ -5373,7 +5373,7 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
-				     struct rt6_info *rt)
+				     struct fib6_info *rt)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
@@ -5836,7 +5836,7 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
 					 info);
 		fib_work->fen6_info = *fen6_info;
-		rt6_hold(fib_work->fen6_info.rt);
+		fib6_info_hold(fib_work->fen6_info.rt);
 		break;
 	}
 }
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d4088d1a688d..d6089b2e64fe 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -64,7 +64,7 @@ struct inet6_ifaddr {
 	struct delayed_work	dad_work;
 
 	struct inet6_dev	*idev;
-	struct rt6_info		*rt;
+	struct fib6_info	*rt;
 
 	struct hlist_node	addr_lst;
 	struct list_head	if_list;
@@ -144,7 +144,7 @@ struct ipv6_ac_socklist {
 struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct inet6_dev	*aca_idev;
-	struct rt6_info		*aca_rt;
+	struct fib6_info	*aca_rt;
 	struct ifacaddr6	*aca_next;
 	int			aca_users;
 	refcount_t		aca_refcnt;
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 6c3d92bb3459..d41b7bd69fb3 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -75,12 +75,12 @@ struct fib6_node {
 #ifdef CONFIG_IPV6_SUBTREES
 	struct fib6_node __rcu	*subtree;
 #endif
-	struct rt6_info __rcu	*leaf;
+	struct fib6_info __rcu	*leaf;
 
 	__u16			fn_bit;		/* bit key */
 	__u16			fn_flags;
 	int			fn_sernum;
-	struct rt6_info __rcu	*rr_ptr;
+	struct fib6_info __rcu	*rr_ptr;
 	struct rcu_head		rcu;
 };
 
@@ -176,7 +176,7 @@ struct fib6_info {
 struct rt6_info {
 	struct dst_entry		dst;
 	struct rt6_info __rcu		*rt6_next;
-	struct rt6_info			*from;
+	struct fib6_info		*from;
 
 	/*
 	 * Tail elements of dst_entry (__refcnt etc.)
@@ -242,20 +242,20 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
 	return ((struct rt6_info *)dst)->rt6i_idev;
 }
 
-static inline void fib6_clean_expires(struct rt6_info *f6i)
+static inline void fib6_clean_expires(struct fib6_info *f6i)
 {
 	f6i->rt6i_flags &= ~RTF_EXPIRES;
 	f6i->expires = 0;
 }
 
-static inline void fib6_set_expires(struct rt6_info *f6i,
+static inline void fib6_set_expires(struct fib6_info *f6i,
 				    unsigned long expires)
 {
 	f6i->expires = expires;
 	f6i->rt6i_flags |= RTF_EXPIRES;
 }
 
-static inline bool fib6_check_expired(const struct rt6_info *f6i)
+static inline bool fib6_check_expired(const struct fib6_info *f6i)
 {
 	if (f6i->rt6i_flags & RTF_EXPIRES)
 		return time_after(jiffies, f6i->expires);
@@ -288,7 +288,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
  * Return true if we can get cookie safely
  * Return false if not
  */
-static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
+static inline bool rt6_get_cookie_safe(const struct fib6_info *rt,
 				       u32 *cookie)
 {
 	struct fib6_node *fn;
@@ -330,15 +330,15 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 
 void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
 
-struct rt6_info *fib6_info_alloc(gfp_t gfp_flags);
-void fib6_info_destroy(struct rt6_info *f6i);
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
+void fib6_info_destroy(struct fib6_info *f6i);
 
-static inline void fib6_info_hold(struct rt6_info *f6i)
+static inline void fib6_info_hold(struct fib6_info *f6i)
 {
 	atomic_inc(&f6i->rt6i_ref);
 }
 
-static inline void fib6_info_release(struct rt6_info *f6i)
+static inline void fib6_info_release(struct fib6_info *f6i)
 {
 	if (f6i && atomic_dec_and_test(&f6i->rt6i_ref))
 		fib6_info_destroy(f6i);
@@ -371,7 +371,7 @@ enum fib6_walk_state {
 struct fib6_walker {
 	struct list_head lh;
 	struct fib6_node *root, *node;
-	struct rt6_info *leaf;
+	struct fib6_info *leaf;
 	enum fib6_walk_state state;
 	unsigned int skip;
 	unsigned int count;
@@ -435,7 +435,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
 
 struct fib6_entry_notifier_info {
 	struct fib_notifier_info info; /* must be first */
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 };
 
 /*
@@ -457,14 +457,14 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 			      const struct in6_addr *saddr, int src_len,
 			      bool exact_match);
 
-void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
+void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg),
 		    void *arg);
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt,
+int fib6_add(struct fib6_node *root, struct fib6_info *rt,
 	     struct nl_info *info, struct netlink_ext_ack *extack);
-int fib6_del(struct rt6_info *rt, struct nl_info *info);
+int fib6_del(struct fib6_info *rt, struct nl_info *info);
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int flags);
 
 void fib6_run_gc(unsigned long expires, struct net *net, bool force);
@@ -487,11 +487,11 @@ void __net_exit fib6_notifier_exit(struct net *net);
 unsigned int fib6_tables_seq_read(struct net *net);
 int fib6_tables_dump(struct net *net, struct notifier_block *nb);
 
-void fib6_update_sernum(struct net *net, struct rt6_info *rt);
-void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
+void fib6_update_sernum(struct net *net, struct fib6_info *rt);
+void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
 
-void fib6_metric_set(struct rt6_info *f6i, int metric, u32 val);
-static inline bool fib6_metric_locked(struct rt6_info *f6i, int metric)
+void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val);
+static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
 {
 	return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
 }
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 57d0d45667f1..d5fb1e4ae7ac 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,7 +66,7 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
-static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
+static inline bool rt6_qualify_for_ecmp(const struct fib6_info *rt)
 {
 	return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
 	       RTF_GATEWAY;
@@ -102,14 +102,14 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
 
 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 		  struct netlink_ext_ack *extack);
-int ip6_ins_rt(struct net *net, struct rt6_info *rt);
-int ip6_del_rt(struct net *net, struct rt6_info *rt);
+int ip6_ins_rt(struct net *net, struct fib6_info *rt);
+int ip6_del_rt(struct net *net, struct fib6_info *rt);
 
-void rt6_flush_exceptions(struct rt6_info *rt);
-void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
+void rt6_flush_exceptions(struct fib6_info *rt);
+void rt6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args,
 			unsigned long now);
 
-static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
+static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *rt,
 				      const struct in6_addr *daddr,
 				      unsigned int prefs,
 				      struct in6_addr *saddr)
@@ -136,9 +136,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
 void fib6_force_start_gc(struct net *net);
 
-struct rt6_info *addrconf_dst_alloc(struct net *net, struct inet6_dev *idev,
-				    const struct in6_addr *addr, bool anycast,
-				    gfp_t gfp_flags);
+struct fib6_info *addrconf_dst_alloc(struct net *net, struct inet6_dev *idev,
+				     const struct in6_addr *addr, bool anycast,
+				     gfp_t gfp_flags);
 
 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 			       int flags);
@@ -147,10 +147,10 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
  *	support functions for ND
  *
  */
-struct rt6_info *rt6_get_dflt_router(struct net *net,
+struct fib6_info *rt6_get_dflt_router(struct net *net,
 				     const struct in6_addr *addr,
 				     struct net_device *dev);
-struct rt6_info *rt6_add_dflt_router(struct net *net,
+struct fib6_info *rt6_add_dflt_router(struct net *net,
 				     const struct in6_addr *gwaddr,
 				     struct net_device *dev, unsigned int pref);
 
@@ -176,14 +176,14 @@ struct rt6_rtnl_dump_arg {
 	struct net *net;
 };
 
-int rt6_dump_route(struct rt6_info *rt, void *p_arg);
+int rt6_dump_route(struct fib6_info *rt, void *p_arg);
 void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
 void rt6_disable_ip(struct net_device *dev, unsigned long event);
 void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
-void rt6_multipath_rebalance(struct rt6_info *rt);
+void rt6_multipath_rebalance(struct fib6_info *rt);
 
 void rt6_uncached_list_add(struct rt6_info *rt);
 void rt6_uncached_list_del(struct rt6_info *rt);
@@ -271,7 +271,7 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
 		return daddr;
 }
 
-static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
+static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
 {
 	return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev &&
 	       a->rt6i_idev == b->rt6i_idev &&
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 74e4e1e449d5..97b3a54579c8 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -60,7 +60,7 @@ struct netns_ipv6 {
 #endif
 	struct xt_table		*ip6table_nat;
 #endif
-	struct rt6_info         *fib6_null_entry;
+	struct fib6_info	*fib6_null_entry;
 	struct rt6_info		*ip6_null_entry;
 	struct rt6_statistics   *rt6_stats;
 	struct timer_list       ip6_fib_timer;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 32d7571e587f..5767e3ae8df9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -170,7 +170,7 @@ static void addrconf_type_change(struct net_device *dev,
 				 unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 
-static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  int plen,
 						  const struct net_device *dev,
 						  u32 flags, u32 noflags);
@@ -994,7 +994,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
 	struct net *net = dev_net(idev->dev);
 	struct inet6_ifaddr *ifa = NULL;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	int err = 0;
 	int addr_type = ipv6_addr_type(addr);
 
@@ -1178,7 +1178,7 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
 static void
 cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
 {
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	rt = addrconf_get_prefix_route(&ifp->addr,
 				       ifp->prefix_len,
@@ -2348,13 +2348,13 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 }
 
 
-static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
+static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 						  int plen,
 						  const struct net_device *dev,
 						  u32 flags, u32 noflags)
 {
 	struct fib6_node *fn;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct fib6_table *table;
 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
 
@@ -2642,7 +2642,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 	 */
 
 	if (pinfo->onlink) {
-		struct rt6_info *rt;
+		struct fib6_info *rt;
 		unsigned long rt_expires;
 
 		/* Avoid arithmetic overflow. Really, we could
@@ -3342,7 +3342,7 @@ static int fixup_permanent_addr(struct net *net,
 	 * case regenerate the host route.
 	 */
 	if (!ifp->rt || !ifp->rt->rt6i_node) {
-		struct rt6_info *rt, *prev;
+		struct fib6_info *rt, *prev;
 
 		rt = addrconf_dst_alloc(net, idev, &ifp->addr, false,
 					GFP_ATOMIC);
@@ -3709,7 +3709,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
 
 	list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
-		struct rt6_info *rt = NULL;
+		struct fib6_info *rt = NULL;
 		bool keep;
 
 		addrconf_del_dad_work(ifa);
@@ -5630,7 +5630,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		if (!ipv6_addr_any(&ifp->peer_addr)) {
-			struct rt6_info *rt;
+			struct fib6_info *rt;
 
 			rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
 						       ifp->idev->dev, 0, 0);
@@ -5986,7 +5986,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
 		spin_lock(&ifa->lock);
 		if (ifa->rt) {
-			struct rt6_info *rt = ifa->rt;
+			struct fib6_info *rt = ifa->rt;
 			int cpu;
 
 			rcu_read_lock();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 3db8fe10322b..0e35657501a7 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -218,7 +218,7 @@ static void aca_put(struct ifacaddr6 *ac)
 	}
 }
 
-static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
+static struct ifacaddr6 *aca_alloc(struct fib6_info *rt,
 				   const struct in6_addr *addr)
 {
 	struct inet6_dev *idev = rt->rt6i_idev;
@@ -247,7 +247,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
 int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 {
 	struct ifacaddr6 *aca;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct net *net;
 	int err;
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 4d6bd033dccd..77cf43b2d858 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -43,7 +43,7 @@ static struct kmem_cache *fib6_node_kmem __read_mostly;
 struct fib6_cleaner {
 	struct fib6_walker w;
 	struct net *net;
-	int (*func)(struct rt6_info *, void *arg);
+	int (*func)(struct fib6_info *, void *arg);
 	int sernum;
 	void *arg;
 };
@@ -54,7 +54,7 @@ struct fib6_cleaner {
 #define FWS_INIT FWS_L
 #endif
 
-static struct rt6_info *fib6_find_prefix(struct net *net,
+static struct fib6_info *fib6_find_prefix(struct net *net,
 					 struct fib6_table *table,
 					 struct fib6_node *fn);
 static struct fib6_node *fib6_repair_tree(struct net *net,
@@ -105,7 +105,7 @@ enum {
 	FIB6_NO_SERNUM_CHANGE = 0,
 };
 
-void fib6_update_sernum(struct net *net, struct rt6_info *rt)
+void fib6_update_sernum(struct net *net, struct fib6_info *rt)
 {
 	struct fib6_node *fn;
 
@@ -145,9 +145,9 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
 	       addr[fn_bit >> 5];
 }
 
-struct rt6_info *fib6_info_alloc(gfp_t gfp_flags)
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
 {
-	struct rt6_info *f6i;
+	struct fib6_info *f6i;
 
 	f6i = kzalloc(sizeof(*f6i), gfp_flags);
 	if (!f6i)
@@ -167,7 +167,7 @@ struct rt6_info *fib6_info_alloc(gfp_t gfp_flags)
 	return f6i;
 }
 
-void fib6_info_destroy(struct rt6_info *f6i)
+void fib6_info_destroy(struct fib6_info *f6i)
 {
 	struct rt6_exception_bucket *bucket;
 	struct dst_metrics *m;
@@ -404,7 +404,7 @@ unsigned int fib6_tables_seq_read(struct net *net)
 
 static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
 				    enum fib_event_type event_type,
-				    struct rt6_info *rt)
+				    struct fib6_info *rt)
 {
 	struct fib6_entry_notifier_info info = {
 		.rt = rt,
@@ -415,7 +415,7 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
 
 static int call_fib6_entry_notifiers(struct net *net,
 				     enum fib_event_type event_type,
-				     struct rt6_info *rt,
+				     struct fib6_info *rt,
 				     struct netlink_ext_ack *extack)
 {
 	struct fib6_entry_notifier_info info = {
@@ -432,7 +432,7 @@ struct fib6_dump_arg {
 	struct notifier_block *nb;
 };
 
-static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
 {
 	if (rt == arg->net->ipv6.fib6_null_entry)
 		return;
@@ -441,7 +441,7 @@ static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
 
 static int fib6_node_dump(struct fib6_walker *w)
 {
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	for_each_fib6_walker_rt(w)
 		fib6_rt_dump(rt, w->args);
@@ -490,7 +490,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb)
 static int fib6_dump_node(struct fib6_walker *w)
 {
 	int res;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	for_each_fib6_walker_rt(w) {
 		res = rt6_dump_route(rt, w->args);
@@ -507,7 +507,7 @@ static int fib6_dump_node(struct fib6_walker *w)
 		 */
 		if (rt->rt6i_nsiblings)
 			rt = list_last_entry(&rt->rt6i_siblings,
-					     struct rt6_info,
+					     struct fib6_info,
 					     rt6i_siblings);
 	}
 	w->leaf = NULL;
@@ -643,7 +643,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	return res;
 }
 
-void fib6_metric_set(struct rt6_info *f6i, int metric, u32 val)
+void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
 {
 	if (!f6i)
 		return;
@@ -690,7 +690,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
 	fn = root;
 
 	do {
-		struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+		struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
 		key = (struct rt6key *)((u8 *)leaf + offset);
 
@@ -884,7 +884,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
 	return ln;
 }
 
-static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
+static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
 			  struct net *net)
 {
 	struct fib6_table *table = rt->rt6i_table;
@@ -897,9 +897,9 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 		 * to still alive ones.
 		 */
 		while (fn) {
-			struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+			struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
-			struct rt6_info *new_leaf;
+			struct fib6_info *new_leaf;
 			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
 				new_leaf = fib6_find_prefix(net, table, fn);
 				atomic_inc(&new_leaf->rt6i_ref);
@@ -936,15 +936,15 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
  *	Insert routing information in a node.
  */
 
-static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
+static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 			    struct nl_info *info,
 			    struct netlink_ext_ack *extack)
 {
-	struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+	struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
-	struct rt6_info *iter = NULL;
-	struct rt6_info __rcu **ins;
-	struct rt6_info __rcu **fallback_ins = NULL;
+	struct fib6_info *iter = NULL;
+	struct fib6_info __rcu **ins;
+	struct fib6_info __rcu **fallback_ins = NULL;
 	int replace = (info->nlh &&
 		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
 	int add = (!info->nlh ||
@@ -1035,7 +1035,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	/* Link this route to others same route. */
 	if (rt->rt6i_nsiblings) {
 		unsigned int rt6i_nsiblings;
-		struct rt6_info *sibling, *temp_sibling;
+		struct fib6_info *sibling, *temp_sibling;
 
 		/* Find the first route that have the same metric */
 		sibling = leaf;
@@ -1156,7 +1156,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	return 0;
 }
 
-static void fib6_start_gc(struct net *net, struct rt6_info *rt)
+static void fib6_start_gc(struct net *net, struct fib6_info *rt)
 {
 	if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
 	    (rt->rt6i_flags & RTF_EXPIRES))
@@ -1171,7 +1171,7 @@ void fib6_force_start_gc(struct net *net)
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
-static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
+static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
 					   int sernum)
 {
 	struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
@@ -1186,7 +1186,7 @@ static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
 	}
 }
 
-void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
+void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
 {
 	__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
 }
@@ -1198,7 +1198,7 @@ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
  *	Need to own table->tb6_lock
  */
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt,
+int fib6_add(struct fib6_node *root, struct fib6_info *rt,
 	     struct nl_info *info, struct netlink_ext_ack *extack)
 {
 	struct fib6_table *table = rt->rt6i_table;
@@ -1219,7 +1219,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 	fn = fib6_add_1(info->nl_net, table, root,
 			&rt->rt6i_dst.addr, rt->rt6i_dst.plen,
-			offsetof(struct rt6_info, rt6i_dst), allow_create,
+			offsetof(struct fib6_info, rt6i_dst), allow_create,
 			replace_required, extack);
 	if (IS_ERR(fn)) {
 		err = PTR_ERR(fn);
@@ -1260,7 +1260,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 			sn = fib6_add_1(info->nl_net, table, sfn,
 					&rt->rt6i_src.addr, rt->rt6i_src.plen,
-					offsetof(struct rt6_info, rt6i_src),
+					offsetof(struct fib6_info, rt6i_src),
 					allow_create, replace_required, extack);
 
 			if (IS_ERR(sn)) {
@@ -1279,7 +1279,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 		} else {
 			sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
 					&rt->rt6i_src.addr, rt->rt6i_src.plen,
-					offsetof(struct rt6_info, rt6i_src),
+					offsetof(struct fib6_info, rt6i_src),
 					allow_create, replace_required, extack);
 
 			if (IS_ERR(sn)) {
@@ -1316,7 +1316,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 		 * super-tree leaf node we have to find a new one for it.
 		 */
 		if (pn != fn) {
-			struct rt6_info *pn_leaf =
+			struct fib6_info *pn_leaf =
 				rcu_dereference_protected(pn->leaf,
 				    lockdep_is_held(&table->tb6_lock));
 			if (pn_leaf == rt) {
@@ -1365,7 +1365,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
  */
 
 struct lookup_args {
-	int			offset;		/* key offset on rt6_info	*/
+	int			offset;		/* key offset on fib6_info */
 	const struct in6_addr	*addr;		/* search key			*/
 };
 
@@ -1403,7 +1403,7 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
 		struct fib6_node *subtree = FIB6_SUBTREE(fn);
 
 		if (subtree || fn->fn_flags & RTN_RTINFO) {
-			struct rt6_info *leaf = rcu_dereference(fn->leaf);
+			struct fib6_info *leaf = rcu_dereference(fn->leaf);
 			struct rt6key *key;
 
 			if (!leaf)
@@ -1443,12 +1443,12 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
 	struct fib6_node *fn;
 	struct lookup_args args[] = {
 		{
-			.offset = offsetof(struct rt6_info, rt6i_dst),
+			.offset = offsetof(struct fib6_info, rt6i_dst),
 			.addr = daddr,
 		},
 #ifdef CONFIG_IPV6_SUBTREES
 		{
-			.offset = offsetof(struct rt6_info, rt6i_src),
+			.offset = offsetof(struct fib6_info, rt6i_src),
 			.addr = saddr,
 		},
 #endif
@@ -1484,7 +1484,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
 	struct fib6_node *fn, *prev = NULL;
 
 	for (fn = root; fn ; ) {
-		struct rt6_info *leaf = rcu_dereference(fn->leaf);
+		struct fib6_info *leaf = rcu_dereference(fn->leaf);
 		struct rt6key *key;
 
 		/* This node is being deleted */
@@ -1533,7 +1533,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 	struct fib6_node *fn;
 
 	fn = fib6_locate_1(root, daddr, dst_len,
-			   offsetof(struct rt6_info, rt6i_dst),
+			   offsetof(struct fib6_info, rt6i_dst),
 			   exact_match);
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -1544,7 +1544,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 
 			if (subtree) {
 				fn = fib6_locate_1(subtree, saddr, src_len,
-					   offsetof(struct rt6_info, rt6i_src),
+					   offsetof(struct fib6_info, rt6i_src),
 					   exact_match);
 			}
 		}
@@ -1563,7 +1563,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
  *
  */
 
-static struct rt6_info *fib6_find_prefix(struct net *net,
+static struct fib6_info *fib6_find_prefix(struct net *net,
 					 struct fib6_table *table,
 					 struct fib6_node *fn)
 {
@@ -1622,11 +1622,11 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 					    lockdep_is_held(&table->tb6_lock));
 		struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
 					    lockdep_is_held(&table->tb6_lock));
-		struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
+		struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
-		struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+		struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
 					    lockdep_is_held(&table->tb6_lock));
-		struct rt6_info *new_fn_leaf;
+		struct fib6_info *new_fn_leaf;
 
 		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
 		iter++;
@@ -1717,10 +1717,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 }
 
 static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
-			   struct rt6_info __rcu **rtp, struct nl_info *info)
+			   struct fib6_info __rcu **rtp, struct nl_info *info)
 {
 	struct fib6_walker *w;
-	struct rt6_info *rt = rcu_dereference_protected(*rtp,
+	struct fib6_info *rt = rcu_dereference_protected(*rtp,
 				    lockdep_is_held(&table->tb6_lock));
 	struct net *net = info->nl_net;
 
@@ -1741,7 +1741,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 
 	/* Remove this entry from other siblings */
 	if (rt->rt6i_nsiblings) {
-		struct rt6_info *sibling, *next_sibling;
+		struct fib6_info *sibling, *next_sibling;
 
 		list_for_each_entry_safe(sibling, next_sibling,
 					 &rt->rt6i_siblings, rt6i_siblings)
@@ -1785,14 +1785,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 }
 
 /* Need to own table->tb6_lock */
-int fib6_del(struct rt6_info *rt, struct nl_info *info)
+int fib6_del(struct fib6_info *rt, struct nl_info *info)
 {
 	struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
 				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
 	struct fib6_table *table = rt->rt6i_table;
 	struct net *net = info->nl_net;
-	struct rt6_info __rcu **rtp;
-	struct rt6_info __rcu **rtp_next;
+	struct fib6_info __rcu **rtp;
+	struct fib6_info __rcu **rtp_next;
 
 	if (!fn || rt == net->ipv6.fib6_null_entry)
 		return -ENOENT;
@@ -1804,7 +1804,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	 */
 
 	for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
-		struct rt6_info *cur = rcu_dereference_protected(*rtp,
+		struct fib6_info *cur = rcu_dereference_protected(*rtp,
 					lockdep_is_held(&table->tb6_lock));
 		if (rt == cur) {
 			fib6_del_route(table, fn, rtp, info);
@@ -1948,7 +1948,7 @@ static int fib6_walk(struct net *net, struct fib6_walker *w)
 static int fib6_clean_node(struct fib6_walker *w)
 {
 	int res;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
 	struct nl_info info = {
 		.nl_net = c->net,
@@ -1983,7 +1983,7 @@ static int fib6_clean_node(struct fib6_walker *w)
 			if (WARN_ON(!rt->rt6i_nsiblings))
 				continue;
 			rt = list_last_entry(&rt->rt6i_siblings,
-					     struct rt6_info, rt6i_siblings);
+					     struct fib6_info, rt6i_siblings);
 			continue;
 		}
 		WARN_ON(res != 0);
@@ -2002,7 +2002,7 @@ static int fib6_clean_node(struct fib6_walker *w)
  */
 
 static void fib6_clean_tree(struct net *net, struct fib6_node *root,
-			    int (*func)(struct rt6_info *, void *arg),
+			    int (*func)(struct fib6_info *, void *arg),
 			    int sernum, void *arg)
 {
 	struct fib6_cleaner c;
@@ -2020,7 +2020,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
 }
 
 static void __fib6_clean_all(struct net *net,
-			     int (*func)(struct rt6_info *, void *),
+			     int (*func)(struct fib6_info *, void *),
 			     int sernum, void *arg)
 {
 	struct fib6_table *table;
@@ -2040,7 +2040,7 @@ static void __fib6_clean_all(struct net *net,
 	rcu_read_unlock();
 }
 
-void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
+void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
 		    void *arg)
 {
 	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
@@ -2057,7 +2057,7 @@ static void fib6_flush_trees(struct net *net)
  *	Garbage collection
  */
 
-static int fib6_age(struct rt6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, void *arg)
 {
 	struct fib6_gc_args *gc_args = arg;
 	unsigned long now = jiffies;
@@ -2261,7 +2261,7 @@ struct ipv6_route_iter {
 
 static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 {
-	struct rt6_info *rt = v;
+	struct fib6_info *rt = v;
 	struct ipv6_route_iter *iter = seq->private;
 	const struct net_device *dev;
 
@@ -2353,14 +2353,14 @@ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
 static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	int r;
-	struct rt6_info *n;
+	struct fib6_info *n;
 	struct net *net = seq_file_net(seq);
 	struct ipv6_route_iter *iter = seq->private;
 
 	if (!v)
 		goto iter_table;
 
-	n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next);
+	n = rcu_dereference_bh(((struct fib6_info *)v)->rt6_next);
 	if (n) {
 		++*pos;
 		return n;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a28857088bff..102645298692 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1155,7 +1155,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
 	struct neighbour *neigh = NULL;
 	struct inet6_dev *in6_dev;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct net *net;
 	int lifetime;
 	struct ndisc_options ndopts;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a5ffb398bffb..b53b28a59151 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -96,24 +96,24 @@ static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 					   struct sk_buff *skb, u32 mtu);
 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 					struct sk_buff *skb);
-static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
-static size_t rt6_nlmsg_size(struct rt6_info *rt);
+static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
+static size_t rt6_nlmsg_size(struct fib6_info *rt);
 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
-			 struct rt6_info *rt, struct dst_entry *dst,
+			 struct fib6_info *rt, struct dst_entry *dst,
 			 struct in6_addr *dest, struct in6_addr *src,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags);
-static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
 					   struct in6_addr *daddr,
 					   struct in6_addr *saddr);
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct fib6_info *rt6_add_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev,
 					   unsigned int pref);
-static struct rt6_info *rt6_get_route_info(struct net *net,
+static struct fib6_info *rt6_get_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev);
@@ -283,7 +283,7 @@ static const u32 ip6_template_metrics[RTAX_MAX] = {
 	[RTAX_HOPLIMIT - 1] = 0,
 };
 
-static const struct rt6_info fib6_null_entry_template = {
+static const struct fib6_info fib6_null_entry_template = {
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32)0,
@@ -372,7 +372,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
-	struct rt6_info *from = rt->from;
+	struct fib6_info *from = rt->from;
 	struct inet6_dev *idev;
 
 	dst_destroy_metrics_generic(dst);
@@ -425,13 +425,13 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 	return false;
 }
 
-static struct rt6_info *rt6_multipath_select(const struct net *net,
-					     struct rt6_info *match,
+static struct fib6_info *rt6_multipath_select(const struct net *net,
+					      struct fib6_info *match,
 					     struct flowi6 *fl6, int oif,
 					     const struct sk_buff *skb,
 					     int strict)
 {
-	struct rt6_info *sibling, *next_sibling;
+	struct fib6_info *sibling, *next_sibling;
 
 	/* We might have already computed the hash for ICMPv6 errors. In such
 	 * case it will always be non-zero. Otherwise now is the time to do it.
@@ -462,14 +462,14 @@ static struct rt6_info *rt6_multipath_select(const struct net *net,
  *	Route lookup. rcu_read_lock() should be held.
  */
 
-static inline struct rt6_info *rt6_device_match(struct net *net,
-						    struct rt6_info *rt,
+static inline struct fib6_info *rt6_device_match(struct net *net,
+						 struct fib6_info *rt,
 						    const struct in6_addr *saddr,
 						    int oif,
 						    int flags)
 {
-	struct rt6_info *local = NULL;
-	struct rt6_info *sprt;
+	struct fib6_info *local = NULL;
+	struct fib6_info *sprt;
 
 	if (!oif && ipv6_addr_any(saddr) &&
 	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
@@ -532,7 +532,7 @@ static void rt6_probe_deferred(struct work_struct *w)
 	kfree(work);
 }
 
-static void rt6_probe(struct rt6_info *rt)
+static void rt6_probe(struct fib6_info *rt)
 {
 	struct __rt6_probe_work *work;
 	const struct in6_addr *nh_gw;
@@ -585,7 +585,7 @@ static void rt6_probe(struct rt6_info *rt)
 	rcu_read_unlock_bh();
 }
 #else
-static inline void rt6_probe(struct rt6_info *rt)
+static inline void rt6_probe(struct fib6_info *rt)
 {
 }
 #endif
@@ -593,7 +593,7 @@ static inline void rt6_probe(struct rt6_info *rt)
 /*
  * Default Router Selection (RFC 2461 6.3.6)
  */
-static inline int rt6_check_dev(struct rt6_info *rt, int oif)
+static inline int rt6_check_dev(struct fib6_info *rt, int oif)
 {
 	const struct net_device *dev = rt->fib6_nh.nh_dev;
 
@@ -605,7 +605,7 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 	return 0;
 }
 
-static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
+static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
 {
 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 	struct neighbour *neigh;
@@ -637,8 +637,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 	return ret;
 }
 
-static int rt6_score_route(struct rt6_info *rt, int oif,
-			   int strict)
+static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
 {
 	int m;
 
@@ -656,8 +655,8 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
 	return m;
 }
 
-static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
-				   int *mpri, struct rt6_info *match,
+static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
+				   int *mpri, struct fib6_info *match,
 				   bool *do_rr)
 {
 	int m;
@@ -696,13 +695,13 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 	return match;
 }
 
-static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
-				     struct rt6_info *leaf,
-				     struct rt6_info *rr_head,
+static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
+				     struct fib6_info *leaf,
+				     struct fib6_info *rr_head,
 				     u32 metric, int oif, int strict,
 				     bool *do_rr)
 {
-	struct rt6_info *rt, *match, *cont;
+	struct fib6_info *rt, *match, *cont;
 	int mpri = -1;
 
 	match = NULL;
@@ -735,11 +734,11 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 	return match;
 }
 
-static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
+static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
 				   int oif, int strict)
 {
-	struct rt6_info *leaf = rcu_dereference(fn->leaf);
-	struct rt6_info *match, *rt0;
+	struct fib6_info *leaf = rcu_dereference(fn->leaf);
+	struct fib6_info *match, *rt0;
 	bool do_rr = false;
 	int key_plen;
 
@@ -767,7 +766,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
 			     &do_rr);
 
 	if (do_rr) {
-		struct rt6_info *next = rcu_dereference(rt0->rt6_next);
+		struct fib6_info *next = rcu_dereference(rt0->rt6_next);
 
 		/* no entries matched; do round-robin */
 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -785,7 +784,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
 	return match ? match : net->ipv6.fib6_null_entry;
 }
 
-static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
+static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
 {
 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 }
@@ -799,7 +798,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	struct in6_addr prefix_buf, *prefix;
 	unsigned int pref;
 	unsigned long lifetime;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	if (len < sizeof(struct route_info)) {
 		return -EINVAL;
@@ -871,7 +870,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
  */
 
 /* called with rcu_lock held */
-static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
+static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
 {
 	struct net_device *dev = rt->fib6_nh.nh_dev;
 
@@ -913,7 +912,7 @@ static int ip6_rt_type_to_error(u8 fib6_type)
 	return fib6_prop[fib6_type];
 }
 
-static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
+static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
 {
 	unsigned short flags = 0;
 
@@ -927,7 +926,7 @@ static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
 	return flags;
 }
 
-static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
+static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
 {
 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
 
@@ -949,7 +948,7 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
 	}
 }
 
-static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
+static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
 {
 	rt->dst.flags |= fib6_info_dst_flags(ort);
 
@@ -977,7 +976,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
 	rt->dst.lastuse = jiffies;
 }
 
-static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
+static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
 {
 	rt->rt6i_flags &= ~RTF_EXPIRES;
 	fib6_info_hold(from);
@@ -989,7 +988,7 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 	}
 }
 
-static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
+static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
 {
 	ip6_rt_init_dst(rt, ort);
 
@@ -1045,7 +1044,7 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 }
 
 /* called with rcu_lock held */
-static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
+static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
 {
 	unsigned short flags = fib6_info_dst_flags(rt);
 	struct net_device *dev = rt->fib6_nh.nh_dev;
@@ -1064,7 +1063,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     const struct sk_buff *skb,
 					     int flags)
 {
-	struct rt6_info *f6i;
+	struct fib6_info *f6i;
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 
@@ -1152,7 +1151,7 @@ EXPORT_SYMBOL(rt6_lookup);
  * Caller must hold dst before calling it.
  */
 
-static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
+static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
 			struct netlink_ext_ack *extack)
 {
 	int err;
@@ -1166,14 +1165,14 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 	return err;
 }
 
-int ip6_ins_rt(struct net *net, struct rt6_info *rt)
+int ip6_ins_rt(struct net *net, struct fib6_info *rt)
 {
 	struct nl_info info = {	.nl_net = net, };
 
 	return __ip6_ins_rt(rt, &info, NULL);
 }
 
-static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
+static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
 					   const struct in6_addr *daddr,
 					   const struct in6_addr *saddr)
 {
@@ -1213,7 +1212,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	return rt;
 }
 
-static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
+static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
 {
 	unsigned short flags = fib6_info_dst_flags(rt);
 	struct net_device *dev;
@@ -1232,7 +1231,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 }
 
 /* It should be called with rcu_read_lock() acquired */
-static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
+static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
 {
 	struct rt6_info *pcpu_rt, **p;
 
@@ -1246,7 +1245,7 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 }
 
 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
-					    struct rt6_info *rt)
+					    struct fib6_info *rt)
 {
 	struct rt6_info *pcpu_rt, *prev, **p;
 
@@ -1390,7 +1389,7 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
 	return NULL;
 }
 
-static unsigned int fib6_mtu(const struct rt6_info *rt)
+static unsigned int fib6_mtu(const struct fib6_info *rt)
 {
 	unsigned int mtu;
 
@@ -1401,7 +1400,7 @@ static unsigned int fib6_mtu(const struct rt6_info *rt)
 }
 
 static int rt6_insert_exception(struct rt6_info *nrt,
-				struct rt6_info *ort)
+				struct fib6_info *ort)
 {
 	struct net *net = dev_net(nrt->dst.dev);
 	struct rt6_exception_bucket *bucket;
@@ -1487,7 +1486,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	return err;
 }
 
-void rt6_flush_exceptions(struct rt6_info *rt)
+void rt6_flush_exceptions(struct fib6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1517,7 +1516,7 @@ void rt6_flush_exceptions(struct rt6_info *rt)
 /* Find cached rt in the hash table inside passed in rt
  * Caller has to hold rcu_read_lock()
  */
-static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
 					   struct in6_addr *daddr,
 					   struct in6_addr *saddr)
 {
@@ -1550,7 +1549,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
 static int rt6_remove_exception_rt(struct rt6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
-	struct rt6_info *from = rt->from;
+	struct fib6_info *from = rt->from;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 	int err;
@@ -1595,7 +1594,7 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
-	struct rt6_info *from = rt->from;
+	struct fib6_info *from = rt->from;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 
@@ -1625,7 +1624,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 	rcu_read_unlock();
 }
 
-static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
+static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1667,7 +1666,7 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
 }
 
 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
-				       struct rt6_info *rt, int mtu)
+				       struct fib6_info *rt, int mtu)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1697,7 +1696,7 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
 
 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
 
-static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
+static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
 					struct in6_addr *gateway)
 {
 	struct rt6_exception_bucket *bucket;
@@ -1776,7 +1775,7 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
 	gc_args->more++;
 }
 
-void rt6_age_exceptions(struct rt6_info *rt,
+void rt6_age_exceptions(struct fib6_info *rt,
 			struct fib6_gc_args *gc_args,
 			unsigned long now)
 {
@@ -1812,7 +1811,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			       const struct sk_buff *skb, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
-	struct rt6_info *f6i;
+	struct fib6_info *f6i;
 	struct rt6_info *rt;
 	int strict = 0;
 
@@ -2139,7 +2138,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
  *	Destination cache support functions
  */
 
-static bool fib6_check(struct rt6_info *f6i, u32 cookie)
+static bool fib6_check(struct fib6_info *f6i, u32 cookie)
 {
 	u32 rt_cookie = 0;
 
@@ -2374,7 +2373,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
 	struct rt6_info *ret = NULL, *rt_cache;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct fib6_node *fn;
 
 	/* Get the "current" route for this destination and
@@ -2620,7 +2619,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
 	return entries > rt_max_size;
 }
 
-static int ip6_convert_metrics(struct net *net, struct rt6_info *rt,
+static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
 			       struct fib6_config *cfg)
 {
 	int err = 0;
@@ -2823,12 +2822,12 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
 	return err;
 }
 
-static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
+static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 					      gfp_t gfp_flags,
 					      struct netlink_ext_ack *extack)
 {
 	struct net *net = cfg->fc_nlinfo.nl_net;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 	struct fib6_table *table;
@@ -3047,7 +3046,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 		  struct netlink_ext_ack *extack)
 {
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	int err;
 
 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
@@ -3060,7 +3059,7 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 	return err;
 }
 
-static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
+static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
 {
 	struct net *net = info->nl_net;
 	struct fib6_table *table;
@@ -3081,14 +3080,14 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	return err;
 }
 
-int ip6_del_rt(struct net *net, struct rt6_info *rt)
+int ip6_del_rt(struct net *net, struct fib6_info *rt)
 {
 	struct nl_info info = { .nl_net = net };
 
 	return __ip6_del_rt(rt, &info);
 }
 
-static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
+static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
 {
 	struct nl_info *info = &cfg->fc_nlinfo;
 	struct net *net = info->nl_net;
@@ -3102,7 +3101,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 	spin_lock_bh(&table->tb6_lock);
 
 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
-		struct rt6_info *sibling, *next_sibling;
+		struct fib6_info *sibling, *next_sibling;
 
 		/* prefer to send a single notification with all hops */
 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
@@ -3159,8 +3158,9 @@ static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
 static int ip6_route_del(struct fib6_config *cfg,
 			 struct netlink_ext_ack *extack)
 {
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *rt_cache;
 	struct fib6_table *table;
+	struct fib6_info *rt;
 	struct fib6_node *fn;
 	int err = -ESRCH;
 
@@ -3336,7 +3336,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 }
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_get_route_info(struct net *net,
+static struct fib6_info *rt6_get_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev)
@@ -3344,7 +3344,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
 	int ifindex = dev->ifindex;
 	struct fib6_node *fn;
-	struct rt6_info *rt = NULL;
+	struct fib6_info *rt = NULL;
 	struct fib6_table *table;
 
 	table = fib6_get_table(net, tb_id);
@@ -3363,7 +3363,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 			continue;
 		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
 			continue;
-		ip6_hold_safe(NULL, &rt, false);
+		fib6_info_hold(rt);
 		break;
 	}
 out:
@@ -3371,7 +3371,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	return rt;
 }
 
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct fib6_info *rt6_add_route_info(struct net *net,
 					   const struct in6_addr *prefix, int prefixlen,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev,
@@ -3404,12 +3404,12 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 }
 #endif
 
-struct rt6_info *rt6_get_dflt_router(struct net *net,
+struct fib6_info *rt6_get_dflt_router(struct net *net,
 				     const struct in6_addr *addr,
 				     struct net_device *dev)
 {
 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct fib6_table *table;
 
 	table = fib6_get_table(net, tb_id);
@@ -3424,12 +3424,12 @@ struct rt6_info *rt6_get_dflt_router(struct net *net,
 			break;
 	}
 	if (rt)
-		ip6_hold_safe(NULL, &rt, false);
+		fib6_info_hold(rt);
 	rcu_read_unlock();
 	return rt;
 }
 
-struct rt6_info *rt6_add_dflt_router(struct net *net,
+struct fib6_info *rt6_add_dflt_router(struct net *net,
 				     const struct in6_addr *gwaddr,
 				     struct net_device *dev,
 				     unsigned int pref)
@@ -3463,7 +3463,7 @@ struct rt6_info *rt6_add_dflt_router(struct net *net,
 static void __rt6_purge_dflt_routers(struct net *net,
 				     struct fib6_table *table)
 {
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 restart:
 	rcu_read_lock();
@@ -3613,14 +3613,14 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff
  *	Allocate a dst for local (unicast / anycast) address.
  */
 
-struct rt6_info *addrconf_dst_alloc(struct net *net,
+struct fib6_info *addrconf_dst_alloc(struct net *net,
 				    struct inet6_dev *idev,
 				    const struct in6_addr *addr,
 				    bool anycast, gfp_t gfp_flags)
 {
 	u32 tb_id;
 	struct net_device *dev = idev->dev;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 
 	rt = fib6_info_alloc(gfp_flags);
 	if (!rt)
@@ -3660,7 +3660,7 @@ struct arg_dev_net_ip {
 	struct in6_addr *addr;
 };
 
-static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
+static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
 {
 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
@@ -3693,7 +3693,7 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
 
 /* Remove routers and update dst entries when gateway turn into host. */
-static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
+static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
 {
 	struct in6_addr *gateway = (struct in6_addr *)arg;
 
@@ -3724,9 +3724,9 @@ struct arg_netdev_event {
 	};
 };
 
-static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
+static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	struct fib6_node *fn;
 
 	fn = rcu_dereference_protected(rt->rt6i_node,
@@ -3744,7 +3744,7 @@ static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
 	return NULL;
 }
 
-static bool rt6_is_dead(const struct rt6_info *rt)
+static bool rt6_is_dead(const struct fib6_info *rt)
 {
 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
 	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
@@ -3754,9 +3754,9 @@ static bool rt6_is_dead(const struct rt6_info *rt)
 	return false;
 }
 
-static int rt6_multipath_total_weight(const struct rt6_info *rt)
+static int rt6_multipath_total_weight(const struct fib6_info *rt)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	int total = 0;
 
 	if (!rt6_is_dead(rt))
@@ -3770,7 +3770,7 @@ static int rt6_multipath_total_weight(const struct rt6_info *rt)
 	return total;
 }
 
-static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
+static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
 {
 	int upper_bound = -1;
 
@@ -3782,9 +3782,9 @@ static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
 	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
 }
 
-static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
+static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	int weight = 0;
 
 	rt6_upper_bound_set(rt, &weight, total);
@@ -3793,9 +3793,9 @@ static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
 		rt6_upper_bound_set(iter, &weight, total);
 }
 
-void rt6_multipath_rebalance(struct rt6_info *rt)
+void rt6_multipath_rebalance(struct fib6_info *rt)
 {
-	struct rt6_info *first;
+	struct fib6_info *first;
 	int total;
 
 	/* In case the entire multipath route was marked for flushing,
@@ -3817,7 +3817,7 @@ void rt6_multipath_rebalance(struct rt6_info *rt)
 	rt6_multipath_upper_bound_set(first, total);
 }
 
-static int fib6_ifup(struct rt6_info *rt, void *p_arg)
+static int fib6_ifup(struct fib6_info *rt, void *p_arg)
 {
 	const struct arg_netdev_event *arg = p_arg;
 	struct net *net = dev_net(arg->dev);
@@ -3846,10 +3846,10 @@ void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
 }
 
-static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
+static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
 				   const struct net_device *dev)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 
 	if (rt->fib6_nh.nh_dev == dev)
 		return true;
@@ -3860,19 +3860,19 @@ static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
 	return false;
 }
 
-static void rt6_multipath_flush(struct rt6_info *rt)
+static void rt6_multipath_flush(struct fib6_info *rt)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 
 	rt->should_flush = 1;
 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
 		iter->should_flush = 1;
 }
 
-static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
+static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
 					     const struct net_device *down_dev)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 	unsigned int dead = 0;
 
 	if (rt->fib6_nh.nh_dev == down_dev ||
@@ -3886,11 +3886,11 @@ static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
 	return dead;
 }
 
-static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
+static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
 				       const struct net_device *dev,
 				       unsigned int nh_flags)
 {
-	struct rt6_info *iter;
+	struct fib6_info *iter;
 
 	if (rt->fib6_nh.nh_dev == dev)
 		rt->fib6_nh.nh_flags |= nh_flags;
@@ -3900,7 +3900,7 @@ static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
 }
 
 /* called with write lock held for table with rt */
-static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
+static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
 {
 	const struct arg_netdev_event *arg = p_arg;
 	const struct net_device *dev = arg->dev;
@@ -3967,7 +3967,7 @@ struct rt6_mtu_change_arg {
 	unsigned int mtu;
 };
 
-static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
+static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
 {
 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
 	struct inet6_dev *idev;
@@ -4154,7 +4154,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 }
 
 struct rt6_nh {
-	struct rt6_info *rt6_info;
+	struct fib6_info *fib6_info;
 	struct fib6_config r_cfg;
 	struct list_head next;
 };
@@ -4172,21 +4172,22 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
 
 static int ip6_route_info_append(struct net *net,
 				 struct list_head *rt6_nh_list,
-				 struct rt6_info *rt, struct fib6_config *r_cfg)
+				 struct fib6_info *rt,
+				 struct fib6_config *r_cfg)
 {
 	struct rt6_nh *nh;
 	int err = -EEXIST;
 
 	list_for_each_entry(nh, rt6_nh_list, next) {
-		/* check if rt6_info already exists */
-		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
+		/* check if fib6_info already exists */
+		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
 			return err;
 	}
 
 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
 	if (!nh)
 		return -ENOMEM;
-	nh->rt6_info = rt;
+	nh->fib6_info = rt;
 	err = ip6_convert_metrics(net, rt, r_cfg);
 	if (err) {
 		kfree(nh);
@@ -4198,8 +4199,8 @@ static int ip6_route_info_append(struct net *net,
 	return 0;
 }
 
-static void ip6_route_mpath_notify(struct rt6_info *rt,
-				   struct rt6_info *rt_last,
+static void ip6_route_mpath_notify(struct fib6_info *rt,
+				   struct fib6_info *rt_last,
 				   struct nl_info *info,
 				   __u16 nlflags)
 {
@@ -4211,7 +4212,7 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
 	 */
 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
 		rt = list_first_entry(&rt_last->rt6i_siblings,
-				      struct rt6_info,
+				      struct fib6_info,
 				      rt6i_siblings);
 	}
 
@@ -4222,11 +4223,11 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
 static int ip6_route_multipath_add(struct fib6_config *cfg,
 				   struct netlink_ext_ack *extack)
 {
-	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
+	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
 	struct nl_info *info = &cfg->fc_nlinfo;
 	struct fib6_config r_cfg;
 	struct rtnexthop *rtnh;
-	struct rt6_info *rt;
+	struct fib6_info *rt;
 	struct rt6_nh *err_nh;
 	struct rt6_nh *nh, *nh_safe;
 	__u16 nlflags;
@@ -4246,7 +4247,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 	rtnh = (struct rtnexthop *)cfg->fc_mp;
 
 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
-	 * rt6_info structs per nexthop
+	 * fib6_info structs per nexthop
 	 */
 	while (rtnh_ok(rtnh, remaining)) {
 		memcpy(&r_cfg, cfg, sizeof(*cfg));
@@ -4296,16 +4297,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 
 	err_nh = NULL;
 	list_for_each_entry(nh, &rt6_nh_list, next) {
-		rt_last = nh->rt6_info;
-		err = __ip6_ins_rt(nh->rt6_info, info, extack);
-		fib6_info_release(nh->rt6_info);
+		rt_last = nh->fib6_info;
+		err = __ip6_ins_rt(nh->fib6_info, info, extack);
+		fib6_info_release(nh->fib6_info);
 
 		/* save reference to first route for notification */
 		if (!rt_notif && !err)
-			rt_notif = nh->rt6_info;
+			rt_notif = nh->fib6_info;
 
-		/* nh->rt6_info is used or freed at this point, reset to NULL*/
-		nh->rt6_info = NULL;
+		/* nh->fib6_info is used or freed at this point, reset to NULL*/
+		nh->fib6_info = NULL;
 		if (err) {
 			if (replace && nhn)
 				ip6_print_replace_route_err(&rt6_nh_list);
@@ -4346,8 +4347,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 
 cleanup:
 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
-		if (nh->rt6_info)
-			fib6_info_release(nh->rt6_info);
+		if (nh->fib6_info)
+			fib6_info_release(nh->fib6_info);
 		list_del(&nh->next);
 		kfree(nh);
 	}
@@ -4427,7 +4428,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
 }
 
-static size_t rt6_nlmsg_size(struct rt6_info *rt)
+static size_t rt6_nlmsg_size(struct fib6_info *rt)
 {
 	int nexthop_len = 0;
 
@@ -4457,7 +4458,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 	       + nexthop_len;
 }
 
-static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
+static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
 			    unsigned int *flags, bool skip_oif)
 {
 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
@@ -4494,7 +4495,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 }
 
 /* add multipath next hop */
-static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
+static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
 {
 	const struct net_device *dev = rt->fib6_nh.nh_dev;
 	struct rtnexthop *rtnh;
@@ -4522,7 +4523,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 }
 
 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
-			 struct rt6_info *rt, struct dst_entry *dst,
+			 struct fib6_info *rt, struct dst_entry *dst,
 			 struct in6_addr *dest, struct in6_addr *src,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags)
@@ -4612,7 +4613,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 	 * each as a nexthop within RTA_MULTIPATH.
 	 */
 	if (rt->rt6i_nsiblings) {
-		struct rt6_info *sibling, *next_sibling;
+		struct fib6_info *sibling, *next_sibling;
 		struct nlattr *mp;
 
 		mp = nla_nest_start(skb, RTA_MULTIPATH);
@@ -4654,7 +4655,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 	return -EMSGSIZE;
 }
 
-int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct fib6_info *rt, void *p_arg)
 {
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 	struct net *net = arg->net;
@@ -4799,7 +4800,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	return err;
 }
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int nlm_flags)
 {
 	struct sk_buff *skb;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 21/21] net/ipv6: Remove unused code and variables for rt6_info
From: David Ahern @ 2018-04-16 15:22 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, roopa, eric.dumazet, weiwan, kafai, yoshfuji,
	David Ahern
In-Reply-To: <20180416152255.2256-1-dsahern@gmail.com>

Drop unneeded elements from rt6_info struct and rearrange layout to
something more relevant for the data path.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h   | 60 +++----------------------------------------------
 net/ipv6/ip6_fib.c      | 22 ------------------
 net/ipv6/route.c        | 27 ++--------------------
 net/ipv6/xfrm6_policy.c |  2 --
 4 files changed, 5 insertions(+), 106 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index d41b7bd69fb3..a36116b92100 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -175,58 +175,20 @@ struct fib6_info {
 
 struct rt6_info {
 	struct dst_entry		dst;
-	struct rt6_info __rcu		*rt6_next;
 	struct fib6_info		*from;
 
-	/*
-	 * Tail elements of dst_entry (__refcnt etc.)
-	 * and these elements (rarely used in hot path) are in
-	 * the same cache line.
-	 */
-	struct fib6_table		*rt6i_table;
-	struct fib6_node __rcu		*rt6i_node;
-
+	struct rt6key			rt6i_dst;
+	struct rt6key			rt6i_src;
 	struct in6_addr			rt6i_gateway;
-
-	/* Multipath routes:
-	 * siblings is a list of rt6_info that have the the same metric/weight,
-	 * destination, but not the same gateway. nsiblings is just a cache
-	 * to speed up lookup.
-	 */
-	struct list_head		rt6i_siblings;
-	unsigned int			rt6i_nsiblings;
-
-	atomic_t			rt6i_ref;
-
-	/* These are in a separate cache line. */
-	struct rt6key			rt6i_dst ____cacheline_aligned_in_smp;
+	struct inet6_dev		*rt6i_idev;
 	u32				rt6i_flags;
-	struct rt6key			rt6i_src;
 	struct rt6key			rt6i_prefsrc;
 
 	struct list_head		rt6i_uncached;
 	struct uncached_list		*rt6i_uncached_list;
 
-	struct inet6_dev		*rt6i_idev;
-	struct rt6_info * __percpu	*rt6i_pcpu;
-	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-
-	u32				rt6i_metric;
 	/* more non-fragment space at head required */
 	unsigned short			rt6i_nfheader_len;
-	u8				rt6i_protocol;
-	u8				fib6_type;
-	u8				exception_bucket_flushed:1,
-					should_flush:1,
-					dst_nocount:1,
-					dst_nopolicy:1,
-					dst_host:1,
-					unused:3;
-
-	unsigned long			expires;
-	struct dst_metrics		*fib6_metrics;
-#define fib6_pmtu		fib6_metrics->metrics[RTAX_MTU-1]
-	struct fib6_nh			fib6_nh;
 };
 
 #define for_each_fib6_node_rt_rcu(fn)					\
@@ -328,8 +290,6 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 	dst_release(&rt->dst);
 }
 
-void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
-
 struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
 void fib6_info_destroy(struct fib6_info *f6i);
 
@@ -344,20 +304,6 @@ static inline void fib6_info_release(struct fib6_info *f6i)
 		fib6_info_destroy(f6i);
 }
 
-static inline void rt6_hold(struct rt6_info *rt)
-{
-	atomic_inc(&rt->rt6i_ref);
-}
-
-static inline void rt6_release(struct rt6_info *rt)
-{
-	if (atomic_dec_and_test(&rt->rt6i_ref)) {
-		rt6_free_pcpu(rt);
-		dst_dev_put(&rt->dst);
-		dst_release(&rt->dst);
-	}
-}
-
 enum fib6_walk_state {
 #ifdef CONFIG_IPV6_SUBTREES
 	FWS_S,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 77cf43b2d858..2ab49b7cac22 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -240,28 +240,6 @@ static void node_free(struct net *net, struct fib6_node *fn)
 	net->ipv6.rt6_stats->fib_nodes--;
 }
 
-void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
-{
-	int cpu;
-
-	if (!non_pcpu_rt->rt6i_pcpu)
-		return;
-
-	for_each_possible_cpu(cpu) {
-		struct rt6_info **ppcpu_rt;
-		struct rt6_info *pcpu_rt;
-
-		ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
-		pcpu_rt = *ppcpu_rt;
-		if (pcpu_rt) {
-			dst_dev_put(&pcpu_rt->dst);
-			dst_release(&pcpu_rt->dst);
-			*ppcpu_rt = NULL;
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(rt6_free_pcpu);
-
 static void fib6_free_table(struct fib6_table *table)
 {
 	inetpeer_invalidate_tree(&table->tb6_peers);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b53b28a59151..fb4e996ca15b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -302,10 +302,6 @@ static const struct rt6_info ip6_null_entry_template = {
 		.output		= ip6_pkt_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
-	.rt6i_protocol  = RTPROT_KERNEL,
-	.rt6i_metric	= ~(u32) 0,
-	.rt6i_ref	= ATOMIC_INIT(1),
-	.fib6_type	= RTN_UNREACHABLE,
 };
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -320,10 +316,6 @@ static const struct rt6_info ip6_prohibit_entry_template = {
 		.output		= ip6_pkt_prohibit_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
-	.rt6i_protocol  = RTPROT_KERNEL,
-	.rt6i_metric	= ~(u32) 0,
-	.rt6i_ref	= ATOMIC_INIT(1),
-	.fib6_type	= RTN_PROHIBIT,
 };
 
 static const struct rt6_info ip6_blk_hole_entry_template = {
@@ -336,10 +328,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
 		.output		= dst_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
-	.rt6i_protocol  = RTPROT_KERNEL,
-	.rt6i_metric	= ~(u32) 0,
-	.rt6i_ref	= ATOMIC_INIT(1),
-	.fib6_type	= RTN_BLACKHOLE,
 };
 
 #endif
@@ -349,7 +337,6 @@ static void rt6_info_init(struct rt6_info *rt)
 	struct dst_entry *dst = &rt->dst;
 
 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
-	INIT_LIST_HEAD(&rt->rt6i_siblings);
 	INIT_LIST_HEAD(&rt->rt6i_uncached);
 }
 
@@ -999,12 +986,10 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
 	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
 	rt->rt6i_flags = ort->rt6i_flags;
 	rt6_set_from(rt, ort);
-	rt->rt6i_metric = ort->rt6i_metric;
 #ifdef CONFIG_IPV6_SUBTREES
 	rt->rt6i_src = ort->rt6i_src;
 #endif
 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
-	rt->rt6i_table = ort->rt6i_table;
 	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
 }
 
@@ -1192,7 +1177,6 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
 
 	ip6_rt_copy_init(rt, ort);
 	rt->rt6i_flags |= RTF_CACHE;
-	rt->rt6i_metric = 0;
 	rt->dst.flags |= DST_HOST;
 	rt->rt6i_dst.addr = *daddr;
 	rt->rt6i_dst.plen = 128;
@@ -1225,7 +1209,6 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
 	if (!pcpu_rt)
 		return NULL;
 	ip6_rt_copy_init(pcpu_rt, rt);
-	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 	pcpu_rt->rt6i_flags |= RTF_PCPU;
 	return pcpu_rt;
 }
@@ -1279,9 +1262,8 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
 		return;
 
 	net = dev_net(rt6_ex->rt6i->dst.dev);
-	rt6_ex->rt6i->rt6i_node = NULL;
 	hlist_del_rcu(&rt6_ex->hlist);
-	ip6_rt_put(rt6_ex->rt6i);
+	dst_release(&rt6_ex->rt6i->dst);
 	kfree_rcu(rt6_ex, rcu);
 	WARN_ON_ONCE(!bucket->depth);
 	bucket->depth--;
@@ -1463,8 +1445,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	}
 	rt6_ex->rt6i = nrt;
 	rt6_ex->stamp = jiffies;
-	atomic_inc(&nrt->rt6i_ref);
-	nrt->rt6i_node = ort->rt6i_node;
 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
 	bucket->depth++;
 	net->ipv6.rt6_stats->fib_rt_cache++;
@@ -2122,7 +2102,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 		rt->rt6i_idev = in6_dev_get(loopback_dev);
 		rt->rt6i_gateway = ort->rt6i_gateway;
 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
-		rt->rt6i_metric = 0;
 
 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 #ifdef CONFIG_IPV6_SUBTREES
@@ -2247,8 +2226,7 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
 {
 	return !(rt->rt6i_flags & RTF_CACHE) &&
-		(rt->rt6i_flags & RTF_PCPU ||
-		 rcu_access_pointer(rt->rt6i_node));
+		(rt->rt6i_flags & RTF_PCPU || rt->from);
 }
 
 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -3313,7 +3291,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	if (on_link)
 		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
-	nrt->rt6i_protocol = RTPROT_REDIRECT;
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
 	/* No need to remove rt from the exception table if rt is
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 416fe67271a9..2cff209d0fc1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -107,8 +107,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	 * it was magically lost, so this code needs audit */
 	xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
 						   RTF_LOCAL);
-	xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
-	xdst->u.rt6.rt6i_node = rt->rt6i_node;
 	xdst->route_cookie = rt6_get_cookie(rt);
 	xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
 	xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH net] tcp: clear tp->packets_out when purging write queue
From: David Miller @ 2018-04-16 15:24 UTC (permalink / raw)
  To: soheil.kdev
  Cc: netdev, ycheng, ncardwell, subashab, hvtaifwkbgefbaei, soheil,
	edumazet
In-Reply-To: <20180415004446.73081-1-soheil.kdev@gmail.com>

From: Soheil Hassas Yeganeh <soheil.kdev@gmail.com>
Date: Sat, 14 Apr 2018 20:44:46 -0400

> From: Soheil Hassas Yeganeh <soheil@google.com>
> 
> Clear tp->packets_out when purging the write queue, otherwise
> tcp_rearm_rto() mistakenly assumes TCP write queue is not empty.
> This results in NULL pointer dereference.
> 
> Also, remove the redundant `tp->packets_out = 0` from
> tcp_disconnect(), since tcp_disconnect() calls
> tcp_write_queue_purge().
> 
> Fixes: a27fd7a8ed38 (tcp: purge write queue upon RST)
> Reported-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
> Reported-by: Sami Farin <hvtaifwkbgefbaei@gmail.com>
> Tested-by: Sami Farin <hvtaifwkbgefbaei@gmail.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
> Acked-by: Yuchung Cheng <ycheng@google.com>
> Acked-by: Neal Cardwell <ncardwell@google.com>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* Re: [PATCH] ibmvnic: Clear pending interrupt after device reset
From: David Miller @ 2018-04-16 15:26 UTC (permalink / raw)
  To: tlfalcon; +Cc: netdev, linuxppc-dev, jallen, nfont, benh
In-Reply-To: <1523836416-16531-1-git-send-email-tlfalcon@linux.vnet.ibm.com>

From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Sun, 15 Apr 2018 18:53:36 -0500

> Due to a firmware bug, the hypervisor can send an interrupt to a
> transmit or receive queue just prior to a partition migration, not
> allowing the device enough time to handle it and send an EOI. When
> the partition migrates, the interrupt is lost but an "EOI-pending"
> flag for the interrupt line is still set in firmware. No further
> interrupts will be sent until that flag is cleared, effectively
> freezing that queue. To workaround this, the driver will disable the
> hardware interrupt and send an H_EOI signal prior to re-enabling it.
> This will flush the pending EOI and allow the driver to continue
> operation.
> 
> Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>

Applied, thanks Thomas.

^ permalink raw reply

* Re: Cavium Octeon III network driver.
From: Steven J. Hill @ 2018-04-16 15:29 UTC (permalink / raw)
  To: Florian Fainelli, netdev
In-Reply-To: <cf2162fd-cf99-9ed9-a009-9fc47713d73f@gmail.com>

On 04/14/2018 07:08 PM, Florian Fainelli wrote:
> 
> net-next tree is currently closed, but once it opens back up, you would
> likely want to resubmit those patches. Last I remember they were ready
> to go.
> 
The announcement appears on this list for when it is open, correct?

^ permalink raw reply

* [PATCH net 0/2] tipc: Better check user provided attributes
From: Eric Dumazet @ 2018-04-16 15:29 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet

syzbot reported a crash in __tipc_nl_net_set()

While fixing it, I also had to fix an old bug involving TIPC_NLA_NET_ADDR

Eric Dumazet (2):
  tipc: add policy for TIPC_NLA_NET_ADDR
  tipc: fix possible crash in __tipc_nl_net_set()

 net/tipc/net.c     | 2 ++
 net/tipc/netlink.c | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

-- 
2.17.0.484.g0c8726318c-goog

^ permalink raw reply

* [PATCH net 1/2] tipc: add policy for TIPC_NLA_NET_ADDR
From: Eric Dumazet @ 2018-04-16 15:29 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Jon Maloy, Ying Xue
In-Reply-To: <20180416152943.103989-1-edumazet@google.com>

Before syzbot/KMSAN bites, add the missing policy for TIPC_NLA_NET_ADDR

Fixes: 27c21416727a ("tipc: add net set to new netlink api")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Ying Xue <ying.xue@windriver.com>
---
 net/tipc/netlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index b76f13f6fea10a53d00ed14a38cdf5cdf7afa44c..d4e0bbeee72793a060befaf8a9d0239731c0d48c 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -79,7 +79,8 @@ const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
 
 const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
 	[TIPC_NLA_NET_UNSPEC]		= { .type = NLA_UNSPEC },
-	[TIPC_NLA_NET_ID]		= { .type = NLA_U32 }
+	[TIPC_NLA_NET_ID]		= { .type = NLA_U32 },
+	[TIPC_NLA_NET_ADDR]		= { .type = NLA_U32 },
 };
 
 const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
-- 
2.17.0.484.g0c8726318c-goog

^ permalink raw reply related

* [PATCH net 2/2] tipc: fix possible crash in __tipc_nl_net_set()
From: Eric Dumazet @ 2018-04-16 15:29 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Jon Maloy, Ying Xue
In-Reply-To: <20180416152943.103989-1-edumazet@google.com>

syzbot reported a crash in __tipc_nl_net_set() caused by NULL dereference.

We need to check that both TIPC_NLA_NET_NODEID and TIPC_NLA_NET_NODEID_W1
are present.

We also need to make sure userland provided u64 attributes.

Fixes: d50ccc2d3909 ("tipc: add 128-bit node identifier")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Ying Xue <ying.xue@windriver.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
---
 net/tipc/net.c     | 2 ++
 net/tipc/netlink.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/tipc/net.c b/net/tipc/net.c
index 856f9e97ea293210bea1d2003d2092482732ace9..4fbaa0464405370601cb2fd1dd3b03733836d342 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -252,6 +252,8 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 		u64 *w0 = (u64 *)&node_id[0];
 		u64 *w1 = (u64 *)&node_id[8];
 
+		if (!attrs[TIPC_NLA_NET_NODEID_W1])
+			return -EINVAL;
 		*w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]);
 		*w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]);
 		tipc_net_init(net, node_id, 0);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index d4e0bbeee72793a060befaf8a9d0239731c0d48c..6ff2254088f647d4f7410c3335ccdae2e68ec522 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -81,6 +81,8 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
 	[TIPC_NLA_NET_UNSPEC]		= { .type = NLA_UNSPEC },
 	[TIPC_NLA_NET_ID]		= { .type = NLA_U32 },
 	[TIPC_NLA_NET_ADDR]		= { .type = NLA_U32 },
+	[TIPC_NLA_NET_NODEID]		= { .type = NLA_U64 },
+	[TIPC_NLA_NET_NODEID_W1]	= { .type = NLA_U64 },
 };
 
 const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
-- 
2.17.0.484.g0c8726318c-goog

^ permalink raw reply related

* [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
From: Yafang Shao @ 2018-04-16 15:33 UTC (permalink / raw)
  To: davem; +Cc: kuznet, yoshfuji, songliubraving, netdev, linux-kernel,
	Yafang Shao

tcp_rcv_space_adjust is called every time data is copied to user space,
introducing a tcp tracepoint for which could show us when the packet is
copied to user.
This could help us figure out whether there's latency in user process.

When a tcp packet arrives, tcp_rcv_established() will be called and with
the existed tracepoint tcp_probe we could get the time when this packet
arrives.
Then this packet will be copied to user, and tcp_rcv_space_adjust will
be called and with this new introduced tracepoint we could get the time
when this packet is copied to user.

	arrives time : user process time    => latency caused by user
	tcp_probe      tcp_rcv_space_adjust

Hence in the prink message, sk is printed as a key to connect these two
tracepoints.

Maybe we could export sockfd in this new tracepoint as well, then we
could connect this new tracepoint with epoll/read/recv* tracepoint, and
finally that could show us the whole lifespan of this packet. But we
could also implement that with pid as these functions are executed in
process context.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/trace/events/tcp.h | 21 +++++++++++++++------
 net/ipv4/tcp_input.c       |  2 ++
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 878b2be..65a6d22 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -146,10 +146,11 @@
 			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
 	),
 
-	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
+	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock=0x%p",
 		  __entry->sport, __entry->dport,
 		  __entry->saddr, __entry->daddr,
-		  __entry->saddr_v6, __entry->daddr_v6)
+		  __entry->saddr_v6, __entry->daddr_v6,
+		  __entry->skaddr)
 );
 
 DEFINE_EVENT(tcp_event_sk, tcp_receive_reset,
@@ -166,6 +167,13 @@
 	TP_ARGS(sk)
 );
 
+DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
+
+	TP_PROTO(const struct sock *sk),
+
+	TP_ARGS(sk)
+);
+
 TRACE_EVENT(tcp_set_state,
 
 	TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
@@ -265,6 +273,7 @@
 	TP_ARGS(sk, skb),
 
 	TP_STRUCT__entry(
+		__field(const void *, skaddr)
 		/* sockaddr_in6 is always bigger than sockaddr_in */
 		__array(__u8, saddr, sizeof(struct sockaddr_in6))
 		__array(__u8, daddr, sizeof(struct sockaddr_in6))
@@ -285,6 +294,8 @@
 		const struct tcp_sock *tp = tcp_sk(sk);
 		const struct inet_sock *inet = inet_sk(sk);
 
+		__entry->skaddr = sk;
+
 		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
 		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
 
@@ -305,13 +316,11 @@
 		__entry->srtt = tp->srtt_us >> 3;
 	),
 
-	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x "
-		  "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u "
-		  "rcv_wnd=%u",
+	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock=0x%p",
 		  __entry->saddr, __entry->daddr, __entry->mark,
 		  __entry->length, __entry->snd_nxt, __entry->snd_una,
 		  __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
-		  __entry->srtt, __entry->rcv_wnd)
+		  __entry->srtt, __entry->rcv_wnd, __entry->skaddr)
 );
 
 #endif /* _TRACE_TCP_H */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 367def6..4b4d6b9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -582,6 +582,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
 	u32 copied;
 	int time;
 
+	trace_tcp_rcv_space_adjust(sk);
+
 	tcp_mstamp_refresh(tp);
 	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
 	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH net] net: af_packet: fix race in PACKET_{R|T}X_RING
From: David Miller @ 2018-04-16 15:40 UTC (permalink / raw)
  To: edumazet; +Cc: netdev, eric.dumazet
In-Reply-To: <20180416005204.86785-1-edumazet@google.com>

From: Eric Dumazet <edumazet@google.com>
Date: Sun, 15 Apr 2018 17:52:04 -0700

> In order to remove the race caught by syzbot [1], we need
> to lock the socket before using po->tp_version as this could
> change under us otherwise.
> 
> This means lock_sock() and release_sock() must be done by
> packet_set_ring() callers.
 ...
> Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: syzbot <syzkaller@googlegroups.com>

The locking in AF_PACKET is very unkind, as has been discussed
before.  Good thing syzbot found this one.

The only other place we access po->tp_version asynchronously
is in the getsockopt() for statistics, and I guess that case
is OK.

Applied and queued up for -stable, thanks Eric.

^ permalink raw reply

* Re: [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
From: Eric Dumazet @ 2018-04-16 15:43 UTC (permalink / raw)
  To: Yafang Shao, davem; +Cc: kuznet, yoshfuji, songliubraving, netdev, linux-kernel
In-Reply-To: <1523892818-12820-1-git-send-email-laoar.shao@gmail.com>



On 04/16/2018 08:33 AM, Yafang Shao wrote:
> tcp_rcv_space_adjust is called every time data is copied to user space,
> introducing a tcp tracepoint for which could show us when the packet is
> copied to user.
> This could help us figure out whether there's latency in user process.
> 
> When a tcp packet arrives, tcp_rcv_established() will be called and with
> the existed tracepoint tcp_probe we could get the time when this packet
> arrives.
> Then this packet will be copied to user, and tcp_rcv_space_adjust will
> be called and with this new introduced tracepoint we could get the time
> when this packet is copied to user.
> 
> 	arrives time : user process time    => latency caused by user
> 	tcp_probe      tcp_rcv_space_adjust
> 
> Hence in the prink message, sk is printed as a key to connect these two
> tracepoints.
> 

socket pointer is not a key.

TCP sockets can be reused pretty fast after free.

I suggest you go for cookie instead, this is an unique 64bit identifier.
( sock_gen_cookie() for details )

^ permalink raw reply

* Re: [Patch nf] nf_conntrack_extend: silent a memory leak warning
From: Pablo Neira Ayuso @ 2018-04-16 15:44 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, netfilter-devel, Jozsef Kadlecsik, Florian Westphal
In-Reply-To: <20180330202206.19565-1-xiyou.wangcong@gmail.com>

On Fri, Mar 30, 2018 at 01:22:06PM -0700, Cong Wang wrote:
> The following memory leak is false postive:
> 
> unreferenced object 0xffff8f37f156fb38 (size 128):
>   comm "softirq", pid 0, jiffies 4294899665 (age 11.292s)
>   hex dump (first 32 bytes):
>     6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
>     00 00 00 00 30 00 20 00 48 6b 6b 6b 6b 6b 6b 6b  ....0. .Hkkkkkkk
>   backtrace:
>     [<000000004fda266a>] __kmalloc_track_caller+0x10d/0x141
>     [<000000007b0a7e3c>] __krealloc+0x45/0x62
>     [<00000000d08e0bfb>] nf_ct_ext_add+0xdc/0x133
>     [<0000000099b47fd8>] init_conntrack+0x1b1/0x392
>     [<0000000086dc36ec>] nf_conntrack_in+0x1ee/0x34b
>     [<00000000940592de>] nf_hook_slow+0x36/0x95
>     [<00000000d1bd4da7>] nf_hook.constprop.43+0x1c3/0x1dd
>     [<00000000c3673266>] __ip_local_out+0xae/0xb4
>     [<000000003e4192a6>] ip_local_out+0x17/0x33
>     [<00000000b64356de>] igmp_ifc_timer_expire+0x23e/0x26f
>     [<000000006a8f3032>] call_timer_fn+0x14c/0x2a5
>     [<00000000650c1725>] __run_timers.part.34+0x150/0x182
>     [<0000000090e6946e>] run_timer_softirq+0x2a/0x4c
>     [<000000004d1e7293>] __do_softirq+0x1d1/0x3c2
>     [<000000004643557d>] irq_exit+0x53/0xa2
>     [<0000000029ddee8f>] smp_apic_timer_interrupt+0x22a/0x235

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next 1/5] virtio: Add support for SCTP checksum offloading
From: Michael S. Tsirkin @ 2018-04-16 15:52 UTC (permalink / raw)
  To: Vlad Yasevich
  Cc: nhorman, netdev, virtualization, linux-sctp, Vladislav Yasevich
In-Reply-To: <f5655ab8-f24f-a5a5-d000-2bbb90ecc552@redhat.com>

On Mon, Apr 16, 2018 at 09:45:48AM -0400, Vlad Yasevich wrote:
> On 04/11/2018 06:49 PM, Michael S. Tsirkin wrote:
> > On Mon, Apr 02, 2018 at 09:40:02AM -0400, Vladislav Yasevich wrote:
> >> To support SCTP checksum offloading, we need to add a new feature
> >> to virtio_net, so we can negotiate support between the hypervisor
> >> and the guest.
> >>
> >> The signalling to the guest that an alternate checksum needs to
> >> be used is done via a new flag in the virtio_net_hdr.  If the
> >> flag is set, the host will know to perform an alternate checksum
> >> calculation, which right now is only CRC32c.
> >>
> >> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> >> ---
> >>  drivers/net/virtio_net.c        | 11 ++++++++---
> >>  include/linux/virtio_net.h      |  6 ++++++
> >>  include/uapi/linux/virtio_net.h |  2 ++

Could you pls include a virtio TC mailing list on uapi changes?

> >>  3 files changed, 16 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >> index 7b187ec..b601294 100644
> >> --- a/drivers/net/virtio_net.c
> >> +++ b/drivers/net/virtio_net.c
> >> @@ -2724,9 +2724,14 @@ static int virtnet_probe(struct virtio_device *vdev)
> >>  	/* Do we support "hardware" checksums? */
> >>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
> >>  		/* This opens up the world of extra features. */
> >> -		dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
> >> +		netdev_features_t sctp = 0;
> >> +
> >> +		if (virtio_has_feature(vdev, VIRTIO_NET_F_SCTP_CSUM))
> >> +			sctp |= NETIF_F_SCTP_CRC;
> >> +
> >> +		dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
> >>  		if (csum)
> >> -			dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
> >> +			dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
> >>  
> >>  		if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
> >>  			dev->hw_features |= NETIF_F_TSO
> >> @@ -2952,7 +2957,7 @@ static struct virtio_device_id id_table[] = {
> >>  };
> >>  
> >>  #define VIRTNET_FEATURES \
> >> -	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
> >> +	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,  VIRTIO_NET_F_SCTP_CSUM, \
> >>  	VIRTIO_NET_F_MAC, \
> >>  	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
> >>  	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
> >> diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
> >> index f144216..2e7a64a 100644
> >> --- a/include/linux/virtio_net.h
> >> +++ b/include/linux/virtio_net.h
> >> @@ -39,6 +39,9 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
> >>  
> >>  		if (!skb_partial_csum_set(skb, start, off))
> >>  			return -EINVAL;
> >> +
> >> +		if (hdr->flags & VIRTIO_NET_HDR_F_CSUM_NOT_INET)
> >> +			skb->csum_not_inet = 1;
> >>  	}
> >>  
> >>  	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> >> @@ -96,6 +99,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
> >>  		hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
> >>  	} /* else everything is zero */
> >>  
> >> +	if (skb->csum_not_inet)
> >> +		hdr->flags &= VIRTIO_NET_HDR_F_CSUM_NOT_INET;
> >> +
> >>  	return 0;
> >>  }
> >>  
> >> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> >> index 5de6ed3..3f279c8 100644
> >> --- a/include/uapi/linux/virtio_net.h
> >> +++ b/include/uapi/linux/virtio_net.h
> >> @@ -36,6 +36,7 @@
> >>  #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
> >>  #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
> >>  #define VIRTIO_NET_F_MTU	3	/* Initial MTU advice */
> >> +#define VIRTIO_NET_F_SCTP_CSUM  4	/* SCTP checksum offload support */
> >>  #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
> >>  #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
> >>  #define VIRTIO_NET_F_GUEST_TSO6	8	/* Guest can handle TSOv6 in. */
> > 
> > Is this a guest or a host checksum? We should differenciate between the
> > two.
> 
> I suppose this is HOST checksum, since it behaves like VIRTIO_NET_F_CSUM only for
> SCTP.  I couldn't find the use for the GUEST side flag, since there technically
> isn't any validations and there is no additional mappings from VIRTIO flag to a
> NETIF flag.
> 
> If the feature is negotiated, the guest ends up generating partially check-summed
> packets, and the host turns on appropriate flags on it's side.   The host will
> also make sure the checksum if fixed up if the guest doesn't support it.
> So 1 flag is currently all that is needed.
> 
> -vlad

Fine, but let's include HOST in there to make things clear.
Also I think we should use a high flag, like 62.

> > 
> > 
> >> @@ -101,6 +102,7 @@ struct virtio_net_config {
> >>  struct virtio_net_hdr_v1 {
> >>  #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	/* Use csum_start, csum_offset */
> >>  #define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
> >> +#define VIRTIO_NET_HDR_F_CSUM_NOT_INET  4       /* Checksum is not inet */
> >>  	__u8 flags;
> >>  #define VIRTIO_NET_HDR_GSO_NONE		0	/* Not a GSO frame */
> >>  #define VIRTIO_NET_HDR_GSO_TCPV4	1	/* GSO frame, IPv4 TCP (TSO) */



> >> -- 
> >> 2.9.5

^ permalink raw reply

* [PATCH] ipv6: remove unnecessary check in addrconf_prefix_rcv_add_addr()
From: Lorenzo Bianconi @ 2018-04-16 15:52 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <cover.1523893788.git.lorenzo.bianconi@redhat.com>

Remove unnecessary check on update_lft variable in
addrconf_prefix_rcv_add_addr routine since it is always set to 0.
Moreover remove update_lft re-initialization to 0

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 net/ipv6/addrconf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dffa38004c13..b2c0175125db 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2529,7 +2529,6 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 		if (IS_ERR_OR_NULL(ifp))
 			return -1;
 
-		update_lft = 0;
 		create = 1;
 		spin_lock_bh(&ifp->lock);
 		ifp->flags |= IFA_F_MANAGETEMPADDR;
@@ -2551,7 +2550,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 			stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
 		else
 			stored_lft = 0;
-		if (!update_lft && !create && stored_lft) {
+		if (!create && stored_lft) {
 			const u32 minimum_lft = min_t(u32,
 				stored_lft, MIN_VALID_LIFETIME);
 			valid_lft = max(valid_lft, minimum_lft);
-- 
2.14.3

^ permalink raw reply related

* Re: Cavium Octeon III network driver.
From: Florian Fainelli @ 2018-04-16 15:53 UTC (permalink / raw)
  To: Steven J. Hill, netdev
In-Reply-To: <af781f7a-c38c-9f1d-f3b7-b5ad5a667ccf@cavium.com>

On 04/16/2018 08:29 AM, Steven J. Hill wrote:
> On 04/14/2018 07:08 PM, Florian Fainelli wrote:
>>
>> net-next tree is currently closed, but once it opens back up, you would
>> likely want to resubmit those patches. Last I remember they were ready
>> to go.
>>
> The announcement appears on this list for when it is open, correct?

Correct, and the announcement was just made that net-next is open.
-- 
Florian

^ permalink raw reply

* Re: [PATCH] ipv6: remove unnecessary check in addrconf_prefix_rcv_add_addr()
From: Lorenzo Bianconi @ 2018-04-16 15:56 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev
In-Reply-To: <cd402c41ec4a3fb61702ae00115e54cfada4ba1d.1523893788.git.lorenzo.bianconi@redhat.com>

> Remove unnecessary check on update_lft variable in
> addrconf_prefix_rcv_add_addr routine since it is always set to 0.
> Moreover remove update_lft re-initialization to 0
>
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---
>  net/ipv6/addrconf.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index dffa38004c13..b2c0175125db 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -2529,7 +2529,6 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
>                 if (IS_ERR_OR_NULL(ifp))
>                         return -1;
>
> -               update_lft = 0;
>                 create = 1;
>                 spin_lock_bh(&ifp->lock);
>                 ifp->flags |= IFA_F_MANAGETEMPADDR;
> @@ -2551,7 +2550,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
>                         stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
>                 else
>                         stored_lft = 0;
> -               if (!update_lft && !create && stored_lft) {
> +               if (!create && stored_lft) {
>                         const u32 minimum_lft = min_t(u32,
>                                 stored_lft, MIN_VALID_LIFETIME);
>                         valid_lft = max(valid_lft, minimum_lft);
> --
> 2.14.3
>

I forgot 'net-next' tag in the subject. Dave should I send a v2?

Regards,
Lorenzo

^ permalink raw reply

* BUG: DSA blocks (some?) multicast traffic
From: Russell King - ARM Linux @ 2018-04-16 16:00 UTC (permalink / raw)
  To: Andrew Lunn, Vivien Didelot; +Cc: netdev

Hi,

Yesterday, I noticed that one of my platforms had become unreachable
over IPv6.  This platform is connected to a Clearfog, which uses
the Marvell 88e6176 switch.

The setup is:

               (rest of network)
                 | | | | | | |
    Source ---- Netgear switch ---- Clearfog switch ---- Target
                                    ^             ^      ^
                                   lan1          lan5   eth2
                                  port 4        port 0

Both the source and target could ping the IPv6 link-local address on
the clearfog, and IPv4 traffic was fine.

The source was attempting to ping the target, and the target had been
attempting to mount a NFS share over IPv6 (and failing.)

Statistics from the Clearfog switch - port 0 is the target, port 4 is
the netgear switch, port 5 is the CPU port:

     Statistic   Port  0  Port  1  Port  2  Port  3  Port  4  Port  5  Port  6
 in_multicasts:    27670        0        0    14552    78196     1674        0
out_multicasts:      439        0      331      332      331   114860      334

As you can see, very few multicast packets have been sent out any of
the ports.

Running tcpdump on the source machine:

21:31:48.805654 xx:xx:xx:15:37:dd > 33:33:ff:00:00:05,
	ethertype IPv6 (0x86dd), length 86:
	fd8f:xxxx:xxxx:xxxx:222:68ff:fe15:37dd > ff02::1:ff00:5:
	ICMP6, neighbor solicitation, who has
	fd8f:xxxx:xxxx:xxxx:200:ff:fe00:5, length 32

tcpdump on the clearfog for port 4:

21:31:48.807640 xx:xx:xx:15:37:dd > 33:33:ff:00:00:05,
	ethertype IPv6 (0x86dd), length 86:
	fd8f:xxxx:xxxx:xxxx:222:68ff:fe15:37dd > ff02::1:ff00:5:
	ICMP6, neighbor solicitation, who has
	fd8f:xxxx:xxxx:xxxx:200:ff:fe00:5, length 32

The port connected to the target doesn't see these multicast messages
which are intended for it.  However, I am able to see messages
multicasted from the target, which is trying to solicit a different
machine on my network:

21:33:22.394653 00:00:00:00:00:05 > 33:33:ff:10:1b:e6,
	ethertype IPv6 (0x86dd), length 86:
	fd8f:xxxx:xxxx:xxxx:200:ff:fe00:5 > ff02::1:ff10:1be6:
	ICMP6, neighbor solicitation, who has
	fd8f:xxxx:xxxx:xxxx:214:fdff:fe10:1be6, length 32

but these neighbour solicitations are also not forwarded to through
the switch to the rest of the network.

Tweaking the DSA switch port register 4 (which had a value of 0x0433)
to 0x043b resolved the issue - multicast traffic started to egress
these ports, and the neighbour solicitations then worked.

I thought maybe this was a 4.16 regression, but it seems my other
clearfog has a similar register setup, so I'm not sure.  I'm pretty
sure that it used to work at some point, as I'm a heavy user of IPv6
internally, and I'm quite certain that I would have noticed a failure
such as this.

The setup on the target is eth2 is part of a Linux bridge device.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up
According to speedtest.net: 8.21Mbps down 510kbps up

^ permalink raw reply

* Re: [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
From: Yafang Shao @ 2018-04-16 16:03 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Alexey Kuznetsov, yoshfuji, Song Liu, netdev, LKML
In-Reply-To: <d2ab1647-c832-c982-5952-ab7a415f7c76@gmail.com>

On Mon, Apr 16, 2018 at 11:43 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
> On 04/16/2018 08:33 AM, Yafang Shao wrote:
>> tcp_rcv_space_adjust is called every time data is copied to user space,
>> introducing a tcp tracepoint for which could show us when the packet is
>> copied to user.
>> This could help us figure out whether there's latency in user process.
>>
>> When a tcp packet arrives, tcp_rcv_established() will be called and with
>> the existed tracepoint tcp_probe we could get the time when this packet
>> arrives.
>> Then this packet will be copied to user, and tcp_rcv_space_adjust will
>> be called and with this new introduced tracepoint we could get the time
>> when this packet is copied to user.
>>
>>       arrives time : user process time    => latency caused by user
>>       tcp_probe      tcp_rcv_space_adjust
>>
>> Hence in the prink message, sk is printed as a key to connect these two
>> tracepoints.
>>
>
> socket pointer is not a key.
>
> TCP sockets can be reused pretty fast after free.
>
> I suggest you go for cookie instead, this is an unique 64bit identifier.
> ( sock_gen_cookie() for details )
>
>

Thanks for your explanation.
Will have a try.

Thanks
Yafang

^ permalink raw reply

* Re: XDP performance regression due to CONFIG_RETPOLINE Spectre V2
From: Alexander Duyck @ 2018-04-16 16:04 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jesper Dangaard Brouer, xdp-newbies@vger.kernel.org,
	netdev@vger.kernel.org, Christoph Hellwig, David Woodhouse,
	William Tu, Björn Töpel, Karlsson, Magnus,
	Arnaldo Carvalho de Melo
In-Reply-To: <20180416122706.GA20624@infradead.org>

On Mon, Apr 16, 2018 at 5:27 AM, Christoph Hellwig <hch@infradead.org> wrote:
> Can you try the following hack which avoids indirect calls entirely
> for the fast path direct mapping case?
>
> ---
> From b256a008c1b305e6a1c2afe7c004c54ad2e96d4b Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Mon, 16 Apr 2018 14:18:14 +0200
> Subject: dma-mapping: bypass dma_ops for direct mappings
>
> Reportedly the retpoline mitigation for spectre causes huge penalties
> for indirect function calls.  This hack bypasses the dma_ops mechanism
> for simple direct mappings.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  include/linux/device.h      |  1 +
>  include/linux/dma-mapping.h | 53 +++++++++++++++++++++++++++----------
>  lib/dma-direct.c            |  4 +--
>  3 files changed, 42 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/device.h b/include/linux/device.h
> index 0059b99e1f25..725eec4c6653 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -990,6 +990,7 @@ struct device {
>         bool                    offline_disabled:1;
>         bool                    offline:1;
>         bool                    of_node_reused:1;
> +       bool                    is_dma_direct:1;
>  };
>
>  static inline struct device *kobj_to_dev(struct kobject *kobj)
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index f8ab1c0f589e..c5d384ae25d6 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -223,6 +223,13 @@ static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
>  }
>  #endif
>
> +/* do not use directly! */
> +dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
> +               unsigned long offset, size_t size, enum dma_data_direction dir,
> +               unsigned long attrs);
> +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
> +               int nents, enum dma_data_direction dir, unsigned long attrs);
> +
>  static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
>                                               size_t size,
>                                               enum dma_data_direction dir,
> @@ -232,9 +239,13 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
>         dma_addr_t addr;
>
>         BUG_ON(!valid_dma_direction(dir));
> -       addr = ops->map_page(dev, virt_to_page(ptr),
> -                            offset_in_page(ptr), size,
> -                            dir, attrs);
> +       if (dev->is_dma_direct) {
> +               addr = dma_direct_map_page(dev, virt_to_page(ptr),
> +                               offset_in_page(ptr), size, dir, attrs);
> +       } else {
> +               addr = ops->map_page(dev, virt_to_page(ptr),
> +                               offset_in_page(ptr), size, dir, attrs);
> +       }
>         debug_dma_map_page(dev, virt_to_page(ptr),
>                            offset_in_page(ptr), size,
>                            dir, addr, true);

I'm not sure if I am really a fan of trying to solve this in this way.
It seems like this is going to be optimizing the paths for one case at
the detriment of others. Historically mapping and unmapping has always
been expensive, especially in the case of IOMMU enabled environments.
I would much rather see us focus on having swiotlb_dma_ops replaced
with dma_direct_ops in the cases where the device can access all of
physical memory.

> @@ -249,7 +260,7 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
>         const struct dma_map_ops *ops = get_dma_ops(dev);
>
>         BUG_ON(!valid_dma_direction(dir));
> -       if (ops->unmap_page)
> +       if (!dev->is_dma_direct && ops->unmap_page)

If I understand correctly this is only needed for the swiotlb case and
not the dma_direct case. It would make much more sense to just
overwrite the dev->dma_ops pointer with dma_direct_ops to address all
of the sync and unmap cases.

>                 ops->unmap_page(dev, addr, size, dir, attrs);
>         debug_dma_unmap_page(dev, addr, size, dir, true);
>  }
> @@ -266,7 +277,10 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
>         int ents;
>
>         BUG_ON(!valid_dma_direction(dir));
> -       ents = ops->map_sg(dev, sg, nents, dir, attrs);
> +       if (dev->is_dma_direct)
> +               ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
> +       else
> +               ents = ops->map_sg(dev, sg, nents, dir, attrs);
>         BUG_ON(ents < 0);
>         debug_dma_map_sg(dev, sg, nents, ents, dir);
>
> @@ -281,7 +295,7 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg
>
>         BUG_ON(!valid_dma_direction(dir));
>         debug_dma_unmap_sg(dev, sg, nents, dir);
> -       if (ops->unmap_sg)
> +       if (!dev->is_dma_direct && ops->unmap_sg)
>                 ops->unmap_sg(dev, sg, nents, dir, attrs);
>  }
>
> @@ -295,7 +309,10 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev,
>         dma_addr_t addr;
>
>         BUG_ON(!valid_dma_direction(dir));
> -       addr = ops->map_page(dev, page, offset, size, dir, attrs);
> +       if (dev->is_dma_direct)
> +               addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
> +       else
> +               addr = ops->map_page(dev, page, offset, size, dir, attrs);
>         debug_dma_map_page(dev, page, offset, size, dir, addr, false);
>
>         return addr;
> @@ -309,7 +326,7 @@ static inline void dma_unmap_page_attrs(struct device *dev,
>         const struct dma_map_ops *ops = get_dma_ops(dev);
>
>         BUG_ON(!valid_dma_direction(dir));
> -       if (ops->unmap_page)
> +       if (!dev->is_dma_direct && ops->unmap_page)
>                 ops->unmap_page(dev, addr, size, dir, attrs);
>         debug_dma_unmap_page(dev, addr, size, dir, false);
>  }
> @@ -356,7 +373,7 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
>         const struct dma_map_ops *ops = get_dma_ops(dev);
>
>         BUG_ON(!valid_dma_direction(dir));
> -       if (ops->sync_single_for_cpu)
> +       if (!dev->is_dma_direct && ops->sync_single_for_cpu)
>                 ops->sync_single_for_cpu(dev, addr, size, dir);
>         debug_dma_sync_single_for_cpu(dev, addr, size, dir);
>  }
> @@ -368,7 +385,7 @@ static inline void dma_sync_single_for_device(struct device *dev,
>         const struct dma_map_ops *ops = get_dma_ops(dev);
>
>         BUG_ON(!valid_dma_direction(dir));
> -       if (ops->sync_single_for_device)
> +       if (!dev->is_dma_direct && ops->sync_single_for_device)
>                 ops->sync_single_for_device(dev, addr, size, dir);
>         debug_dma_sync_single_for_device(dev, addr, size, dir);
>  }
> @@ -382,7 +399,7 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev,
>         const struct dma_map_ops *ops = get_dma_ops(dev);
>
>         BUG_ON(!valid_dma_direction(dir));
> -       if (ops->sync_single_for_cpu)
> +       if (!dev->is_dma_direct && ops->sync_single_for_cpu)
>                 ops->sync_single_for_cpu(dev, addr + offset, size, dir);
>         debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
>  }
> @@ -396,7 +413,7 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
>         const struct dma_map_ops *ops = get_dma_ops(dev);
>
>         BUG_ON(!valid_dma_direction(dir));
> -       if (ops->sync_single_for_device)
> +       if (!dev->is_dma_direct && ops->sync_single_for_device)
>                 ops->sync_single_for_device(dev, addr + offset, size, dir);
>         debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir);
>  }
> @@ -408,7 +425,7 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
>         const struct dma_map_ops *ops = get_dma_ops(dev);
>
>         BUG_ON(!valid_dma_direction(dir));
> -       if (ops->sync_sg_for_cpu)
> +       if (!dev->is_dma_direct && ops->sync_sg_for_cpu)
>                 ops->sync_sg_for_cpu(dev, sg, nelems, dir);
>         debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
>  }
> @@ -420,7 +437,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
>         const struct dma_map_ops *ops = get_dma_ops(dev);
>
>         BUG_ON(!valid_dma_direction(dir));
> -       if (ops->sync_sg_for_device)
> +       if (!dev->is_dma_direct && ops->sync_sg_for_device)
>                 ops->sync_sg_for_device(dev, sg, nelems, dir);
>         debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
>
> @@ -600,6 +617,8 @@ static inline int dma_supported(struct device *dev, u64 mask)
>         return ops->dma_supported(dev, mask);
>  }
>
> +extern const struct dma_map_ops swiotlb_dma_ops;
> +
>  #ifndef HAVE_ARCH_DMA_SET_MASK
>  static inline int dma_set_mask(struct device *dev, u64 mask)
>  {
> @@ -609,6 +628,12 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
>         dma_check_mask(dev, mask);
>
>         *dev->dma_mask = mask;
> +       if (dev->dma_ops == &dma_direct_ops ||
> +           (dev->dma_ops == &swiotlb_dma_ops &&
> +            mask == DMA_BIT_MASK(64)))
> +               dev->is_dma_direct = true;
> +       else
> +               dev->is_dma_direct = false;

So I am not sure this will work on x86. If I am not mistaken I believe
dev->dma_ops is normally not set and instead the default dma
operations are pulled via get_arch_dma_ops which returns the global
dma_ops pointer.

What you may want to consider as an alternative would be to look at
modifying drivers that are using the swiotlb so that you could just
overwrite the dev->dma_ops with the dma_direct_ops in the cases where
the hardware can support accessing all of physical hardware and where
we aren't forcing the use of the bounce buffers in the swiotlb.

Then for the above code you only have to worry about the map calls,
and you could just do a check against the dma_direct_ops pointer
instead of having to add a new flag.

>         return 0;
>  }
>  #endif
> diff --git a/lib/dma-direct.c b/lib/dma-direct.c
> index c0bba30fef0a..3deb8666974b 100644
> --- a/lib/dma-direct.c
> +++ b/lib/dma-direct.c
> @@ -120,7 +120,7 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
>                 free_pages((unsigned long)cpu_addr, page_order);
>  }
>
> -static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
> +dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
>                 unsigned long offset, size_t size, enum dma_data_direction dir,
>                 unsigned long attrs)
>  {
> @@ -131,7 +131,7 @@ static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
>         return dma_addr;
>  }
>
> -static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
> +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
>                 int nents, enum dma_data_direction dir, unsigned long attrs)
>  {
>         int i;
> --
> 2.17.0
>
>

^ permalink raw reply

* [PATCH net-next 1/1] tc-testing: add sample action tests
From: Roman Mashak @ 2018-04-16 16:06 UTC (permalink / raw)
  To: davem; +Cc: netdev, kernel, jhs, xiyou.wangcong, jiri, Roman Mashak

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
---
 .../tc-testing/tc-tests/actions/sample.json        | 588 +++++++++++++++++++++
 1 file changed, 588 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/sample.json

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
new file mode 100644
index 000000000000..3aca33c00039
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -0,0 +1,588 @@
+[
+    {
+        "id": "9784",
+        "name": "Add valid sample action with mandatory arguments",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 10 group 1 index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 2",
+        "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "5c91",
+        "name": "Add valid sample action with mandatory arguments and continue control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 700 group 2 continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 2",
+        "matchPattern": "action order [0-9]+: sample rate 1/700 group 2 continue.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "334b",
+        "name": "Add valid sample action with mandatory arguments and drop control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 10000 group 11 drop index 22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/10000 group 11 drop.*index 22 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "da69",
+        "name": "Add valid sample action with mandatory arguments and reclassify control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 20000 group 72 reclassify index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/20000 group 72 reclassify.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "13ce",
+        "name": "Add valid sample action with mandatory arguments and pipe control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 20 group 2 pipe index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/20 group 2 pipe.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "1886",
+        "name": "Add valid sample action with mandatory arguments and jump control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 700 group 25 jump 4 index 200",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 200",
+        "matchPattern": "action order [0-9]+: sample rate 1/700 group 25 jump 4.*index 200 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "b6d4",
+        "name": "Add sample action with mandatory arguments and invalid control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 200000 group 52 foo index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/200000 group 52 foo.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a874",
+        "name": "Add invalid sample action without mandatory arguments",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ac01",
+        "name": "Add invalid sample action without mandatory argument rate",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample group 10 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample.*group 10.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "4203",
+        "name": "Add invalid sample action without mandatory argument group",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "14a7",
+        "name": "Add invalid sample action without mandatory argument group",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "8f2e",
+        "name": "Add valid sample action with trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 1024 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 1024 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "45f8",
+        "name": "Add sample action with maximum rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294967295 group 4 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294967295 group 4 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "ad0c",
+        "name": "Add sample action with maximum trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 16000 group 4 trunc 4294967295 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/16000 group 4 trunc_size 4294967295 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "83a9",
+        "name": "Add sample action with maximum group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294 group 4294967295 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 1",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294 group 4294967295 pipe.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "ed27",
+        "name": "Add sample action with invalid rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294967296 group 4 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294967296 group 4 pipe.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2eae",
+        "name": "Add sample action with invalid group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4098 group 5294967299 continue index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 1",
+        "matchPattern": "action order [0-9]+: sample rate 1/4098 group 5294967299 continue.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "6ff3",
+        "name": "Add sample action with invalid trunc size",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 112233445566 index 11",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 11",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 112233445566.*index 11 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2b2a",
+        "name": "Add sample action with invalid index",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 5294967299",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 5294967299",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 5294967299 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "dee2",
+        "name": "Add sample action with maximum allowed index",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 4294967295",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "560e",
+        "name": "Add sample action with cookie",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 45 cookie aabbccdd",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 45",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 45.*cookie aabbccdd",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "704a",
+        "name": "Replace existing sample action with new rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 2048 group 4 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/2048 group 4 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "60eb",
+        "name": "Replace existing sample action with new group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "2cce",
+        "name": "Replace existing sample action with new trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 trunc 48 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 trunc 64 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 trunc_size 64 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "59d1",
+        "name": "Replace existing sample action with new control argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 reclassify index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 pipe index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    }
+]
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH 1/1] net/mlx4_core: avoid resetting HCA when accessing an offline device
From: David Miller @ 2018-04-16 16:51 UTC (permalink / raw)
  To: yanjun.zhu; +Cc: tariqt, netdev, linux-rdma, haakon.bugge
In-Reply-To: <1523840527-22746-1-git-send-email-yanjun.zhu@oracle.com>

From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Sun, 15 Apr 2018 21:02:07 -0400

> While a faulty cable is used or HCA firmware error, HCA device will
> be offline. When the driver is accessing this offline device, the
> following call trace will pop out.
 ...
> In the above call trace, the function mlx4_cmd_poll calls the function
> mlx4_cmd_post to access the HCA while HCA is offline. Then mlx4_cmd_post
> returns an error -EIO. Per -EIO, the function mlx4_cmd_poll calls
> mlx4_cmd_reset_flow to reset HCA. And the above call trace pops out.
> 
> This is not reasonable. Since HCA device is offline when it is being
> accessed, it should not be reset again.
> 
> In this patch, since HCA is offline, the function mlx4_cmd_post returns
> an error -EINVAL. Per -EINVAL, the function mlx4_cmd_poll directly returns
> instead of resetting HCA.
> 
> CC: Srinivas Eeda <srinivas.eeda@oracle.com>
> CC: Junxiao Bi <junxiao.bi@oracle.com>
> Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>

Tariq, I'm assuming you'll take this in and send it to me later.

Thanks.

^ permalink raw reply

* Re: [PATCH net] net: Fix one possible memleak in ip_setup_cork
From: David Miller @ 2018-04-16 16:58 UTC (permalink / raw)
  To: gfree.wind; +Cc: kuznet, netdev
In-Reply-To: <1523845005-6353-2-git-send-email-gfree.wind@vip.163.com>

From: gfree.wind@vip.163.com
Date: Mon, 16 Apr 2018 10:16:45 +0800

> From: Gao Feng <gfree.wind@vip.163.com>
> 
> It would allocate memory in this function when the cork->opt is NULL. But
> the memory isn't freed if failed in the latter rt check, and return error
> directly. It causes the memleak if its caller is ip_make_skb which also
> doesn't free the cork->opt when meet a error.
> 
> Now move the rt check ahead to avoid the memleak.
> 
> Signed-off-by: Gao Feng <gfree.wind@vip.163.com>

Looks good, applied and queued up for -stable.

I guess in the other code paths, ip_flush_pending_frames() or similar
would clean up the in-sock cork information.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox