netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works
@ 2017-10-08  2:30 Eric Dumazet
  2017-10-08  2:30 ` [PATCH net-next 1/6] ipv6: prepare RCU lookups for idev->addr_list Eric Dumazet
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Eric Dumazet @ 2017-10-08  2:30 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Hideaki YOSHIFUJI

Sending IPv6 udp packets on non connected sockets is quite slow,
because ipv6_dev_get_saddr() is still using an rwlock and silly
references games on ifa.

Tested:

$ ./super_netperf 16 -H 4444::555:0786 -l 2000 -t UDP_STREAM -- -m 100 &
[1] 12527

Performance is boosted from 2.02 Mpps to 4.28 Mpps

Kernel profile before patches :
  22.62%  [kernel]  [k] _raw_read_lock_bh
   7.04%  [kernel]  [k] refcount_sub_and_test
   6.56%  [kernel]  [k] ipv6_get_saddr_eval
   5.67%  [kernel]  [k] _raw_read_unlock_bh
   5.34%  [kernel]  [k] __ipv6_dev_get_saddr
   4.95%  [kernel]  [k] refcount_inc_not_zero
   4.03%  [kernel]  [k] __ip6addrlbl_match
   3.70%  [kernel]  [k] _raw_spin_lock
   3.44%  [kernel]  [k] ipv6_dev_get_saddr
   3.24%  [kernel]  [k] ip6_pol_route
   3.06%  [kernel]  [k] refcount_add_not_zero
   2.30%  [kernel]  [k] __local_bh_enable_ip
   1.81%  [kernel]  [k] mlx4_en_xmit
   1.20%  [kernel]  [k] __ip6_append_data
   1.12%  [kernel]  [k] __ip6_make_skb
   1.11%  [kernel]  [k] __dev_queue_xmit
   1.06%  [kernel]  [k] l3mdev_master_ifindex_rcu

Kernel profile after patches :
  11.36%  [kernel]  [k] ip6_pol_route
   7.65%  [kernel]  [k] _raw_spin_lock
   7.16%  [kernel]  [k] __ipv6_dev_get_saddr
   6.49%  [kernel]  [k] ipv6_get_saddr_eval
   6.04%  [kernel]  [k] refcount_add_not_zero
   3.34%  [kernel]  [k] __ip6addrlbl_match
   2.62%  [kernel]  [k] __dev_queue_xmit
   2.37%  [kernel]  [k] mlx4_en_xmit
   2.26%  [kernel]  [k] dst_release
   1.89%  [kernel]  [k] __ip6_make_skb
   1.87%  [kernel]  [k] __ip6_append_data
   1.86%  [kernel]  [k] udpv6_sendmsg
   1.86%  [kernel]  [k] ip6t_do_table
   1.64%  [kernel]  [k] ipv6_dev_get_saddr
   1.64%  [kernel]  [k] find_match
   1.51%  [kernel]  [k] l3mdev_master_ifindex_rcu
   1.24%  [kernel]  [k] ipv6_addr_label

Eric Dumazet (6):
  ipv6: prepare RCU lookups for idev->addr_list
  ipv6: rcu conversion of ipv6_count_addresses()
  ipv6: ipv6_chk_custom_prefix() rcu conversion
  ipv6: ipv6_chk_prefix() rcu conversion
  ipv6: __ipv6_dev_get_saddr() rcu conversion
  ipv6: avoid cache line dirtying in ipv6_dev_get_saddr()

 net/ipv6/addrconf.c | 70 +++++++++++++++++++----------------------------------
 1 file changed, 25 insertions(+), 45 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH net-next 1/6] ipv6: prepare RCU lookups for idev->addr_list
  2017-10-08  2:30 [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works Eric Dumazet
@ 2017-10-08  2:30 ` Eric Dumazet
  2017-10-08  2:30 ` [PATCH net-next 2/6] ipv6: ipv6_count_addresses() rcu conversion Eric Dumazet
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Dumazet @ 2017-10-08  2:30 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Hideaki YOSHIFUJI

inet6_ifa_finish_destroy() already uses kfree_rcu() to free
inet6_ifaddr structs.

We need to use proper list additions/deletions in order
to allow readers to use RCU instead of idev->lock rwlock.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrconf.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9854d93e45bb5ca919d55e6b875f7dc392a5dae5..d1ff0955b709eaa6b5d94bd8d740334eb1eed6d7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -945,7 +945,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 			break;
 	}
 
-	list_add_tail(&ifp->if_list, p);
+	list_add_tail_rcu(&ifp->if_list, p);
 }
 
 static u32 inet6_addr_hash(const struct in6_addr *addr)
@@ -1204,7 +1204,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
 		action = check_cleanup_prefix_route(ifp, &expires);
 
-	list_del_init(&ifp->if_list);
+	list_del_rcu(&ifp->if_list);
 	__in6_ifa_put(ifp);
 
 	write_unlock_bh(&ifp->idev->lock);
@@ -3562,7 +3562,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	struct net *net = dev_net(dev);
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa, *tmp;
-	struct list_head del_list;
 	int _keep_addr;
 	bool keep_addr;
 	int state, i;
@@ -3654,7 +3653,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	 */
 	keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
 
-	INIT_LIST_HEAD(&del_list);
 	list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
 		struct rt6_info *rt = NULL;
 		bool keep;
@@ -3663,8 +3661,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 		keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
 			!addr_is_local(&ifa->addr);
-		if (!keep)
-			list_move(&ifa->if_list, &del_list);
 
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
@@ -3698,19 +3694,14 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		}
 
 		write_lock_bh(&idev->lock);
+		if (!keep) {
+			list_del_rcu(&ifa->if_list);
+			in6_ifa_put(ifa);
+		}
 	}
 
 	write_unlock_bh(&idev->lock);
 
-	/* now clean up addresses to be removed */
-	while (!list_empty(&del_list)) {
-		ifa = list_first_entry(&del_list,
-				       struct inet6_ifaddr, if_list);
-		list_del(&ifa->if_list);
-
-		in6_ifa_put(ifa);
-	}
-
 	/* Step 5: Discard anycast and multicast list */
 	if (how) {
 		ipv6_ac_destroy_dev(idev);
-- 
2.14.2.920.gcf0c67979c-goog

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 2/6] ipv6: ipv6_count_addresses() rcu conversion
  2017-10-08  2:30 [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works Eric Dumazet
  2017-10-08  2:30 ` [PATCH net-next 1/6] ipv6: prepare RCU lookups for idev->addr_list Eric Dumazet
@ 2017-10-08  2:30 ` Eric Dumazet
  2017-10-08  2:30 ` [PATCH net-next 3/6] ipv6: ipv6_chk_custom_prefix() " Eric Dumazet
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Dumazet @ 2017-10-08  2:30 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Hideaki YOSHIFUJI

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrconf.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d1ff0955b709eaa6b5d94bd8d740334eb1eed6d7..2e029c8be1f2e2746e804a47bb5a3eb632adaa5d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -152,7 +152,7 @@ static void ipv6_regen_rndid(struct inet6_dev *idev);
 static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
 
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
-static int ipv6_count_addresses(struct inet6_dev *idev);
+static int ipv6_count_addresses(const struct inet6_dev *idev);
 static int ipv6_generate_stable_address(struct in6_addr *addr,
 					u8 dad_count,
 					const struct inet6_dev *idev);
@@ -1785,15 +1785,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 	return err;
 }
 
-static int ipv6_count_addresses(struct inet6_dev *idev)
+static int ipv6_count_addresses(const struct inet6_dev *idev)
 {
+	const struct inet6_ifaddr *ifp;
 	int cnt = 0;
-	struct inet6_ifaddr *ifp;
 
-	read_lock_bh(&idev->lock);
-	list_for_each_entry(ifp, &idev->addr_list, if_list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(ifp, &idev->addr_list, if_list)
 		cnt++;
-	read_unlock_bh(&idev->lock);
+	rcu_read_unlock();
 	return cnt;
 }
 
-- 
2.14.2.920.gcf0c67979c-goog

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 3/6] ipv6: ipv6_chk_custom_prefix() rcu conversion
  2017-10-08  2:30 [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works Eric Dumazet
  2017-10-08  2:30 ` [PATCH net-next 1/6] ipv6: prepare RCU lookups for idev->addr_list Eric Dumazet
  2017-10-08  2:30 ` [PATCH net-next 2/6] ipv6: ipv6_count_addresses() rcu conversion Eric Dumazet
@ 2017-10-08  2:30 ` Eric Dumazet
  2017-10-08  2:30 ` [PATCH net-next 4/6] ipv6: ipv6_chk_prefix() " Eric Dumazet
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Dumazet @ 2017-10-08  2:30 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Hideaki YOSHIFUJI

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrconf.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2e029c8be1f2e2746e804a47bb5a3eb632adaa5d..33ee84c2512b50e316a0a6ed38a604a382ce5319 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1859,20 +1859,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
 	const unsigned int prefix_len, struct net_device *dev)
 {
-	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa;
+	const struct inet6_ifaddr *ifa;
+	const struct inet6_dev *idev;
 	bool ret = false;
 
 	rcu_read_lock();
 	idev = __in6_dev_get(dev);
 	if (idev) {
-		read_lock_bh(&idev->lock);
-		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
 			ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
 			if (ret)
 				break;
 		}
-		read_unlock_bh(&idev->lock);
 	}
 	rcu_read_unlock();
 
-- 
2.14.2.920.gcf0c67979c-goog

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 4/6] ipv6: ipv6_chk_prefix() rcu conversion
  2017-10-08  2:30 [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works Eric Dumazet
                   ` (2 preceding siblings ...)
  2017-10-08  2:30 ` [PATCH net-next 3/6] ipv6: ipv6_chk_custom_prefix() " Eric Dumazet
@ 2017-10-08  2:30 ` Eric Dumazet
  2017-10-08  2:30 ` [PATCH net-next 5/6] ipv6: __ipv6_dev_get_saddr() " Eric Dumazet
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Dumazet @ 2017-10-08  2:30 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Hideaki YOSHIFUJI

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrconf.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 33ee84c2512b50e316a0a6ed38a604a382ce5319..ea63442209bf268f1a19b5e014cb8c7e34fd40b4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1880,22 +1880,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix);
 
 int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
 {
-	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa;
+	const struct inet6_ifaddr *ifa;
+	const struct inet6_dev *idev;
 	int	onlink;
 
 	onlink = 0;
 	rcu_read_lock();
 	idev = __in6_dev_get(dev);
 	if (idev) {
-		read_lock_bh(&idev->lock);
-		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
 			onlink = ipv6_prefix_equal(addr, &ifa->addr,
 						   ifa->prefix_len);
 			if (onlink)
 				break;
 		}
-		read_unlock_bh(&idev->lock);
 	}
 	rcu_read_unlock();
 	return onlink;
-- 
2.14.2.920.gcf0c67979c-goog

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 5/6] ipv6: __ipv6_dev_get_saddr() rcu conversion
  2017-10-08  2:30 [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works Eric Dumazet
                   ` (3 preceding siblings ...)
  2017-10-08  2:30 ` [PATCH net-next 4/6] ipv6: ipv6_chk_prefix() " Eric Dumazet
@ 2017-10-08  2:30 ` Eric Dumazet
  2017-10-08  2:30 ` [PATCH net-next 6/6] ipv6: avoid cache line dirtying in ipv6_dev_get_saddr() Eric Dumazet
  2017-10-09  4:17 ` [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works David Miller
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Dumazet @ 2017-10-08  2:30 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Hideaki YOSHIFUJI

Callers hold rcu_read_lock(), so we do not need
the rcu_read_lock()/rcu_read_unlock() pair.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrconf.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ea63442209bf268f1a19b5e014cb8c7e34fd40b4..20c3ca777529fc49ebf749ca6f7d8c2451258d55 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1558,8 +1558,7 @@ static int __ipv6_dev_get_saddr(struct net *net,
 {
 	struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
 
-	read_lock_bh(&idev->lock);
-	list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+	list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) {
 		int i;
 
 		/*
@@ -1625,7 +1624,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
 		}
 	}
 out:
-	read_unlock_bh(&idev->lock);
 	return hiscore_idx;
 }
 
-- 
2.14.2.920.gcf0c67979c-goog

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 6/6] ipv6: avoid cache line dirtying in ipv6_dev_get_saddr()
  2017-10-08  2:30 [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works Eric Dumazet
                   ` (4 preceding siblings ...)
  2017-10-08  2:30 ` [PATCH net-next 5/6] ipv6: __ipv6_dev_get_saddr() " Eric Dumazet
@ 2017-10-08  2:30 ` Eric Dumazet
  2017-10-09  4:17 ` [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works David Miller
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Dumazet @ 2017-10-08  2:30 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Hideaki YOSHIFUJI

By extending the rcu section a bit, we can avoid these
very expensive in6_ifa_put()/in6_ifa_hold() calls
done in __ipv6_dev_get_saddr() and ipv6_dev_get_saddr()

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrconf.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 20c3ca777529fc49ebf749ca6f7d8c2451258d55..cab3faad2bf1354c1241a11ac27178ea675aed55 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1608,11 +1608,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
 				}
 				break;
 			} else if (minihiscore < miniscore) {
-				if (hiscore->ifa)
-					in6_ifa_put(hiscore->ifa);
-
-				in6_ifa_hold(score->ifa);
-
 				swap(hiscore, score);
 				hiscore_idx = 1 - hiscore_idx;
 
@@ -1660,6 +1655,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
 	int dst_type;
 	bool use_oif_addr = false;
 	int hiscore_idx = 0;
+	int ret = 0;
 
 	dst_type = __ipv6_addr_type(daddr);
 	dst.addr = daddr;
@@ -1735,15 +1731,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
 	}
 
 out:
-	rcu_read_unlock();
-
 	hiscore = &scores[hiscore_idx];
 	if (!hiscore->ifa)
-		return -EADDRNOTAVAIL;
+		ret = -EADDRNOTAVAIL;
+	else
+		*saddr = hiscore->ifa->addr;
 
-	*saddr = hiscore->ifa->addr;
-	in6_ifa_put(hiscore->ifa);
-	return 0;
+	rcu_read_unlock();
+	return ret;
 }
 EXPORT_SYMBOL(ipv6_dev_get_saddr);
 
-- 
2.14.2.920.gcf0c67979c-goog

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works
  2017-10-08  2:30 [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works Eric Dumazet
                   ` (5 preceding siblings ...)
  2017-10-08  2:30 ` [PATCH net-next 6/6] ipv6: avoid cache line dirtying in ipv6_dev_get_saddr() Eric Dumazet
@ 2017-10-09  4:17 ` David Miller
  6 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2017-10-09  4:17 UTC (permalink / raw)
  To: edumazet; +Cc: netdev, eric.dumazet, yoshfuji

From: Eric Dumazet <edumazet@google.com>
Date: Sat,  7 Oct 2017 19:30:22 -0700

> Sending IPv6 udp packets on non connected sockets is quite slow,
> because ipv6_dev_get_saddr() is still using an rwlock and silly
> references games on ifa.
> 
> Tested:
> 
> $ ./super_netperf 16 -H 4444::555:0786 -l 2000 -t UDP_STREAM -- -m 100 &
> [1] 12527
> 
> Performance is boosted from 2.02 Mpps to 4.28 Mpps

Awesome, series applied, thanks!

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-10-09  4:17 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-10-08  2:30 [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works Eric Dumazet
2017-10-08  2:30 ` [PATCH net-next 1/6] ipv6: prepare RCU lookups for idev->addr_list Eric Dumazet
2017-10-08  2:30 ` [PATCH net-next 2/6] ipv6: ipv6_count_addresses() rcu conversion Eric Dumazet
2017-10-08  2:30 ` [PATCH net-next 3/6] ipv6: ipv6_chk_custom_prefix() " Eric Dumazet
2017-10-08  2:30 ` [PATCH net-next 4/6] ipv6: ipv6_chk_prefix() " Eric Dumazet
2017-10-08  2:30 ` [PATCH net-next 5/6] ipv6: __ipv6_dev_get_saddr() " Eric Dumazet
2017-10-08  2:30 ` [PATCH net-next 6/6] ipv6: avoid cache line dirtying in ipv6_dev_get_saddr() Eric Dumazet
2017-10-09  4:17 ` [PATCH net-next 0/6] ipv6: ipv6_dev_get_saddr() rcu works David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).