netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next-2.6] net: dont hold rtnl mutex during netlink dump callbacks
@ 2011-04-28  8:56 Eric Dumazet
  2011-04-28 15:40 ` Stephen Hemminger
  2011-04-28 15:43 ` Stephen Hemminger
  0 siblings, 2 replies; 9+ messages in thread
From: Eric Dumazet @ 2011-04-28  8:56 UTC (permalink / raw)
  To: David Miller; +Cc: Patrick McHardy, netdev, Remi Denis-Courmont

Four years ago, Patrick made a change to hold rtnl mutex during netlink
dump callbacks.

I believe it was a wrong move. This slows down concurrent dumps, making
good old /proc/net/ files faster than rtnetlink in some situations.

This occurred to me because one "ip link show dev ..." was _very_ slow
on a workload adding/removing network devices in background.

All dump callbacks are able to use RCU locking now, so this patch does
roughly a revert of commits :

1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks
6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks

This let writers fight for rtnl mutex and readers going full speed.

It also takes care of phonet : phonet_route_get() is now called from rcu
read section. I renamed it to phonet_route_get_rcu()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
---
 include/net/phonet/pn_dev.h |    2 +-
 net/bridge/br_netlink.c     |    7 ++++---
 net/core/fib_rules.c        |    3 ++-
 net/core/rtnetlink.c        |   12 +++++-------
 net/decnet/dn_dev.c         |   10 ++++++----
 net/ipv6/ip6_fib.c          |    4 +++-
 net/phonet/pn_dev.c         |    6 +-----
 net/phonet/pn_netlink.c     |    4 +++-
 8 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 13649eb..8639de5 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -51,7 +51,7 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr);
 int phonet_route_add(struct net_device *dev, u8 daddr);
 int phonet_route_del(struct net_device *dev, u8 daddr);
 void rtm_phonet_notify(int event, struct net_device *dev, u8 dst);
-struct net_device *phonet_route_get(struct net *net, u8 daddr);
+struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr);
 struct net_device *phonet_route_output(struct net *net, u8 daddr);
 
 #define PN_NO_ADDR	0xff
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 134a2ff..ffb0dc4 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -120,8 +120,9 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	int idx;
 
 	idx = 0;
-	for_each_netdev(net, dev) {
-		struct net_bridge_port *port = br_port_get_rtnl(dev);
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
+		struct net_bridge_port *port = br_port_get_rcu(dev);
 
 		/* not a bridge port */
 		if (!port || idx < cb->args[0])
@@ -135,7 +136,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 skip:
 		++idx;
 	}
-
+	rcu_read_unlock();
 	cb->args[0] = idx;
 
 	return skb->len;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 8248ebb..3911586 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -590,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
 	int idx = 0;
 	struct fib_rule *rule;
 
-	list_for_each_entry(rule, &ops->rules_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
 		if (idx < cb->args[1])
 			goto skip;
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4..2963312 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1007,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
 
+	rcu_read_lock();
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
-		hlist_for_each_entry(dev, node, head, index_hlist) {
+		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -1023,6 +1024,7 @@ cont:
 		}
 	}
 out:
+	rcu_read_unlock();
 	cb->args[1] = idx;
 	cb->args[0] = h;
 
@@ -1879,7 +1881,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	int min_len;
 	int family;
 	int type;
-	int err;
 
 	type = nlh->nlmsg_type;
 	if (type > RTM_MAX)
@@ -1906,11 +1907,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (dumpit == NULL)
 			return -EOPNOTSUPP;
 
-		__rtnl_unlock();
 		rtnl = net->rtnl;
-		err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
-		rtnl_lock();
-		return err;
+		return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
 	}
 
 	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
@@ -1980,7 +1978,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
 {
 	struct sock *sk;
 	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
-				   rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
+				   rtnetlink_rcv, NULL, THIS_MODULE);
 	if (!sk)
 		return -ENOMEM;
 	net->rtnl = sk;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 0dcaa90..404fa15 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -752,7 +752,8 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	skip_naddr = cb->args[1];
 
 	idx = 0;
-	for_each_netdev(&init_net, dev) {
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
 		if (idx < skip_ndevs)
 			goto cont;
 		else if (idx > skip_ndevs) {
@@ -761,11 +762,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 			skip_naddr = 0;
 		}
 
-		if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL)
+		if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL)
 			goto cont;
 
-		for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
-		     ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) {
+		for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
+		     ifa = rcu_dereference(ifa->ifa_next), dn_idx++) {
 			if (dn_idx < skip_naddr)
 				continue;
 
@@ -778,6 +779,7 @@ cont:
 		idx++;
 	}
 done:
+	rcu_read_unlock();
 	cb->args[0] = idx;
 	cb->args[1] = dn_idx;
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index dd88df0..4076a0b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -394,10 +394,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	arg.net = net;
 	w->args = &arg;
 
+	rcu_read_lock();
 	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
 		e = 0;
 		head = &net->ipv6.fib_table_hash[h];
-		hlist_for_each_entry(tb, node, head, tb6_hlist) {
+		hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
 			if (e < s_e)
 				goto next;
 			res = fib6_dump_table(tb, skb, cb);
@@ -408,6 +409,7 @@ next:
 		}
 	}
 out:
+	rcu_read_unlock();
 	cb->args[1] = e;
 	cb->args[0] = h;
 
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 947038d..47b3452 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -426,18 +426,14 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
 	return 0;
 }
 
-struct net_device *phonet_route_get(struct net *net, u8 daddr)
+struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr)
 {
 	struct phonet_net *pnn = phonet_pernet(net);
 	struct phonet_routes *routes = &pnn->routes;
 	struct net_device *dev;
 
-	ASSERT_RTNL(); /* no need to hold the device */
-
 	daddr >>= 2;
-	rcu_read_lock();
 	dev = rcu_dereference(routes->table[daddr]);
-	rcu_read_unlock();
 	return dev;
 }
 
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 58b3b1f..438accb 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -264,10 +264,11 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net *net = sock_net(skb->sk);
 	u8 addr, addr_idx = 0, addr_start_idx = cb->args[0];
 
+	rcu_read_lock();
 	for (addr = 0; addr < 64; addr++) {
 		struct net_device *dev;
 
-		dev = phonet_route_get(net, addr << 2);
+		dev = phonet_route_get_rcu(net, addr << 2);
 		if (!dev)
 			continue;
 
@@ -279,6 +280,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 out:
+	rcu_read_unlock();
 	cb->args[0] = addr_idx;
 	cb->args[1] = 0;
 



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next-2.6] net: dont hold rtnl mutex during netlink dump callbacks
  2011-04-28  8:56 [PATCH net-next-2.6] net: dont hold rtnl mutex during netlink dump callbacks Eric Dumazet
@ 2011-04-28 15:40 ` Stephen Hemminger
  2011-04-28 15:43 ` Stephen Hemminger
  1 sibling, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2011-04-28 15:40 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Patrick McHardy, netdev, Remi Denis-Courmont

On Thu, 28 Apr 2011 10:56:07 +0200
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> @@ -1980,7 +1978,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
>  {
>  	struct sock *sk;
>  	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
> -				   rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
> +				   rtnetlink_rcv, NULL, THIS_MODULE);

Looks like only genetlink is still using the mutex argument.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next-2.6] net: dont hold rtnl mutex during netlink dump callbacks
  2011-04-28  8:56 [PATCH net-next-2.6] net: dont hold rtnl mutex during netlink dump callbacks Eric Dumazet
  2011-04-28 15:40 ` Stephen Hemminger
@ 2011-04-28 15:43 ` Stephen Hemminger
  2011-04-28 15:53   ` Eric Dumazet
  2011-05-02 22:27   ` David Miller
  1 sibling, 2 replies; 9+ messages in thread
From: Stephen Hemminger @ 2011-04-28 15:43 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Patrick McHardy, netdev, Remi Denis-Courmont

On Thu, 28 Apr 2011 10:56:07 +0200
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Four years ago, Patrick made a change to hold rtnl mutex during netlink
> dump callbacks.
> 
> I believe it was a wrong move. This slows down concurrent dumps, making
> good old /proc/net/ files faster than rtnetlink in some situations.
> 
> This occurred to me because one "ip link show dev ..." was _very_ slow
> on a workload adding/removing network devices in background.
> 
> All dump callbacks are able to use RCU locking now, so this patch does
> roughly a revert of commits :
> 
> 1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks
> 6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks
> 
> This let writers fight for rtnl mutex and readers going full speed.
> 
> It also takes care of phonet : phonet_route_get() is now called from rcu
> read section. I renamed it to phonet_route_get_rcu()
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> Cc: Patrick McHardy <kaber@trash.net>
> Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com>

Acked-by: Stephen Hemminger <shemminger@vyatta.com>

-- 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next-2.6] net: dont hold rtnl mutex during netlink dump callbacks
  2011-04-28 15:43 ` Stephen Hemminger
@ 2011-04-28 15:53   ` Eric Dumazet
  2011-05-02 22:27   ` David Miller
  1 sibling, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2011-04-28 15:53 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: David Miller, Patrick McHardy, netdev, Remi Denis-Courmont

Le jeudi 28 avril 2011 à 08:43 -0700, Stephen Hemminger a écrit :
> On Thu, 28 Apr 2011 10:56:07 +0200
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> > Four years ago, Patrick made a change to hold rtnl mutex during netlink
> > dump callbacks.
> > 
> > I believe it was a wrong move. This slows down concurrent dumps, making
> > good old /proc/net/ files faster than rtnetlink in some situations.
> > 
> > This occurred to me because one "ip link show dev ..." was _very_ slow
> > on a workload adding/removing network devices in background.
> > 
> > All dump callbacks are able to use RCU locking now, so this patch does
> > roughly a revert of commits :
> > 
> > 1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks
> > 6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks
> > 
> > This let writers fight for rtnl mutex and readers going full speed.
> > 
> > It also takes care of phonet : phonet_route_get() is now called from rcu
> > read section. I renamed it to phonet_route_get_rcu()
> > 
> > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> > Cc: Patrick McHardy <kaber@trash.net>
> > Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
> 
> Acked-by: Stephen Hemminger <shemminger@vyatta.com>
> 

Thanks for reviewing Stephen 



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next-2.6] net: dont hold rtnl mutex during netlink dump callbacks
  2011-04-28 15:43 ` Stephen Hemminger
  2011-04-28 15:53   ` Eric Dumazet
@ 2011-05-02 22:27   ` David Miller
  2011-05-17 22:18     ` [PATCH net-next-2.6] net: add rcu protection to netdev->ifalias Eric Dumazet
  1 sibling, 1 reply; 9+ messages in thread
From: David Miller @ 2011-05-02 22:27 UTC (permalink / raw)
  To: shemminger; +Cc: eric.dumazet, kaber, netdev, remi.denis-courmont

From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 28 Apr 2011 08:43:37 -0700

> On Thu, 28 Apr 2011 10:56:07 +0200
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
>> Four years ago, Patrick made a change to hold rtnl mutex during netlink
>> dump callbacks.
>> 
>> I believe it was a wrong move. This slows down concurrent dumps, making
>> good old /proc/net/ files faster than rtnetlink in some situations.
>> 
>> This occurred to me because one "ip link show dev ..." was _very_ slow
>> on a workload adding/removing network devices in background.
>> 
>> All dump callbacks are able to use RCU locking now, so this patch does
>> roughly a revert of commits :
>> 
>> 1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks
>> 6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks
>> 
>> This let writers fight for rtnl mutex and readers going full speed.
>> 
>> It also takes care of phonet : phonet_route_get() is now called from rcu
>> read section. I renamed it to phonet_route_get_rcu()
>> 
>> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
>> Cc: Patrick McHardy <kaber@trash.net>
>> Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
> 
> Acked-by: Stephen Hemminger <shemminger@vyatta.com>

Applied, thanks everyone!

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH net-next-2.6] net: add rcu protection to netdev->ifalias
  2011-05-02 22:27   ` David Miller
@ 2011-05-17 22:18     ` Eric Dumazet
  2011-05-17 22:25       ` Stephen Hemminger
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2011-05-17 22:18 UTC (permalink / raw)
  To: David Miller; +Cc: shemminger, kaber, netdev, remi.denis-courmont

Le lundi 02 mai 2011 à 15:27 -0700, David Miller a écrit :
> From: Stephen Hemminger <shemminger@vyatta.com>
> Date: Thu, 28 Apr 2011 08:43:37 -0700
> 
> > On Thu, 28 Apr 2011 10:56:07 +0200
> > Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > 
> >> Four years ago, Patrick made a change to hold rtnl mutex during netlink
> >> dump callbacks.
> >> 
> >> I believe it was a wrong move. This slows down concurrent dumps, making
> >> good old /proc/net/ files faster than rtnetlink in some situations.
> >> 
> >> This occurred to me because one "ip link show dev ..." was _very_ slow
> >> on a workload adding/removing network devices in background.
> >> 
> >> All dump callbacks are able to use RCU locking now, so this patch does
> >> roughly a revert of commits :
> >> 
> >> 1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks
> >> 6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks
> >> 
> >> This let writers fight for rtnl mutex and readers going full speed.
> >> 
> >> It also takes care of phonet : phonet_route_get() is now called from rcu
> >> read section. I renamed it to phonet_route_get_rcu()
> >> 
> >> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> >> Cc: Patrick McHardy <kaber@trash.net>
> >> Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
> > 
> > Acked-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> Applied, thanks everyone!

I think we probably have to make some fixes, because rtnl_fill_ifinfo()
can access fields that were previously protected with RTNL

Let's see if it's doable, before considering re-adding rtnl_lock() in
rtnl_dump_ifinfo() ?

First step : dev->ifalias

Its late here, I'll continue the work tomorrow.

Thanks

[PATCH net-next-2.6] net: add rcu protection to netdev->ifalias

Avoid holding RTNL in show_ifalias() and rtnl_fill_ifinfo() to
manipulate netdev->ifalias

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/linux/netdevice.h |    7 ++++++-
 net/core/dev.c            |   28 ++++++++++++++++------------
 net/core/net-sysfs.c      |   13 +++++++------
 net/core/rtnetlink.c      |    8 ++++++--
 4 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a134d80..09b3df4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -980,6 +980,11 @@ struct net_device_ops {
 						    u32 features);
 };
 
+struct ifalias {
+	struct rcu_head	rcu;
+	char		name[0];
+};
+
 /*
  *	The DEVICE structure.
  *	Actually, this whole structure is a big mistake.  It mixes I/O
@@ -1004,7 +1009,7 @@ struct net_device {
 	/* device name hash chain */
 	struct hlist_node	name_hlist;
 	/* snmp alias */
-	char 			*ifalias;
+	struct ifalias __rcu 	*ifalias;
 
 	/*
 	 *	I/O specific fields
diff --git a/net/core/dev.c b/net/core/dev.c
index 155de20..01e3be0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1035,6 +1035,12 @@ rollback:
 	return err;
 }
 
+
+static void ifalias_kfree_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct ifalias, rcu));
+}
+
 /**
  *	dev_set_alias - change ifalias of a device
  *	@dev: device
@@ -1045,24 +1051,22 @@ rollback:
  */
 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 {
+	struct ifalias *ifalias, *new = NULL;
 	ASSERT_RTNL();
 
 	if (len >= IFALIASZ)
 		return -EINVAL;
 
-	if (!len) {
-		if (dev->ifalias) {
-			kfree(dev->ifalias);
-			dev->ifalias = NULL;
-		}
-		return 0;
+	ifalias = rtnl_dereference(dev->ifalias);
+	if (len) {
+		new = kmalloc(sizeof(*new) + len + 1, GFP_KERNEL);
+		if (!new)
+			return -ENOMEM;
+		strlcpy(new->name, alias, len + 1);
 	}
-
-	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
-	if (!dev->ifalias)
-		return -ENOMEM;
-
-	strlcpy(dev->ifalias, alias, len+1);
+	if (ifalias)
+		call_rcu(&ifalias->rcu, ifalias_kfree_rcu);
+	rcu_assign_pointer(dev->ifalias, new);
 	return len;
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1b12217..e2acf15 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -281,13 +281,14 @@ static ssize_t show_ifalias(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
 	const struct net_device *netdev = to_net_dev(dev);
+	struct ifalias *ifalias;
 	ssize_t ret = 0;
 
-	if (!rtnl_trylock())
-		return restart_syscall();
-	if (netdev->ifalias)
-		ret = sprintf(buf, "%s\n", netdev->ifalias);
-	rtnl_unlock();
+	rcu_read_lock();
+	ifalias = rcu_dereference(netdev->ifalias);
+	if (ifalias)
+		ret = sprintf(buf, "%s\n", ifalias->name);
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -1265,7 +1266,7 @@ static void netdev_release(struct device *d)
 
 	BUG_ON(dev->reg_state != NETREG_RELEASED);
 
-	kfree(dev->ifalias);
+	kfree(rcu_dereference_protected(dev->ifalias, 1));
 	kfree((char *)dev - dev->padded);
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d2ba259..c8f0a11 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -849,6 +849,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	const struct rtnl_link_stats64 *stats;
 	struct nlattr *attr, *af_spec;
 	struct rtnl_af_ops *af_ops;
+	struct ifalias *ifalias;
 
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
 	if (nlh == NULL)
@@ -879,8 +880,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (dev->qdisc)
 		NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc->ops->id);
 
-	if (dev->ifalias)
-		NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias);
+	rcu_read_lock();
+	ifalias = rcu_dereference(dev->ifalias);
+	if (ifalias)
+		NLA_PUT_STRING(skb, IFLA_IFALIAS, ifalias->name);
+	rcu_read_unlock();
 
 	if (1) {
 		struct rtnl_link_ifmap map = {



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next-2.6] net: add rcu protection to netdev->ifalias
  2011-05-17 22:18     ` [PATCH net-next-2.6] net: add rcu protection to netdev->ifalias Eric Dumazet
@ 2011-05-17 22:25       ` Stephen Hemminger
  2011-05-17 22:39         ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2011-05-17 22:25 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, kaber, netdev, remi.denis-courmont

On Wed, 18 May 2011 00:18:12 +0200
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Le lundi 02 mai 2011 à 15:27 -0700, David Miller a écrit :
> > From: Stephen Hemminger <shemminger@vyatta.com>
> > Date: Thu, 28 Apr 2011 08:43:37 -0700
> > 
> > > On Thu, 28 Apr 2011 10:56:07 +0200
> > > Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > > 
> > >> Four years ago, Patrick made a change to hold rtnl mutex during netlink
> > >> dump callbacks.
> > >> 
> > >> I believe it was a wrong move. This slows down concurrent dumps, making
> > >> good old /proc/net/ files faster than rtnetlink in some situations.
> > >> 
> > >> This occurred to me because one "ip link show dev ..." was _very_ slow
> > >> on a workload adding/removing network devices in background.
> > >> 
> > >> All dump callbacks are able to use RCU locking now, so this patch does
> > >> roughly a revert of commits :
> > >> 
> > >> 1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks
> > >> 6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks
> > >> 
> > >> This let writers fight for rtnl mutex and readers going full speed.
> > >> 
> > >> It also takes care of phonet : phonet_route_get() is now called from rcu
> > >> read section. I renamed it to phonet_route_get_rcu()
> > >> 
> > >> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> > >> Cc: Patrick McHardy <kaber@trash.net>
> > >> Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
> > > 
> > > Acked-by: Stephen Hemminger <shemminger@vyatta.com>
> > 
> > Applied, thanks everyone!
> 
> I think we probably have to make some fixes, because rtnl_fill_ifinfo()
> can access fields that were previously protected with RTNL
> 
> Let's see if it's doable, before considering re-adding rtnl_lock() in
> rtnl_dump_ifinfo() ?
> 
> First step : dev->ifalias
> 
> Its late here, I'll continue the work tomorrow.
> 
> Thanks
> 
> [PATCH net-next-2.6] net: add rcu protection to netdev->ifalias
> 
> Avoid holding RTNL in show_ifalias() and rtnl_fill_ifinfo() to
> manipulate netdev->ifalias
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

I am beginning to think the idea of optimizing case of "ip link show"
running in background with adding/removing network devices is silly. 
And switching to RCU leads to all sorts of corner cases where only
partial data will be read. Is it really worth it?


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next-2.6] net: add rcu protection to netdev->ifalias
  2011-05-17 22:25       ` Stephen Hemminger
@ 2011-05-17 22:39         ` Eric Dumazet
  2011-05-18  4:35           ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2011-05-17 22:39 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, kaber, netdev, remi.denis-courmont

Le mardi 17 mai 2011 à 15:25 -0700, Stephen Hemminger a écrit :

> I am beginning to think the idea of optimizing case of "ip link show"
> running in background with adding/removing network devices is silly. 
> And switching to RCU leads to all sorts of corner cases where only
> partial data will be read. Is it really worth it?
> 

Just try "modprobe dummy numdummies=1000", and watch load average
raising, because so many processes want to hold RTNL, and the
rtnl_trylock() crazy thing make them spin (restart syscall, and find
RTNL locked again... wooo...)

Some workloads hit RTNL quite hard, and monitoring tools were sometime
frozen for unexpected long times. (before the patch, when we used to
hold RTNL in dump())

I dont know, if it happens to be too hard, we'll just stick again rtnl
for "ip link show" ;)




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next-2.6] net: add rcu protection to netdev->ifalias
  2011-05-17 22:39         ` Eric Dumazet
@ 2011-05-18  4:35           ` Eric Dumazet
  0 siblings, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2011-05-18  4:35 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, kaber, netdev, remi.denis-courmont

Le mercredi 18 mai 2011 à 00:39 +0200, Eric Dumazet a écrit :

> I dont know, if it happens to be too hard, we'll just stick again rtnl
> for "ip link show" ;)
> 
> 

I believe I'll take this path, its a bit too hard for the moment, and
need several preliminary steps :

It is making sense trying to get rid of rtnl_trylock() hack with more
fine grained locks, and thus lower pressure on RTNL.

Some synchronize_rcu() calls are done whith RTNL held : All processes
hitting rtnl_trylock() have to enter a busy loop, restarting syscall as
long as the RTNL owner is blocked in synchronize_rcu().





^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2011-05-18  4:35 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-04-28  8:56 [PATCH net-next-2.6] net: dont hold rtnl mutex during netlink dump callbacks Eric Dumazet
2011-04-28 15:40 ` Stephen Hemminger
2011-04-28 15:43 ` Stephen Hemminger
2011-04-28 15:53   ` Eric Dumazet
2011-05-02 22:27   ` David Miller
2011-05-17 22:18     ` [PATCH net-next-2.6] net: add rcu protection to netdev->ifalias Eric Dumazet
2011-05-17 22:25       ` Stephen Hemminger
2011-05-17 22:39         ` Eric Dumazet
2011-05-18  4:35           ` Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).