From: Eric Dumazet <eric.dumazet@gmail.com>
To: Stephen Hemminger <shemminger@vyatta.com>
Cc: David Miller <davem@davemloft.net>, netdev@vger.kernel.org
Subject: Re: [PATCH net-next-2.6] net: Introduce dev_get_by_index_rcu()
Date: Tue, 20 Oct 2009 07:18:49 +0200 [thread overview]
Message-ID: <4ADD4839.9010500@gmail.com> (raw)
In-Reply-To: <20091020140632.79efb738@s6510>
Stephen Hemminger a écrit :
> On Tue, 20 Oct 2009 07:03:44 +0200
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>> David Miller a écrit :
>>> From: Eric Dumazet <eric.dumazet@gmail.com>
>>> Date: Tue, 20 Oct 2009 06:23:54 +0200
>>>
>>>> I wonder if the whole thing could use RCU somehow, since some
>>>> workloads hit this dev_base_lock rwlock pretty hard...
>>> True, but for now we'll put your fix in :-)
>> [PATCH net-next-2.6] net: Introduce dev_get_by_index_rcu()
>>
>> Some workloads hit dev_base_lock rwlock pretty hard.
>> We can use RCU lookups to avoid touching this rwlock.
>>
>> netdevices are already freed after a RCU grace period, so this patch
>> adds no penalty at device dismantle time.
>>
>> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
>
> All usage dev_base_lock should be replaceable by using combination of rtnl_mutex
> and RCU?
Yes probably, but I believe we should make step-by-step patches ?
1) __dev_get_by_index() is faster than dev_get_by_index_rcu()
2) I am not sure holding RTNL means we also have rcu_lock() implied.
However dev_ifname() could use rcu_lock() in the same patch,
here is an updated version.
[PATCH net-next-2.6] net: Introduce dev_get_by_index_rcu()
Some workloads hit dev_base_lock rwlock pretty hard.
We can use RCU lookups to avoid touching this rwlock.
netdevices are already freed after a RCU grace period, so this patch
adds no penalty at device dismantle time.
dev_ifname() converted to dev_get_by_index_rcu()
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
include/linux/netdevice.h | 1
net/core/dev.c | 48 ++++++++++++++++++++++++++++--------
2 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8380009..4eda680 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1127,6 +1127,7 @@ extern void netdev_resync_ops(struct net_device *dev);
extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
extern struct net_device *dev_get_by_index(struct net *net, int ifindex);
extern struct net_device *__dev_get_by_index(struct net *net, int ifindex);
+extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
extern int dev_restart(struct net_device *dev);
#ifdef CONFIG_NETPOLL_TRAP
extern int netpoll_trap(void);
diff --git a/net/core/dev.c b/net/core/dev.c
index 28b0b9e..4564596 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -217,12 +217,15 @@ static int list_netdevice(struct net_device *dev)
write_lock_bh(&dev_base_lock);
list_add_tail(&dev->dev_list, &net->dev_base_head);
hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
- hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
+ hlist_add_head_rcu(&dev->index_hlist,
+ dev_index_hash(net, dev->ifindex));
write_unlock_bh(&dev_base_lock);
return 0;
}
-/* Device list removal */
+/* Device list removal
+ * caller must respect a RCU grace period before freeing/reusing dev
+ */
static void unlist_netdevice(struct net_device *dev)
{
ASSERT_RTNL();
@@ -231,7 +234,7 @@ static void unlist_netdevice(struct net_device *dev)
write_lock_bh(&dev_base_lock);
list_del(&dev->dev_list);
hlist_del(&dev->name_hlist);
- hlist_del(&dev->index_hlist);
+ hlist_del_rcu(&dev->index_hlist);
write_unlock_bh(&dev_base_lock);
}
@@ -649,6 +652,31 @@ struct net_device *__dev_get_by_index(struct net *net, int ifindex)
}
EXPORT_SYMBOL(__dev_get_by_index);
+/**
+ * dev_get_by_index_rcu - find a device by its ifindex
+ * @net: the applicable net namespace
+ * @ifindex: index of device
+ *
+ * Search for an interface by index. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not
+ * had its reference counter increased so the caller must be careful
+ * about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
+{
+ struct hlist_node *p;
+ struct net_device *dev;
+ struct hlist_head *head = dev_index_hash(net, ifindex);
+
+ hlist_for_each_entry_rcu(dev, p, head, index_hlist)
+ if (dev->ifindex == ifindex)
+ return dev;
+
+ return NULL;
+}
+EXPORT_SYMBOL(dev_get_by_index_rcu);
+
/**
* dev_get_by_index - find a device by its ifindex
@@ -665,11 +693,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
{
struct net_device *dev;
- read_lock(&dev_base_lock);
- dev = __dev_get_by_index(net, ifindex);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, ifindex);
if (dev)
dev_hold(dev);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return dev;
}
EXPORT_SYMBOL(dev_get_by_index);
@@ -2930,15 +2958,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
- read_lock(&dev_base_lock);
- dev = __dev_get_by_index(net, ifr.ifr_ifindex);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
if (!dev) {
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return -ENODEV;
}
strcpy(ifr.ifr_name, dev->name);
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
next prev parent reply other threads:[~2009-10-20 5:18 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-19 16:41 [PATCH] net: Fix IP_MULTICAST_IF Eric Dumazet
2009-10-20 3:59 ` David Miller
2009-10-20 4:07 ` Eric Dumazet
2009-10-20 4:16 ` Eric Dumazet
2009-10-20 4:21 ` David Miller
2009-10-20 4:20 ` David Miller
2009-10-20 4:23 ` Eric Dumazet
2009-10-20 4:28 ` David Miller
2009-10-20 5:03 ` [PATCH net-next-2.6] net: Introduce dev_get_by_index_rcu() Eric Dumazet
2009-10-20 5:06 ` Stephen Hemminger
2009-10-20 5:18 ` Eric Dumazet [this message]
2009-10-29 8:43 ` David Miller
2009-10-20 12:35 ` [PATCH] ifb: should not use __dev_get_by_index() without locks Eric Dumazet
2009-10-23 4:54 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4ADD4839.9010500@gmail.com \
--to=eric.dumazet@gmail.com \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=shemminger@vyatta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).