* [NETLINK 01/04]: Switch cb_lock spinlock to mutex and allow to override it
2007-04-16 7:51 [NETLINK 00/04]: Consistent dump callback locking Patrick McHardy
@ 2007-04-16 7:51 ` Patrick McHardy
2007-04-16 23:55 ` David Miller
2007-04-16 7:51 ` [RTNETLINK 02/04]: Hold rtnl_mutex during netlink dump callbacks Patrick McHardy
` (2 subsequent siblings)
3 siblings, 1 reply; 10+ messages in thread
From: Patrick McHardy @ 2007-04-16 7:51 UTC (permalink / raw)
To: davem; +Cc: netdev, Patrick McHardy
[NETLINK]: Switch cb_lock spinlock to mutex and allow to override it
Switch cb_lock to mutex and allow netlink kernel users to override it
with a subsystem specific mutex for consistent locking in dump callbacks.
All netlink_dump_start users have been audited not to rely on any
side-effects of the previously used spinlock.
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
commit 8373a426e9d692f74d10867c2dc3ea332e09154c
tree b0c1c2cc07d75444d8e5cb3da0b521f863324d93
parent 7d91d7463ee62d4c45d3fdd78ba92dee2920b342
author Patrick McHardy <kaber@trash.net> Mon, 16 Apr 2007 09:05:17 +0200
committer Patrick McHardy <kaber@trash.net> Mon, 16 Apr 2007 09:05:17 +0200
drivers/connector/connector.c | 2 +-
drivers/scsi/scsi_netlink.c | 3 ++-
drivers/scsi/scsi_transport_iscsi.c | 2 +-
fs/ecryptfs/netlink.c | 2 +-
include/linux/netlink.h | 5 ++++-
kernel/audit.c | 2 +-
lib/kobject_uevent.c | 2 +-
net/bridge/netfilter/ebt_ulog.c | 2 +-
net/core/rtnetlink.c | 2 +-
net/decnet/netfilter/dn_rtmsg.c | 2 +-
net/ipv4/fib_frontend.c | 3 ++-
net/ipv4/inet_diag.c | 2 +-
net/ipv4/netfilter/ip_queue.c | 2 +-
net/ipv4/netfilter/ipt_ULOG.c | 2 +-
net/ipv6/netfilter/ip6_queue.c | 2 +-
net/netfilter/nfnetlink.c | 2 +-
net/netlink/af_netlink.c | 38 +++++++++++++++++++++--------------
net/netlink/genetlink.c | 2 +-
net/xfrm/xfrm_user.c | 2 +-
security/selinux/netlink.c | 2 +-
20 files changed, 47 insertions(+), 34 deletions(-)
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 7f9c4fb..a7b9e9b 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -448,7 +448,7 @@ static int __devinit cn_init(void)
dev->nls = netlink_kernel_create(NETLINK_CONNECTOR,
CN_NETLINK_USERS + 0xf,
- dev->input, THIS_MODULE);
+ dev->input, NULL, THIS_MODULE);
if (!dev->nls)
return -EIO;
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 45646a2..4bf9aa5 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -168,7 +168,8 @@ scsi_netlink_init(void)
}
scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT,
- SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE);
+ SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL,
+ THIS_MODULE);
if (!scsi_nl_sock) {
printk(KERN_ERR "%s: register of recieve handler failed\n",
__FUNCTION__);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 10590cd..aabaa05 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1435,7 +1435,7 @@ static __init int iscsi_transport_init(void)
if (err)
goto unregister_conn_class;
- nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx,
+ nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL,
THIS_MODULE);
if (!nls) {
err = -ENOBUFS;
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index 8405d21..fe91863 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -229,7 +229,7 @@ int ecryptfs_init_netlink(void)
ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0,
ecryptfs_receive_nl_message,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (!ecryptfs_nl_sock) {
rc = -EIO;
ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n");
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 0d11f6a..f41688f 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -157,7 +157,10 @@ struct netlink_skb_parms
#define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds)
-extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module);
+extern struct sock *netlink_kernel_create(int unit, unsigned int groups,
+ void (*input)(struct sock *sk, int len),
+ struct mutex *cb_mutex,
+ struct module *module);
extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
extern int netlink_has_listeners(struct sock *sk, unsigned int group);
extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
diff --git a/kernel/audit.c b/kernel/audit.c
index 80a7457..4e9d208 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -795,7 +795,7 @@ static int __init audit_init(void)
printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
audit_default ? "enabled" : "disabled");
audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (!audit_sock)
audit_panic("cannot initialize netlink socket");
else
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 84272ed..82fc179 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -293,7 +293,7 @@ EXPORT_SYMBOL_GPL(add_uevent_var);
static int __init kobject_uevent_init(void)
{
uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (!uevent_sock) {
printk(KERN_ERR
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 8b84cd4..9411db6 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -302,7 +302,7 @@ static int __init ebt_ulog_init(void)
}
ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS,
- NULL, THIS_MODULE);
+ NULL, NULL, THIS_MODULE);
if (!ebtulognl)
ret = -ENOMEM;
else if ((ret = ebt_register_watcher(&ulog)))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5266df3..648a7b6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -972,7 +972,7 @@ void __init rtnetlink_init(void)
panic("rtnetlink_init: cannot allocate rta_buf\n");
rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 2ee47ba..6962346 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -138,7 +138,7 @@ static int __init dn_rtmsg_init(void)
int rv = 0;
dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
- dnrmg_receive_user_sk, THIS_MODULE);
+ dnrmg_receive_user_sk, NULL, THIS_MODULE);
if (dnrmg == NULL) {
printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
return -ENOMEM;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 60f119d..4764310 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -821,7 +821,8 @@ static void nl_fib_input(struct sock *sk, int len)
static void nl_fib_lookup_init(void)
{
- netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
+ netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
+ THIS_MODULE);
}
static void fib_disable_ip(struct net_device *dev, int force)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 0148f0e..dbeacd8 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -893,7 +893,7 @@ static int __init inet_diag_init(void)
goto out;
idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (idiagnl == NULL)
goto out_free_table;
err = 0;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 0d72693..702d94d 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -668,7 +668,7 @@ static int __init ip_queue_init(void)
netlink_register_notifier(&ipq_nl_notifier);
ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (ipqnl == NULL) {
printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
goto cleanup_netlink_notifier;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 60dcf86..ebca0d0 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -419,7 +419,7 @@ static int __init ipt_ulog_init(void)
setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (!nflognl)
return -ENOMEM;
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index bfae9fd..0004db3 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -657,7 +657,7 @@ static int __init ip6_queue_init(void)
struct proc_dir_entry *proc;
netlink_register_notifier(&ipq_nl_notifier);
- ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk,
+ ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL,
THIS_MODULE);
if (ipqnl == NULL) {
printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b0da853..8797e69 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -265,7 +265,7 @@ static int __init nfnetlink_init(void)
printk("Netfilter messages via NETLINK v%s.\n", nfversion);
nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX,
- nfnetlink_rcv, THIS_MODULE);
+ nfnetlink_rcv, NULL, THIS_MODULE);
if (!nfnl) {
printk(KERN_ERR "cannot initialize nfnetlink!\n");
return -1;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6701fbd..0be19b7 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -56,6 +56,7 @@
#include <linux/types.h>
#include <linux/audit.h>
#include <linux/selinux.h>
+#include <linux/mutex.h>
#include <net/sock.h>
#include <net/scm.h>
@@ -76,7 +77,8 @@ struct netlink_sock {
unsigned long state;
wait_queue_head_t wait;
struct netlink_callback *cb;
- spinlock_t cb_lock;
+ struct mutex *cb_mutex;
+ struct mutex cb_def_mutex;
void (*data_ready)(struct sock *sk, int bytes);
struct module *module;
};
@@ -108,6 +110,7 @@ struct netlink_table {
unsigned long *listeners;
unsigned int nl_nonroot;
unsigned int groups;
+ struct mutex *cb_mutex;
struct module *module;
int registered;
};
@@ -370,7 +373,8 @@ static struct proto netlink_proto = {
.obj_size = sizeof(struct netlink_sock),
};
-static int __netlink_create(struct socket *sock, int protocol)
+static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
+ int protocol)
{
struct sock *sk;
struct netlink_sock *nlk;
@@ -384,7 +388,8 @@ static int __netlink_create(struct socket *sock, int protocol)
sock_init_data(sock, sk);
nlk = nlk_sk(sk);
- spin_lock_init(&nlk->cb_lock);
+ nlk->cb_mutex = cb_mutex ? : &nlk->cb_def_mutex;
+ mutex_init(nlk->cb_mutex);
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
@@ -395,6 +400,7 @@ static int __netlink_create(struct socket *sock, int protocol)
static int netlink_create(struct socket *sock, int protocol)
{
struct module *module = NULL;
+ struct mutex *cb_mutex;
struct netlink_sock *nlk;
int err = 0;
@@ -417,9 +423,10 @@ static int netlink_create(struct socket *sock, int protocol)
if (nl_table[protocol].registered &&
try_module_get(nl_table[protocol].module))
module = nl_table[protocol].module;
+ cb_mutex = nl_table[protocol].cb_mutex;
netlink_unlock_table();
- if ((err = __netlink_create(sock, protocol)) < 0)
+ if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0)
goto out_module;
nlk = nlk_sk(sock->sk);
@@ -443,14 +450,14 @@ static int netlink_release(struct socket *sock)
netlink_remove(sk);
nlk = nlk_sk(sk);
- spin_lock(&nlk->cb_lock);
+ mutex_lock(nlk->cb_mutex);
if (nlk->cb) {
if (nlk->cb->done)
nlk->cb->done(nlk->cb);
netlink_destroy_callback(nlk->cb);
nlk->cb = NULL;
}
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
/* OK. Socket is unlinked, and, therefore,
no new packets will arrive */
@@ -1266,7 +1273,7 @@ static void netlink_data_ready(struct sock *sk, int len)
struct sock *
netlink_kernel_create(int unit, unsigned int groups,
void (*input)(struct sock *sk, int len),
- struct module *module)
+ struct mutex *cb_mutex, struct module *module)
{
struct socket *sock;
struct sock *sk;
@@ -1281,7 +1288,7 @@ netlink_kernel_create(int unit, unsigned int groups,
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- if (__netlink_create(sock, unit) < 0)
+ if (__netlink_create(sock, cb_mutex, unit) < 0)
goto out_sock_release;
if (groups < 32)
@@ -1305,6 +1312,7 @@ netlink_kernel_create(int unit, unsigned int groups,
netlink_table_grab();
nl_table[unit].groups = groups;
nl_table[unit].listeners = listeners;
+ nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
nl_table[unit].registered = 1;
netlink_table_ungrab();
@@ -1347,7 +1355,7 @@ static int netlink_dump(struct sock *sk)
if (!skb)
goto errout;
- spin_lock(&nlk->cb_lock);
+ mutex_lock(nlk->cb_mutex);
cb = nlk->cb;
if (cb == NULL) {
@@ -1358,7 +1366,7 @@ static int netlink_dump(struct sock *sk)
len = cb->dump(skb, cb);
if (len > 0) {
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, len);
return 0;
@@ -1376,13 +1384,13 @@ static int netlink_dump(struct sock *sk)
if (cb->done)
cb->done(cb);
nlk->cb = NULL;
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
netlink_destroy_callback(cb);
return 0;
errout_skb:
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
kfree_skb(skb);
errout:
return err;
@@ -1414,15 +1422,15 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
}
nlk = nlk_sk(sk);
/* A dump is in progress... */
- spin_lock(&nlk->cb_lock);
+ mutex_lock(nlk->cb_mutex);
if (nlk->cb) {
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
netlink_destroy_callback(cb);
sock_put(sk);
return -EBUSY;
}
nlk->cb = cb;
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
netlink_dump(sk);
sock_put(sk);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index fac2e7a..6e31234 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -558,7 +558,7 @@ static int __init genl_init(void)
netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
- genl_rcv, THIS_MODULE);
+ genl_rcv, NULL, THIS_MODULE);
if (genl_sock == NULL)
panic("GENL: Cannot initialize generic netlink\n");
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2f6671f..a5a5a1a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2445,7 +2445,7 @@ static int __init xfrm_user_init(void)
printk(KERN_INFO "Initializing XFRM netlink socket\n");
nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
- xfrm_netlink_rcv, THIS_MODULE);
+ xfrm_netlink_rcv, NULL, THIS_MODULE);
if (nlsk == NULL)
return -ENOMEM;
rcu_assign_pointer(xfrm_nl, nlsk);
diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c
index 33f2e06..f49046d 100644
--- a/security/selinux/netlink.c
+++ b/security/selinux/netlink.c
@@ -104,7 +104,7 @@ void selnl_notify_policyload(u32 seqno)
static int __init selnl_init(void)
{
- selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL,
+ selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, NULL,
THIS_MODULE);
if (selnl == NULL)
panic("SELinux: Cannot create netlink socket.");
^ permalink raw reply related [flat|nested] 10+ messages in thread* [RTNETLINK 03/04]: Remove unnecessary locking in dump callbacks
2007-04-16 7:51 [NETLINK 00/04]: Consistent dump callback locking Patrick McHardy
2007-04-16 7:51 ` [NETLINK 01/04]: Switch cb_lock spinlock to mutex and allow to override it Patrick McHardy
2007-04-16 7:51 ` [RTNETLINK 02/04]: Hold rtnl_mutex during netlink dump callbacks Patrick McHardy
@ 2007-04-16 7:51 ` Patrick McHardy
2007-04-17 0:01 ` David Miller
2007-04-16 7:51 ` [NET_SCHED 04/04]: Eliminate qdisc_tree_lock Patrick McHardy
3 siblings, 1 reply; 10+ messages in thread
From: Patrick McHardy @ 2007-04-16 7:51 UTC (permalink / raw)
To: davem; +Cc: netdev, Patrick McHardy
[RTNETLINK]: Remove unnecessary locking in dump callbacks
Since we're now holding the rtnl during the entire dump operation, we can
remove additional locking for rtnl protected data. This patch does that
for all simple cases (dev_base_lock for dev_base walking, RCU protection
for FIB rule dumping).
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
commit 49310148887f3d7a6b78809b0a771628be4ef0a8
tree 901a913e7b2570765df8aeb5b628c3783ee4934a
parent 8d28b6a3d70b6c98bba6d1e60e2ecf8eb9fbbb2b
author Patrick McHardy <kaber@trash.net> Mon, 16 Apr 2007 09:05:32 +0200
committer Patrick McHardy <kaber@trash.net> Mon, 16 Apr 2007 09:05:32 +0200
net/bridge/br_netlink.c | 2 --
net/core/fib_rules.c | 4 +---
net/core/rtnetlink.c | 2 --
net/decnet/dn_dev.c | 3 ---
net/ipv4/devinet.c | 12 ++----------
net/ipv6/addrconf.c | 2 --
6 files changed, 3 insertions(+), 22 deletions(-)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 5e84ade..35facc0 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -109,7 +109,6 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct net_device *dev;
int idx;
- read_lock(&dev_base_lock);
for (dev = dev_base, idx = 0; dev; dev = dev->next) {
/* not a bridge port */
if (dev->br_port == NULL || idx < cb->args[0])
@@ -122,7 +121,6 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
skip:
++idx;
}
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index cb2dae1..8c5474e 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -495,8 +495,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
int idx = 0;
struct fib_rule *rule;
- rcu_read_lock();
- list_for_each_entry_rcu(rule, ops->rules_list, list) {
+ list_for_each_entry(rule, ops->rules_list, list) {
if (idx < cb->args[1])
goto skip;
@@ -507,7 +506,6 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
skip:
idx++;
}
- rcu_read_unlock();
cb->args[1] = idx;
rules_ops_put(ops);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 62f5c7f..bc95fab 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -543,7 +543,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
int s_idx = cb->args[0];
struct net_device *dev;
- read_lock(&dev_base_lock);
for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
continue;
@@ -552,7 +551,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
break;
}
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
return skb->len;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 61be2ca..5c2a995 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -799,7 +799,6 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
skip_ndevs = cb->args[0];
skip_naddr = cb->args[1];
- read_lock(&dev_base_lock);
for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
if (idx < skip_ndevs)
continue;
@@ -824,8 +823,6 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
}
}
done:
- read_unlock(&dev_base_lock);
-
cb->args[0] = idx;
cb->args[1] = dn_idx;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9bdc795..088888d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1182,17 +1182,13 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
int s_ip_idx, s_idx = cb->args[0];
s_ip_idx = ip_idx = cb->args[1];
- read_lock(&dev_base_lock);
for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
continue;
if (idx > s_idx)
s_ip_idx = 0;
- rcu_read_lock();
- if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
- rcu_read_unlock();
+ if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
continue;
- }
for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
ifa = ifa->ifa_next, ip_idx++) {
@@ -1200,16 +1196,12 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
- RTM_NEWADDR, NLM_F_MULTI) <= 0) {
- rcu_read_unlock();
+ RTM_NEWADDR, NLM_F_MULTI) <= 0)
goto done;
- }
}
- rcu_read_unlock();
}
done:
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
cb->args[1] = ip_idx;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index adb171e..ed65456 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3222,7 +3222,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
s_idx = cb->args[0];
s_ip_idx = ip_idx = cb->args[1];
- read_lock(&dev_base_lock);
for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
@@ -3284,7 +3283,6 @@ done:
read_unlock_bh(&idev->lock);
in6_dev_put(idev);
}
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
cb->args[1] = ip_idx;
return skb->len;
^ permalink raw reply related [flat|nested] 10+ messages in thread* [NET_SCHED 04/04]: Eliminate qdisc_tree_lock
2007-04-16 7:51 [NETLINK 00/04]: Consistent dump callback locking Patrick McHardy
` (2 preceding siblings ...)
2007-04-16 7:51 ` [RTNETLINK 03/04]: Remove unnecessary locking in " Patrick McHardy
@ 2007-04-16 7:51 ` Patrick McHardy
2007-04-17 0:06 ` David Miller
3 siblings, 1 reply; 10+ messages in thread
From: Patrick McHardy @ 2007-04-16 7:51 UTC (permalink / raw)
To: davem; +Cc: netdev, Patrick McHardy
[NET_SCHED]: Eliminate qdisc_tree_lock
Since we're now holding the rtnl during the entire dump operation, we
can remove qdisc_tree_lock, whose only purpose is to protect dump
callbacks from concurrent changes to the qdisc tree.
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
commit cc965189583f5a980e1fc6acaf708c47d3661a22
tree faede9cc5c6b86b39517c56c71caa6942e748f20
parent 49310148887f3d7a6b78809b0a771628be4ef0a8
author Patrick McHardy <kaber@trash.net> Mon, 16 Apr 2007 09:05:39 +0200
committer Patrick McHardy <kaber@trash.net> Mon, 16 Apr 2007 09:05:39 +0200
include/net/pkt_sched.h | 2 --
net/sched/cls_api.c | 2 --
net/sched/sch_api.c | 22 +++-------------------
net/sched/sch_generic.c | 29 +++++++----------------------
4 files changed, 10 insertions(+), 45 deletions(-)
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index b2cc9a8..5754d53 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -13,8 +13,6 @@ struct qdisc_walker
int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
};
-extern rwlock_t qdisc_tree_lock;
-
#define QDISC_ALIGNTO 32
#define QDISC_ALIGN(len) (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1))
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ca3da50..ebf94ed 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -400,7 +400,6 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
return skb->len;
- read_lock(&qdisc_tree_lock);
if (!tcm->tcm_parent)
q = dev->qdisc_sleeping;
else
@@ -457,7 +456,6 @@ errout:
if (cl)
cops->put(q, cl);
out:
- read_unlock(&qdisc_tree_lock);
dev_put(dev);
return skb->len;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2e863bd..0ce6914 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -191,7 +191,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
(root qdisc, all its children, children of children etc.)
*/
-static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
@@ -202,16 +202,6 @@ static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
return NULL;
}
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
-{
- struct Qdisc *q;
-
- read_lock(&qdisc_tree_lock);
- q = __qdisc_lookup(dev, handle);
- read_unlock(&qdisc_tree_lock);
- return q;
-}
-
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
@@ -405,7 +395,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
if (n == 0)
return;
while ((parentid = sch->parent)) {
- sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+ sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
cops = sch->ops->cl_ops;
if (cops->qlen_notify) {
cl = cops->get(sch, parentid);
@@ -905,7 +895,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (idx > s_idx)
s_q_idx = 0;
- read_lock(&qdisc_tree_lock);
q_idx = 0;
list_for_each_entry(q, &dev->qdisc_list, list) {
if (q_idx < s_q_idx) {
@@ -913,13 +902,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
continue;
}
if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
- read_unlock(&qdisc_tree_lock);
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
- }
q_idx++;
}
- read_unlock(&qdisc_tree_lock);
}
done:
@@ -1142,7 +1128,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- read_lock(&qdisc_tree_lock);
list_for_each_entry(q, &dev->qdisc_list, list) {
if (t < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent &&
@@ -1164,7 +1149,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
break;
t++;
}
- read_unlock(&qdisc_tree_lock);
cb->args[0] = t;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 52eb343..1894eb7 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -36,34 +36,23 @@
/* Main transmission queue. */
-/* Main qdisc structure lock.
-
- However, modifications
- to data, participating in scheduling must be additionally
- protected with dev->queue_lock spinlock.
-
- The idea is the following:
- - enqueue, dequeue are serialized via top level device
- spinlock dev->queue_lock.
- - tree walking is protected by read_lock(qdisc_tree_lock)
- and this lock is used only in process context.
- - updates to tree are made only under rtnl semaphore,
- hence this lock may be made without local bh disabling.
-
- qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
+/* Modifications to data participating in scheduling must be protected with
+ * dev->queue_lock spinlock.
+ *
+ * The idea is the following:
+ * - enqueue, dequeue are serialized via top level device
+ * spinlock dev->queue_lock.
+ * - updates to tree and tree walking are only done under the rtnl mutex.
*/
-DEFINE_RWLOCK(qdisc_tree_lock);
void qdisc_lock_tree(struct net_device *dev)
{
- write_lock(&qdisc_tree_lock);
spin_lock_bh(&dev->queue_lock);
}
void qdisc_unlock_tree(struct net_device *dev)
{
spin_unlock_bh(&dev->queue_lock);
- write_unlock(&qdisc_tree_lock);
}
/*
@@ -528,15 +517,11 @@ void dev_activate(struct net_device *dev)
printk(KERN_INFO "%s: activation failed\n", dev->name);
return;
}
- write_lock(&qdisc_tree_lock);
list_add_tail(&qdisc->list, &dev->qdisc_list);
- write_unlock(&qdisc_tree_lock);
} else {
qdisc = &noqueue_qdisc;
}
- write_lock(&qdisc_tree_lock);
dev->qdisc_sleeping = qdisc;
- write_unlock(&qdisc_tree_lock);
}
if (!netif_carrier_ok(dev))
^ permalink raw reply related [flat|nested] 10+ messages in thread