Linux Container Development
 help / color / mirror / Atom feed
From: "Denis V. Lunev" <den-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
To: ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org
Cc: containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org
Subject: [PATCH] [NETNS45] network namespace locking rules
Date: Fri, 28 Sep 2007 18:36:54 +0400	[thread overview]
Message-ID: <20070928143654.GA14129@iris.sw.ru> (raw)

Current locking for network namespace list/initialization is broken.
for_each_net is called under single rtnl_lock in
register_netdevice_notifier.

Locking:
    net_mutex -> rtnl_lock() -> dev_base_lock
Reasoning:
  - net_mutex holds serialization of the addition/removal of
    subsystems/modules and the creation/destruction of network
    namespaces as a whole
  - loopback device is one of such subsystems and it takes
    rtnl_lock inside
  - per/namespace RTNL netlink socket requires an iteration over
    namespace list inside rtnl_unlock, which is called inside net_mutex
Resume:
    net_namespace_list is guarded by both rtnl_lock & net_mutex and
    can be safely iterated under any of them

Signed-off-by: Denis V. Lunev <den-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>

--------

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index b8186ea..2845992 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -174,8 +174,21 @@ static inline void release_net(struct net *net)
 	atomic_dec(&net->use_count);
 }
 
-extern void net_lock(void);
-extern void net_unlock(void);
+/*
+ * Locking:
+ *     net_mutex -> rtnl_lock() -> dev_base_lock
+ * Reasoning:
+ *   - net_mutex holds serialization of the addition/removal of
+ *     subsystems/modules and the creation/destruction of network
+ *     namespaces as a whole
+ *   - loopback device is one of such subsystems and it takes
+ *     rtnl_lock inside
+ *   - per/namespace RTNL netlink socket requires an iteration over
+ *     namespace list inside rtnl_unlock, which is called inside net_mutex
+ * Resume:
+ *     net_namespace_list is guarded by both rtnl_lock & net_mutex and
+ *     can be safely iterated under any of them
+ */
 
 #define for_each_net(VAR)				\
 	list_for_each_entry(VAR, &net_namespace_list, list)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 026e39a..07682a2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -10,6 +10,7 @@
 
 /*
  *	Our network namespace constructor/destructor lists
+ *	Locking rules are described in details in include/net/net_namespace.h
  */
 
 static LIST_HEAD(pernet_list);
@@ -24,16 +25,6 @@ static struct kmem_cache *net_cachep;
 struct net init_net;
 EXPORT_SYMBOL_GPL(init_net);
 
-void net_lock(void)
-{
-	mutex_lock(&net_list_mutex);
-}
-
-void net_unlock(void)
-{
-	mutex_unlock(&net_list_mutex);
-}
-
 static struct net *net_alloc(void)
 {
 	return kmem_cache_alloc(net_cachep, GFP_KERNEL);
@@ -71,9 +62,9 @@ static void cleanup_net(struct work_struct *work)
 	mutex_lock(&net_mutex);
 
 	/* Don't let anyone else find us. */
-	net_lock();
+	rtnl_lock();
 	list_del(&net->list);
-	net_unlock();
+	rtnl_unlock();
 
 	/* Run all of the network namespace exit methods */
 	pernet_count = 0;
@@ -193,9 +184,9 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 	if (err)
 		goto out_unlock;
 
-	net_lock();
+	rtnl_lock();
 	list_add_tail(&new_net->list, &net_namespace_list);
-	net_unlock();
+	rtnl_unlock();
 
 
 out_unlock:
@@ -220,14 +211,13 @@ static int __init net_ns_init(void)
 	mutex_lock(&net_mutex);
 	err = setup_net(&init_net);
 
-	net_lock();
+	rtnl_lock();
 	list_add_tail(&init_net.list, &net_namespace_list);
-	net_unlock();
+	rtnl_unlock();
 
 	mutex_unlock(&net_mutex);
 	if (err)
 		panic("Could not setup the initial network namespace");
-
 	return 0;
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 82ebc23..e610313 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -73,16 +73,24 @@ void __rtnl_unlock(void)
 void rtnl_unlock(void)
 {
 	struct net *net;
-	mutex_unlock(&rtnl_mutex);
-	
-	net_lock();
+
+retry:
 	for_each_net(net) {
 		struct sock *rtnl = net->rtnl;
+
+		if (rtnl == NULL || rtnl->sk_receive_queue.qlen == 0)
+			continue;
+
+		get_net(net);
+		mutex_unlock(&rtnl_mutex);
 		if (rtnl && rtnl->sk_receive_queue.qlen)
 			rtnl->sk_data_ready(rtnl, 0);
-	}
-	net_unlock();
+		mutex_lock(&rtnl_mutex);
+		put_net(net);
 
+		goto retry;
+	}
+	mutex_unlock(&rtnl_mutex);
 	netdev_run_todo();
 }

             reply	other threads:[~2007-09-28 14:36 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-09-28 14:36 Denis V. Lunev [this message]
     [not found] ` <20070928143654.GA14129-aPCOdVxUTlgvJsYlp49lxw@public.gmane.org>
2007-09-28 15:10   ` [PATCH] [NETNS45] network namespace locking rules Daniel Lezcano
     [not found]     ` <46FD196C.6080309-GANU6spQydw@public.gmane.org>
2007-09-28 16:33       ` Denis V. Lunev
2007-09-28 16:54   ` Eric W. Biederman
     [not found]     ` <m1d4w2d92m.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
2007-09-28 17:02       ` Denis V. Lunev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070928143654.GA14129@iris.sw.ru \
    --to=den-gefaqzzx7r8dnm+yrofe0a@public.gmane.org \
    --cc=containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org \
    --cc=ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox