All of lore.kernel.org
 help / color / mirror / Atom feed
From: menage@google.com (Paul Menage)
To: a.p.zijlstra@chello.nl, maxk@qualcomm.com, pj@sgi.com,
	vegard.nossum@gmail.com
Cc: linux-kernel@vger.kernel.org
Subject: [RFC][PATCH] CGroups: Add a per-subsystem hierarchy lock
Date: Fri, 27 Jun 2008 00:59:12 -0700 (PDT)	[thread overview]
Message-ID: <20080627075912.92AE43D6907@localhost> (raw)


This patch adds a hierarchy_mutex to the cgroup_subsys object that
protects changes to the hierarchy observed by that subsystem. It is
taken by the cgroup subsystem for the following operations:

- linking a cgroup into that subsystem's cgroup tree
- unlinking a cgroup from that subsystem's cgroup tree
- moving the subsystem to/from a hierarchy (including across the
  bind() callback)

Thus if the subsystem holds its own hierarchy_mutex, it can safely
traverse its ts own hierarchy.

This patch also changes the cpuset hotplug code to take
cpuset_subsys.hierarchy_mutex rather than cgroup_mutex, removing a
cyclical locking dependancy between cpu_hotplug.lock and cgroup_mutex.

---

This is just a first cut - a full version of this patch would also
make use of the hierarchy_mutex at other places in cpuset.c that use
the cgroup hierarchy, but I wanted to see what people thought of the
concept. It appears to keep lockdep quiet.

 include/linux/cgroup.h |   10 ++++++++--
 kernel/cgroup.c        |   35 ++++++++++++++++++++++++++++++++++-
 kernel/cpuset.c        |    4 ++--
 3 files changed, 44 insertions(+), 5 deletions(-)

Index: lockfix-2.6.26-rc5-mm3/include/linux/cgroup.h
===================================================================
--- lockfix-2.6.26-rc5-mm3.orig/include/linux/cgroup.h
+++ lockfix-2.6.26-rc5-mm3/include/linux/cgroup.h
@@ -319,9 +319,15 @@ struct cgroup_subsys {
 #define MAX_CGROUP_TYPE_NAMELEN 32
 	const char *name;
 
-	/* Protected by RCU */
-	struct cgroupfs_root *root;
+	/*
+	 * Protects sibling/children links of cgroups in this
+	 * hierarchy, plus protects which hierarchy (or none) the
+	 * subsystem is a part of (i.e. root/sibling)
+	 */
+	struct mutex hierarchy_mutex;
 
+	/* Protected by RCU, this->hierarchy_mutex and cgroup_lock() */
+	struct cgroupfs_root *root;
 	struct list_head sibling;
 
 	void *private;
Index: lockfix-2.6.26-rc5-mm3/kernel/cgroup.c
===================================================================
--- lockfix-2.6.26-rc5-mm3.orig/kernel/cgroup.c
+++ lockfix-2.6.26-rc5-mm3/kernel/cgroup.c
@@ -728,23 +728,26 @@ static int rebind_subsystems(struct cgro
 			BUG_ON(cgrp->subsys[i]);
 			BUG_ON(!dummytop->subsys[i]);
 			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
+			mutex_lock(&ss->hierarchy_mutex);
 			cgrp->subsys[i] = dummytop->subsys[i];
 			cgrp->subsys[i]->cgroup = cgrp;
 			list_add(&ss->sibling, &root->subsys_list);
 			rcu_assign_pointer(ss->root, root);
 			if (ss->bind)
 				ss->bind(ss, cgrp);
-
+			mutex_unlock(&ss->hierarchy_mutex);
 		} else if (bit & removed_bits) {
 			/* We're removing this subsystem */
 			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
 			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+			mutex_lock(&ss->hierarchy_mutex);
 			if (ss->bind)
 				ss->bind(ss, dummytop);
 			dummytop->subsys[i]->cgroup = dummytop;
 			cgrp->subsys[i] = NULL;
 			rcu_assign_pointer(subsys[i]->root, &rootnode);
 			list_del(&ss->sibling);
+			mutex_unlock(&ss->hierarchy_mutex);
 		} else if (bit & final_bits) {
 			/* Subsystem state should already exist */
 			BUG_ON(!cgrp->subsys[i]);
@@ -2291,6 +2294,29 @@ static void init_cgroup_css(struct cgrou
 	cgrp->subsys[ss->subsys_id] = css;
 }
 
+static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
+{
+	/* We need to take each hierarchy_mutex in a consistent order */
+	int i;
+
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+		if (ss->root == root)
+			mutex_lock_nested(&ss->hierarchy_mutex, i);
+	}
+}
+
+static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
+{
+	int i;
+
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+		if (ss->root == root)
+			mutex_unlock(&ss->hierarchy_mutex);
+	}
+}
+
 /*
  * cgroup_create - create a cgroup
  * @parent: cgroup that will be parent of the new cgroup
@@ -2342,8 +2368,10 @@ static long cgroup_create(struct cgroup 
 		init_cgroup_css(css, ss, cgrp);
 	}
 
+	cgroup_lock_hierarchy(root);
 	list_add(&cgrp->sibling, &cgrp->parent->children);
 	root->number_of_cgroups++;
+	cgroup_unlock_hierarchy(root);
 
 	err = cgroup_create_dir(cgrp, dentry, mode);
 	if (err < 0)
@@ -2460,8 +2488,12 @@ static int cgroup_rmdir(struct inode *un
 	if (!list_empty(&cgrp->release_list))
 		list_del(&cgrp->release_list);
 	spin_unlock(&release_list_lock);
+
+	cgroup_lock_hierarchy(root);
 	/* delete my sibling from parent->children */
 	list_del(&cgrp->sibling);
+	cgroup_unlock_hierarchy(root);
+
 	spin_lock(&cgrp->dentry->d_lock);
 	d = dget(cgrp->dentry);
 	cgrp->dentry = NULL;
@@ -2504,6 +2536,7 @@ static void __init cgroup_init_subsys(st
 	 * need to invoke fork callbacks here. */
 	BUG_ON(!list_empty(&init_task.tasks));
 
+	mutex_init(&ss->hierarchy_mutex);
 	ss->active = 1;
 }
 
Index: lockfix-2.6.26-rc5-mm3/kernel/cpuset.c
===================================================================
--- lockfix-2.6.26-rc5-mm3.orig/kernel/cpuset.c
+++ lockfix-2.6.26-rc5-mm3/kernel/cpuset.c
@@ -1929,7 +1929,7 @@ static void scan_for_empty_cpusets(const
 
 static void common_cpu_mem_hotplug_unplug(void)
 {
-	cgroup_lock();
+	mutex_lock(&cpuset_subsys.hierarchy_mutex);
 
 	top_cpuset.cpus_allowed = cpu_online_map;
 	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
@@ -1941,7 +1941,7 @@ static void common_cpu_mem_hotplug_unplu
 	 */
 	rebuild_sched_domains();
 
-	cgroup_unlock();
+	mutex_unlock(&cpuset_subsys.hierarchy_mutex);
 }
 
 /*

             reply	other threads:[~2008-06-27  7:59 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-06-27  7:59 Paul Menage [this message]
2008-06-28  0:02 ` [RFC][PATCH] CGroups: Add a per-subsystem hierarchy lock Paul Menage
2008-07-02  3:55   ` Max Krasnyansky
2008-07-02 22:31     ` Paul Menage
2008-07-03  4:46       ` Max Krasnyansky
2008-07-10 17:26         ` Max Krasnyansky
2008-07-11  0:32         ` Paul Menage
2008-07-12 22:48           ` Max Krasnyansky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080627075912.92AE43D6907@localhost \
    --to=menage@google.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maxk@qualcomm.com \
    --cc=pj@sgi.com \
    --cc=vegard.nossum@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.