All of lore.kernel.org
 help / color / mirror / Atom feed
From: Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
To: "akpm >> Andrew Morton"
	<akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
	Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	LKML <linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
	Stephane Eranian
	<eranian-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Subject: [PATCH 2/7] cgroups: Allow to bind a subsystem to a cgroup hierarchy
Date: Fri, 22 Oct 2010 16:09:56 +0800	[thread overview]
Message-ID: <4CC146D4.7030009@cn.fujitsu.com> (raw)
In-Reply-To: <4CC146A4.9090505-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>

Stephane posted a patchset to add perf_cgroup subsystem, so perf can
be used to monitor all threads belonging to a cgroup.

But if you already mounted a cgroup hierarchy but without perf_cgroup
and the hierarchy has sub-cgroups, you can't bind perf_cgroup to it,
and thus you're not able to use per-cgroup perf feature.

This patchset alleviates the pain, and then a subsytem can be bind/unbind
to/from a hierarchy which has sub-cgroups in it.

For a cgroup subsystem to become bindable, the can_bind flag of
struct cgroup_subsys should be set, and provide ->bind() callback
if necessary.

But for some constraints, not all subsystems can take advantage of
this patch. For example, we can't decide a cgroup's cpuset.mems and
cpuset.cpus automatically, so cpuset is not bindable.

Usage:

# mount -t cgroup -o cpuset xxx /mnt
# mkdir /mnt/tmp
# echo $$ > /mnt/tmp/tasks

(assume cpuacct is bindable, and we add cpuacct to the hierarchy)
# mount -o remount,cpuset,cpuacct xxx /mnt

Signed-off-by: Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
---
 include/linux/cgroup.h |    5 +
 kernel/cgroup.c        |  225 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 187 insertions(+), 43 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e23ded6..49369ff 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -490,6 +490,11 @@ struct cgroup_subsys {
 	 * (not available in early_init time.)
 	 */
 	unsigned int use_id:1;
+	/*
+	 * Indicate if this subsystem can be bound/unbound to/from a cgroup
+	 * hierarchy which has child cgroups.
+	 */
+	unsigned int can_bind:1;
 
 #define MAX_CGROUP_TYPE_NAMELEN 32
 	const char *name;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 6c36750..46df5f8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,6 +57,7 @@
 #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
 #include <linux/eventfd.h>
 #include <linux/poll.h>
+#include <linux/bitops.h>
 
 #include <asm/atomic.h>
 
@@ -870,18 +871,13 @@ static void remove_dir(struct dentry *d)
 
 static void cgroup_clear_directory(struct dentry *dentry)
 {
-	struct list_head *node;
+	struct dentry *d, *tmp;
 
 	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
 	spin_lock(&dcache_lock);
-	node = dentry->d_subdirs.next;
-	while (node != &dentry->d_subdirs) {
-		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
-		list_del_init(node);
-		if (d->d_inode) {
-			/* This should never be called on a cgroup
-			 * directory with child cgroups */
-			BUG_ON(d->d_inode->i_mode & S_IFDIR);
+	list_for_each_entry_safe(d, tmp, &dentry->d_subdirs, d_u.d_child) {
+		if (d->d_inode && !(d->d_inode->i_mode & S_IFDIR)) {
+			list_del_init(&d->d_u.d_child);
 			d = dget_locked(d);
 			spin_unlock(&dcache_lock);
 			d_delete(d);
@@ -889,7 +885,6 @@ static void cgroup_clear_directory(struct dentry *dentry)
 			dput(d);
 			spin_lock(&dcache_lock);
 		}
-		node = dentry->d_subdirs.next;
 	}
 	spin_unlock(&dcache_lock);
 }
@@ -934,6 +929,145 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
 	css_put(css);
 }
 
+static void init_cgroup_css(struct cgroup_subsys_state *css,
+			       struct cgroup_subsys *ss,
+			       struct cgroup *cgrp)
+{
+	css->cgroup = cgrp;
+	atomic_set(&css->refcnt, 1);
+	css->flags = 0;
+	css->id = NULL;
+	if (cgrp == dummytop)
+		set_bit(CSS_ROOT, &css->flags);
+	BUG_ON(cgrp->subsys[ss->subsys_id]);
+	cgrp->subsys[ss->subsys_id] = css;
+}
+
+/*
+ * cgroup_walk_herarchy - iterate through a cgroup hierarchy
+ * @process_cgroup: callback called on each cgroup in the hierarchy
+ * @data: will be passed to @process_cgroup
+ * @top_cgrp: the root cgroup of the hierarchy
+ *
+ * For such a hierarchy:
+ *        a1        c1
+ *      /         /
+ * Root - a2 - b1 - c2
+ *      \
+ *        a3
+ *
+ * The iterating order is: a1, a2, b1, c1, c2, a3. So a parent will be
+ * processed before its children.
+ */
+static int cgroup_walk_hierarchy(int (*process_cgroup)(struct cgroup *, void *),
+				 void *data, struct cgroup *top_cgrp)
+{
+	struct cgroup *parent = top_cgrp;
+	struct cgroup *child;
+	struct list_head *node;
+	int ret;
+
+	node = parent->children.next;
+repeat:
+	while (node != &parent->children) {
+		child = list_entry(node, struct cgroup, sibling);
+
+		ret = process_cgroup(child, data);
+		if (ret)
+			return ret;
+
+		if (!list_empty(&child->children)) {
+			parent = child;
+			node = parent->children.next;
+			goto repeat;
+		} else
+			node = node->next;
+	}
+
+	if (parent != top_cgrp) {
+		child = parent;
+		parent = child->parent;
+		node = child->sibling.next;
+		goto repeat;
+	}
+
+	return 0;
+}
+
+static int hierarchy_attach_css_failed(struct cgroup *cgrp, void *data)
+{
+	unsigned long added_bits = (unsigned long)data;
+	int i;
+
+	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT);
+		if (cgrp->subsys[i])
+			subsys[i]->destroy(subsys[i], cgrp);
+
+	return 0;
+}
+
+static int hierarchy_attach_css(struct cgroup *cgrp, void *data)
+{
+	unsigned long added_bits = (unsigned long)data;
+	int i;
+	int ret = 0;
+
+	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT) {
+		struct cgroup_subsys_state *css;
+		struct cgroup_subsys *ss = subsys[i];
+
+		css = ss->create(ss, cgrp);
+		if (IS_ERR(css)) {
+			ret = PTR_ERR(css);
+			break;
+		}
+		init_cgroup_css(css, ss, cgrp);
+
+		if (ss->use_id) {
+			ret = alloc_css_id(ss, cgrp->parent, cgrp);
+			if (ret)
+				break;
+		}
+	}
+
+	if (ret)
+		cgroup_walk_hierarchy(hierarchy_attach_css_failed, data,
+				      cgrp->top_cgroup);
+	return ret;
+}
+
+static int hierarchy_update_css_sets(struct cgroup *cgrp, void *data)
+{
+	unsigned long added_bits = (unsigned long)data;
+	int i;
+	struct cg_cgroup_link *link;
+
+	write_lock(&css_set_lock);
+	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
+		struct css_set *cg = link->cg;
+		struct hlist_head *hhead;
+
+		for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
+			cg->subsys[i] = cgrp->subsys[i];
+
+		/* rehash */
+		hlist_del(&cg->hlist);
+		hhead = css_set_hash(cg->subsys);
+		hlist_add_head(&cg->hlist, hhead);
+	}
+	write_unlock(&css_set_lock);
+
+	return 0;
+}
+
+static int hierarchy_populate_dir(struct cgroup *cgrp, void *data)
+{
+	mutex_lock_nested(&cgrp->dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+	cgroup_populate_dir(cgrp);
+	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+	return 0;
+}
+
 /*
  * Call with cgroup_mutex held. Drops reference counts on modules, including
  * any duplicate ones that parse_cgroupfs_options took. If this function
@@ -945,36 +1079,53 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	unsigned long added_bits, removed_bits;
 	struct cgroup *cgrp = &root->top_cgroup;
 	int i;
+	int err;
 
 	BUG_ON(!mutex_is_locked(&cgroup_mutex));
 
 	removed_bits = root->actual_subsys_bits & ~final_bits;
 	added_bits = final_bits & ~root->actual_subsys_bits;
+
 	/* Check that any added subsystems are currently free */
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-		unsigned long bit = 1UL << i;
-		struct cgroup_subsys *ss = subsys[i];
-		if (!(bit & added_bits))
-			continue;
+	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT) {
 		/*
 		 * Nobody should tell us to do a subsys that doesn't exist:
 		 * parse_cgroupfs_options should catch that case and refcounts
 		 * ensure that subsystems won't disappear once selected.
 		 */
-		BUG_ON(ss == NULL);
-		if (ss->root != &rootnode) {
+		BUG_ON(subsys[i] == NULL);
+		if (subsys[i]->root != &rootnode) {
 			/* Subsystem isn't free */
 			return -EBUSY;
 		}
 	}
 
-	/* Currently we don't handle adding/removing subsystems when
-	 * any child cgroups exist. This is theoretically supportable
-	 * but involves complex error handling, so it's being left until
-	 * later */
-	if (root->number_of_cgroups > 1)
+	/* removing will be supported later */
+	if (root->number_of_cgroups > 1 && removed_bits)
 		return -EBUSY;
 
+	if (root->number_of_cgroups > 1) {
+		for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
+			if (!subsys[i]->can_bind)
+				return -EBUSY;
+
+	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT) {
+		BUG_ON(cgrp->subsys[i]);
+		BUG_ON(!dummytop->subsys[i]);
+		BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
+
+		cgrp->subsys[i] = dummytop->subsys[i];
+		cgrp->subsys[i]->cgroup = cgrp;
+	}
+
+	err = cgroup_walk_hierarchy(hierarchy_attach_css,
+				    (void *)added_bits, cgrp);
+	if (err)
+		goto failed;
+
+	cgroup_walk_hierarchy(hierarchy_update_css_sets,
+			      (void *)added_bits, cgrp);
+
 	/* Process each subsystem */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
@@ -982,12 +1133,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		if (bit & added_bits) {
 			/* We're binding this subsystem to this hierarchy */
 			BUG_ON(ss == NULL);
-			BUG_ON(cgrp->subsys[i]);
-			BUG_ON(!dummytop->subsys[i]);
-			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
 			mutex_lock(&ss->hierarchy_mutex);
-			cgrp->subsys[i] = dummytop->subsys[i];
-			cgrp->subsys[i]->cgroup = cgrp;
 			list_move(&ss->sibling, &root->subsys_list);
 			ss->root = root;
 			if (ss->bind)
@@ -1000,10 +1146,10 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
 			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
 			mutex_lock(&ss->hierarchy_mutex);
-			if (ss->bind)
-				ss->bind(ss, dummytop);
 			dummytop->subsys[i]->cgroup = dummytop;
 			cgrp->subsys[i] = NULL;
+			if (ss->bind)
+				ss->bind(ss, dummytop);
 			subsys[i]->root = &rootnode;
 			list_move(&ss->sibling, &rootnode.subsys_list);
 			mutex_unlock(&ss->hierarchy_mutex);
@@ -1030,6 +1176,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	synchronize_rcu();
 
 	return 0;
+
+failed:
+	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
+		cgrp->subsys[i] = NULL;
+
+	return err;
 }
 
 static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
@@ -1285,6 +1437,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 
 	/* (re)populate subsystem files */
 	cgroup_populate_dir(cgrp);
+	cgroup_walk_hierarchy(hierarchy_populate_dir, NULL, cgrp);
 
 	if (opts.release_agent)
 		strcpy(root->release_agent_path, opts.release_agent);
@@ -3313,20 +3466,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
 	return 0;
 }
 
-static void init_cgroup_css(struct cgroup_subsys_state *css,
-			       struct cgroup_subsys *ss,
-			       struct cgroup *cgrp)
-{
-	css->cgroup = cgrp;
-	atomic_set(&css->refcnt, 1);
-	css->flags = 0;
-	css->id = NULL;
-	if (cgrp == dummytop)
-		set_bit(CSS_ROOT, &css->flags);
-	BUG_ON(cgrp->subsys[ss->subsys_id]);
-	cgrp->subsys[ss->subsys_id] = css;
-}
-
 static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
 {
 	/* We need to take each hierarchy_mutex in a consistent order */
-- 
1.7.0.1

WARNING: multiple messages have this Message-ID (diff)
From: Li Zefan <lizf@cn.fujitsu.com>
To: "akpm >> Andrew Morton" <akpm@linux-foundation.org>
Cc: Paul Menage <menage@google.com>,
	Stephane Eranian <eranian@google.com>,
	LKML <linux-kernel@vger.kernel.org>,
	containers@lists.linux-foundation.org
Subject: [PATCH 2/7] cgroups: Allow to bind a subsystem to a cgroup hierarchy
Date: Fri, 22 Oct 2010 16:09:56 +0800	[thread overview]
Message-ID: <4CC146D4.7030009@cn.fujitsu.com> (raw)
In-Reply-To: <4CC146A4.9090505@cn.fujitsu.com>

Stephane posted a patchset to add perf_cgroup subsystem, so perf can
be used to monitor all threads belonging to a cgroup.

But if you already mounted a cgroup hierarchy but without perf_cgroup
and the hierarchy has sub-cgroups, you can't bind perf_cgroup to it,
and thus you're not able to use per-cgroup perf feature.

This patchset alleviates the pain, and then a subsytem can be bind/unbind
to/from a hierarchy which has sub-cgroups in it.

For a cgroup subsystem to become bindable, the can_bind flag of
struct cgroup_subsys should be set, and provide ->bind() callback
if necessary.

But for some constraints, not all subsystems can take advantage of
this patch. For example, we can't decide a cgroup's cpuset.mems and
cpuset.cpus automatically, so cpuset is not bindable.

Usage:

# mount -t cgroup -o cpuset xxx /mnt
# mkdir /mnt/tmp
# echo $$ > /mnt/tmp/tasks

(assume cpuacct is bindable, and we add cpuacct to the hierarchy)
# mount -o remount,cpuset,cpuacct xxx /mnt

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/cgroup.h |    5 +
 kernel/cgroup.c        |  225 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 187 insertions(+), 43 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e23ded6..49369ff 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -490,6 +490,11 @@ struct cgroup_subsys {
 	 * (not available in early_init time.)
 	 */
 	unsigned int use_id:1;
+	/*
+	 * Indicate if this subsystem can be bound/unbound to/from a cgroup
+	 * hierarchy which has child cgroups.
+	 */
+	unsigned int can_bind:1;
 
 #define MAX_CGROUP_TYPE_NAMELEN 32
 	const char *name;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 6c36750..46df5f8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,6 +57,7 @@
 #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
 #include <linux/eventfd.h>
 #include <linux/poll.h>
+#include <linux/bitops.h>
 
 #include <asm/atomic.h>
 
@@ -870,18 +871,13 @@ static void remove_dir(struct dentry *d)
 
 static void cgroup_clear_directory(struct dentry *dentry)
 {
-	struct list_head *node;
+	struct dentry *d, *tmp;
 
 	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
 	spin_lock(&dcache_lock);
-	node = dentry->d_subdirs.next;
-	while (node != &dentry->d_subdirs) {
-		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
-		list_del_init(node);
-		if (d->d_inode) {
-			/* This should never be called on a cgroup
-			 * directory with child cgroups */
-			BUG_ON(d->d_inode->i_mode & S_IFDIR);
+	list_for_each_entry_safe(d, tmp, &dentry->d_subdirs, d_u.d_child) {
+		if (d->d_inode && !(d->d_inode->i_mode & S_IFDIR)) {
+			list_del_init(&d->d_u.d_child);
 			d = dget_locked(d);
 			spin_unlock(&dcache_lock);
 			d_delete(d);
@@ -889,7 +885,6 @@ static void cgroup_clear_directory(struct dentry *dentry)
 			dput(d);
 			spin_lock(&dcache_lock);
 		}
-		node = dentry->d_subdirs.next;
 	}
 	spin_unlock(&dcache_lock);
 }
@@ -934,6 +929,145 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
 	css_put(css);
 }
 
+static void init_cgroup_css(struct cgroup_subsys_state *css,
+			       struct cgroup_subsys *ss,
+			       struct cgroup *cgrp)
+{
+	css->cgroup = cgrp;
+	atomic_set(&css->refcnt, 1);
+	css->flags = 0;
+	css->id = NULL;
+	if (cgrp == dummytop)
+		set_bit(CSS_ROOT, &css->flags);
+	BUG_ON(cgrp->subsys[ss->subsys_id]);
+	cgrp->subsys[ss->subsys_id] = css;
+}
+
+/*
+ * cgroup_walk_herarchy - iterate through a cgroup hierarchy
+ * @process_cgroup: callback called on each cgroup in the hierarchy
+ * @data: will be passed to @process_cgroup
+ * @top_cgrp: the root cgroup of the hierarchy
+ *
+ * For such a hierarchy:
+ *        a1        c1
+ *      /         /
+ * Root - a2 - b1 - c2
+ *      \
+ *        a3
+ *
+ * The iterating order is: a1, a2, b1, c1, c2, a3. So a parent will be
+ * processed before its children.
+ */
+static int cgroup_walk_hierarchy(int (*process_cgroup)(struct cgroup *, void *),
+				 void *data, struct cgroup *top_cgrp)
+{
+	struct cgroup *parent = top_cgrp;
+	struct cgroup *child;
+	struct list_head *node;
+	int ret;
+
+	node = parent->children.next;
+repeat:
+	while (node != &parent->children) {
+		child = list_entry(node, struct cgroup, sibling);
+
+		ret = process_cgroup(child, data);
+		if (ret)
+			return ret;
+
+		if (!list_empty(&child->children)) {
+			parent = child;
+			node = parent->children.next;
+			goto repeat;
+		} else
+			node = node->next;
+	}
+
+	if (parent != top_cgrp) {
+		child = parent;
+		parent = child->parent;
+		node = child->sibling.next;
+		goto repeat;
+	}
+
+	return 0;
+}
+
+static int hierarchy_attach_css_failed(struct cgroup *cgrp, void *data)
+{
+	unsigned long added_bits = (unsigned long)data;
+	int i;
+
+	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT);
+		if (cgrp->subsys[i])
+			subsys[i]->destroy(subsys[i], cgrp);
+
+	return 0;
+}
+
+static int hierarchy_attach_css(struct cgroup *cgrp, void *data)
+{
+	unsigned long added_bits = (unsigned long)data;
+	int i;
+	int ret = 0;
+
+	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT) {
+		struct cgroup_subsys_state *css;
+		struct cgroup_subsys *ss = subsys[i];
+
+		css = ss->create(ss, cgrp);
+		if (IS_ERR(css)) {
+			ret = PTR_ERR(css);
+			break;
+		}
+		init_cgroup_css(css, ss, cgrp);
+
+		if (ss->use_id) {
+			ret = alloc_css_id(ss, cgrp->parent, cgrp);
+			if (ret)
+				break;
+		}
+	}
+
+	if (ret)
+		cgroup_walk_hierarchy(hierarchy_attach_css_failed, data,
+				      cgrp->top_cgroup);
+	return ret;
+}
+
+static int hierarchy_update_css_sets(struct cgroup *cgrp, void *data)
+{
+	unsigned long added_bits = (unsigned long)data;
+	int i;
+	struct cg_cgroup_link *link;
+
+	write_lock(&css_set_lock);
+	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
+		struct css_set *cg = link->cg;
+		struct hlist_head *hhead;
+
+		for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
+			cg->subsys[i] = cgrp->subsys[i];
+
+		/* rehash */
+		hlist_del(&cg->hlist);
+		hhead = css_set_hash(cg->subsys);
+		hlist_add_head(&cg->hlist, hhead);
+	}
+	write_unlock(&css_set_lock);
+
+	return 0;
+}
+
+static int hierarchy_populate_dir(struct cgroup *cgrp, void *data)
+{
+	mutex_lock_nested(&cgrp->dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+	cgroup_populate_dir(cgrp);
+	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+	return 0;
+}
+
 /*
  * Call with cgroup_mutex held. Drops reference counts on modules, including
  * any duplicate ones that parse_cgroupfs_options took. If this function
@@ -945,36 +1079,53 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	unsigned long added_bits, removed_bits;
 	struct cgroup *cgrp = &root->top_cgroup;
 	int i;
+	int err;
 
 	BUG_ON(!mutex_is_locked(&cgroup_mutex));
 
 	removed_bits = root->actual_subsys_bits & ~final_bits;
 	added_bits = final_bits & ~root->actual_subsys_bits;
+
 	/* Check that any added subsystems are currently free */
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-		unsigned long bit = 1UL << i;
-		struct cgroup_subsys *ss = subsys[i];
-		if (!(bit & added_bits))
-			continue;
+	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT) {
 		/*
 		 * Nobody should tell us to do a subsys that doesn't exist:
 		 * parse_cgroupfs_options should catch that case and refcounts
 		 * ensure that subsystems won't disappear once selected.
 		 */
-		BUG_ON(ss == NULL);
-		if (ss->root != &rootnode) {
+		BUG_ON(subsys[i] == NULL);
+		if (subsys[i]->root != &rootnode) {
 			/* Subsystem isn't free */
 			return -EBUSY;
 		}
 	}
 
-	/* Currently we don't handle adding/removing subsystems when
-	 * any child cgroups exist. This is theoretically supportable
-	 * but involves complex error handling, so it's being left until
-	 * later */
-	if (root->number_of_cgroups > 1)
+	/* removing will be supported later */
+	if (root->number_of_cgroups > 1 && removed_bits)
 		return -EBUSY;
 
+	if (root->number_of_cgroups > 1) {
+		for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
+			if (!subsys[i]->can_bind)
+				return -EBUSY;
+
+	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT) {
+		BUG_ON(cgrp->subsys[i]);
+		BUG_ON(!dummytop->subsys[i]);
+		BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
+
+		cgrp->subsys[i] = dummytop->subsys[i];
+		cgrp->subsys[i]->cgroup = cgrp;
+	}
+
+	err = cgroup_walk_hierarchy(hierarchy_attach_css,
+				    (void *)added_bits, cgrp);
+	if (err)
+		goto failed;
+
+	cgroup_walk_hierarchy(hierarchy_update_css_sets,
+			      (void *)added_bits, cgrp);
+
 	/* Process each subsystem */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
@@ -982,12 +1133,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		if (bit & added_bits) {
 			/* We're binding this subsystem to this hierarchy */
 			BUG_ON(ss == NULL);
-			BUG_ON(cgrp->subsys[i]);
-			BUG_ON(!dummytop->subsys[i]);
-			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
 			mutex_lock(&ss->hierarchy_mutex);
-			cgrp->subsys[i] = dummytop->subsys[i];
-			cgrp->subsys[i]->cgroup = cgrp;
 			list_move(&ss->sibling, &root->subsys_list);
 			ss->root = root;
 			if (ss->bind)
@@ -1000,10 +1146,10 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
 			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
 			mutex_lock(&ss->hierarchy_mutex);
-			if (ss->bind)
-				ss->bind(ss, dummytop);
 			dummytop->subsys[i]->cgroup = dummytop;
 			cgrp->subsys[i] = NULL;
+			if (ss->bind)
+				ss->bind(ss, dummytop);
 			subsys[i]->root = &rootnode;
 			list_move(&ss->sibling, &rootnode.subsys_list);
 			mutex_unlock(&ss->hierarchy_mutex);
@@ -1030,6 +1176,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	synchronize_rcu();
 
 	return 0;
+
+failed:
+	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
+		cgrp->subsys[i] = NULL;
+
+	return err;
 }
 
 static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
@@ -1285,6 +1437,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 
 	/* (re)populate subsystem files */
 	cgroup_populate_dir(cgrp);
+	cgroup_walk_hierarchy(hierarchy_populate_dir, NULL, cgrp);
 
 	if (opts.release_agent)
 		strcpy(root->release_agent_path, opts.release_agent);
@@ -3313,20 +3466,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
 	return 0;
 }
 
-static void init_cgroup_css(struct cgroup_subsys_state *css,
-			       struct cgroup_subsys *ss,
-			       struct cgroup *cgrp)
-{
-	css->cgroup = cgrp;
-	atomic_set(&css->refcnt, 1);
-	css->flags = 0;
-	css->id = NULL;
-	if (cgrp == dummytop)
-		set_bit(CSS_ROOT, &css->flags);
-	BUG_ON(cgrp->subsys[ss->subsys_id]);
-	cgrp->subsys[ss->subsys_id] = css;
-}
-
 static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
 {
 	/* We need to take each hierarchy_mutex in a consistent order */
-- 
1.7.0.1


  parent reply	other threads:[~2010-10-22  8:09 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-22  8:09 [PATCH 0/7] cgroups: Allow to bind/unbind subsystems to/from non-trival hierarchy Li Zefan
2010-10-22  8:09 ` [PATCH 1/7] cgroups: Shrink struct cgroup_subsys Li Zefan
2010-10-28 23:34   ` Paul Menage
2010-11-08  5:23     ` Li Zefan
2010-11-09 21:05       ` Paul Menage
     [not found]         ` <AANLkTim6d1fQLZbkmZST3PTN0RMSs3m=oossF81pYBn9-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-11-10  0:52           ` Li Zefan
2010-11-10  0:52         ` Li Zefan
     [not found]           ` <4CD9ECD2.3030805-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-11-10  1:53             ` Paul Menage
2010-11-10  1:53           ` Paul Menage
     [not found]             ` <AANLkTinMr7VE4Os7rXWjiHWOVysv=oE0vHKduLWCN0bC-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-11-10  2:06               ` Li Zefan
2010-11-10  2:06             ` Li Zefan
2010-11-10  2:15               ` Paul Menage
     [not found]               ` <4CD9FE2D.2070108-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-11-10  2:15                 ` Paul Menage
     [not found]       ` <4CD78946.5060405-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-11-09 21:05         ` Paul Menage
     [not found]     ` <AANLkTikf-1kLStxqi5UjP=vn3pqVBHy0OA7ibeWTkJ5z-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-11-08  5:23       ` Li Zefan
     [not found]   ` <4CC146BA.7080009-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-10-28 23:34     ` Paul Menage
2010-10-22  8:10 ` [PATCH 3/7] cgroups: Allow to unbind subsystem from a cgroup hierarachy Li Zefan
     [not found]   ` <4CC146F5.9060006-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-10-29  0:02     ` Paul Menage
2010-10-29  0:02   ` Paul Menage
2010-10-22  8:11 ` [PATCH 4/7] cgroups: Mark some subsystems bindable Li Zefan
     [not found] ` <4CC146A4.9090505-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-10-22  8:09   ` [PATCH 1/7] cgroups: Shrink struct cgroup_subsys Li Zefan
2010-10-22  8:09   ` Li Zefan [this message]
2010-10-22  8:09     ` [PATCH 2/7] cgroups: Allow to bind a subsystem to a cgroup hierarchy Li Zefan
2010-10-22 12:47     ` Peter Zijlstra
2010-10-25  0:59       ` Li Zefan
2010-10-25  0:59       ` Li Zefan
     [not found]     ` <4CC146D4.7030009-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-10-22 12:47       ` Peter Zijlstra
2010-10-22 21:38       ` Matt Helsley
2010-10-28 23:55       ` Paul Menage
2010-10-28 23:55         ` Paul Menage
     [not found]         ` <AANLkTimCD90s+y_6y=LyOL1QqEOOAaT+b2b4guDrzo_g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-11-08  5:26           ` Li Zefan
2010-11-08  5:26         ` Li Zefan
2010-10-22 21:38     ` Matt Helsley
     [not found]       ` <20101022213819.GK10119-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2010-10-25  1:23         ` Li Zefan
2010-10-25  1:23           ` Li Zefan
2010-10-28 23:57         ` Paul Menage
2010-10-28 23:57       ` Paul Menage
2010-10-22  8:10   ` [PATCH 3/7] cgroups: Allow to unbind subsystem from a cgroup hierarachy Li Zefan
2010-10-22  8:11   ` [PATCH 4/7] cgroups: Mark some subsystems bindable Li Zefan
2010-10-22  8:11   ` [PATCH 5/7] cgroups: Make freezer subsystem bindable Li Zefan
2010-10-22  8:12   ` [PATCH 6/7] cgroups: Warn if a bindable subsystem calls css_get() Li Zefan
2010-10-22  8:12   ` [PATCH 7/7] cgroups: Update documentation for bindable subsystems Li Zefan
2010-10-22 12:50   ` [PATCH 0/7] cgroups: Allow to bind/unbind subsystems to/from non-trival hierarchy Peter Zijlstra
2010-10-22  8:11 ` [PATCH 5/7] cgroups: Make freezer subsystem bindable Li Zefan
     [not found]   ` <4CC1473D.9070201-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-10-22 20:57     ` Matt Helsley
2010-10-22 20:57       ` Matt Helsley
2010-10-22 21:46       ` Matt Helsley
2010-10-29  0:06         ` Paul Menage
     [not found]         ` <20101022214650.GL10119-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2010-10-29  0:06           ` Paul Menage
     [not found]       ` <20101022205755.GJ10119-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2010-10-22 21:46         ` Matt Helsley
2010-10-22 21:57         ` Matt Helsley
2010-10-25  1:15         ` Li Zefan
2010-10-22 21:57       ` Matt Helsley
2010-10-25  1:15       ` Li Zefan
2010-10-22  8:12 ` [PATCH 6/7] cgroups: Warn if a bindable subsystem calls css_get() Li Zefan
     [not found]   ` <4CC14756.5010504-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-10-29  0:05     ` Paul Menage
2010-10-29  0:05   ` Paul Menage
2010-10-22  8:12 ` [PATCH 7/7] cgroups: Update documentation for bindable subsystems Li Zefan
     [not found]   ` <4CC14769.2000406-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-10-25  0:36     ` KAMEZAWA Hiroyuki
2010-10-29  0:13     ` Paul Menage
2010-10-25  0:36   ` KAMEZAWA Hiroyuki
     [not found]     ` <20101025093617.7de750c0.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2010-10-25  0:52       ` Li Zefan
2010-10-25  0:52     ` Li Zefan
     [not found]       ` <4CC4D4B9.3020807-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-10-25  0:56         ` Li Zefan
2010-10-25  0:56       ` Li Zefan
2010-10-29  0:13   ` Paul Menage
     [not found]     ` <AANLkTinbtLkF=haFeDkzecEWv_FE9jG4TefiptSnZcPi-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-10-29  0:15       ` Paul Menage
2010-11-08  5:27       ` Li Zefan
2010-10-29  0:15     ` Paul Menage
2010-11-08  5:27     ` Li Zefan
2010-10-22 12:50 ` [PATCH 0/7] cgroups: Allow to bind/unbind subsystems to/from non-trival hierarchy Peter Zijlstra
2010-10-25  1:07   ` Li Zefan
     [not found]     ` <4CC4D84C.5000705-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-10-28 23:33       ` Paul Menage
2010-10-28 23:33     ` Paul Menage
2010-10-25  1:07   ` Li Zefan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4CC146D4.7030009@cn.fujitsu.com \
    --to=lizf-bthxqxjhjhxqfuhtdcdx3a@public.gmane.org \
    --cc=akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
    --cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=eranian-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.