public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Li Zefan <lizf@cn.fujitsu.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Menage <menage@google.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Hiroyuki KAMEZAWA <kamezawa.hiroyu@jp.fujitsu.com>,
	Matt Helsley <matthltc@us.ibm.com>,
	Stephane Eranian <eranian@google.com>,
	LKML <linux-kernel@vger.kernel.org>,
	containers@lists.linux-foundation.org
Subject: [PATCH v2 3/6] cgroups: Allow to unbind subsystem from a cgroup hierarchy
Date: Wed, 15 Dec 2010 17:35:33 +0800	[thread overview]
Message-ID: <4D088BE5.1000708@cn.fujitsu.com> (raw)
In-Reply-To: <4D088BB5.30903@cn.fujitsu.com>

This allows us to unbind a cgroup subsystem from a hierarchy
which has sub-cgroups in it.

If a subsystem is to support unbinding, when pinning a cgroup
via css refcnt, it should use __css_tryget() instead of css_get().

Usage:

 # mount -t cgroup -o cpuset,cpuacct xxx /mnt
 # mkdir /mnt/tmp
 # echo $$ > /mnt/tmp/tasks

 (remove it from the hierarchy)
 # mount -o remount,cpuset xxx /mnt

Changelog v2:

- Allow a cgroup subsystem to use css refcnt.
- Add more code comments.
- Use rcu_assign_pointer() in hierarchy_update_css_sets().
- Split can_bind flag to bindable and unbindable flags.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/cgroup.h |   17 ++++++
 kernel/cgroup.c        |  139 +++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 138 insertions(+), 18 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d8c4e22..17579b2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -110,6 +110,18 @@ static inline bool css_is_removed(struct cgroup_subsys_state *css)
 }
 
 /*
+ * For a subsystem which supports unbinding, call this to get css
+ * refcnt. Called with rcu_read_lock or cgroup_mutex held.
+ */
+
+static inline bool __css_tryget(struct cgroup_subsys_state *css)
+{
+	if (test_bit(CSS_ROOT, &css->flags))
+		return true;
+	return atomic_inc_not_zero(&css->refcnt);
+}
+
+/*
  * Call css_tryget() to take a reference on a css if your existing
  * (known-valid) reference isn't already ref-counted. Returns false if
  * the css has been destroyed.
@@ -495,6 +507,11 @@ struct cgroup_subsys {
 	 * which has child cgroups.
 	 */
 	bool bindable:1;
+	/*
+	 * Indicate if this subsystem can be removed from a cgroup hierarchy
+	 * which has child cgroups.
+	 */
+	bool unbindable:1;
 
 #define MAX_CGROUP_TYPE_NAMELEN 32
 	const char *name;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index caac80f..463575d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1055,12 +1055,61 @@ static int hierarchy_attach_css(struct cgroup *cgrp, void *data)
 }
 
 /*
- * After attaching new css objects to the cgroup, we need to entangle
- * them into the existing css_sets.
+ * Reset those css objects whose refcnts are cleared.
  */
-static int hierarchy_update_css_sets(struct cgroup *cgrp, void *data)
+static int hierarchy_reset_css_refs(struct cgroup *cgrp, void *data)
+{
+	unsigned long removed_bits = (unsigned long)data;
+	int i;
+
+	for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+		if (atomic_read(&css->refcnt) == 0)
+			atomic_set(&css->refcnt, 1);
+	}
+	return 0;
+}
+
+/*
+ * Clear all the css objects' refcnt to 0. If there's a refcnt > 1,
+ * return failure.
+ */
+static int hierarchy_clear_css_refs(struct cgroup *cgrp, void *data)
+{
+	unsigned long removed_bits = (unsigned long)data;
+	int i;
+
+	for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+		struct cgroup_subsys_state *css = cgrp->subsys[i];
+
+		if (atomic_cmpxchg(&css->refcnt, 1, 0) != 1)
+			goto failed;
+	}
+	return 0;
+failed:
+	hierarchy_reset_css_refs(struct cgroup *cgrp, void *data);
+	return -EBUSY;
+}
+
+/*
+ * We're removing some subsystems from cgroup hierarchy, and here we
+ * remove and destroy the css objects from each cgroup.
+ */
+static int hierarchy_remove_css(struct cgroup *cgrp, void *data)
+{
+	unsigned long removed_bits = (unsigned long)data;
+	int i;
+
+	for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+		subsys[i]->destroy(subsys[i], cgrp);
+		cgrp->subsys[i] = NULL;
+	}
+
+	return 0;
+}
+
+static int hierarchy_update_css_sets(struct cgroup *cgrp,
+				     unsigned long bits, bool add)
 {
-	unsigned long added_bits = (unsigned long)data;
 	int i;
 	struct cg_cgroup_link *link;
 
@@ -1069,8 +1118,14 @@ static int hierarchy_update_css_sets(struct cgroup *cgrp, void *data)
 		struct css_set *cg = link->cg;
 		struct hlist_head *hhead;
 
-		for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
-			rcu_assign_pointer(cg->subsys[i], cgrp->subsys[i]);
+		for_each_set_bit(i, &bits, CGROUP_SUBSYS_COUNT) {
+			if (add)
+				rcu_assign_pointer(cg->subsys[i],
+						   cgrp->subsys[i]);
+			else
+				rcu_assign_pointer(cg->subsys[i],
+						   dummytop->subsys[i]);
+		}
 
 		/* rehash */
 		hlist_del(&cg->hlist);
@@ -1083,6 +1138,30 @@ static int hierarchy_update_css_sets(struct cgroup *cgrp, void *data)
 }
 
 /*
+ * After attaching new css objects to the cgroup, we need to entangle
+ * them into the existing css_sets.
+ */
+static int hierarchy_add_to_css_sets(struct cgroup *cgrp, void *data)
+{
+	unsigned long added_bits = (unsigned long)data;
+
+	hierarchy_update_css_sets(cgrp, added_bits, true);
+	return 0;
+}
+
+/*
+ * Before dettaching and destroying css objects from the cgroup, we
+ * should detangle them from the existing css_sets.
+ */
+static int hierarchy_remove_from_css_sets(struct cgroup *cgrp, void *data)
+{
+	unsigned long removed_bits = (unsigned long)data;
+
+	hierarchy_update_css_sets(cgrp, removed_bits, false);
+	return 0;
+}
+
+/*
  * Re-populate each cgroup directory.
  *
  * Note root cgroup's inode mutex is held.
@@ -1127,18 +1206,17 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		}
 	}
 
-	/* Removing will be supported later */
-	if (root->number_of_cgroups > 1 && removed_bits)
-		return -EBUSY;
-
 	/*
 	 * For non-trivial hierarchy, check that added subsystems
-	 * are all bindable
+	 * are all bindable and removed subsystems are all unbindable
 	 */
 	if (root->number_of_cgroups > 1) {
 		for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
 			if (!subsys[i]->bindable)
 				return -EBUSY;
+		for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT)
+			if (!subsys[i]->unbindable)
+				return -EBUSY;
 	}
 
 	/* Attach css objects to the top cgroup */
@@ -1154,9 +1232,14 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	err = cgroup_walk_hierarchy(hierarchy_attach_css,
 				    (void *)added_bits, cgrp);
 	if (err)
-		goto failed;
+		goto out;
+
+	err = cgroup_walk_hierarchy(hierarchy_clear_css_refs,
+				    (void *)removed_bits, cgrp);
+	if (err)
+		goto out_remove_css;
 
-	cgroup_walk_hierarchy(hierarchy_update_css_sets,
+	cgroup_walk_hierarchy(hierarchy_add_to_css_sets,
 			      (void *)added_bits, cgrp);
 
 	/* Process each subsystem */
@@ -1176,11 +1259,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		} else if (bit & removed_bits) {
 			/* We're removing this subsystem */
 			BUG_ON(ss == NULL);
-			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
-			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
 			mutex_lock(&ss->hierarchy_mutex);
-			dummytop->subsys[i]->cgroup = dummytop;
-			cgrp->subsys[i] = NULL;
 			if (ss->bind)
 				ss->bind(ss, dummytop);
 			subsys[i]->root = &rootnode;
@@ -1206,11 +1285,35 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		}
 	}
 	root->subsys_bits = root->actual_subsys_bits = final_bits;
+
+	for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+		BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
+		BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+
+		dummytop->subsys[i]->cgroup = dummytop;
+		cgrp->subsys[i] = NULL;
+	}
+
+	cgroup_walk_hierarchy(hierarchy_remove_from_css_sets,
+			      (void *)removed_bits, cgrp);
+
+	/*
+	 * There might be some pointers to the cgrouip_subsys_state
+	 * that we are going to destroy.
+	 */
+	synchronize_rcu();
+
+	cgroup_walk_hierarchy(hierarchy_remove_css,
+			      (void *)removed_bits, cgrp);
+
 	synchronize_rcu();
 
 	return 0;
 
-failed:
+out_remove_css:
+	cgroup_walk_hierarchy(hierarchy_remove_css,
+			      (void *)added_bits, cgrp);
+out:
 	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
 		cgrp->subsys[i] = NULL;
 
-- 
1.6.3


  parent reply	other threads:[~2010-12-15  9:35 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-15  9:34 [PATCH v2 0/6] cgroups: Bindable cgroup subsystems Li Zefan
2010-12-15  9:35 ` [PATCH v2 1/6] cgroups: Shrink struct cgroup_subsys Li Zefan
2010-12-15  9:35 ` [PATCH v2 2/6] cgroups: Allow to bind a subsystem to a cgroup hierarchy Li Zefan
2010-12-15  9:35 ` Li Zefan [this message]
2010-12-15  9:36 ` [PATCH v2 4/6] cgroups: Mark some subsystems bindable/unbindable Li Zefan
2010-12-15  9:36 ` [PATCH v2 5/6] cgroups: Triger BUG if a bindable subsystem calls css_get() Li Zefan
2010-12-15  9:36 ` [PATCH v2 6/6] cgroups: Update documentation for bindable subsystems Li Zefan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D088BE5.1000708@cn.fujitsu.com \
    --to=lizf@cn.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=containers@lists.linux-foundation.org \
    --cc=eranian@google.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matthltc@us.ibm.com \
    --cc=menage@google.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox