All of lore.kernel.org
 help / color / mirror / Atom feed
From: Li Zefan <lizf@cn.fujitsu.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Menage <menage@google.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Hiroyuki KAMEZAWA <kamezawa.hiroyu@jp.fujitsu.com>,
	Matt Helsley <matthltc@us.ibm.com>,
	Stephane Eranian <eranian@google.com>,
	LKML <linux-kernel@vger.kernel.org>,
	containers@lists.linux-foundation.org
Subject: [PATCH v2 3/6] cgroups: Allow to unbind subsystem from a cgroup hierarchy
Date: Wed, 15 Dec 2010 17:35:33 +0800	[thread overview]
Message-ID: <4D088BE5.1000708@cn.fujitsu.com> (raw)
In-Reply-To: <4D088BB5.30903@cn.fujitsu.com>

This allows us to unbind a cgroup subsystem from a hierarchy
which has sub-cgroups in it.

If a subsystem is to support unbinding, when pinning a cgroup
via css refcnt, it should use __css_tryget() instead of css_get().

Usage:

 # mount -t cgroup -o cpuset,cpuacct xxx /mnt
 # mkdir /mnt/tmp
 # echo $$ > /mnt/tmp/tasks

 (remove it from the hierarchy)
 # mount -o remount,cpuset xxx /mnt

Changelog v2:

- Allow a cgroup subsystem to use css refcnt.
- Add more code comments.
- Use rcu_assign_pointer() in hierarchy_update_css_sets().
- Split can_bind flag to bindable and unbindable flags.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/cgroup.h |   17 ++++++
 kernel/cgroup.c        |  139 +++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 138 insertions(+), 18 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d8c4e22..17579b2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -110,6 +110,18 @@ static inline bool css_is_removed(struct cgroup_subsys_state *css)
 }
 
 /*
+ * For a subsystem which supports unbinding, call this to get css
+ * refcnt. Called with rcu_read_lock or cgroup_mutex held.
+ */
+
+static inline bool __css_tryget(struct cgroup_subsys_state *css)
+{
+	if (test_bit(CSS_ROOT, &css->flags))
+		return true;
+	return atomic_inc_not_zero(&css->refcnt);
+}
+
+/*
  * Call css_tryget() to take a reference on a css if your existing
  * (known-valid) reference isn't already ref-counted. Returns false if
  * the css has been destroyed.
@@ -495,6 +507,11 @@ struct cgroup_subsys {
 	 * which has child cgroups.
 	 */
 	bool bindable:1;
+	/*
+	 * Indicate if this subsystem can be removed from a cgroup hierarchy
+	 * which has child cgroups.
+	 */
+	bool unbindable:1;
 
 #define MAX_CGROUP_TYPE_NAMELEN 32
 	const char *name;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index caac80f..463575d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1055,12 +1055,61 @@ static int hierarchy_attach_css(struct cgroup *cgrp, void *data)
 }
 
 /*
- * After attaching new css objects to the cgroup, we need to entangle
- * them into the existing css_sets.
+ * Reset those css objects whose refcnts are cleared.
  */
-static int hierarchy_update_css_sets(struct cgroup *cgrp, void *data)
+static int hierarchy_reset_css_refs(struct cgroup *cgrp, void *data)
+{
+	unsigned long removed_bits = (unsigned long)data;
+	int i;
+
+	for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+		if (atomic_read(&css->refcnt) == 0)
+			atomic_set(&css->refcnt, 1);
+	}
+	return 0;
+}
+
+/*
+ * Clear all the css objects' refcnt to 0. If there's a refcnt > 1,
+ * return failure.
+ */
+static int hierarchy_clear_css_refs(struct cgroup *cgrp, void *data)
+{
+	unsigned long removed_bits = (unsigned long)data;
+	int i;
+
+	for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+		struct cgroup_subsys_state *css = cgrp->subsys[i];
+
+		if (atomic_cmpxchg(&css->refcnt, 1, 0) != 1)
+			goto failed;
+	}
+	return 0;
+failed:
+	hierarchy_reset_css_refs(struct cgroup *cgrp, void *data);
+	return -EBUSY;
+}
+
+/*
+ * We're removing some subsystems from cgroup hierarchy, and here we
+ * remove and destroy the css objects from each cgroup.
+ */
+static int hierarchy_remove_css(struct cgroup *cgrp, void *data)
+{
+	unsigned long removed_bits = (unsigned long)data;
+	int i;
+
+	for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+		subsys[i]->destroy(subsys[i], cgrp);
+		cgrp->subsys[i] = NULL;
+	}
+
+	return 0;
+}
+
+static int hierarchy_update_css_sets(struct cgroup *cgrp,
+				     unsigned long bits, bool add)
 {
-	unsigned long added_bits = (unsigned long)data;
 	int i;
 	struct cg_cgroup_link *link;
 
@@ -1069,8 +1118,14 @@ static int hierarchy_update_css_sets(struct cgroup *cgrp, void *data)
 		struct css_set *cg = link->cg;
 		struct hlist_head *hhead;
 
-		for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
-			rcu_assign_pointer(cg->subsys[i], cgrp->subsys[i]);
+		for_each_set_bit(i, &bits, CGROUP_SUBSYS_COUNT) {
+			if (add)
+				rcu_assign_pointer(cg->subsys[i],
+						   cgrp->subsys[i]);
+			else
+				rcu_assign_pointer(cg->subsys[i],
+						   dummytop->subsys[i]);
+		}
 
 		/* rehash */
 		hlist_del(&cg->hlist);
@@ -1083,6 +1138,30 @@ static int hierarchy_update_css_sets(struct cgroup *cgrp, void *data)
 }
 
 /*
+ * After attaching new css objects to the cgroup, we need to entangle
+ * them into the existing css_sets.
+ */
+static int hierarchy_add_to_css_sets(struct cgroup *cgrp, void *data)
+{
+	unsigned long added_bits = (unsigned long)data;
+
+	hierarchy_update_css_sets(cgrp, added_bits, true);
+	return 0;
+}
+
+/*
+ * Before dettaching and destroying css objects from the cgroup, we
+ * should detangle them from the existing css_sets.
+ */
+static int hierarchy_remove_from_css_sets(struct cgroup *cgrp, void *data)
+{
+	unsigned long removed_bits = (unsigned long)data;
+
+	hierarchy_update_css_sets(cgrp, removed_bits, false);
+	return 0;
+}
+
+/*
  * Re-populate each cgroup directory.
  *
  * Note root cgroup's inode mutex is held.
@@ -1127,18 +1206,17 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		}
 	}
 
-	/* Removing will be supported later */
-	if (root->number_of_cgroups > 1 && removed_bits)
-		return -EBUSY;
-
 	/*
 	 * For non-trivial hierarchy, check that added subsystems
-	 * are all bindable
+	 * are all bindable and removed subsystems are all unbindable
 	 */
 	if (root->number_of_cgroups > 1) {
 		for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
 			if (!subsys[i]->bindable)
 				return -EBUSY;
+		for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT)
+			if (!subsys[i]->unbindable)
+				return -EBUSY;
 	}
 
 	/* Attach css objects to the top cgroup */
@@ -1154,9 +1232,14 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	err = cgroup_walk_hierarchy(hierarchy_attach_css,
 				    (void *)added_bits, cgrp);
 	if (err)
-		goto failed;
+		goto out;
+
+	err = cgroup_walk_hierarchy(hierarchy_clear_css_refs,
+				    (void *)removed_bits, cgrp);
+	if (err)
+		goto out_remove_css;
 
-	cgroup_walk_hierarchy(hierarchy_update_css_sets,
+	cgroup_walk_hierarchy(hierarchy_add_to_css_sets,
 			      (void *)added_bits, cgrp);
 
 	/* Process each subsystem */
@@ -1176,11 +1259,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		} else if (bit & removed_bits) {
 			/* We're removing this subsystem */
 			BUG_ON(ss == NULL);
-			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
-			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
 			mutex_lock(&ss->hierarchy_mutex);
-			dummytop->subsys[i]->cgroup = dummytop;
-			cgrp->subsys[i] = NULL;
 			if (ss->bind)
 				ss->bind(ss, dummytop);
 			subsys[i]->root = &rootnode;
@@ -1206,11 +1285,35 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		}
 	}
 	root->subsys_bits = root->actual_subsys_bits = final_bits;
+
+	for_each_set_bit(i, &removed_bits, CGROUP_SUBSYS_COUNT) {
+		BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
+		BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+
+		dummytop->subsys[i]->cgroup = dummytop;
+		cgrp->subsys[i] = NULL;
+	}
+
+	cgroup_walk_hierarchy(hierarchy_remove_from_css_sets,
+			      (void *)removed_bits, cgrp);
+
+	/*
+	 * There might be some pointers to the cgrouip_subsys_state
+	 * that we are going to destroy.
+	 */
+	synchronize_rcu();
+
+	cgroup_walk_hierarchy(hierarchy_remove_css,
+			      (void *)removed_bits, cgrp);
+
 	synchronize_rcu();
 
 	return 0;
 
-failed:
+out_remove_css:
+	cgroup_walk_hierarchy(hierarchy_remove_css,
+			      (void *)added_bits, cgrp);
+out:
 	for_each_set_bit(i, &added_bits, CGROUP_SUBSYS_COUNT)
 		cgrp->subsys[i] = NULL;
 
-- 
1.6.3


  parent reply	other threads:[~2010-12-15  9:35 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-15  9:34 [PATCH v2 0/6] cgroups: Bindable cgroup subsystems Li Zefan
2010-12-15  9:35 ` [PATCH v2 1/6] cgroups: Shrink struct cgroup_subsys Li Zefan
2010-12-15  9:35 ` [PATCH v2 2/6] cgroups: Allow to bind a subsystem to a cgroup hierarchy Li Zefan
     [not found] ` <4D088BB5.30903-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-12-15  9:35   ` [PATCH v2 1/6] cgroups: Shrink struct cgroup_subsys Li Zefan
2010-12-15  9:35   ` [PATCH v2 2/6] cgroups: Allow to bind a subsystem to a cgroup hierarchy Li Zefan
2010-12-15  9:35   ` [PATCH v2 3/6] cgroups: Allow to unbind subsystem from " Li Zefan
2010-12-15  9:36   ` [PATCH v2 4/6] cgroups: Mark some subsystems bindable/unbindable Li Zefan
2010-12-15  9:36   ` [PATCH v2 5/6] cgroups: Triger BUG if a bindable subsystem calls css_get() Li Zefan
2010-12-15  9:36   ` [PATCH v2 6/6] cgroups: Update documentation for bindable subsystems Li Zefan
2010-12-15  9:35 ` Li Zefan [this message]
2010-12-15  9:36 ` [PATCH v2 4/6] cgroups: Mark some subsystems bindable/unbindable Li Zefan
2010-12-15  9:36 ` [PATCH v2 5/6] cgroups: Triger BUG if a bindable subsystem calls css_get() Li Zefan
2010-12-15  9:36 ` [PATCH v2 6/6] cgroups: Update documentation for bindable subsystems Li Zefan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D088BE5.1000708@cn.fujitsu.com \
    --to=lizf@cn.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=containers@lists.linux-foundation.org \
    --cc=eranian@google.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matthltc@us.ibm.com \
    --cc=menage@google.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.