All of lore.kernel.org
 help / color / mirror / Atom feed
From: menage@google.com
To: akpm@linux-foundation.org, kamezawa.hiroyu@jp.fujitsu.com,
	lizf@cn.fujitsu.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH 3/3] CGroups: Add css_tryget()
Date: Tue, 16 Dec 2008 03:30:58 -0800	[thread overview]
Message-ID: <20081216113653.252690000@menage.corp.google.com> (raw)
In-Reply-To: 20081216113055.713856000@menage.corp.google.com

[-- Attachment #1: cgroup_refcnt.patch --]
[-- Type: text/plain, Size: 6618 bytes --]

This patch adds css_tryget(), that obtains a counted reference on a
CSS.  It is used in situations where the caller has a "weak" reference
to the CSS, i.e. one that does not protect the cgroup from removal via
a reference count, but would instead be cleaned up by a destroy()
callback.

css_tryget() will return true on success, or false if the cgroup is
being removed.

This is similar to Kamezawa Hiroyuki's patch from a week or two ago,
but with the difference that in the event of css_tryget() racing with
a cgroup_rmdir(), css_tryget() will only return false if the cgroup
really does get removed.

This implementation is done by biasing css->refcnt, so that a refcnt
of 1 means "releasable" and 0 means "released or releasing". In the
event of a race, css_tryget() distinguishes between "released" and
"releasing" by checking for the CSS_REMOVED flag in css->flags.

Signed-off-by: Paul Menage <menage@google.com>

---
 include/linux/cgroup.h |   38 +++++++++++++++++++++++++-----
 kernel/cgroup.c        |   61 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 88 insertions(+), 11 deletions(-)

Index: hierarchy_lock-mmotm-2008-12-09/include/linux/cgroup.h
===================================================================
--- hierarchy_lock-mmotm-2008-12-09.orig/include/linux/cgroup.h
+++ hierarchy_lock-mmotm-2008-12-09/include/linux/cgroup.h
@@ -52,9 +52,9 @@ struct cgroup_subsys_state {
 	 * hierarchy structure */
 	struct cgroup *cgroup;
 
-	/* State maintained by the cgroup system to allow
-	 * subsystems to be "busy". Should be accessed via css_get()
-	 * and css_put() */
+	/* State maintained by the cgroup system to allow subsystems
+	 * to be "busy". Should be accessed via css_get(),
+	 * css_tryget() and and css_put(). */
 
 	atomic_t refcnt;
 
@@ -64,11 +64,14 @@ struct cgroup_subsys_state {
 /* bits in struct cgroup_subsys_state flags field */
 enum {
 	CSS_ROOT, /* This CSS is the root of the subsystem */
+	CSS_REMOVED, /* This CSS is dead */
 };
 
 /*
- * Call css_get() to hold a reference on the cgroup;
- *
+ * Call css_get() to hold a reference on the css; it can be used
+ * for a reference obtained via:
+ * - an existing ref-counted reference to the css
+ * - task->cgroups for a locked task
  */
 
 static inline void css_get(struct cgroup_subsys_state *css)
@@ -77,9 +80,32 @@ static inline void css_get(struct cgroup
 	if (!test_bit(CSS_ROOT, &css->flags))
 		atomic_inc(&css->refcnt);
 }
+
+static inline bool css_is_removed(struct cgroup_subsys_state *css)
+{
+	return test_bit(CSS_REMOVED, &css->flags);
+}
+
+/*
+ * Call css_tryget() to take a reference on a css if your existing
+ * (known-valid) reference isn't already ref-counted. Returns false if
+ * the css has been destroyed.
+ */
+
+static inline bool css_tryget(struct cgroup_subsys_state *css)
+{
+	if (test_bit(CSS_ROOT, &css->flags))
+		return true;
+	while (!atomic_inc_not_zero(&css->refcnt)) {
+		if (test_bit(CSS_REMOVED, &css->flags))
+			return false;
+	}
+	return true;
+}
+
 /*
  * css_put() should be called to release a reference taken by
- * css_get()
+ * css_get() or css_tryget()
  */
 
 extern void __css_put(struct cgroup_subsys_state *css);
Index: hierarchy_lock-mmotm-2008-12-09/kernel/cgroup.c
===================================================================
--- hierarchy_lock-mmotm-2008-12-09.orig/kernel/cgroup.c
+++ hierarchy_lock-mmotm-2008-12-09/kernel/cgroup.c
@@ -2321,7 +2321,7 @@ static void init_cgroup_css(struct cgrou
 			       struct cgroup *cgrp)
 {
 	css->cgroup = cgrp;
-	atomic_set(&css->refcnt, 0);
+	atomic_set(&css->refcnt, 1);
 	css->flags = 0;
 	if (cgrp == dummytop)
 		set_bit(CSS_ROOT, &css->flags);
@@ -2453,7 +2453,7 @@ static int cgroup_has_css_refs(struct cg
 {
 	/* Check the reference count on each subsystem. Since we
 	 * already established that there are no tasks in the
-	 * cgroup, if the css refcount is also 0, then there should
+	 * cgroup, if the css refcount is also 1, then there should
 	 * be no outstanding references, so the subsystem is safe to
 	 * destroy. We scan across all subsystems rather than using
 	 * the per-hierarchy linked list of mounted subsystems since
@@ -2474,12 +2474,62 @@ static int cgroup_has_css_refs(struct cg
 		 * matter, since it can only happen if the cgroup
 		 * has been deleted and hence no longer needs the
 		 * release agent to be called anyway. */
-		if (css && atomic_read(&css->refcnt))
+		if (css && (atomic_read(&css->refcnt) > 1))
 			return 1;
 	}
 	return 0;
 }
 
+/*
+ * Atomically mark all (or else none) of the cgroup's CSS objects as
+ * CSS_REMOVED. Return true on success, or false if the cgroup has
+ * busy subsystems. Call with cgroup_mutex held
+ */
+
+static int cgroup_clear_css_refs(struct cgroup *cgrp)
+{
+	struct cgroup_subsys *ss;
+	unsigned long flags;
+	bool failed = false;
+	local_irq_save(flags);
+	for_each_subsys(cgrp->root, ss) {
+		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+		int refcnt;
+		do {
+			/* We can only remove a CSS with a refcnt==1 */
+			refcnt = atomic_read(&css->refcnt);
+			if (refcnt > 1) {
+				failed = true;
+				goto done;
+			}
+			BUG_ON(!refcnt);
+			/*
+			 * Drop the refcnt to 0 while we check other
+			 * subsystems. This will cause any racing
+			 * css_tryget() to spin until we set the
+			 * CSS_REMOVED bits or abort
+			 */
+		} while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt);
+	}
+ done:
+	for_each_subsys(cgrp->root, ss) {
+		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+		if (failed) {
+			/*
+			 * Restore old refcnt if we previously managed
+			 * to clear it from 1 to 0
+			 */
+			if (!atomic_read(&css->refcnt))
+				atomic_set(&css->refcnt, 1);
+		} else {
+			/* Commit the fact that the CSS is removed */
+			set_bit(CSS_REMOVED, &css->flags);
+		}
+	}
+	local_irq_restore(flags);
+	return !failed;
+}
+
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 {
 	struct cgroup *cgrp = dentry->d_fsdata;
@@ -2510,7 +2560,7 @@ static int cgroup_rmdir(struct inode *un
 
 	if (atomic_read(&cgrp->count)
 	    || !list_empty(&cgrp->children)
-	    || cgroup_has_css_refs(cgrp)) {
+	    || !cgroup_clear_css_refs(cgrp)) {
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
@@ -3065,7 +3115,8 @@ void __css_put(struct cgroup_subsys_stat
 {
 	struct cgroup *cgrp = css->cgroup;
 	rcu_read_lock();
-	if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
+	if ((atomic_dec_return(&css->refcnt) == 1) &&
+	    notify_on_release(cgrp)) {
 		set_bit(CGRP_RELEASABLE, &cgrp->flags);
 		check_for_release(cgrp);
 	}

--

WARNING: multiple messages have this Message-ID (diff)
From: menage@google.com
To: akpm@linux-foundation.org, kamezawa.hiroyu@jp.fujitsu.com,
	lizf@cn.fujitsu.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH 3/3] CGroups: Add css_tryget()
Date: Tue, 16 Dec 2008 03:30:58 -0800	[thread overview]
Message-ID: <20081216113653.252690000@menage.corp.google.com> (raw)
In-Reply-To: 20081216113055.713856000@menage.corp.google.com

[-- Attachment #1: cgroup_refcnt.patch --]
[-- Type: text/plain, Size: 6843 bytes --]

This patch adds css_tryget(), that obtains a counted reference on a
CSS.  It is used in situations where the caller has a "weak" reference
to the CSS, i.e. one that does not protect the cgroup from removal via
a reference count, but would instead be cleaned up by a destroy()
callback.

css_tryget() will return true on success, or false if the cgroup is
being removed.

This is similar to Kamezawa Hiroyuki's patch from a week or two ago,
but with the difference that in the event of css_tryget() racing with
a cgroup_rmdir(), css_tryget() will only return false if the cgroup
really does get removed.

This implementation is done by biasing css->refcnt, so that a refcnt
of 1 means "releasable" and 0 means "released or releasing". In the
event of a race, css_tryget() distinguishes between "released" and
"releasing" by checking for the CSS_REMOVED flag in css->flags.

Signed-off-by: Paul Menage <menage@google.com>

---
 include/linux/cgroup.h |   38 +++++++++++++++++++++++++-----
 kernel/cgroup.c        |   61 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 88 insertions(+), 11 deletions(-)

Index: hierarchy_lock-mmotm-2008-12-09/include/linux/cgroup.h
===================================================================
--- hierarchy_lock-mmotm-2008-12-09.orig/include/linux/cgroup.h
+++ hierarchy_lock-mmotm-2008-12-09/include/linux/cgroup.h
@@ -52,9 +52,9 @@ struct cgroup_subsys_state {
 	 * hierarchy structure */
 	struct cgroup *cgroup;
 
-	/* State maintained by the cgroup system to allow
-	 * subsystems to be "busy". Should be accessed via css_get()
-	 * and css_put() */
+	/* State maintained by the cgroup system to allow subsystems
+	 * to be "busy". Should be accessed via css_get(),
+	 * css_tryget() and and css_put(). */
 
 	atomic_t refcnt;
 
@@ -64,11 +64,14 @@ struct cgroup_subsys_state {
 /* bits in struct cgroup_subsys_state flags field */
 enum {
 	CSS_ROOT, /* This CSS is the root of the subsystem */
+	CSS_REMOVED, /* This CSS is dead */
 };
 
 /*
- * Call css_get() to hold a reference on the cgroup;
- *
+ * Call css_get() to hold a reference on the css; it can be used
+ * for a reference obtained via:
+ * - an existing ref-counted reference to the css
+ * - task->cgroups for a locked task
  */
 
 static inline void css_get(struct cgroup_subsys_state *css)
@@ -77,9 +80,32 @@ static inline void css_get(struct cgroup
 	if (!test_bit(CSS_ROOT, &css->flags))
 		atomic_inc(&css->refcnt);
 }
+
+static inline bool css_is_removed(struct cgroup_subsys_state *css)
+{
+	return test_bit(CSS_REMOVED, &css->flags);
+}
+
+/*
+ * Call css_tryget() to take a reference on a css if your existing
+ * (known-valid) reference isn't already ref-counted. Returns false if
+ * the css has been destroyed.
+ */
+
+static inline bool css_tryget(struct cgroup_subsys_state *css)
+{
+	if (test_bit(CSS_ROOT, &css->flags))
+		return true;
+	while (!atomic_inc_not_zero(&css->refcnt)) {
+		if (test_bit(CSS_REMOVED, &css->flags))
+			return false;
+	}
+	return true;
+}
+
 /*
  * css_put() should be called to release a reference taken by
- * css_get()
+ * css_get() or css_tryget()
  */
 
 extern void __css_put(struct cgroup_subsys_state *css);
Index: hierarchy_lock-mmotm-2008-12-09/kernel/cgroup.c
===================================================================
--- hierarchy_lock-mmotm-2008-12-09.orig/kernel/cgroup.c
+++ hierarchy_lock-mmotm-2008-12-09/kernel/cgroup.c
@@ -2321,7 +2321,7 @@ static void init_cgroup_css(struct cgrou
 			       struct cgroup *cgrp)
 {
 	css->cgroup = cgrp;
-	atomic_set(&css->refcnt, 0);
+	atomic_set(&css->refcnt, 1);
 	css->flags = 0;
 	if (cgrp == dummytop)
 		set_bit(CSS_ROOT, &css->flags);
@@ -2453,7 +2453,7 @@ static int cgroup_has_css_refs(struct cg
 {
 	/* Check the reference count on each subsystem. Since we
 	 * already established that there are no tasks in the
-	 * cgroup, if the css refcount is also 0, then there should
+	 * cgroup, if the css refcount is also 1, then there should
 	 * be no outstanding references, so the subsystem is safe to
 	 * destroy. We scan across all subsystems rather than using
 	 * the per-hierarchy linked list of mounted subsystems since
@@ -2474,12 +2474,62 @@ static int cgroup_has_css_refs(struct cg
 		 * matter, since it can only happen if the cgroup
 		 * has been deleted and hence no longer needs the
 		 * release agent to be called anyway. */
-		if (css && atomic_read(&css->refcnt))
+		if (css && (atomic_read(&css->refcnt) > 1))
 			return 1;
 	}
 	return 0;
 }
 
+/*
+ * Atomically mark all (or else none) of the cgroup's CSS objects as
+ * CSS_REMOVED. Return true on success, or false if the cgroup has
+ * busy subsystems. Call with cgroup_mutex held
+ */
+
+static int cgroup_clear_css_refs(struct cgroup *cgrp)
+{
+	struct cgroup_subsys *ss;
+	unsigned long flags;
+	bool failed = false;
+	local_irq_save(flags);
+	for_each_subsys(cgrp->root, ss) {
+		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+		int refcnt;
+		do {
+			/* We can only remove a CSS with a refcnt==1 */
+			refcnt = atomic_read(&css->refcnt);
+			if (refcnt > 1) {
+				failed = true;
+				goto done;
+			}
+			BUG_ON(!refcnt);
+			/*
+			 * Drop the refcnt to 0 while we check other
+			 * subsystems. This will cause any racing
+			 * css_tryget() to spin until we set the
+			 * CSS_REMOVED bits or abort
+			 */
+		} while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt);
+	}
+ done:
+	for_each_subsys(cgrp->root, ss) {
+		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+		if (failed) {
+			/*
+			 * Restore old refcnt if we previously managed
+			 * to clear it from 1 to 0
+			 */
+			if (!atomic_read(&css->refcnt))
+				atomic_set(&css->refcnt, 1);
+		} else {
+			/* Commit the fact that the CSS is removed */
+			set_bit(CSS_REMOVED, &css->flags);
+		}
+	}
+	local_irq_restore(flags);
+	return !failed;
+}
+
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 {
 	struct cgroup *cgrp = dentry->d_fsdata;
@@ -2510,7 +2560,7 @@ static int cgroup_rmdir(struct inode *un
 
 	if (atomic_read(&cgrp->count)
 	    || !list_empty(&cgrp->children)
-	    || cgroup_has_css_refs(cgrp)) {
+	    || !cgroup_clear_css_refs(cgrp)) {
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
@@ -3065,7 +3115,8 @@ void __css_put(struct cgroup_subsys_stat
 {
 	struct cgroup *cgrp = css->cgroup;
 	rcu_read_lock();
-	if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
+	if ((atomic_dec_return(&css->refcnt) == 1) &&
+	    notify_on_release(cgrp)) {
 		set_bit(CGRP_RELEASABLE, &cgrp->flags);
 		check_for_release(cgrp);
 	}

--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-12-16 11:38 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-12-16 11:30 [PATCH 0/3] CGroups: Hierarchy locking/refcount changes menage
2008-12-16 11:30 ` menage
2008-12-16 11:30 ` [PATCH 1/3] CGroups: Add a per-subsystem hierarchy_mutex menage
2008-12-16 11:30   ` menage
2008-12-17  5:16   ` KAMEZAWA Hiroyuki
2008-12-17  5:16     ` KAMEZAWA Hiroyuki
2008-12-16 11:30 ` [PATCH 2/3] CGroups: Use hierarchy_mutex in memory controller menage
2008-12-16 11:30   ` menage
2008-12-17  5:17   ` KAMEZAWA Hiroyuki
2008-12-17  5:17     ` KAMEZAWA Hiroyuki
2008-12-16 11:30 ` menage [this message]
2008-12-16 11:30   ` [PATCH 3/3] CGroups: Add css_tryget() menage
2008-12-17  5:19   ` KAMEZAWA Hiroyuki
2008-12-17  5:19     ` KAMEZAWA Hiroyuki
2008-12-17 22:07   ` Andrew Morton
2008-12-17 22:07     ` Andrew Morton
2008-12-17 22:48     ` Paul Menage
2008-12-17 22:48       ` Paul Menage
2008-12-17 22:03 ` [PATCH 0/3] CGroups: Hierarchy locking/refcount changes Andrew Morton
2008-12-17 22:03   ` Andrew Morton
2008-12-19  2:06   ` Paul Menage
2008-12-19  2:06     ` Paul Menage

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081216113653.252690000@menage.corp.google.com \
    --to=menage@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizf@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.