All of lore.kernel.org
 help / color / mirror / Atom feed
From: Colin Cross <ccross-z5hGa2qSFaRBDgjK7y7TUQ@public.gmane.org>
Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
	Colin Cross <ccross-z5hGa2qSFaRBDgjK7y7TUQ@public.gmane.org>
Subject: [PATCH] cgroup: Remove call to synchronize_rcu in cgroup_attach_task
Date: Tue, 23 Nov 2010 17:43:38 -0800	[thread overview]
Message-ID: <1290563018-2804-1-git-send-email-ccross@android.com> (raw)
In-Reply-To: <AANLkTi=4-OgPUugnUBaqSU3oC=3wxTjAsOB_Ais3Or+i-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

synchronize_rcu can be very expensive, averaging 100 ms in
some cases.  In cgroup_attach_task, it is used to prevent
a task->cgroups pointer dereferenced in an RCU read side
critical section from being invalidated by delaying the call
to put_css_set until after an RCU grace period.

To avoid the call to synchronize_rcu, make the put_css_set
call rcu-safe by moving the deletion of the css_set links
into rcu-protected free_css_set_rcu.

The calls to check_for_release in free_css_set_rcu now occur
in softirq context, so convert all uses of the
release_list_lock spinlock to irq safe versions.

The decrement of the cgroup refcount is no longer
synchronous with the call to put_css_set, which can result
in the cgroup refcount staying positive after the last call
to cgroup_attach_task returns.  To allow the cgroup to be
deleted with cgroup_rmdir synchronously after
cgroup_attach_task, introduce a second refcount,
rmdir_count, that is decremented synchronously in
put_css_set.  If cgroup_rmdir is called on a cgroup for
hich rmdir_count is zero but count is nonzero, reuse the
rmdir waitqueue to block the rmdir until the rcu callback
is called.

Signed-off-by: Colin Cross <ccross-z5hGa2qSFaRBDgjK7y7TUQ@public.gmane.org>
---

This patch is similar to what you described.  The main differences are
that I used a new atomic to handle the rmdir case, and I converted
check_for_release to be callable in softirq context rather than schedule
work in free_css_set_rcu.  Your css_set scanning in rmdir sounds better,
I'll take another look at that.  Is there any problem with disabling irqs
with spin_lock_irqsave in check_for_release?

 include/linux/cgroup.h |    6 ++
 kernel/cgroup.c        |  124 ++++++++++++++++++++++++++++--------------------
 2 files changed, 78 insertions(+), 52 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed4ba11..3b6e73d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -202,6 +202,12 @@ struct cgroup {
 	atomic_t count;
 
 	/*
+	 * separate refcount for rmdir on a cgroup.  When rmdir_count is 0,
+	 * rmdir should block until count is 0.
+	 */
+	atomic_t rmdir_count;
+
+	/*
 	 * We link our 'sibling' struct into our parent's 'children'.
 	 * Our children link their 'sibling' into our 'children'.
 	 */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 66a416b..fa3c0ac 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -267,6 +267,33 @@ static void cgroup_release_agent(struct work_struct *work);
 static DECLARE_WORK(release_agent_work, cgroup_release_agent);
 static void check_for_release(struct cgroup *cgrp);
 
+/*
+ * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
+ * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
+ * reference to css->refcnt. In general, this refcnt is expected to goes down
+ * to zero, soon.
+ *
+ * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
+ */
+DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
+
+static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
+{
+	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+		wake_up_all(&cgroup_rmdir_waitq);
+}
+
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
+{
+	css_get(css);
+}
+
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
+{
+	cgroup_wakeup_rmdir_waiter(css->cgroup);
+	css_put(css);
+}
+
 /* Link structure for associating css_set objects with cgroups */
 struct cg_cgroup_link {
 	/*
@@ -329,6 +356,22 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
 static void free_css_set_rcu(struct rcu_head *obj)
 {
 	struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+	struct cg_cgroup_link *link;
+	struct cg_cgroup_link *saved_link;
+
+	/* Nothing else can have a reference to cg, no need for css_set_lock */
+	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+				 cg_link_list) {
+		struct cgroup *cgrp = link->cgrp;
+		list_del(&link->cg_link_list);
+		list_del(&link->cgrp_link_list);
+		if (atomic_dec_and_test(&cgrp->count)) {
+			check_for_release(cgrp);
+			cgroup_wakeup_rmdir_waiter(cgrp);
+		}
+		kfree(link);
+	}
+
 	kfree(cg);
 }
 
@@ -355,23 +398,20 @@ static void __put_css_set(struct css_set *cg, int taskexit)
 		return;
 	}
 
-	/* This css_set is dead. unlink it and release cgroup refcounts */
 	hlist_del(&cg->hlist);
 	css_set_count--;
 
+	/* This css_set is now unreachable from the css_set_table, but RCU
+	 * read-side critical sections may still have a reference to it.
+	 * Decrement the cgroup rmdir_count so that rmdir's on an empty
+	 * cgroup can block until the free_css_set_rcu callback */
 	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
 				 cg_link_list) {
 		struct cgroup *cgrp = link->cgrp;
-		list_del(&link->cg_link_list);
-		list_del(&link->cgrp_link_list);
-		if (atomic_dec_and_test(&cgrp->count) &&
-		    notify_on_release(cgrp)) {
-			if (taskexit)
-				set_bit(CGRP_RELEASABLE, &cgrp->flags);
-			check_for_release(cgrp);
-		}
-
-		kfree(link);
+		if (taskexit)
+			set_bit(CGRP_RELEASABLE, &cgrp->flags);
+		atomic_dec(&cgrp->rmdir_count);
+		smp_mb();
 	}
 
 	write_unlock(&css_set_lock);
@@ -571,6 +611,8 @@ static void link_css_set(struct list_head *tmp_cg_links,
 				cgrp_link_list);
 	link->cg = cg;
 	link->cgrp = cgrp;
+	atomic_inc(&cgrp->rmdir_count);
+	smp_mb(); /* make sure rmdir_count increments first */
 	atomic_inc(&cgrp->count);
 	list_move(&link->cgrp_link_list, &cgrp->css_sets);
 	/*
@@ -725,9 +767,9 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
  * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
  * another.  It does so using cgroup_mutex, however there are
  * several performance critical places that need to reference
- * task->cgroup without the expense of grabbing a system global
+ * task->cgroups without the expense of grabbing a system global
  * mutex.  Therefore except as noted below, when dereferencing or, as
- * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
+ * in cgroup_attach_task(), modifying a task's cgroups pointer we use
  * task_lock(), which acts on a spinlock (task->alloc_lock) already in
  * the task_struct routinely used for such matters.
  *
@@ -909,33 +951,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
 }
 
 /*
- * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
- * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
- * reference to css->refcnt. In general, this refcnt is expected to goes down
- * to zero, soon.
- *
- * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
- */
-DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
-
-static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
-{
-	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
-		wake_up_all(&cgroup_rmdir_waitq);
-}
-
-void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
-{
-	css_get(css);
-}
-
-void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
-{
-	cgroup_wakeup_rmdir_waiter(css->cgroup);
-	css_put(css);
-}
-
-/*
  * Call with cgroup_mutex held. Drops reference counts on modules, including
  * any duplicate ones that parse_cgroupfs_options took. If this function
  * returns an error, no reference counts are touched.
@@ -1802,7 +1817,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 			ss->attach(ss, cgrp, oldcgrp, tsk, false);
 	}
 	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
-	synchronize_rcu();
+	/* put_css_set will not destroy cg until after an RCU grace period */
 	put_css_set(cg);
 
 	/*
@@ -3566,11 +3581,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	DEFINE_WAIT(wait);
 	struct cgroup_event *event, *tmp;
 	int ret;
+	unsigned long flags;
 
 	/* the vfs holds both inode->i_mutex already */
 again:
 	mutex_lock(&cgroup_mutex);
-	if (atomic_read(&cgrp->count) != 0) {
+	if (atomic_read(&cgrp->rmdir_count) != 0) {
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
@@ -3603,13 +3619,13 @@ again:
 
 	mutex_lock(&cgroup_mutex);
 	parent = cgrp->parent;
-	if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
+	if (atomic_read(&cgrp->rmdir_count) || !list_empty(&cgrp->children)) {
 		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
 	prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
-	if (!cgroup_clear_css_refs(cgrp)) {
+	if (atomic_read(&cgrp->count) != 0 || !cgroup_clear_css_refs(cgrp)) {
 		mutex_unlock(&cgroup_mutex);
 		/*
 		 * Because someone may call cgroup_wakeup_rmdir_waiter() before
@@ -3627,11 +3643,11 @@ again:
 	finish_wait(&cgroup_rmdir_waitq, &wait);
 	clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 
-	spin_lock(&release_list_lock);
+	spin_lock_irqsave(&release_list_lock, flags);
 	set_bit(CGRP_REMOVED, &cgrp->flags);
 	if (!list_empty(&cgrp->release_list))
 		list_del(&cgrp->release_list);
-	spin_unlock(&release_list_lock);
+	spin_unlock_irqrestore(&release_list_lock, flags);
 
 	cgroup_lock_hierarchy(cgrp->root);
 	/* delete this cgroup from parent->children */
@@ -4389,6 +4405,8 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
 
 static void check_for_release(struct cgroup *cgrp)
 {
+	unsigned long flags;
+
 	/* All of these checks rely on RCU to keep the cgroup
 	 * structure alive */
 	if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
@@ -4397,13 +4415,13 @@ static void check_for_release(struct cgroup *cgrp)
 		 * already queued for a userspace notification, queue
 		 * it now */
 		int need_schedule_work = 0;
-		spin_lock(&release_list_lock);
+		spin_lock_irqsave(&release_list_lock, flags);
 		if (!cgroup_is_removed(cgrp) &&
 		    list_empty(&cgrp->release_list)) {
 			list_add(&cgrp->release_list, &release_list);
 			need_schedule_work = 1;
 		}
-		spin_unlock(&release_list_lock);
+		spin_unlock_irqrestore(&release_list_lock, flags);
 		if (need_schedule_work)
 			schedule_work(&release_agent_work);
 	}
@@ -4453,9 +4471,11 @@ EXPORT_SYMBOL_GPL(__css_put);
  */
 static void cgroup_release_agent(struct work_struct *work)
 {
+	unsigned long flags;
+
 	BUG_ON(work != &release_agent_work);
 	mutex_lock(&cgroup_mutex);
-	spin_lock(&release_list_lock);
+	spin_lock_irqsave(&release_list_lock, flags);
 	while (!list_empty(&release_list)) {
 		char *argv[3], *envp[3];
 		int i;
@@ -4464,7 +4484,7 @@ static void cgroup_release_agent(struct work_struct *work)
 						    struct cgroup,
 						    release_list);
 		list_del_init(&cgrp->release_list);
-		spin_unlock(&release_list_lock);
+		spin_unlock_irqrestore(&release_list_lock, flags);
 		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
 		if (!pathbuf)
 			goto continue_free;
@@ -4494,9 +4514,9 @@ static void cgroup_release_agent(struct work_struct *work)
  continue_free:
 		kfree(pathbuf);
 		kfree(agentbuf);
-		spin_lock(&release_list_lock);
+		spin_lock_irqsave(&release_list_lock, flags);
 	}
-	spin_unlock(&release_list_lock);
+	spin_unlock_irqrestore(&release_list_lock, flags);
 	mutex_unlock(&cgroup_mutex);
 }
 
-- 
1.7.3.1

WARNING: multiple messages have this Message-ID (diff)
From: Colin Cross <ccross@android.com>
To: Paul Menage <menage@google.com>
Cc: Colin Cross <ccross@android.com>, Paul Menage <menage@google.com>,
	Li Zefan <lizf@cn.fujitsu.com>,
	containers@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH] cgroup: Remove call to synchronize_rcu in cgroup_attach_task
Date: Tue, 23 Nov 2010 17:43:38 -0800	[thread overview]
Message-ID: <1290563018-2804-1-git-send-email-ccross@android.com> (raw)
In-Reply-To: <AANLkTi=4-OgPUugnUBaqSU3oC=3wxTjAsOB_Ais3Or+i@mail.gmail.com>

synchronize_rcu can be very expensive, averaging 100 ms in
some cases.  In cgroup_attach_task, it is used to prevent
a task->cgroups pointer dereferenced in an RCU read side
critical section from being invalidated by delaying the call
to put_css_set until after an RCU grace period.

To avoid the call to synchronize_rcu, make the put_css_set
call rcu-safe by moving the deletion of the css_set links
into rcu-protected free_css_set_rcu.

The calls to check_for_release in free_css_set_rcu now occur
in softirq context, so convert all uses of the
release_list_lock spinlock to irq safe versions.

The decrement of the cgroup refcount is no longer
synchronous with the call to put_css_set, which can result
in the cgroup refcount staying positive after the last call
to cgroup_attach_task returns.  To allow the cgroup to be
deleted with cgroup_rmdir synchronously after
cgroup_attach_task, introduce a second refcount,
rmdir_count, that is decremented synchronously in
put_css_set.  If cgroup_rmdir is called on a cgroup for
hich rmdir_count is zero but count is nonzero, reuse the
rmdir waitqueue to block the rmdir until the rcu callback
is called.

Signed-off-by: Colin Cross <ccross@android.com>
---

This patch is similar to what you described.  The main differences are
that I used a new atomic to handle the rmdir case, and I converted
check_for_release to be callable in softirq context rather than schedule
work in free_css_set_rcu.  Your css_set scanning in rmdir sounds better,
I'll take another look at that.  Is there any problem with disabling irqs
with spin_lock_irqsave in check_for_release?

 include/linux/cgroup.h |    6 ++
 kernel/cgroup.c        |  124 ++++++++++++++++++++++++++++--------------------
 2 files changed, 78 insertions(+), 52 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed4ba11..3b6e73d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -202,6 +202,12 @@ struct cgroup {
 	atomic_t count;
 
 	/*
+	 * separate refcount for rmdir on a cgroup.  When rmdir_count is 0,
+	 * rmdir should block until count is 0.
+	 */
+	atomic_t rmdir_count;
+
+	/*
 	 * We link our 'sibling' struct into our parent's 'children'.
 	 * Our children link their 'sibling' into our 'children'.
 	 */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 66a416b..fa3c0ac 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -267,6 +267,33 @@ static void cgroup_release_agent(struct work_struct *work);
 static DECLARE_WORK(release_agent_work, cgroup_release_agent);
 static void check_for_release(struct cgroup *cgrp);
 
+/*
+ * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
+ * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
+ * reference to css->refcnt. In general, this refcnt is expected to goes down
+ * to zero, soon.
+ *
+ * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
+ */
+DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
+
+static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
+{
+	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+		wake_up_all(&cgroup_rmdir_waitq);
+}
+
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
+{
+	css_get(css);
+}
+
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
+{
+	cgroup_wakeup_rmdir_waiter(css->cgroup);
+	css_put(css);
+}
+
 /* Link structure for associating css_set objects with cgroups */
 struct cg_cgroup_link {
 	/*
@@ -329,6 +356,22 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
 static void free_css_set_rcu(struct rcu_head *obj)
 {
 	struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+	struct cg_cgroup_link *link;
+	struct cg_cgroup_link *saved_link;
+
+	/* Nothing else can have a reference to cg, no need for css_set_lock */
+	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+				 cg_link_list) {
+		struct cgroup *cgrp = link->cgrp;
+		list_del(&link->cg_link_list);
+		list_del(&link->cgrp_link_list);
+		if (atomic_dec_and_test(&cgrp->count)) {
+			check_for_release(cgrp);
+			cgroup_wakeup_rmdir_waiter(cgrp);
+		}
+		kfree(link);
+	}
+
 	kfree(cg);
 }
 
@@ -355,23 +398,20 @@ static void __put_css_set(struct css_set *cg, int taskexit)
 		return;
 	}
 
-	/* This css_set is dead. unlink it and release cgroup refcounts */
 	hlist_del(&cg->hlist);
 	css_set_count--;
 
+	/* This css_set is now unreachable from the css_set_table, but RCU
+	 * read-side critical sections may still have a reference to it.
+	 * Decrement the cgroup rmdir_count so that rmdir's on an empty
+	 * cgroup can block until the free_css_set_rcu callback */
 	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
 				 cg_link_list) {
 		struct cgroup *cgrp = link->cgrp;
-		list_del(&link->cg_link_list);
-		list_del(&link->cgrp_link_list);
-		if (atomic_dec_and_test(&cgrp->count) &&
-		    notify_on_release(cgrp)) {
-			if (taskexit)
-				set_bit(CGRP_RELEASABLE, &cgrp->flags);
-			check_for_release(cgrp);
-		}
-
-		kfree(link);
+		if (taskexit)
+			set_bit(CGRP_RELEASABLE, &cgrp->flags);
+		atomic_dec(&cgrp->rmdir_count);
+		smp_mb();
 	}
 
 	write_unlock(&css_set_lock);
@@ -571,6 +611,8 @@ static void link_css_set(struct list_head *tmp_cg_links,
 				cgrp_link_list);
 	link->cg = cg;
 	link->cgrp = cgrp;
+	atomic_inc(&cgrp->rmdir_count);
+	smp_mb(); /* make sure rmdir_count increments first */
 	atomic_inc(&cgrp->count);
 	list_move(&link->cgrp_link_list, &cgrp->css_sets);
 	/*
@@ -725,9 +767,9 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
  * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
  * another.  It does so using cgroup_mutex, however there are
  * several performance critical places that need to reference
- * task->cgroup without the expense of grabbing a system global
+ * task->cgroups without the expense of grabbing a system global
  * mutex.  Therefore except as noted below, when dereferencing or, as
- * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
+ * in cgroup_attach_task(), modifying a task's cgroups pointer we use
  * task_lock(), which acts on a spinlock (task->alloc_lock) already in
  * the task_struct routinely used for such matters.
  *
@@ -909,33 +951,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
 }
 
 /*
- * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
- * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
- * reference to css->refcnt. In general, this refcnt is expected to goes down
- * to zero, soon.
- *
- * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
- */
-DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
-
-static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
-{
-	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
-		wake_up_all(&cgroup_rmdir_waitq);
-}
-
-void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
-{
-	css_get(css);
-}
-
-void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
-{
-	cgroup_wakeup_rmdir_waiter(css->cgroup);
-	css_put(css);
-}
-
-/*
  * Call with cgroup_mutex held. Drops reference counts on modules, including
  * any duplicate ones that parse_cgroupfs_options took. If this function
  * returns an error, no reference counts are touched.
@@ -1802,7 +1817,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 			ss->attach(ss, cgrp, oldcgrp, tsk, false);
 	}
 	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
-	synchronize_rcu();
+	/* put_css_set will not destroy cg until after an RCU grace period */
 	put_css_set(cg);
 
 	/*
@@ -3566,11 +3581,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	DEFINE_WAIT(wait);
 	struct cgroup_event *event, *tmp;
 	int ret;
+	unsigned long flags;
 
 	/* the vfs holds both inode->i_mutex already */
 again:
 	mutex_lock(&cgroup_mutex);
-	if (atomic_read(&cgrp->count) != 0) {
+	if (atomic_read(&cgrp->rmdir_count) != 0) {
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
@@ -3603,13 +3619,13 @@ again:
 
 	mutex_lock(&cgroup_mutex);
 	parent = cgrp->parent;
-	if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
+	if (atomic_read(&cgrp->rmdir_count) || !list_empty(&cgrp->children)) {
 		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
 	prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
-	if (!cgroup_clear_css_refs(cgrp)) {
+	if (atomic_read(&cgrp->count) != 0 || !cgroup_clear_css_refs(cgrp)) {
 		mutex_unlock(&cgroup_mutex);
 		/*
 		 * Because someone may call cgroup_wakeup_rmdir_waiter() before
@@ -3627,11 +3643,11 @@ again:
 	finish_wait(&cgroup_rmdir_waitq, &wait);
 	clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 
-	spin_lock(&release_list_lock);
+	spin_lock_irqsave(&release_list_lock, flags);
 	set_bit(CGRP_REMOVED, &cgrp->flags);
 	if (!list_empty(&cgrp->release_list))
 		list_del(&cgrp->release_list);
-	spin_unlock(&release_list_lock);
+	spin_unlock_irqrestore(&release_list_lock, flags);
 
 	cgroup_lock_hierarchy(cgrp->root);
 	/* delete this cgroup from parent->children */
@@ -4389,6 +4405,8 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
 
 static void check_for_release(struct cgroup *cgrp)
 {
+	unsigned long flags;
+
 	/* All of these checks rely on RCU to keep the cgroup
 	 * structure alive */
 	if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
@@ -4397,13 +4415,13 @@ static void check_for_release(struct cgroup *cgrp)
 		 * already queued for a userspace notification, queue
 		 * it now */
 		int need_schedule_work = 0;
-		spin_lock(&release_list_lock);
+		spin_lock_irqsave(&release_list_lock, flags);
 		if (!cgroup_is_removed(cgrp) &&
 		    list_empty(&cgrp->release_list)) {
 			list_add(&cgrp->release_list, &release_list);
 			need_schedule_work = 1;
 		}
-		spin_unlock(&release_list_lock);
+		spin_unlock_irqrestore(&release_list_lock, flags);
 		if (need_schedule_work)
 			schedule_work(&release_agent_work);
 	}
@@ -4453,9 +4471,11 @@ EXPORT_SYMBOL_GPL(__css_put);
  */
 static void cgroup_release_agent(struct work_struct *work)
 {
+	unsigned long flags;
+
 	BUG_ON(work != &release_agent_work);
 	mutex_lock(&cgroup_mutex);
-	spin_lock(&release_list_lock);
+	spin_lock_irqsave(&release_list_lock, flags);
 	while (!list_empty(&release_list)) {
 		char *argv[3], *envp[3];
 		int i;
@@ -4464,7 +4484,7 @@ static void cgroup_release_agent(struct work_struct *work)
 						    struct cgroup,
 						    release_list);
 		list_del_init(&cgrp->release_list);
-		spin_unlock(&release_list_lock);
+		spin_unlock_irqrestore(&release_list_lock, flags);
 		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
 		if (!pathbuf)
 			goto continue_free;
@@ -4494,9 +4514,9 @@ static void cgroup_release_agent(struct work_struct *work)
  continue_free:
 		kfree(pathbuf);
 		kfree(agentbuf);
-		spin_lock(&release_list_lock);
+		spin_lock_irqsave(&release_list_lock, flags);
 	}
-	spin_unlock(&release_list_lock);
+	spin_unlock_irqrestore(&release_list_lock, flags);
 	mutex_unlock(&cgroup_mutex);
 }
 
-- 
1.7.3.1


  parent reply	other threads:[~2010-11-24  1:43 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-21  2:00 [PATCH] cgroup: Remove RCU from task->cgroups Colin Cross
     [not found] ` <1290304824-22722-1-git-send-email-ccross-z5hGa2qSFaRBDgjK7y7TUQ@public.gmane.org>
2010-11-21 23:02   ` Colin Cross
2010-11-21 23:02 ` Colin Cross
     [not found]   ` <AANLkTikx6d0_VFtZ4zWQucRCf=vFt7N2M6=0jpnKasEE-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-11-22  4:06     ` [PATCH] cgroup: Convert synchronize_rcu to call_rcu in cgroup_attach_task Colin Cross
2010-11-22  4:06   ` Colin Cross
     [not found]     ` <1290398767-15230-1-git-send-email-ccross-z5hGa2qSFaRBDgjK7y7TUQ@public.gmane.org>
2010-11-23  8:14       ` Li Zefan
2010-11-24  1:24       ` Paul Menage
2010-11-23  8:14     ` Li Zefan
2010-11-23  8:58       ` Colin Cross
2010-11-23 20:22         ` Colin Cross
     [not found]         ` <AANLkTimjpW6NZ6fEiVi0VzjkpQGVob4=VHsohXUiDQkJ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-11-23 20:22           ` Colin Cross
     [not found]       ` <4CEB77E0.10202-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-11-23  8:58         ` Colin Cross
2010-11-24  1:24     ` Paul Menage
2010-11-24  2:06       ` Li Zefan
2010-11-24  2:10         ` Colin Cross
2010-11-24  5:37           ` [PATCH 1/2] cgroup: Set CGRP_RELEASABLE when adding to a cgroup Colin Cross
2010-11-24 23:54             ` Paul Menage
     [not found]               ` <AANLkTimFqJ+qPidS_81DKd7ExSxDG7GNi0gjcUEEq_7j-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-11-25  0:11                 ` Colin Cross
2010-11-25  0:11                   ` Colin Cross
2010-11-25  0:18                   ` Colin Cross
     [not found]                   ` <AANLkTimwvP2Ey1gJ6AbbFNtDKjGZt4cwqL=08nGBa_PT-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-11-25  0:18                     ` Colin Cross
2010-11-25  0:21                     ` Paul Menage
2010-11-25  0:21                   ` Paul Menage
     [not found]                     ` <AANLkTimJA52-GTM=AzS+tkOugrsi6Keh0_j87vK1BkGv-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-12-03  3:07                       ` Colin Cross
2010-12-03  3:07                     ` Colin Cross
2010-12-17  0:54                       ` Paul Menage
     [not found]                         ` <AANLkTinZarXbEyb1xfJWjG4gN2qhTVTXTdso4Cym5M9T-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-12-17  1:12                           ` Colin Cross
2010-12-17  1:12                         ` Colin Cross
     [not found]                       ` <AANLkTim67fLN+PYz-P0TM0QRmvQKP80tyXSNKNSZhFZ2-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-12-17  0:54                         ` Paul Menage
2011-01-28  1:17             ` Bryan Huntsman
2011-01-28  1:30               ` Paul Menage
     [not found]                 ` <AANLkTin7B51maXHRH+FNmZ14bmWmEp9P2=2QTNqgq_Fi-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2011-01-28  1:48                   ` Michael Bohan
2011-01-28  1:48                 ` Michael Bohan
     [not found]               ` <4D42192C.9000701-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
2011-01-28  1:30                 ` Paul Menage
     [not found]             ` <1290577024-12347-1-git-send-email-ccross-z5hGa2qSFaRBDgjK7y7TUQ@public.gmane.org>
2010-11-24 23:54               ` Paul Menage
2011-01-28  1:17               ` Bryan Huntsman
2010-11-24  5:37           ` [PATCH 2/2] cgroup: Remove call to synchronize_rcu in cgroup_attach_task Colin Cross
     [not found]             ` <1290577024-12347-2-git-send-email-ccross-z5hGa2qSFaRBDgjK7y7TUQ@public.gmane.org>
2011-01-28  1:17               ` Bryan Huntsman
2011-01-28  1:17             ` Bryan Huntsman
     [not found]           ` <AANLkTi=6nwDCdzDz7E2EaAw2pf3KUVjmKMRqGfz5zVhP-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-11-24  5:37             ` [PATCH 1/2] cgroup: Set CGRP_RELEASABLE when adding to a cgroup Colin Cross
2010-11-24  5:37             ` [PATCH 2/2] cgroup: Remove call to synchronize_rcu in cgroup_attach_task Colin Cross
2010-11-24 18:58         ` [PATCH] cgroup: Convert synchronize_rcu to call_rcu " Paul Menage
     [not found]         ` <4CEC7329.7070909-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2010-11-24  2:10           ` Colin Cross
2010-11-24 18:58           ` Paul Menage
     [not found]       ` <AANLkTi=4-OgPUugnUBaqSU3oC=3wxTjAsOB_Ais3Or+i-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-11-24  1:43         ` Colin Cross [this message]
2010-11-24  1:43           ` [PATCH] cgroup: Remove call to synchronize_rcu " Colin Cross
2010-11-24  2:29           ` Colin Cross
2011-01-22  1:17           ` Bryan Huntsman
2011-01-22  2:04             ` Colin Cross
     [not found]             ` <4D3A3024.9040402-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
2011-01-22  2:04               ` Colin Cross
2011-01-28  1:17           ` Bryan Huntsman
     [not found]           ` <1290563018-2804-1-git-send-email-ccross-z5hGa2qSFaRBDgjK7y7TUQ@public.gmane.org>
2010-11-24  2:29             ` Colin Cross
2011-01-22  1:17             ` Bryan Huntsman
2011-01-28  1:17             ` Bryan Huntsman
2010-11-24  2:06         ` [PATCH] cgroup: Convert synchronize_rcu to call_rcu " Li Zefan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1290563018-2804-1-git-send-email-ccross@android.com \
    --to=ccross-z5hga2qsfarbdgjk7y7tuq@public.gmane.org \
    --cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.