public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Andrea Righi <righi.andrea@gmail.com>
To: Mike Galbraith <efault@gmx.de>
Cc: Alexey Vlasov <renton@renton.name>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	linux-kernel@vger.kernel.org
Subject: Re: Attaching a process to cgroups
Date: Mon, 23 Jul 2012 22:41:59 +0200	[thread overview]
Message-ID: <20120723204159.GB6379@thinkpad> (raw)
In-Reply-To: <1340266982.29752.37.camel@marge.simpson.net>

On Thu, Jun 21, 2012 at 10:23:02AM +0200, Mike Galbraith wrote:
> On Thu, 2012-06-21 at 11:54 +0400, Alexey Vlasov wrote: 
> > On Wed, Jun 20, 2012 at 02:28:18PM +0200, Mike Galbraith wrote:
> > > 
> > > kernel/cgroup.c::cgroup_attach_task()
> > > {
> > > ...
> > > 	synchronize_rcu();
> > > ...
> > > }
> > 
> > So nothing can be done here? (I mean if only I knew how to fix it I
> > wouldn't ask about it ;)
> 
> Sure, kill the obnoxious thing, it's sitting right in the middle of the
> userspace interface.
> 
> I banged on it a while back (wrt explosive android patches), extracted
> RCU from the userspace interface.  It seemed to work great, much faster,
> couldn't make it explode.  I wouldn't bet anything I wasn't willing to
> immediately part with that the result was really really safe though ;-)
> 
> -Mike

JFYI,

I'm testing the following patch in a bunch of hosts and I wasn't able to
make any of them to explode, even running a multi-threaded
cgroup-intensive workload, but probably I was just lucky (or unlucky,
depending on the point of view).

It is basically the same Not-signed-off-by work posted by Mike a while
ago: https://lkml.org/lkml/2011/4/12/599.

In addition, I totally removed the synchronize_rcu() call from
cgroup_attach_task() and added the call_rcu -> schedule_work removal
also for css_set. The latter looks unnecessary to me from a logical
point of view, or maybe I'm missing something, because I can't explain
why with it I can't trigger any BUG / oops.

Mike, did you make any progress from your old patch?

Thanks,
-Andrea

---
 include/linux/cgroup.h |    2 ++
 kernel/cgroup.c        |   91 +++++++++++++++++++++++++++++-------------------
 2 files changed, 58 insertions(+), 35 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d3f5fba..2bab2d6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -212,6 +212,7 @@ struct cgroup {
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
+	struct work_struct work;
 
 	/* List of events which userspace want to receive */
 	struct list_head event_list;
@@ -260,6 +261,7 @@ struct css_set {
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
+	struct work_struct work;
 };
 
 /*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b303dfc..aa7cc06 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -396,6 +396,21 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
  * compiled into their kernel but not actually in use */
 static int use_task_css_set_links __read_mostly;
 
+static void free_css_set_work(struct work_struct *work)
+{
+	struct css_set *cg = container_of(work, struct css_set, work);
+
+	kfree(cg);
+}
+
+static void free_css_set_rcu(struct rcu_head *obj)
+{
+	struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+
+	INIT_WORK(&cg->work, free_css_set_work);
+	schedule_work(&cg->work);
+}
+
 static void __put_css_set(struct css_set *cg, int taskexit)
 {
 	struct cg_cgroup_link *link;
@@ -433,7 +448,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
 	}
 
 	write_unlock(&css_set_lock);
-	kfree_rcu(cg, rcu_head);
+	call_rcu(&cg->rcu_head, free_css_set_rcu);
 }
 
 /*
@@ -875,44 +890,52 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
 	return ret;
 }
 
-static void cgroup_diput(struct dentry *dentry, struct inode *inode)
+static void free_cgroup_work(struct work_struct *work)
 {
-	/* is dentry a directory ? if so, kfree() associated cgroup */
-	if (S_ISDIR(inode->i_mode)) {
-		struct cgroup *cgrp = dentry->d_fsdata;
-		struct cgroup_subsys *ss;
-		BUG_ON(!(cgroup_is_removed(cgrp)));
-		/* It's possible for external users to be holding css
-		 * reference counts on a cgroup; css_put() needs to
-		 * be able to access the cgroup after decrementing
-		 * the reference count in order to know if it needs to
-		 * queue the cgroup to be handled by the release
-		 * agent */
-		synchronize_rcu();
+	struct cgroup *cgrp = container_of(work, struct cgroup, work);
+	struct cgroup_subsys *ss;
 
-		mutex_lock(&cgroup_mutex);
-		/*
-		 * Release the subsystem state objects.
-		 */
-		for_each_subsys(cgrp->root, ss)
-			ss->destroy(cgrp);
+	mutex_lock(&cgroup_mutex);
+	/*
+	 * Release the subsystem state objects.
+	 */
+	for_each_subsys(cgrp->root, ss)
+		ss->destroy(cgrp);
 
-		cgrp->root->number_of_cgroups--;
-		mutex_unlock(&cgroup_mutex);
+	cgrp->root->number_of_cgroups--;
+	mutex_unlock(&cgroup_mutex);
 
-		/*
-		 * Drop the active superblock reference that we took when we
-		 * created the cgroup
-		 */
-		deactivate_super(cgrp->root->sb);
+	/*
+	 * Drop the active superblock reference that we took when we
+	 * created the cgroup
+	 */
+	deactivate_super(cgrp->root->sb);
 
-		/*
-		 * if we're getting rid of the cgroup, refcount should ensure
-		 * that there are no pidlists left.
-		 */
-		BUG_ON(!list_empty(&cgrp->pidlists));
+	/*
+	 * if we're getting rid of the cgroup, refcount should ensure
+	 * that there are no pidlists left.
+	 */
+	BUG_ON(!list_empty(&cgrp->pidlists));
+
+	kfree(cgrp);
+}
+
+static void free_cgroup_rcu(struct rcu_head *obj)
+{
+	struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
+
+	INIT_WORK(&cgrp->work, free_cgroup_work);
+	schedule_work(&cgrp->work);
+}
+
+static void cgroup_diput(struct dentry *dentry, struct inode *inode)
+{
+	/* is dentry a directory ? if so, kfree() associated cgroup */
+	if (S_ISDIR(inode->i_mode)) {
+		struct cgroup *cgrp = dentry->d_fsdata;
 
-		kfree_rcu(cgrp, rcu_head);
+		BUG_ON(!(cgroup_is_removed(cgrp)));
+		call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
 	} else {
 		struct cfent *cfe = __d_cfe(dentry);
 		struct cgroup *cgrp = dentry->d_parent->d_fsdata;
@@ -1990,8 +2013,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 			ss->attach(cgrp, &tset);
 	}
 
-	synchronize_rcu();
-
 	/*
 	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
 	 * is no longer empty.

  parent reply	other threads:[~2012-07-23 20:42 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-19 18:58 Attaching a process to cgroups Alexey Vlasov
2012-06-20  3:34 ` Daisuke Nishimura
2012-06-20 11:08   ` Alexey Vlasov
2012-06-20 12:28 ` Mike Galbraith
2012-06-21  7:54   ` Alexey Vlasov
2012-06-21  8:23     ` Mike Galbraith
2012-06-21  8:26       ` Mike Galbraith
2012-06-26 18:06       ` Paul E. McKenney
2012-06-27  7:23         ` Mike Galbraith
2012-06-27 17:10           ` Paul E. McKenney
2012-06-28  2:40             ` Mike Galbraith
2012-07-23 20:41       ` Andrea Righi [this message]
2012-07-24  1:19         ` Mike Galbraith
2012-07-25 13:36   ` Alexey Vlasov
2012-07-25 13:57     ` Mike Galbraith
2012-07-26 13:02       ` Alexey Vlasov
2012-07-26 14:44         ` Mike Galbraith
2012-08-08 16:40       ` Alexey Vlasov
2012-08-08 16:51         ` Paul E. McKenney
2012-08-10  9:53           ` Alexey Vlasov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120723204159.GB6379@thinkpad \
    --to=righi.andrea@gmail.com \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=renton@renton.name \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox