All of lore.kernel.org
 help / color / mirror / Atom feed
From: Li Zefan <lizf@cn.fujitsu.com>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>, Ingo Molnar <mingo@elte.hu>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Paul Menage <menage@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH] sched: fix another race when reading /proc/sched_debug
Date: Tue, 16 Dec 2008 14:59:16 +0800	[thread overview]
Message-ID: <494751C4.6050709@cn.fujitsu.com> (raw)
In-Reply-To: <4947413B.5090400@cn.fujitsu.com>

How about this :

====================

I fixed an oops with the following commit:

| commit 24eb089950ce44603b30a3145a2c8520e2b55bb1
| Author: Li Zefan <lizf@cn.fujitsu.com>
| Date:   Thu Nov 6 12:53:32 2008 -0800
|
|    cgroups: fix invalid cgrp->dentry before cgroup has been completely removed
|
|    This fixes an oops when reading /proc/sched_debug.

The above commit fixed a race that reading /proc/sched_debug may access
NULL cgrp->dentry if a cgroup is being removed (via cgroup_rmdir), but
hasn't been destroyed (via cgroup_diput).

But I found there's another different race, in that reading sched_debug
may access a cgroup which is being created or has been destroyed, and thus
dereference NULL cgrp->dentry!

We fix the former issue by checking if the cgroup is being created, and
fix the latter issue by synchronize free cgroup with rcu.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
---
 include/linux/cgroup.h |   12 +++++++++++-
 kernel/cgroup.c        |   14 ++++++++------
 kernel/sched_debug.c   |   18 ++++++++++++++----
 3 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1164963..23854ec 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -91,6 +91,8 @@ static inline void css_put(struct cgroup_subsys_state *css)
 
 /* bits in struct cgroup flags field */
 enum {
+	/* Control Group is completely created */
+	CGRP_CREATED,
 	/* Control Group is dead */
 	CGRP_REMOVED,
 	/* Control Group has previously had a child cgroup or a task,
@@ -303,7 +305,15 @@ int cgroup_add_files(struct cgroup *cgrp,
 			const struct cftype cft[],
 			int count);
 
-int cgroup_is_removed(const struct cgroup *cgrp);
+static inline int cgroup_is_removed(const struct cgroup *cgrp)
+{
+	return test_bit(CGRP_REMOVED, &cgrp->flags);
+}
+
+static inline int cgroup_is_created(const struct cgroup *cgrp)
+{
+	return test_bit(CGRP_CREATED, &cgrp->flags);
+}
 
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index fe00b3b..364e8a3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -118,12 +118,6 @@ static int root_count;
 static int need_forkexit_callback __read_mostly;
 static int need_mm_owner_callback __read_mostly;
 
-/* convenient tests for these bits */
-inline int cgroup_is_removed(const struct cgroup *cgrp)
-{
-	return test_bit(CGRP_REMOVED, &cgrp->flags);
-}
-
 /* bits in struct cgroupfs_root flags field */
 enum {
 	ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
@@ -624,6 +618,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 		 * created the cgroup */
 		deactivate_super(cgrp->root->sb);
 
+		/*
+		 * Some subsystems (cpu cgroup) might still be able to
+		 * accessing the cgroup in rcu section.
+		 */
+		synchronize_rcu();
+
 		kfree(cgrp);
 	}
 	iput(inode);
@@ -2391,6 +2391,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	err = cgroup_populate_dir(cgrp);
 	/* If err < 0, we have a half-filled directory - oh well  ;)  */
 
+	set_bit(CGRP_CREATED, &cgrp->flags);
+
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
 
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 26ed8e3..ae35d1a 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -127,8 +127,13 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	if (tg)
 		cgroup = tg->css.cgroup;
 
-	if (cgroup)
-		cgroup_path(cgroup, path, sizeof(path));
+	/*
+	 * The task_group is dead or we race with cgroup creating.
+	 */
+	if (!cgroup || !cgroup_is_created(cgroup) || !cgroup_is_removed(cgroup))
+		return;
+
+	cgroup_path(cgroup, path, sizeof(path));
 
 	SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
 #else
@@ -181,8 +186,13 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 	if (tg)
 		cgroup = tg->css.cgroup;
 
-	if (cgroup)
-		cgroup_path(cgroup, path, sizeof(path));
+	/*
+	 * The task_group is dead or we race with cgroup creating.
+	 */
+	if (!cgroup || !cgroup_is_created(cgroup) || !cgroup_is_removed(cgroup))
+		return;
+
+	cgroup_path(cgroup, path, sizeof(path));
 
 	SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
 #else


  reply	other threads:[~2008-12-16  7:00 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-12-12  9:53 [PATCH] sched: fix another race when reading /proc/sched_debug Li Zefan
2008-12-12 10:00 ` Ingo Molnar
2008-12-14  2:54   ` Li Zefan
2008-12-14 12:48     ` Peter Zijlstra
2008-12-15  1:25       ` Li Zefan
2008-12-15  8:13         ` Peter Zijlstra
2008-12-15  9:51           ` Li Zefan
2008-12-15 10:43             ` Peter Zijlstra
2008-12-15 11:08               ` KAMEZAWA Hiroyuki
2008-12-16  5:48                 ` Li Zefan
2008-12-16  6:59                   ` Li Zefan [this message]
2008-12-16  9:41               ` Paul Menage
2008-12-16 12:42                 ` Paul Menage
2008-12-16 12:55                   ` Li Zefan
2008-12-16 18:35                     ` Paul Menage
     [not found]       ` <6599ad830812141347k5d7e7e08vfc17855ea0ac981c@mail.gmail.com>
2008-12-15  1:39         ` Li Zefan
2008-12-15  1:50           ` KAMEZAWA Hiroyuki
2008-12-15  2:11             ` Li Zefan
2008-12-16  9:23             ` Paul Menage
2008-12-16  9:39               ` Li Zefan
2008-12-19  4:37       ` Balbir Singh
2008-12-19 14:06         ` Paul Menage
2008-12-16  8:01     ` Li Zefan
2008-12-16 12:23       ` Ingo Molnar
2008-12-12 11:38 ` Bharata B Rao
2008-12-13  8:22   ` Li Zefan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=494751C4.6050709@cn.fujitsu.com \
    --to=lizf@cn.fujitsu.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=menage@google.com \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.