* [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount
@ 2012-06-30 7:07 Li Zefan
2012-06-30 14:34 ` Masanari Iida
[not found] ` <4FEEA5CB.8070809-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
0 siblings, 2 replies; 13+ messages in thread
From: Li Zefan @ 2012-06-30 7:07 UTC (permalink / raw)
To: Tejun Heo
Cc: shyju pv, Sanil kumar, Masanari Iida, LKML, Cgroups,
viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn
To avoid rmdir hang, now we won't drain css references at rmdir, but
rmdir will proceed regardless of css refs. We still have to wait till
all css refs have been released before destroying cgroup, and this is
achieved by holding an extra dentry refcnt for each css, which leads to
this bug:
BUG: Dentry ffff8801164db490{i=491b,n=/} still in use (1) [unmount of cgroup cgroup]
------------[ cut here ]------------
kernel BUG at fs/dcache.c:965!
...
Call Trace:
[<ffffffff811ec19a>] shrink_dcache_for_umount+0x4a/0x90
[<ffffffff811cc3c7>] generic_shutdown_super+0x37/0x160
[<ffffffff811cc5d9>] kill_anon_super+0x19/0x40
[<ffffffff811cc63a>] kill_litter_super+0x3a/0x50
[<ffffffff810fb65a>] cgroup_kill_sb+0x20a/0x280
[<ffffffff811ccdc5>] deactivate_locked_super+0x65/0xb0
[<ffffffff811ce339>] deactivate_super+0x89/0xe0
[<ffffffff810f7734>] cgroup_d_release+0x34/0x40
[<ffffffff811eb258>] d_free+0x58/0xc0
[<ffffffff811ee5f0>] dput+0x220/0x350
[<ffffffff810f7429>] css_dput_fn+0x19/0x30
[<ffffffff8107b4d9>] process_one_work+0x239/0x800
[<ffffffff8107eba3>] worker_thread+0x2e3/0x710
[<ffffffff81087e46>] kthread+0xd6/0xf0
If there's a css ref dangling after umount, when the ref goes down
to 0, dput will drop the cgroup's dentry and then drop the root
dentry. But kill_sb will be called inbetween, as dropping cgroup dentry
unpinned sb, and now vfs will find the root dentry's refcnt is not 0.
To fix this, we make css pin cgroup instead of cgroup dentry.
Reported-by: Shyju P V <shyju.pv-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Reported-by: Masanari Iida <standby24x7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Signed-off-by: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
include/linux/cgroup.h | 8 +++-
kernel/cgroup.c | 99 ++++++++++++++++++++++--------------------------
2 files changed, 52 insertions(+), 55 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d3f5fba..a642f2e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -17,6 +17,7 @@
#include <linux/rwsem.h>
#include <linux/idr.h>
#include <linux/workqueue.h>
+#include <linux/kref.h>
#ifdef CONFIG_CGROUPS
@@ -78,8 +79,8 @@ struct cgroup_subsys_state {
/* ID for this css, if possible */
struct css_id __rcu *id;
- /* Used to put @cgroup->dentry on the last css_put() */
- struct work_struct dput_work;
+ /* Used to put @cgroup->css_refs on the last css_put() */
+ struct work_struct put_work;
};
/* bits in struct cgroup_subsys_state flags field */
@@ -170,6 +171,9 @@ struct cgroup {
*/
atomic_t count;
+ /* We won't destroy the cgroup when there are css refs */
+ struct kref css_refs;
+
/*
* We link our 'sibling' struct into our parent's 'children'.
* Our children link their 'sibling' into our 'children'.
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2097684..5b28938 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -875,12 +875,42 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
return ret;
}
+static void cgroup_release(struct kref *ref)
+{
+ struct cgroup_subsys *ss;
+ struct cgroup *cgrp = container_of(ref, struct cgroup, css_refs);
+
+ mutex_lock(&cgroup_mutex);
+ /*
+ * Release the subsystem state objects.
+ */
+ for_each_subsys(cgrp->root, ss)
+ ss->destroy(cgrp);
+
+ cgrp->root->number_of_cgroups--;
+ mutex_unlock(&cgroup_mutex);
+
+ /*
+ * Drop the active superblock reference that we took when we
+ * created the cgroup
+ */
+ deactivate_super(cgrp->root->sb);
+
+ /*
+ * if we're getting rid of the cgroup, refcount should ensure
+ * that there are no pidlists left.
+ */
+ BUG_ON(!list_empty(&cgrp->pidlists));
+
+ kfree_rcu(cgrp, rcu_head);
+}
+
static void cgroup_diput(struct dentry *dentry, struct inode *inode)
{
/* is dentry a directory ? if so, kfree() associated cgroup */
if (S_ISDIR(inode->i_mode)) {
struct cgroup *cgrp = dentry->d_fsdata;
- struct cgroup_subsys *ss;
+
BUG_ON(!(cgroup_is_removed(cgrp)));
/* It's possible for external users to be holding css
* reference counts on a cgroup; css_put() needs to
@@ -890,32 +920,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
* agent */
synchronize_rcu();
- mutex_lock(&cgroup_mutex);
- /*
- * Release the subsystem state objects.
- */
- for_each_subsys(cgrp->root, ss)
- ss->destroy(cgrp);
-
- cgrp->root->number_of_cgroups--;
- mutex_unlock(&cgroup_mutex);
-
- /*
- * We want to drop the active superblock reference from the
- * cgroup creation after all the dentry refs are gone -
- * kill_sb gets mighty unhappy otherwise. Mark
- * dentry->d_fsdata with cgroup_diput() to tell
- * cgroup_d_release() to call deactivate_super().
- */
- dentry->d_fsdata = cgroup_diput;
-
- /*
- * if we're getting rid of the cgroup, refcount should ensure
- * that there are no pidlists left.
- */
- BUG_ON(!list_empty(&cgrp->pidlists));
-
- kfree_rcu(cgrp, rcu_head);
+ kref_put(&cgrp->css_refs, cgroup_release);
} else {
struct cfent *cfe = __d_cfe(dentry);
struct cgroup *cgrp = dentry->d_parent->d_fsdata;
@@ -933,13 +938,6 @@ static int cgroup_delete(const struct dentry *d)
return 1;
}
-static void cgroup_d_release(struct dentry *dentry)
-{
- /* did cgroup_diput() tell me to deactivate super? */
- if (dentry->d_fsdata == cgroup_diput)
- deactivate_super(dentry->d_sb);
-}
-
static void remove_dir(struct dentry *d)
{
struct dentry *parent = dget(d->d_parent);
@@ -1415,6 +1413,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
mutex_init(&cgrp->pidlist_mutex);
INIT_LIST_HEAD(&cgrp->event_list);
spin_lock_init(&cgrp->event_list_lock);
+ kref_init(&cgrp->css_refs);
}
static void init_cgroup_root(struct cgroupfs_root *root)
@@ -1547,7 +1546,6 @@ static int cgroup_get_rootdir(struct super_block *sb)
static const struct dentry_operations cgroup_dops = {
.d_iput = cgroup_diput,
.d_delete = cgroup_delete,
- .d_release = cgroup_d_release,
};
struct inode *inode =
@@ -3890,12 +3888,12 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
return 0;
}
-static void css_dput_fn(struct work_struct *work)
+static void css_put_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
- container_of(work, struct cgroup_subsys_state, dput_work);
+ container_of(work, struct cgroup_subsys_state, put_work);
- dput(css->cgroup->dentry);
+ kref_put(&css->cgroup->css_refs, cgroup_release);
}
static void init_cgroup_css(struct cgroup_subsys_state *css,
@@ -3912,14 +3910,14 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
cgrp->subsys[ss->subsys_id] = css;
/*
- * If !clear_css_refs, css holds an extra ref to @cgrp->dentry
- * which is put on the last css_put(). dput() requires process
- * context, which css_put() may be called without. @css->dput_work
- * will be used to invoke dput() asynchronously from css_put().
+ * If !clear_css_refs, css holds a ref to @cgrp->css_refs
+ * which is put on the last css_put().
*/
- INIT_WORK(&css->dput_work, css_dput_fn);
+ INIT_WORK(&css->put_work, css_put_fn);
if (ss->__DEPRECATED_clear_css_refs)
set_bit(CSS_CLEAR_CSS_REFS, &css->flags);
+ else
+ kref_get(&cgrp->css_refs);
}
static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
@@ -4022,11 +4020,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
if (err < 0)
goto err_remove;
- /* If !clear_css_refs, each css holds a ref to the cgroup's dentry */
- for_each_subsys(root, ss)
- if (!ss->__DEPRECATED_clear_css_refs)
- dget(dentry);
-
/* The cgroup directory was pre-locked for us */
BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
@@ -4127,11 +4120,11 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
* veto removal on any invocation. This behavior is deprecated and will be
* removed as soon as the existing user (memcg) is updated.
*
- * If clear is not set, each css holds an extra reference to the cgroup's
- * dentry and cgroup removal proceeds regardless of css refs.
+ * If clear is not set, each css holds a reference to cgroup->css_refs
+ * and cgroup removal proceeds regardless of css refs.
* ->pre_destroy() will be called at least once and is not allowed to fail.
- * On the last put of each css, whenever that may be, the extra dentry ref
- * is put so that dentry destruction happens only after all css's are
+ * On the last put of each css, whenever that may be, cgroup->css_refs
+ * is put so that cgroup destruction happens only after all css's are
* released.
*/
static int cgroup_clear_css_refs(struct cgroup *cgrp)
@@ -5002,7 +4995,7 @@ void __css_put(struct cgroup_subsys_state *css)
break;
case 0:
if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags))
- schedule_work(&css->dput_work);
+ schedule_work(&css->put_work);
break;
}
rcu_read_unlock();
--
1.7.9.7
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount
2012-06-30 7:07 [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount Li Zefan
@ 2012-06-30 14:34 ` Masanari Iida
[not found] ` <4FEEA5CB.8070809-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
1 sibling, 0 replies; 13+ messages in thread
From: Masanari Iida @ 2012-06-30 14:34 UTC (permalink / raw)
To: Li Zefan; +Cc: Tejun Heo, shyju pv, Sanil kumar, LKML, Cgroups, viro
Hello
Apply the patch on top of 3.5-rc4, and confirmed
the symptom has fixed by your patch.
Thank you!
Tested-by: Masanari Iida <standby24x7@gmail.com>
Masanari
On Sat, Jun 30, 2012 at 4:07 PM, Li Zefan <lizefan@huawei.com> wrote:
> To avoid rmdir hang, now we won't drain css references at rmdir, but
> rmdir will proceed regardless of css refs. We still have to wait till
> all css refs have been released before destroying cgroup, and this is
> achieved by holding an extra dentry refcnt for each css, which leads to
> this bug:
>
> BUG: Dentry ffff8801164db490{i=491b,n=/} still in use (1) [unmount of cgroup cgroup]
> ------------[ cut here ]------------
> kernel BUG at fs/dcache.c:965!
> ...
> Call Trace:
> [<ffffffff811ec19a>] shrink_dcache_for_umount+0x4a/0x90
> [<ffffffff811cc3c7>] generic_shutdown_super+0x37/0x160
> [<ffffffff811cc5d9>] kill_anon_super+0x19/0x40
> [<ffffffff811cc63a>] kill_litter_super+0x3a/0x50
> [<ffffffff810fb65a>] cgroup_kill_sb+0x20a/0x280
> [<ffffffff811ccdc5>] deactivate_locked_super+0x65/0xb0
> [<ffffffff811ce339>] deactivate_super+0x89/0xe0
> [<ffffffff810f7734>] cgroup_d_release+0x34/0x40
> [<ffffffff811eb258>] d_free+0x58/0xc0
> [<ffffffff811ee5f0>] dput+0x220/0x350
> [<ffffffff810f7429>] css_dput_fn+0x19/0x30
> [<ffffffff8107b4d9>] process_one_work+0x239/0x800
> [<ffffffff8107eba3>] worker_thread+0x2e3/0x710
> [<ffffffff81087e46>] kthread+0xd6/0xf0
>
> If there's a css ref dangling after umount, when the ref goes down
> to 0, dput will drop the cgroup's dentry and then drop the root
> dentry. But kill_sb will be called inbetween, as dropping cgroup dentry
> unpinned sb, and now vfs will find the root dentry's refcnt is not 0.
>
> To fix this, we make css pin cgroup instead of cgroup dentry.
>
> Reported-by: Shyju P V <shyju.pv@huawei.com>
> Reported-by: Masanari Iida <standby24x7@gmail.com>
> Signed-off-by: Li Zefan <lizefan@huawei.com>
> ---
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount
[not found] ` <4FEEA5CB.8070809-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
@ 2012-07-03 17:03 ` Tejun Heo
[not found] ` <20120703170317.GB555-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Tejun Heo @ 2012-07-03 17:03 UTC (permalink / raw)
To: Li Zefan
Cc: shyju pv, Sanil kumar, Masanari Iida, LKML, Cgroups,
viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn
Hello, Li.
On Sat, Jun 30, 2012 at 03:07:55PM +0800, Li Zefan wrote:
> If there's a css ref dangling after umount, when the ref goes down
> to 0, dput will drop the cgroup's dentry and then drop the root
> dentry. But kill_sb will be called inbetween, as dropping cgroup dentry
> unpinned sb, and now vfs will find the root dentry's refcnt is not 0.
Ah, okay, the window is between dput of child and parent. Now I can
reproduce. :) Dunno why your case didn't work here.
> @@ -78,8 +79,8 @@ struct cgroup_subsys_state {
> /* ID for this css, if possible */
> struct css_id __rcu *id;
>
> - /* Used to put @cgroup->dentry on the last css_put() */
> - struct work_struct dput_work;
> + /* Used to put @cgroup->css_refs on the last css_put() */
> + struct work_struct put_work;
> };
>
> /* bits in struct cgroup_subsys_state flags field */
> @@ -170,6 +171,9 @@ struct cgroup {
> */
> atomic_t count;
>
> + /* We won't destroy the cgroup when there are css refs */
> + struct kref css_refs;
> +
> /*
> * We link our 'sibling' struct into our parent's 'children'.
> * Our children link their 'sibling' into our 'children'.
Hmm... adding another layer of refcnting. Now we end up with three
layers of refcnting - the active usage via cgroup->count, cgroup
lifetime via cgroup->dentry->d_count and this extended lifecycle
refcnt. In addition, we now have to deal with partially destroyed
cgroups - e.g. running cgroup_path() will oops on lingering cgroups.
Hating this tangling of dentries and cgroups more and more. :(
So, the reason why the d_release() change caused this problem was
because it decoupled super deactivation from rmdir. Before, dentries
were holding onto super the same but as rmdir also holds onto super
via normal vfs access, the whole recurssion was protected. ie.
1. rmdir(2) - gets s_active
2. cgroup waits for css drain
3. dput() happens on cgroup synchronously. This ends up
calling deactivate_super() via d_iput() and then may
recursively put parent dentries but that's safe thanks to
s_active ref from rmdir(2).
4. rmdir(2) is done, super deactivated.
After super deactivation is moved to d_release(), the last dput may
happen outside rmdir(2) or any other vfs syscall, so between dput of
child and root, which doesn't hold s_active, super can try to die.
Ugh....... I don't know. I'll think more about it.
Thanks a lot.
--
tejun
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount
[not found] ` <20120703170317.GB555-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
@ 2012-07-03 22:52 ` Tejun Heo
[not found] ` <20120703225218.GF555-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
[not found] ` <4ff55c60.27da440a.65ec.ffff83dfSMTPIN_ADDED@mx.google.com>
2012-07-04 5:56 ` [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount Li Zefan
1 sibling, 2 replies; 13+ messages in thread
From: Tejun Heo @ 2012-07-03 22:52 UTC (permalink / raw)
To: Li Zefan
Cc: shyju pv, Sanil kumar, Masanari Iida, LKML, Cgroups,
viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
levinsasha928-Re5JQEeQqe8AvxtiuMwx3w
Hello,
Shyju, can you please test the following patch?
Sasha, can you please re-test your test case with this patch? I don't
think fa980ca87d "cgroup: superblock can't be released with active
dentries" did anything useful. cgroup always calls d_iput() and
d_release() in succession from the same context. It may just have
stretched the timing a bit to hide the race.
Li, this patch reverts fa980ca87d and wraps dput() in css_dput_fn()
with s_active refs. Positive dentry ref means that we have active ref
on sb, so wrapping the final dput() with extra s_active ref should
avoid premature super destruction. I think we're horridly broken for
root cgroup tho - and it has been broken for very long time. I think
it's mostly hidden because most (all?) controllers short-circuit root
cgroup. Eh, well....
Thanks.
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b214fc2..15462a0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -901,13 +901,10 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
mutex_unlock(&cgroup_mutex);
/*
- * We want to drop the active superblock reference from the
- * cgroup creation after all the dentry refs are gone -
- * kill_sb gets mighty unhappy otherwise. Mark
- * dentry->d_fsdata with cgroup_diput() to tell
- * cgroup_d_release() to call deactivate_super().
+ * Drop the active superblock reference that we took when we
+ * created the cgroup
*/
- dentry->d_fsdata = cgroup_diput;
+ deactivate_super(cgrp->root->sb);
/*
* if we're getting rid of the cgroup, refcount should ensure
@@ -933,13 +930,6 @@ static int cgroup_delete(const struct dentry *d)
return 1;
}
-static void cgroup_d_release(struct dentry *dentry)
-{
- /* did cgroup_diput() tell me to deactivate super? */
- if (dentry->d_fsdata == cgroup_diput)
- deactivate_super(dentry->d_sb);
-}
-
static void remove_dir(struct dentry *d)
{
struct dentry *parent = dget(d->d_parent);
@@ -1547,7 +1537,6 @@ static int cgroup_get_rootdir(struct super_block *sb)
static const struct dentry_operations cgroup_dops = {
.d_iput = cgroup_diput,
.d_delete = cgroup_delete,
- .d_release = cgroup_d_release,
};
struct inode *inode =
@@ -3894,8 +3883,12 @@ static void css_dput_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, dput_work);
+ struct dentry *dentry = css->cgroup->dentry;
+ struct super_block *sb = dentry->d_sb;
- dput(css->cgroup->dentry);
+ atomic_inc(&sb->s_active);
+ dput(dentry);
+ deactivate_super(sb);
}
static void init_cgroup_css(struct cgroup_subsys_state *css,
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount
[not found] ` <20120703170317.GB555-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
2012-07-03 22:52 ` Tejun Heo
@ 2012-07-04 5:56 ` Li Zefan
1 sibling, 0 replies; 13+ messages in thread
From: Li Zefan @ 2012-07-04 5:56 UTC (permalink / raw)
To: Tejun Heo
Cc: shyju pv, Sanil kumar, Masanari Iida, LKML, Cgroups,
viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn
>> @@ -78,8 +79,8 @@ struct cgroup_subsys_state {
>> /* ID for this css, if possible */
>> struct css_id __rcu *id;
>>
>> - /* Used to put @cgroup->dentry on the last css_put() */
>> - struct work_struct dput_work;
>> + /* Used to put @cgroup->css_refs on the last css_put() */
>> + struct work_struct put_work;
>> };
>>
>> /* bits in struct cgroup_subsys_state flags field */
>> @@ -170,6 +171,9 @@ struct cgroup {
>> */
>> atomic_t count;
>>
>> + /* We won't destroy the cgroup when there are css refs */
>> + struct kref css_refs;
>> +
>> /*
>> * We link our 'sibling' struct into our parent's 'children'.
>> * Our children link their 'sibling' into our 'children'.
>
> Hmm... adding another layer of refcnting. Now we end up with three
> layers of refcnting - the active usage via cgroup->count, cgroup
> lifetime via cgroup->dentry->d_count and this extended lifecycle
> refcnt. In addition, we now have to deal with partially destroyed
> cgroups - e.g. running cgroup_path() will oops on lingering cgroups.
> Hating this tangling of dentries and cgroups more and more. :(
>
yeah, the fix you sent looks better.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount
[not found] ` <20120703225218.GF555-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
@ 2012-07-04 6:19 ` Li Zefan
[not found] ` <4FF3E063.5010604-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Li Zefan @ 2012-07-04 6:19 UTC (permalink / raw)
To: Tejun Heo
Cc: shyju pv, Sanil kumar, Masanari Iida, LKML, Cgroups,
viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
levinsasha928-Re5JQEeQqe8AvxtiuMwx3w
On 2012/7/4 6:52, Tejun Heo wrote:
> Hello,
>
> Shyju, can you please test the following patch?
>
> Sasha, can you please re-test your test case with this patch? I don't
> think fa980ca87d "cgroup: superblock can't be released with active
> dentries" did anything useful. cgroup always calls d_iput() and
> d_release() in succession from the same context. It may just have
> stretched the timing a bit to hide the race.
>
> Li, this patch reverts fa980ca87d and wraps dput() in css_dput_fn()
> with s_active refs. Positive dentry ref means that we have active ref
> on sb, so wrapping the final dput() with extra s_active ref should
> avoid premature super destruction.
> I think we're horridly broken for
> root cgroup tho - and it has been broken for very long time. I think
> it's mostly hidden because most (all?) controllers short-circuit root
> cgroup. Eh, well....
>
Could you elaborate a bit on what's broken for root cgroup?
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 1/2] Revert "cgroup: superblock can't be released with active dentries"
[not found] ` <4ff55c60.27da440a.65ec.ffff83dfSMTPIN_ADDED-ATjtLOhZ0NVl57MIdRCFDg@public.gmane.org>
@ 2012-07-07 23:46 ` 'Tejun Heo'
[not found] ` <20120707234634.GC16783-RcKxWJ4Cfj1J2suj2OqeGauc2jM2gXBXkQQo+JxHRPFibQn6LdNjmg@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: 'Tejun Heo' @ 2012-07-07 23:46 UTC (permalink / raw)
To: Shyju PV
Cc: 'Li Zefan', 'Sanil kumar',
'Masanari Iida', 'LKML', 'Cgroups',
viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
levinsasha928-Re5JQEeQqe8AvxtiuMwx3w,
nagamani.mantha-hv44wF8Li93QT0dZR+AlfA
From 7db5b3ca0ecdb2e8fad52a4770e4e320e61c77a6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Date: Sat, 7 Jul 2012 15:55:47 -0700
This reverts commit fa980ca87d15bb8a1317853f257a505990f3ffde. The
commit was an attempt to fix a race condition where a cgroup hierarchy
may be unmounted with positive dentry reference on root cgroup. While
the commit made the race condition slightly more difficult to trigger,
the race was still there and could be reliably triggered using a
different test case.
Revert the incorrect fix. The next commit will describe the race and
fix it correctly.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
LKML-Reference: <4FEEA5CB.8070809-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Reported-by: shyju pv <shyju.pv-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Cc: Sasha Levin <levinsasha928-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Acked-by: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
Sorry about the delay. Was on vacation. Will soon push these two
patches to Linus.
(Shyju, didn't realize I was replying to private reply, resending)
Thanks.
kernel/cgroup.c | 17 +++--------------
1 files changed, 3 insertions(+), 14 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2097684..5f134a0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -901,13 +901,10 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
mutex_unlock(&cgroup_mutex);
/*
- * We want to drop the active superblock reference from the
- * cgroup creation after all the dentry refs are gone -
- * kill_sb gets mighty unhappy otherwise. Mark
- * dentry->d_fsdata with cgroup_diput() to tell
- * cgroup_d_release() to call deactivate_super().
+ * Drop the active superblock reference that we took when we
+ * created the cgroup
*/
- dentry->d_fsdata = cgroup_diput;
+ deactivate_super(cgrp->root->sb);
/*
* if we're getting rid of the cgroup, refcount should ensure
@@ -933,13 +930,6 @@ static int cgroup_delete(const struct dentry *d)
return 1;
}
-static void cgroup_d_release(struct dentry *dentry)
-{
- /* did cgroup_diput() tell me to deactivate super? */
- if (dentry->d_fsdata == cgroup_diput)
- deactivate_super(dentry->d_sb);
-}
-
static void remove_dir(struct dentry *d)
{
struct dentry *parent = dget(d->d_parent);
@@ -1547,7 +1537,6 @@ static int cgroup_get_rootdir(struct super_block *sb)
static const struct dentry_operations cgroup_dops = {
.d_iput = cgroup_diput,
.d_delete = cgroup_delete,
- .d_release = cgroup_d_release,
};
struct inode *inode =
--
1.7.7.3
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 2/2] cgroup: fix cgroup hierarchy umount race
[not found] ` <20120707234634.GC16783-RcKxWJ4Cfj1J2suj2OqeGauc2jM2gXBXkQQo+JxHRPFibQn6LdNjmg@public.gmane.org>
@ 2012-07-07 23:46 ` 'Tejun Heo'
[not found] ` <20120707234659.GD16783-RcKxWJ4Cfj1J2suj2OqeGauc2jM2gXBXkQQo+JxHRPFibQn6LdNjmg@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: 'Tejun Heo' @ 2012-07-07 23:46 UTC (permalink / raw)
To: Shyju PV
Cc: 'Li Zefan', 'Sanil kumar',
'Masanari Iida', 'LKML', 'Cgroups',
viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
levinsasha928-Re5JQEeQqe8AvxtiuMwx3w,
nagamani.mantha-hv44wF8Li93QT0dZR+AlfA
From 5db9a4d99b0157a513944e9a44d29c9cec2e91dc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Date: Sat, 7 Jul 2012 16:08:18 -0700
48ddbe1946 "cgroup: make css->refcnt clearing on cgroup removal
optional" allowed a css to linger after the associated cgroup is
removed. As a css holds a reference on the cgroup's dentry, it means
that cgroup dentries may linger for a while.
Destroying a superblock which has dentries with positive refcnts is a
critical bug and triggers BUG() in vfs code. As each cgroup dentry
holds an s_active reference, any lingering cgroup has both its dentry
and the superblock pinned and thus preventing premature release of
superblock.
Unfortunately, after 48ddbe1946, there's a small window while
releasing a cgroup which is directly under the root of the hierarchy.
When a cgroup directory is released, vfs layer first deletes the
corresponding dentry and then invokes dput() on the parent, which may
recurse further, so when a cgroup directly below root cgroup is
released, the cgroup is first destroyed - which releases the s_active
it was holding - and then the dentry for the root cgroup is dput().
This creates a window where the root dentry's refcnt isn't zero but
superblock's s_active is. If umount happens before or during this
window, vfs will see the root dentry with non-zero refcnt and trigger
BUG().
Before 48ddbe1946, this problem didn't exist because the last dentry
reference was guaranteed to be put synchronously from rmdir(2)
invocation which holds s_active around the whole process.
Fix it by holding an extra superblock->s_active reference across
dput() from css release, which is the dput() path added by 48ddbe1946
and the only one which doesn't hold an extra s_active ref across the
final cgroup dput().
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
LKML-Reference: <4FEEA5CB.8070809-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Reported-by: shyju pv <shyju.pv-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Tested-by: shyju pv <shyju.pv-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Cc: Sasha Levin <levinsasha928-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Acked-by: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
kernel/cgroup.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5f134a0..b303dfc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3883,8 +3883,12 @@ static void css_dput_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, dput_work);
+ struct dentry *dentry = css->cgroup->dentry;
+ struct super_block *sb = dentry->d_sb;
- dput(css->cgroup->dentry);
+ atomic_inc(&sb->s_active);
+ dput(dentry);
+ deactivate_super(sb);
}
static void init_cgroup_css(struct cgroup_subsys_state *css,
--
1.7.7.3
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount
[not found] ` <4FF3E063.5010604-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
@ 2012-07-08 6:35 ` Tejun Heo
[not found] ` <20120708063536.GB19021-RcKxWJ4Cfj1J2suj2OqeGauc2jM2gXBXkQQo+JxHRPFibQn6LdNjmg@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Tejun Heo @ 2012-07-08 6:35 UTC (permalink / raw)
To: Li Zefan
Cc: shyju pv, Sanil kumar, Masanari Iida, LKML, Cgroups,
viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
levinsasha928-Re5JQEeQqe8AvxtiuMwx3w
Hello, Li.
On Wed, Jul 04, 2012 at 02:19:15PM +0800, Li Zefan wrote:
> > I think we're horridly broken for
> > root cgroup tho - and it has been broken for very long time. I think
> > it's mostly hidden because most (all?) controllers short-circuit root
> > cgroup. Eh, well....
> >
>
> Could you elaborate a bit on what's broken for root cgroup?
If someone holds css ref of a root cgroup, AFAICS nothing is
preventing the cgroup hierarchy from being unmounted and root cgroup
destroyed.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount
[not found] ` <20120708063536.GB19021-RcKxWJ4Cfj1J2suj2OqeGauc2jM2gXBXkQQo+JxHRPFibQn6LdNjmg@public.gmane.org>
@ 2012-07-10 2:11 ` Li Zefan
[not found] ` <4FFB8F39.9030209-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Li Zefan @ 2012-07-10 2:11 UTC (permalink / raw)
To: Tejun Heo
Cc: shyju pv, Sanil kumar, Masanari Iida, LKML, Cgroups,
viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
levinsasha928-Re5JQEeQqe8AvxtiuMwx3w
On 2012/7/8 14:35, Tejun Heo wrote:
> Hello, Li.
>
> On Wed, Jul 04, 2012 at 02:19:15PM +0800, Li Zefan wrote:
>>> I think we're horridly broken for
>>> root cgroup tho - and it has been broken for very long time. I think
>>> it's mostly hidden because most (all?) controllers short-circuit root
>>> cgroup. Eh, well....
>>>
>>
>> Could you elaborate a bit on what's broken for root cgroup?
>
> If someone holds css ref of a root cgroup, AFAICS nothing is
> preventing the cgroup hierarchy from being unmounted and root cgroup
> destroyed.
>
Right, but that should be safe. The css objects of the root cgroup are
allocated at boot, and won't be destroyed at umount.
Furthermore when a cgroup hierarchy is going to be unmounted, those css's
will be made to point to a cgroup named dummytop in rebind_subsystems(),
and there's a syncronize_rcu() in the end of the function, so accessing
css->cgroup is always safe.
In this case, dummytop->dentry is NULL, and that's safe too, because
cgroup_path() is aware of this case.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] cgroup: fix cgroup hierarchy umount race
[not found] ` <20120707234659.GD16783-RcKxWJ4Cfj1J2suj2OqeGauc2jM2gXBXkQQo+JxHRPFibQn6LdNjmg@public.gmane.org>
@ 2012-07-14 12:08 ` Al Viro
[not found] ` <20120714120852.GK22927-3bDd1+5oDREiFSDQTTA3OLVCufUGDwFn@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Al Viro @ 2012-07-14 12:08 UTC (permalink / raw)
To: 'Tejun Heo'
Cc: Shyju PV, 'Li Zefan', 'Sanil kumar',
'Masanari Iida', 'LKML', 'Cgroups',
levinsasha928-Re5JQEeQqe8AvxtiuMwx3w,
nagamani.mantha-hv44wF8Li93QT0dZR+AlfA
On Sat, Jul 07, 2012 at 04:46:59PM -0700, 'Tejun Heo' wrote:
> Fix it by holding an extra superblock->s_active reference across
> dput() from css release, which is the dput() path added by 48ddbe1946
> and the only one which doesn't hold an extra s_active ref across the
> final cgroup dput().
> @@ -3883,8 +3883,12 @@ static void css_dput_fn(struct work_struct *work)
> {
> struct cgroup_subsys_state *css =
> container_of(work, struct cgroup_subsys_state, dput_work);
> + struct dentry *dentry = css->cgroup->dentry;
> + struct super_block *sb = dentry->d_sb;
>
> - dput(css->cgroup->dentry);
> + atomic_inc(&sb->s_active);
> + dput(dentry);
> + deactivate_super(sb);
> }
While we are at it, what guarantees that css->dput_work will complete before
css->cgroup or the object containing css get freed under us?
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] cgroup: fix cgroup hierarchy umount race
[not found] ` <20120714120852.GK22927-3bDd1+5oDREiFSDQTTA3OLVCufUGDwFn@public.gmane.org>
@ 2012-07-16 16:44 ` 'Tejun Heo'
0 siblings, 0 replies; 13+ messages in thread
From: 'Tejun Heo' @ 2012-07-16 16:44 UTC (permalink / raw)
To: Al Viro
Cc: Shyju PV, 'Li Zefan', 'Sanil kumar',
'Masanari Iida', 'LKML', 'Cgroups',
levinsasha928-Re5JQEeQqe8AvxtiuMwx3w,
nagamani.mantha-hv44wF8Li93QT0dZR+AlfA
Hello, Al.
On Sat, Jul 14, 2012 at 01:08:52PM +0100, Al Viro wrote:
> On Sat, Jul 07, 2012 at 04:46:59PM -0700, 'Tejun Heo' wrote:
> > Fix it by holding an extra superblock->s_active reference across
> > dput() from css release, which is the dput() path added by 48ddbe1946
> > and the only one which doesn't hold an extra s_active ref across the
> > final cgroup dput().
>
> > @@ -3883,8 +3883,12 @@ static void css_dput_fn(struct work_struct *work)
> > {
> > struct cgroup_subsys_state *css =
> > container_of(work, struct cgroup_subsys_state, dput_work);
> > + struct dentry *dentry = css->cgroup->dentry;
> > + struct super_block *sb = dentry->d_sb;
> >
> > - dput(css->cgroup->dentry);
> > + atomic_inc(&sb->s_active);
> > + dput(dentry);
> > + deactivate_super(sb);
> > }
>
> While we are at it, what guarantees that css->dput_work will complete before
> css->cgroup or the object containing css get freed under us?
css's are tied to the cgroup. They are created on cgroup creation and
destroyed together with cgroup, which is controlled by the dentry
refcnt. css refcnts are relaying reference to cgroup dentry refcnt.
The reason why css needs this finer grained refcnts instead of
directly using cgroup dentry refcnt is that for some subsystems cgroup
removal tries to drain all css refcnts before proceeding with removal.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount
[not found] ` <4FFB8F39.9030209-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
@ 2012-07-16 16:45 ` Tejun Heo
0 siblings, 0 replies; 13+ messages in thread
From: Tejun Heo @ 2012-07-16 16:45 UTC (permalink / raw)
To: Li Zefan
Cc: shyju pv, Sanil kumar, Masanari Iida, LKML, Cgroups,
viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn,
levinsasha928-Re5JQEeQqe8AvxtiuMwx3w
Hello,
On Tue, Jul 10, 2012 at 10:11:05AM +0800, Li Zefan wrote:
> Right, but that should be safe. The css objects of the root cgroup are
> allocated at boot, and won't be destroyed at umount.
>
> Furthermore when a cgroup hierarchy is going to be unmounted, those css's
> will be made to point to a cgroup named dummytop in rebind_subsystems(),
> and there's a syncronize_rcu() in the end of the function, so accessing
> css->cgroup is always safe.
>
> In this case, dummytop->dentry is NULL, and that's safe too, because
> cgroup_path() is aware of this case.
I see. Thanks for the explanation.
--
tejun
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2012-07-16 16:45 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-06-30 7:07 [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount Li Zefan
2012-06-30 14:34 ` Masanari Iida
[not found] ` <4FEEA5CB.8070809-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2012-07-03 17:03 ` Tejun Heo
[not found] ` <20120703170317.GB555-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
2012-07-03 22:52 ` Tejun Heo
[not found] ` <20120703225218.GF555-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
2012-07-04 6:19 ` Li Zefan
[not found] ` <4FF3E063.5010604-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2012-07-08 6:35 ` Tejun Heo
[not found] ` <20120708063536.GB19021-RcKxWJ4Cfj1J2suj2OqeGauc2jM2gXBXkQQo+JxHRPFibQn6LdNjmg@public.gmane.org>
2012-07-10 2:11 ` Li Zefan
[not found] ` <4FFB8F39.9030209-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2012-07-16 16:45 ` Tejun Heo
[not found] ` <4ff55c60.27da440a.65ec.ffff83dfSMTPIN_ADDED@mx.google.com>
[not found] ` <4ff55c60.27da440a.65ec.ffff83dfSMTPIN_ADDED-ATjtLOhZ0NVl57MIdRCFDg@public.gmane.org>
2012-07-07 23:46 ` [PATCH 1/2] Revert "cgroup: superblock can't be released with active dentries" 'Tejun Heo'
[not found] ` <20120707234634.GC16783-RcKxWJ4Cfj1J2suj2OqeGauc2jM2gXBXkQQo+JxHRPFibQn6LdNjmg@public.gmane.org>
2012-07-07 23:46 ` [PATCH 2/2] cgroup: fix cgroup hierarchy umount race 'Tejun Heo'
[not found] ` <20120707234659.GD16783-RcKxWJ4Cfj1J2suj2OqeGauc2jM2gXBXkQQo+JxHRPFibQn6LdNjmg@public.gmane.org>
2012-07-14 12:08 ` Al Viro
[not found] ` <20120714120852.GK22927-3bDd1+5oDREiFSDQTTA3OLVCufUGDwFn@public.gmane.org>
2012-07-16 16:44 ` 'Tejun Heo'
2012-07-04 5:56 ` [PATCH] cgroup: fix dentry still in use bug when dropping css refs after umount Li Zefan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).