* [PATCH 2/9] cgroup: rename cgroup->dummy_css to ->self and move it to the top
2014-05-09 21:13 [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting Tejun Heo
@ 2014-05-09 21:13 ` Tejun Heo
2014-05-09 21:13 ` [PATCH 4/9] cgroup: move check_for_release(parent) call to the end of cgroup_destroy_locked() Tejun Heo
` (5 subsequent siblings)
6 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2014-05-09 21:13 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, linux-kernel, Tejun Heo
cgroup->dummy_css is used as the placeholder css when performing css
oriended operations on the cgroup. We're gonna shift more cgroup
management to this css. Let's rename it to ->self and move it to the
top.
This is pure rename and field relocation.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
include/linux/cgroup.h | 6 +++---
kernel/cgroup.c | 20 ++++++++++----------
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index aa7353d..164851e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -143,6 +143,9 @@ enum {
};
struct cgroup {
+ /* self css with NULL ->ss, points back to this cgroup */
+ struct cgroup_subsys_state self;
+
unsigned long flags; /* "unsigned long" so bitops work */
/*
@@ -224,9 +227,6 @@ struct cgroup {
struct list_head pidlists;
struct mutex pidlist_mutex;
- /* dummy css with NULL ->ss, points back to this cgroup */
- struct cgroup_subsys_state dummy_css;
-
/* For css percpu_ref killing and RCU-protected deletion */
struct rcu_head rcu_head;
struct work_struct destroy_work;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2755f33..b3708a8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -220,7 +220,7 @@ static void cgroup_idr_remove(struct idr *idr, int id)
/**
* cgroup_css - obtain a cgroup's css for the specified subsystem
* @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns the dummy_css)
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
*
* Return @cgrp's css (cgroup_subsys_state) associated with @ss. This
* function must be called either under cgroup_mutex or rcu_read_lock() and
@@ -235,13 +235,13 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
return rcu_dereference_check(cgrp->subsys[ss->id],
lockdep_is_held(&cgroup_mutex));
else
- return &cgrp->dummy_css;
+ return &cgrp->self;
}
/**
* cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
* @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns the dummy_css)
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
*
* Similar to cgroup_css() but returns the effctive css, which is defined
* as the matching css of the nearest ancestor including self which has @ss
@@ -254,7 +254,7 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
lockdep_assert_held(&cgroup_mutex);
if (!ss)
- return &cgrp->dummy_css;
+ return &cgrp->self;
if (!(cgrp->root->subsys_mask & (1 << ss->id)))
return NULL;
@@ -288,7 +288,7 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
if (cft->ss)
return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
else
- return &cgrp->dummy_css;
+ return &cgrp->self;
}
EXPORT_SYMBOL_GPL(of_css);
@@ -1551,7 +1551,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->release_list);
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
- cgrp->dummy_css.cgroup = cgrp;
+ cgrp->self.cgroup = cgrp;
for_each_subsys(ss, ssid)
INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
@@ -3453,7 +3453,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
* ->can_attach() fails.
*/
do {
- css_task_iter_start(&from->dummy_css, &it);
+ css_task_iter_start(&from->self, &it);
task = css_task_iter_next(&it);
if (task)
get_task_struct(task);
@@ -3718,7 +3718,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
if (!array)
return -ENOMEM;
/* now, populate the array */
- css_task_iter_start(&cgrp->dummy_css, &it);
+ css_task_iter_start(&cgrp->self, &it);
while ((tsk = css_task_iter_next(&it))) {
if (unlikely(n == length))
break;
@@ -3792,7 +3792,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
}
rcu_read_unlock();
- css_task_iter_start(&cgrp->dummy_css, &it);
+ css_task_iter_start(&cgrp->self, &it);
while ((tsk = css_task_iter_next(&it))) {
switch (tsk->state) {
case TASK_RUNNING:
@@ -4273,7 +4273,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
init_cgroup_housekeeping(cgrp);
cgrp->parent = parent;
- cgrp->dummy_css.parent = &parent->dummy_css;
+ cgrp->self.parent = &parent->self;
cgrp->root = root;
if (notify_on_release(parent))
--
1.9.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 4/9] cgroup: move check_for_release(parent) call to the end of cgroup_destroy_locked()
2014-05-09 21:13 [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting Tejun Heo
2014-05-09 21:13 ` [PATCH 2/9] cgroup: rename cgroup->dummy_css to ->self and move it to the top Tejun Heo
@ 2014-05-09 21:13 ` Tejun Heo
[not found] ` <1399670015-23463-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
` (4 subsequent siblings)
6 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2014-05-09 21:13 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, linux-kernel, Tejun Heo
Currently, check_for_release() on the parent of a destroyed cgroup is
invoked from cgroup_destroy_css_killed(). This is because this is
where the destroyed cgroup can be removed from the parent's children
list. check_for_release() tests the emptiness of the list directly,
so invoking it before removing the cgroup from the list makes it think
that the parent still has children even when it no longer does.
This patch updates check_for_release() to use
cgroup_has_live_children() instead of directly testing ->children
emptiness and moves check_for_release(parent) earlier to the end of
cgroup_destroy_locked(). As cgroup_has_live_children() ignores
cgroups marked DEAD, check_for_release() functions correctly as long
as it's called after asserting DEAD.
This makes release notification slightly more timely and more
importantly enables further simplification of cgroup destruction path.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/cgroup.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d369f19..45149b7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4541,6 +4541,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
*/
kernfs_remove(cgrp->kn);
+ set_bit(CGRP_RELEASABLE, &cgrp->parent->flags);
+ check_for_release(cgrp->parent);
+
return 0;
};
@@ -4555,17 +4558,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
*/
static void cgroup_destroy_css_killed(struct cgroup *cgrp)
{
- struct cgroup *parent = cgrp->parent;
-
lockdep_assert_held(&cgroup_mutex);
/* delete this cgroup from parent->children */
list_del_rcu(&cgrp->sibling);
cgroup_put(cgrp);
-
- set_bit(CGRP_RELEASABLE, &parent->flags);
- check_for_release(parent);
}
static int cgroup_rmdir(struct kernfs_node *kn)
@@ -5005,7 +5003,7 @@ void cgroup_exit(struct task_struct *tsk)
static void check_for_release(struct cgroup *cgrp)
{
if (cgroup_is_releasable(cgrp) &&
- list_empty(&cgrp->cset_links) && list_empty(&cgrp->children)) {
+ list_empty(&cgrp->cset_links) && !cgroup_has_live_children(cgrp)) {
/*
* Control Group is currently removeable. If it's not
* already queued for a userspace notification, queue
--
1.9.0
^ permalink raw reply related [flat|nested] 23+ messages in thread[parent not found: <1399670015-23463-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>]
* [PATCH 1/9] cgroup: use restart_syscall() for mount retries
[not found] ` <1399670015-23463-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2014-05-09 21:13 ` Tejun Heo
[not found] ` <1399670015-23463-2-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2014-05-09 21:13 ` [PATCH 3/9] cgroup: separate out cgroup_has_live_children() from cgroup_destroy_locked() Tejun Heo
` (4 subsequent siblings)
5 siblings, 1 reply; 23+ messages in thread
From: Tejun Heo @ 2014-05-09 21:13 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Tejun Heo
cgroup_mount() uses dumb delay-and-retry logic to wait for cgroup_root
which is being destroyed. The retry currently loops inside
cgroup_mount() proper. This patch makes it return with
restart_syscall() instead so that retry travels out to userland
boundary.
This slightly simplifies the logic and more importantly makes the
retry logic behave better when the wait for some reason becomes
lengthy or infinite by allowing the operation to be suspended or
terminated from userland.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
kernel/cgroup.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3083c5a..2755f33 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1681,7 +1681,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
ret = parse_cgroupfs_options(data, &opts);
if (ret)
goto out_unlock;
-retry:
+
/* look for a matching existing root */
if (!opts.subsys_mask && !opts.none && !opts.name) {
cgrp_dfl_root_visible = true;
@@ -1740,8 +1740,7 @@ retry:
if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
mutex_unlock(&cgroup_mutex);
msleep(10);
- mutex_lock(&cgroup_mutex);
- goto retry;
+ return ERR_PTR(restart_syscall());
}
ret = 0;
--
1.9.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 3/9] cgroup: separate out cgroup_has_live_children() from cgroup_destroy_locked()
[not found] ` <1399670015-23463-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2014-05-09 21:13 ` [PATCH 1/9] cgroup: use restart_syscall() for mount retries Tejun Heo
@ 2014-05-09 21:13 ` Tejun Heo
2014-05-09 21:13 ` [PATCH 5/9] cgroup: move cgroup->sibling unlinking to cgroup_put() Tejun Heo
` (3 subsequent siblings)
5 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2014-05-09 21:13 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Tejun Heo
We're expecting another user.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
kernel/cgroup.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b3708a8..d369f19 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3294,6 +3294,21 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
return css_parent(pos);
}
+static bool cgroup_has_live_children(struct cgroup *cgrp)
+{
+ struct cgroup *child;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(child, &cgrp->children, sibling) {
+ if (!cgroup_is_dead(child)) {
+ rcu_read_unlock();
+ return true;
+ }
+ }
+ rcu_read_unlock();
+ return false;
+}
+
/**
* css_advance_task_iter - advance a task itererator to the next css_set
* @it: the iterator to advance
@@ -4464,7 +4479,6 @@ static void kill_css(struct cgroup_subsys_state *css)
static int cgroup_destroy_locked(struct cgroup *cgrp)
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
{
- struct cgroup *child;
struct cgroup_subsys_state *css;
bool empty;
int ssid;
@@ -4486,15 +4500,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
* emptiness as dead children linger on it while being destroyed;
* otherwise, "rmdir parent/child parent" may fail with -EBUSY.
*/
- empty = true;
- rcu_read_lock();
- list_for_each_entry_rcu(child, &cgrp->children, sibling) {
- empty = cgroup_is_dead(child);
- if (!empty)
- break;
- }
- rcu_read_unlock();
- if (!empty)
+ if (cgroup_has_live_children(cgrp))
return -EBUSY;
/*
--
1.9.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 5/9] cgroup: move cgroup->sibling unlinking to cgroup_put()
[not found] ` <1399670015-23463-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2014-05-09 21:13 ` [PATCH 1/9] cgroup: use restart_syscall() for mount retries Tejun Heo
2014-05-09 21:13 ` [PATCH 3/9] cgroup: separate out cgroup_has_live_children() from cgroup_destroy_locked() Tejun Heo
@ 2014-05-09 21:13 ` Tejun Heo
2014-05-09 21:13 ` [PATCH 8/9] cgroup: enable refcnting for root csses Tejun Heo
` (2 subsequent siblings)
5 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2014-05-09 21:13 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Tejun Heo
Move cgroup->sibling unlinking from cgroup_destroy_css_killed() to
cgroup_put(). This is later but still before the RCU grace period, so
it doesn't break css_next_child() although there now is a larger
window in which a dead cgroup is visible during css iteration. As css
iteration always could have included offline csses, this doesn't
affect correctness; however, it does make css_next_child() fall back
to reiterting mode more often. This also makes cgroup_put() directly
take cgroup_mutex, which limits where it can be called from. These
are not immediately problematic and will be dealt with later.
This change enables simplification of cgroup destruction path.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
kernel/cgroup.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 45149b7..87ea2ce 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1056,6 +1056,11 @@ static void cgroup_put(struct cgroup *cgrp)
if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp)))
return;
+ /* delete this cgroup from parent->children */
+ mutex_lock(&cgroup_mutex);
+ list_del_rcu(&cgrp->sibling);
+ mutex_unlock(&cgroup_mutex);
+
cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
cgrp->id = -1;
@@ -4560,9 +4565,6 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp)
{
lockdep_assert_held(&cgroup_mutex);
- /* delete this cgroup from parent->children */
- list_del_rcu(&cgrp->sibling);
-
cgroup_put(cgrp);
}
--
1.9.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 8/9] cgroup: enable refcnting for root csses
[not found] ` <1399670015-23463-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
` (2 preceding siblings ...)
2014-05-09 21:13 ` [PATCH 5/9] cgroup: move cgroup->sibling unlinking to cgroup_put() Tejun Heo
@ 2014-05-09 21:13 ` Tejun Heo
2014-05-14 3:15 ` [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting Li Zefan
2014-05-14 17:04 ` Tejun Heo
5 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2014-05-09 21:13 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, Tejun Heo
Currently, css_get(), css_tryget() and css_tryget_online() are noops
for root csses as an optimization; however, we're planning to use css
refcnts to track of cgroup lifetime too and root cgroups also need to
be reference counted. Since css has been converted to percpu_refcnt,
the overhead of refcnting is miniscule and this optimization isn't too
meaningful anymore. Furthermore, controllers which optimize the root
cgroup often never even invoke these functions in their hot paths.
This patch enables refcnting for root csses too. This makes CSS_ROOT
flag unused and removes it.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
include/linux/cgroup.h | 10 ++--------
kernel/cgroup.c | 6 +++---
2 files changed, 5 insertions(+), 11 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 160fcc6..286e39e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -77,7 +77,6 @@ struct cgroup_subsys_state {
/* bits in struct cgroup_subsys_state flags field */
enum {
- CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */
CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */
};
@@ -89,9 +88,7 @@ enum {
*/
static inline void css_get(struct cgroup_subsys_state *css)
{
- /* We don't need to reference count the root state */
- if (!(css->flags & CSS_ROOT))
- percpu_ref_get(&css->refcnt);
+ percpu_ref_get(&css->refcnt);
}
/**
@@ -106,8 +103,6 @@ static inline void css_get(struct cgroup_subsys_state *css)
*/
static inline bool css_tryget_online(struct cgroup_subsys_state *css)
{
- if (css->flags & CSS_ROOT)
- return true;
return percpu_ref_tryget_live(&css->refcnt);
}
@@ -119,8 +114,7 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css)
*/
static inline void css_put(struct cgroup_subsys_state *css)
{
- if (!(css->flags & CSS_ROOT))
- percpu_ref_put(&css->refcnt);
+ percpu_ref_put(&css->refcnt);
}
/* bits in struct cgroup flags field */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b382383..5a31e61 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4157,8 +4157,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
if (cgrp->parent) {
css->parent = cgroup_css(cgrp->parent, ss);
css_get(css->parent);
- } else {
- css->flags |= CSS_ROOT;
}
BUG_ON(cgroup_css(cgrp, ss));
@@ -4581,9 +4579,10 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
BUG_ON(IS_ERR(css));
init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
if (early) {
- /* idr_alloc() can't be called safely during early init */
+ /* allocation can't be done safely during early init */
css->id = 1;
} else {
+ BUG_ON(percpu_ref_init(&css->refcnt, css_release));
css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
BUG_ON(css->id < 0);
}
@@ -4670,6 +4669,7 @@ int __init cgroup_init(void)
struct cgroup_subsys_state *css =
init_css_set.subsys[ss->id];
+ BUG_ON(percpu_ref_init(&css->refcnt, css_release));
css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
GFP_KERNEL);
BUG_ON(css->id < 0);
--
1.9.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting
[not found] ` <1399670015-23463-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
` (3 preceding siblings ...)
2014-05-09 21:13 ` [PATCH 8/9] cgroup: enable refcnting for root csses Tejun Heo
@ 2014-05-14 3:15 ` Li Zefan
2014-05-14 17:04 ` Tejun Heo
5 siblings, 0 replies; 23+ messages in thread
From: Li Zefan @ 2014-05-14 3:15 UTC (permalink / raw)
To: Tejun Heo
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
On 2014/5/10 5:13, Tejun Heo wrote:
> Hello,
>
> Currently, cgroup and css (cgroup_subsys_state) are separately
> reference counted. cgroup->refcnt is an atomic_t and css->refcnt is a
> percpu_ref. css is becoming the primary structural block and used
> widely in various operaitons. cgroup already has a css embedded in it
> (cgroup->dummy_css) to use as its proxy in such operations; however,
> there still are quite a few differences between cgroup and css
> handling limitng how cgroup->dummy_css can be used.
>
> This patchset makes cgroup use the embedded css's refcnt for reference
> counting. This closes one of the gaps between an cgroup embedded css,
> which is renamed to cgroup->self early in the patchset, and a normal
> css and will allow more unified handling of cgroups and csses. In
> addition, this makes cgroup refcnting use percpu_ref too, which is a
> lot more scalable than an atomic_t.
>
> Ultimately, it'd make things far simpler to assign a proper
> cgroup_subsys to the cgroup embedded csses and handle them the same as
> other csses; however, we can't yet do it thanks to multiple
> hierarchies as we end up with multiple csses of the same subsystem for
> the same task, but in the very long term, if multiple hierarchies can
> be removed, that's where it's headed.
>
> This patchset contains the following nine patches.
>
> 0001-cgroup-use-restart_syscall-for-mount-retries.patch
> 0002-cgroup-rename-cgroup-dummy_css-to-self-and-move-it-t.patch
> 0003-cgroup-separate-out-cgroup_has_live_children-from-cg.patch
> 0004-cgroup-move-check_for_release-parent-call-to-the-end.patch
> 0005-cgroup-move-cgroup-sibling-unlinking-to-cgroup_put.patch
> 0006-cgroup-remove-cgroup_destory_css_killed.patch
> 0007-cgroup-bounce-css-release-through-css-destroy_work.patch
> 0008-cgroup-enable-refcnting-for-root-csses.patch
> 0009-cgroup-use-cgroup-self.refcnt-for-cgroup-refcnting.patch
>
> 0001-0003 are prep patches.
>
> 0004-0006 remove cgroup_destroy_css_killed(). This brings cgroup's
> destruction path closer to css's so that they can be merged.
>
> 0007-0009 make cgroup use the embedded css's refcnt.
>
> This pachset is on top of
>
> b9a63d0116e8 ("Merge branch 'for-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu into for-3.16")
> + [1] [PATCHSET v2 cgroup/for-3.16] cgroup: post unified hierarchy fixes and updates
> + [2] (REFRESHED) [PATCHSET cgroup/for-3.16] cgroup: implement cftype->write()
> + [3] (REFRESHED) [PATCHSET cgroup/for-3.16] cgroup: remove cgroup_tree_mutex
>
> and available in the following git branch.
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git review-use-css-ref
>
> diffstat follows. Thanks.
>
> include/linux/cgroup.h | 25 ----
> kernel/cgroup.c | 284 ++++++++++++++++++++++---------------------------
> 2 files changed, 136 insertions(+), 173 deletions(-)
>
With the memory leak fixed:
Acked-by: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting
[not found] ` <1399670015-23463-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
` (4 preceding siblings ...)
2014-05-14 3:15 ` [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting Li Zefan
@ 2014-05-14 17:04 ` Tejun Heo
5 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2014-05-14 17:04 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
On Fri, May 09, 2014 at 05:13:26PM -0400, Tejun Heo wrote:
> Hello,
>
> Currently, cgroup and css (cgroup_subsys_state) are separately
> reference counted. cgroup->refcnt is an atomic_t and css->refcnt is a
> percpu_ref. css is becoming the primary structural block and used
> widely in various operaitons. cgroup already has a css embedded in it
> (cgroup->dummy_css) to use as its proxy in such operations; however,
> there still are quite a few differences between cgroup and css
> handling limitng how cgroup->dummy_css can be used.
>
> This patchset makes cgroup use the embedded css's refcnt for reference
> counting. This closes one of the gaps between an cgroup embedded css,
> which is renamed to cgroup->self early in the patchset, and a normal
> css and will allow more unified handling of cgroups and csses. In
> addition, this makes cgroup refcnting use percpu_ref too, which is a
> lot more scalable than an atomic_t.
>
> Ultimately, it'd make things far simpler to assign a proper
> cgroup_subsys to the cgroup embedded csses and handle them the same as
> other csses; however, we can't yet do it thanks to multiple
> hierarchies as we end up with multiple csses of the same subsystem for
> the same task, but in the very long term, if multiple hierarchies can
> be removed, that's where it's headed.
>
> This patchset contains the following nine patches.
>
> 0001-cgroup-use-restart_syscall-for-mount-retries.patch
> 0002-cgroup-rename-cgroup-dummy_css-to-self-and-move-it-t.patch
> 0003-cgroup-separate-out-cgroup_has_live_children-from-cg.patch
> 0004-cgroup-move-check_for_release-parent-call-to-the-end.patch
> 0005-cgroup-move-cgroup-sibling-unlinking-to-cgroup_put.patch
> 0006-cgroup-remove-cgroup_destory_css_killed.patch
> 0007-cgroup-bounce-css-release-through-css-destroy_work.patch
> 0008-cgroup-enable-refcnting-for-root-csses.patch
> 0009-cgroup-use-cgroup-self.refcnt-for-cgroup-refcnting.patch
Applied to cgroup/for-3.16.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 6/9] cgroup: remove cgroup_destory_css_killed()
2014-05-09 21:13 [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting Tejun Heo
` (2 preceding siblings ...)
[not found] ` <1399670015-23463-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2014-05-09 21:13 ` Tejun Heo
2014-05-09 21:13 ` [PATCH 7/9] cgroup: bounce css release through css->destroy_work Tejun Heo
` (2 subsequent siblings)
6 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2014-05-09 21:13 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, linux-kernel, Tejun Heo
cgroup_destroy_css_killed() is cgroup destruction stage which happens
after all csses are offlined. After the recent updates, it no longer
does anything other than putting the base reference. This patch
removes the function and makes cgroup_destroy_locked() put the base
ref at the end isntead.
This also makes cgroup->nr_css unnecessary. Removed.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
include/linux/cgroup.h | 3 ---
kernel/cgroup.c | 62 +++++---------------------------------------------
2 files changed, 6 insertions(+), 59 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 164851e..160fcc6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -158,9 +158,6 @@ struct cgroup {
*/
int id;
- /* the number of attached css's */
- int nr_css;
-
/*
* If this cgroup contains any tasks, it contributes one to
* populated_cnt. All children with non-zero popuplated_cnt of
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 87ea2ce..bb67acb 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -178,7 +178,6 @@ static struct cftype cgroup_base_files[];
static void cgroup_put(struct cgroup *cgrp);
static int rebind_subsystems(struct cgroup_root *dst_root,
unsigned int ss_mask);
-static void cgroup_destroy_css_killed(struct cgroup *cgrp);
static int cgroup_destroy_locked(struct cgroup *cgrp);
static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
static void kill_css(struct cgroup_subsys_state *css);
@@ -4168,7 +4167,6 @@ static int online_css(struct cgroup_subsys_state *css)
ret = ss->css_online(css);
if (!ret) {
css->flags |= CSS_ONLINE;
- css->cgroup->nr_css++;
rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
}
return ret;
@@ -4188,7 +4186,6 @@ static void offline_css(struct cgroup_subsys_state *css)
ss->css_offline(css);
css->flags &= ~CSS_ONLINE;
- css->cgroup->nr_css--;
RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
wake_up_all(&css->cgroup->offline_waitq);
@@ -4373,39 +4370,18 @@ out_destroy:
/*
* This is called when the refcnt of a css is confirmed to be killed.
- * css_tryget_online() is now guaranteed to fail.
+ * css_tryget_online() is now guaranteed to fail. Tell the subsystem to
+ * initate destruction and put the css ref from kill_css().
*/
static void css_killed_work_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
- struct cgroup *cgrp = css->cgroup;
mutex_lock(&cgroup_mutex);
-
- /*
- * css_tryget_online() is guaranteed to fail now. Tell subsystems
- * to initate destruction.
- */
offline_css(css);
-
- /*
- * If @cgrp is marked dead, it's waiting for refs of all css's to
- * be disabled before proceeding to the second phase of cgroup
- * destruction. If we are the last one, kick it off.
- */
- if (!cgrp->nr_css && cgroup_is_dead(cgrp))
- cgroup_destroy_css_killed(cgrp);
-
mutex_unlock(&cgroup_mutex);
- /*
- * Put the css refs from kill_css(). Each css holds an extra
- * reference to the cgroup's dentry and cgroup removal proceeds
- * regardless of css refs. On the last put of each css, whenever
- * that may be, the extra dentry ref is put so that dentry
- * destruction happens only after all css's are released.
- */
css_put(css);
}
@@ -4517,11 +4493,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
*/
set_bit(CGRP_DEAD, &cgrp->flags);
- /*
- * Initiate massacre of all css's. cgroup_destroy_css_killed()
- * will be invoked to perform the rest of destruction once the
- * percpu refs of all css's are confirmed to be killed.
- */
+ /* initiate massacre of all css's */
for_each_css(css, ssid, cgrp)
kill_css(css);
@@ -4532,15 +4504,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
raw_spin_unlock(&release_list_lock);
/*
- * If @cgrp has css's attached, the second stage of cgroup
- * destruction is kicked off from css_killed_work_fn() after the
- * refs of all attached css's are killed. If @cgrp doesn't have
- * any css, we kick it off here.
- */
- if (!cgrp->nr_css)
- cgroup_destroy_css_killed(cgrp);
-
- /*
* Remove @cgrp directory along with the base files. @cgrp has an
* extra ref on its kn.
*/
@@ -4549,25 +4512,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
set_bit(CGRP_RELEASABLE, &cgrp->parent->flags);
check_for_release(cgrp->parent);
+ /* put the base reference */
+ cgroup_put(cgrp);
+
return 0;
};
-/**
- * cgroup_destroy_css_killed - the second step of cgroup destruction
- * @cgrp: the cgroup whose csses have just finished offlining
- *
- * This function is invoked from a work item for a cgroup which is being
- * destroyed after all css's are offlined and performs the rest of
- * destruction. This is the second step of destruction described in the
- * comment above cgroup_destroy_locked().
- */
-static void cgroup_destroy_css_killed(struct cgroup *cgrp)
-{
- lockdep_assert_held(&cgroup_mutex);
-
- cgroup_put(cgrp);
-}
-
static int cgroup_rmdir(struct kernfs_node *kn)
{
struct cgroup *cgrp;
--
1.9.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 7/9] cgroup: bounce css release through css->destroy_work
2014-05-09 21:13 [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting Tejun Heo
` (3 preceding siblings ...)
2014-05-09 21:13 ` [PATCH 6/9] cgroup: remove cgroup_destory_css_killed() Tejun Heo
@ 2014-05-09 21:13 ` Tejun Heo
2014-05-09 21:13 ` [PATCH 9/9] cgroup: use cgroup->self.refcnt for cgroup refcnting Tejun Heo
2014-05-13 16:59 ` [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting Tejun Heo
6 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2014-05-09 21:13 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, linux-kernel, Tejun Heo
css release is planned to do more and would require process context.
Bounce it through css->destroy_work.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/cgroup.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index bb67acb..b382383 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4125,10 +4125,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
queue_work(cgroup_destroy_wq, &css->destroy_work);
}
-static void css_release(struct percpu_ref *ref)
+static void css_release_work_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
- container_of(ref, struct cgroup_subsys_state, refcnt);
+ container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss;
cgroup_idr_remove(&ss->css_idr, css->id);
@@ -4136,6 +4136,15 @@ static void css_release(struct percpu_ref *ref)
call_rcu(&css->rcu_head, css_free_rcu_fn);
}
+static void css_release(struct percpu_ref *ref)
+{
+ struct cgroup_subsys_state *css =
+ container_of(ref, struct cgroup_subsys_state, refcnt);
+
+ INIT_WORK(&css->destroy_work, css_release_work_fn);
+ queue_work(cgroup_destroy_wq, &css->destroy_work);
+}
+
static void init_and_link_css(struct cgroup_subsys_state *css,
struct cgroup_subsys *ss, struct cgroup *cgrp)
{
--
1.9.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 9/9] cgroup: use cgroup->self.refcnt for cgroup refcnting
2014-05-09 21:13 [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting Tejun Heo
` (4 preceding siblings ...)
2014-05-09 21:13 ` [PATCH 7/9] cgroup: bounce css release through css->destroy_work Tejun Heo
@ 2014-05-09 21:13 ` Tejun Heo
[not found] ` <1399670015-23463-10-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2014-05-13 16:59 ` [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting Tejun Heo
6 siblings, 1 reply; 23+ messages in thread
From: Tejun Heo @ 2014-05-09 21:13 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, linux-kernel, Tejun Heo
Currently cgroup implements refcnting separately using atomic_t
cgroup->refcnt. The destruction paths of cgroup and css are rather
complex and bear a lot of similiarities including the use of RCU and
bouncing to a work item.
This patch makes cgroup use the refcnt of self css for refcnting
instead of using its own. This makes cgroup refcnting use css's
percpu refcnt and share the destruction mechanism.
* css_release_work_fn() and css_free_work_fn() are updated to handle
both csses and cgroups. This is a bit messy but should do until we
can make cgroup->self a full css, which currently can't be done
thanks to multiple hierarchies.
* cgroup_destroy_locked() now performs
percpu_ref_kill(&cgrp->self.refcnt) instead of cgroup_put(cgrp).
* Negative refcnt sanity check in cgroup_get() is no longer necessary
as percpu_ref already handles it.
* Similarly, as a cgroup which hasn't been killed will never be
released regardless of its refcnt value and percpu_ref has sanity
check on kill, cgroup_is_dead() sanity check in cgroup_put() is no
longer necessary.
* As whether a refcnt reached zero or not can only be decided after
the reference count is killed, cgroup_root->cgrp's refcnting can no
longer be used to decide whether to kill the root or not. Let's
make cgroup_kill_sb() explicitly initiate destruction if the root
doesn't have any children. This makes sense anyway as unmounted
cgroup hierarchy without any children should be destroyed.
While this is a bit messy, this will allow pushing more bookkeeping
towards cgroup->self and thus handling cgroups and csses in more
uniform way. In the very long term, it should be possible to
introduce a base subsystem and convert the self css to a proper one
making things whole lot simpler and unified.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
include/linux/cgroup.h | 6 --
kernel/cgroup.c | 146 +++++++++++++++++++++++++++----------------------
2 files changed, 80 insertions(+), 72 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 286e39e..76dadd77 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -160,8 +160,6 @@ struct cgroup {
*/
int populated_cnt;
- atomic_t refcnt;
-
/*
* We link our 'sibling' struct into our parent's 'children'.
* Our children link their 'sibling' into our 'children'.
@@ -218,10 +216,6 @@ struct cgroup {
struct list_head pidlists;
struct mutex pidlist_mutex;
- /* For css percpu_ref killing and RCU-protected deletion */
- struct rcu_head rcu_head;
- struct work_struct destroy_work;
-
/* used to wait for offlining of csses */
wait_queue_head_t offline_waitq;
};
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5a31e61..64ff413 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -176,10 +176,12 @@ static int need_forkexit_callback __read_mostly;
static struct cftype cgroup_base_files[];
static void cgroup_put(struct cgroup *cgrp);
+static bool cgroup_has_live_children(struct cgroup *cgrp);
static int rebind_subsystems(struct cgroup_root *dst_root,
unsigned int ss_mask);
static int cgroup_destroy_locked(struct cgroup *cgrp);
static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
+static void css_release(struct percpu_ref *ref);
static void kill_css(struct cgroup_subsys_state *css);
static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
bool is_add);
@@ -1008,62 +1010,15 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
return mode;
}
-static void cgroup_free_fn(struct work_struct *work)
-{
- struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
-
- atomic_dec(&cgrp->root->nr_cgrps);
- cgroup_pidlist_destroy_all(cgrp);
-
- if (cgrp->parent) {
- /*
- * We get a ref to the parent, and put the ref when this
- * cgroup is being freed, so it's guaranteed that the
- * parent won't be destroyed before its children.
- */
- cgroup_put(cgrp->parent);
- kernfs_put(cgrp->kn);
- kfree(cgrp);
- } else {
- /*
- * This is root cgroup's refcnt reaching zero, which
- * indicates that the root should be released.
- */
- cgroup_destroy_root(cgrp->root);
- }
-}
-
-static void cgroup_free_rcu(struct rcu_head *head)
-{
- struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
-
- INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
- queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
-}
-
static void cgroup_get(struct cgroup *cgrp)
{
WARN_ON_ONCE(cgroup_is_dead(cgrp));
- WARN_ON_ONCE(atomic_read(&cgrp->refcnt) <= 0);
- atomic_inc(&cgrp->refcnt);
+ css_get(&cgrp->self);
}
static void cgroup_put(struct cgroup *cgrp)
{
- if (!atomic_dec_and_test(&cgrp->refcnt))
- return;
- if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp)))
- return;
-
- /* delete this cgroup from parent->children */
- mutex_lock(&cgroup_mutex);
- list_del_rcu(&cgrp->sibling);
- mutex_unlock(&cgroup_mutex);
-
- cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
- cgrp->id = -1;
-
- call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
+ css_put(&cgrp->self);
}
/**
@@ -1548,7 +1503,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
struct cgroup_subsys *ss;
int ssid;
- atomic_set(&cgrp->refcnt, 1);
INIT_LIST_HEAD(&cgrp->sibling);
INIT_LIST_HEAD(&cgrp->children);
INIT_LIST_HEAD(&cgrp->cset_links);
@@ -1597,6 +1551,10 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
goto out;
root_cgrp->id = ret;
+ ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release);
+ if (ret)
+ goto out;
+
/*
* We're accessing css_set_count without locking css_set_rwsem here,
* but that's OK - it can only be increased by someone holding
@@ -1605,11 +1563,11 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
*/
ret = allocate_cgrp_cset_links(css_set_count, &tmp_links);
if (ret)
- goto out;
+ goto cancel_ref;
ret = cgroup_init_root_id(root);
if (ret)
- goto out;
+ goto cancel_ref;
root->kf_root = kernfs_create_root(&cgroup_kf_syscall_ops,
KERNFS_ROOT_CREATE_DEACTIVATED,
@@ -1657,6 +1615,8 @@ destroy_root:
root->kf_root = NULL;
exit_root_id:
cgroup_exit_root_id(root);
+cancel_ref:
+ percpu_ref_cancel_init(&root_cgrp->self.refcnt);
out:
free_cgrp_cset_links(&tmp_links);
return ret;
@@ -1735,13 +1695,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
}
/*
- * A root's lifetime is governed by its root cgroup. Zero
- * ref indicate that the root is being destroyed. Wait for
- * destruction to complete so that the subsystems are free.
- * We can use wait_queue for the wait but this path is
- * super cold. Let's just sleep for a bit and retry.
+ * A root's lifetime is governed by its root cgroup.
+ * tryget_live failure indicate that the root is being
+ * destroyed. Wait for destruction to complete so that the
+ * subsystems are free. We can use wait_queue for the wait
+ * but this path is super cold. Let's just sleep for a bit
+ * and retry.
*/
- if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
+ if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
mutex_unlock(&cgroup_mutex);
msleep(10);
return ERR_PTR(restart_syscall());
@@ -1793,7 +1754,16 @@ static void cgroup_kill_sb(struct super_block *sb)
struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
- cgroup_put(&root->cgrp);
+ /*
+ * If @root doesn't have any mounts or children, start killing it.
+ * This prevents new mounts by disabling percpu_ref_tryget_live().
+ * cgroup_mount() may wait for @root's release.
+ */
+ if (cgroup_has_live_children(&root->cgrp))
+ cgroup_put(&root->cgrp);
+ else
+ percpu_ref_kill(&root->cgrp.self.refcnt);
+
kernfs_kill_sb(sb);
}
@@ -4109,11 +4079,37 @@ static void css_free_work_fn(struct work_struct *work)
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup *cgrp = css->cgroup;
- if (css->parent)
- css_put(css->parent);
+ if (css->ss) {
+ /* css free path */
+ if (css->parent)
+ css_put(css->parent);
- css->ss->css_free(css);
- cgroup_put(cgrp);
+ css->ss->css_free(css);
+ cgroup_put(cgrp);
+ } else {
+ /* cgroup free path */
+ atomic_dec(&cgrp->root->nr_cgrps);
+ cgroup_pidlist_destroy_all(cgrp);
+
+ if (cgrp->parent) {
+ /*
+ * We get a ref to the parent, and put the ref when
+ * this cgroup is being freed, so it's guaranteed
+ * that the parent won't be destroyed before its
+ * children.
+ */
+ cgroup_put(cgrp->parent);
+ kernfs_put(cgrp->kn);
+ kfree(cgrp);
+ } else {
+ /*
+ * This is root cgroup's refcnt reaching zero,
+ * which indicates that the root should be
+ * released.
+ */
+ cgroup_destroy_root(cgrp->root);
+ }
+ }
}
static void css_free_rcu_fn(struct rcu_head *rcu_head)
@@ -4130,8 +4126,20 @@ static void css_release_work_fn(struct work_struct *work)
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss;
+ struct cgroup *cgrp = css->cgroup;
- cgroup_idr_remove(&ss->css_idr, css->id);
+ if (ss) {
+ /* css release path */
+ cgroup_idr_remove(&ss->css_idr, css->id);
+ } else {
+ /* cgroup release path */
+ mutex_lock(&cgroup_mutex);
+ list_del_rcu(&cgrp->sibling);
+ mutex_unlock(&cgroup_mutex);
+
+ cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
+ cgrp->id = -1;
+ }
call_rcu(&css->rcu_head, css_free_rcu_fn);
}
@@ -4284,6 +4292,10 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
goto out_unlock;
}
+ ret = percpu_ref_init(&cgrp->self.refcnt, css_release);
+ if (ret)
+ goto out_free_cgrp;
+
/*
* Temporarily set the pointer to NULL, so idr_find() won't return
* a half-baked cgroup.
@@ -4291,7 +4303,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_NOWAIT);
if (cgrp->id < 0) {
ret = -ENOMEM;
- goto out_free_cgrp;
+ goto out_cancel_ref;
}
init_cgroup_housekeeping(cgrp);
@@ -4364,6 +4376,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
out_free_id:
cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
+out_cancel_ref:
+ percpu_ref_cancel_init(&cgrp->self.refcnt);
out_free_cgrp:
kfree(cgrp);
out_unlock:
@@ -4520,7 +4534,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
check_for_release(cgrp->parent);
/* put the base reference */
- cgroup_put(cgrp);
+ percpu_ref_kill(&cgrp->self.refcnt);
return 0;
};
--
1.9.0
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting
2014-05-09 21:13 [PATCHSET cgroup/for-3.16] cgroup: use css->refcnt for cgroup reference counting Tejun Heo
` (5 preceding siblings ...)
2014-05-09 21:13 ` [PATCH 9/9] cgroup: use cgroup->self.refcnt for cgroup refcnting Tejun Heo
@ 2014-05-13 16:59 ` Tejun Heo
6 siblings, 0 replies; 23+ messages in thread
From: Tejun Heo @ 2014-05-13 16:59 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, linux-kernel
On Fri, May 09, 2014 at 05:13:26PM -0400, Tejun Heo wrote:
> and available in the following git branch.
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git review-use-css-ref
Rebased on top of cgroup/for-3.16.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 23+ messages in thread