From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	koverstreet-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	cl-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org,
	Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Subject: [PATCH 10/11] cgroup: split cgroup destruction into two steps
Date: Wed, 12 Jun 2013 14:03:15 -0700	[thread overview]
Message-ID: <1371070996-20613-11-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1371070996-20613-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Split cgroup_destroy_locked() into two steps and put the latter half
into cgroup_offline_fn() which is executed from a work item.  The
latter half is responsible for offlining the css's, removing the
cgroup from internal lists, and propagating release notification to
the parent.  The separation is to allow using percpu refcnt for css.
Note that this allows for other cgroup operations to happen between
the first and second halves of destruction, including creating a new
cgroup with the same name.  As the target cgroup is marked DEAD in the
first half and cgroup internals don't care about the names of cgroups,
this should be fine.  A comment explaining this will be added by the
next patch which implements the actual percpu refcnting.
As RCU freeing is guaranteed to happen after the second step of
destruction, we can use the same work item for both.  This patch
renames cgroup->free_work to ->destroy_work and uses it for both
purposes.  INIT_WORK() is now performed right before queueing the work
item.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 include/linux/cgroup.h |  2 +-
 kernel/cgroup.c        | 38 +++++++++++++++++++++++++++-----------
 2 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 5428738..58ee0f4 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -233,7 +233,7 @@ struct cgroup {
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
-	struct work_struct free_work;
+	struct work_struct destroy_work;
 
 	/* List of events which userspace want to receive */
 	struct list_head event_list;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a43bc9d..83e3183 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -208,6 +208,7 @@ static struct cgroup_name root_cgroup_name = { .name = "/" };
  */
 static int need_forkexit_callback __read_mostly;
 
+static void cgroup_offline_fn(struct work_struct *work);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
 			      struct cftype cfts[], bool is_add);
@@ -830,7 +831,7 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
 
 static void cgroup_free_fn(struct work_struct *work)
 {
-	struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
+	struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
 	struct cgroup_subsys *ss;
 
 	mutex_lock(&cgroup_mutex);
@@ -875,7 +876,8 @@ static void cgroup_free_rcu(struct rcu_head *head)
 {
 	struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
 
-	schedule_work(&cgrp->free_work);
+	INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
+	schedule_work(&cgrp->destroy_work);
 }
 
 static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -1407,7 +1409,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->allcg_node);
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
-	INIT_WORK(&cgrp->free_work, cgroup_free_fn);
 	mutex_init(&cgrp->pidlist_mutex);
 	INIT_LIST_HEAD(&cgrp->event_list);
 	spin_lock_init(&cgrp->event_list_lock);
@@ -2994,12 +2995,13 @@ struct cgroup *cgroup_next_sibling(struct cgroup *pos)
 	/*
 	 * @pos could already have been removed.  Once a cgroup is removed,
 	 * its ->sibling.next is no longer updated when its next sibling
-	 * changes.  As CGRP_DEAD is set on removal which is fully
-	 * serialized, if we see it unasserted, it's guaranteed that the
-	 * next sibling hasn't finished its grace period even if it's
-	 * already removed, and thus safe to dereference from this RCU
-	 * critical section.  If ->sibling.next is inaccessible,
-	 * cgroup_is_dead() is guaranteed to be visible as %true here.
+	 * changes.  As CGRP_DEAD assertion is serialized and happens
+	 * before the cgroup is taken off the ->sibling list, if we see it
+	 * unasserted, it's guaranteed that the next sibling hasn't
+	 * finished its grace period even if it's already removed, and thus
+	 * safe to dereference from this RCU critical section.  If
+	 * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
+	 * to be visible as %true here.
 	 */
 	if (likely(!cgroup_is_dead(pos))) {
 		next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
@@ -4362,7 +4364,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
 {
 	struct dentry *d = cgrp->dentry;
-	struct cgroup *parent = cgrp->parent;
 	struct cgroup_event *event, *tmp;
 	struct cgroup_subsys *ss;
 	bool empty;
@@ -4426,6 +4427,21 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	}
 	spin_unlock(&cgrp->event_list_lock);
 
+	INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
+	schedule_work(&cgrp->destroy_work);
+
+	return 0;
+};
+
+static void cgroup_offline_fn(struct work_struct *work)
+{
+	struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
+	struct cgroup *parent = cgrp->parent;
+	struct dentry *d = cgrp->dentry;
+	struct cgroup_subsys *ss;
+
+	mutex_lock(&cgroup_mutex);
+
 	/* tell subsystems to initate destruction */
 	for_each_subsys(cgrp->root, ss)
 		offline_css(ss, cgrp);
@@ -4449,7 +4465,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	set_bit(CGRP_RELEASABLE, &parent->flags);
 	check_for_release(parent);
 
-	return 0;
+	mutex_unlock(&cgroup_mutex);
 }
 
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
-- 
1.8.2.1
next prev parent reply	other threads:[~2013-06-12 21:03 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-12 21:03 [PATCHSET cgroup/for-3.11] cgroup: convert cgroup_subsys_state refcnt to percpu_ref Tejun Heo
2013-06-12 21:03 ` [PATCH 04/11] cgroup: use kzalloc() and list_del_init() Tejun Heo
     [not found]   ` <1371070996-20613-5-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2013-06-13  2:36     ` Li Zefan
     [not found]       ` <51B93038.9010202-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2013-06-13  2:38         ` Kent Overstreet
2013-06-13  2:41           ` Tejun Heo
     [not found]             ` <CAOS58YPv_uKeTqZSNF=sXTEnLn=LTbsdpBPM5K_ykXoVT-+CpA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-06-13  2:43               ` Kent Overstreet
2013-06-13  2:48                 ` Tejun Heo
     [not found]                   ` <20130613024859.GA7432-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2013-06-13  2:52                     ` Kent Overstreet
2013-06-13  2:56                       ` Tejun Heo
     [not found]                         ` <20130613025623.GB7432-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2013-06-13  3:05                           ` Tejun Heo
2013-06-13  3:13           ` Li Zefan
2013-06-13  2:39         ` Tejun Heo
2013-06-12 21:03 ` [PATCH 06/11] cgroup: rename CGRP_REMOVED to CGRP_DEAD Tejun Heo
2013-06-12 21:03 ` [PATCH 08/11] cgroup: remove cgroup->count and use Tejun Heo
     [not found] ` <1371070996-20613-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2013-06-12 21:03   ` [PATCH 01/11] cgroup: remove now unused css_depth() Tejun Heo
2013-06-12 21:03   ` [PATCH 02/11] cgroup: consistently use @cset for struct css_set variables Tejun Heo
2013-06-12 21:03   ` [PATCH 03/11] cgroup: bring some sanity to naming around cg_cgroup_link Tejun Heo
     [not found]     ` <1371070996-20613-4-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2013-06-13  2:34       ` Li Zefan
2013-06-12 21:03   ` [PATCH 05/11] cgroup: clean up css_[try]get() and css_put() Tejun Heo
     [not found]     ` <1371070996-20613-6-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2013-06-13  2:38       ` Li Zefan
2013-06-12 21:03   ` [PATCH 07/11] cgroup: drop unnecessary RCU dancing from __put_css_set() Tejun Heo
2013-06-12 21:03   ` [PATCH 09/11] cgroup: reorder the operations in cgroup_destroy_locked() Tejun Heo
2013-06-12 21:03   ` Tejun Heo [this message]
2013-06-12 21:03 ` [PATCH 11/11] cgroup: use percpu refcnt for cgroup_subsys_states Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2013-06-13  4:04 [PATCHSET v2 cgroup/for-3.11] cgroup: convert cgroup_subsys_state refcnt to percpu_ref Tejun Heo
2013-06-13  4:04 ` [PATCH 10/11] cgroup: split cgroup destruction into two steps Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox
  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):
  git send-email \
    --in-reply-to=1371070996-20613-11-git-send-email-tj@kernel.org \
    --to=tj-dgejt+ai2ygdnm+yrofe0a@public.gmane.org \
    --cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=cl-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
    --cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=koverstreet-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org \
    /path/to/YOUR_REPLY
  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
  Be sure your reply has a Subject: header at the top and a blank line
  before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).