cgroups.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org,
	paul-inf54ven1CmVyaH7bEyXVA@public.gmane.org,
	glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	mhocko-AlSwsSmVLrQ@public.gmane.org,
	Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH 2/3] cpuset: replace cpuset->stack_list with cpuset_for_each_descendant_pre()
Date: Thu,  3 Jan 2013 13:44:05 -0800	[thread overview]
Message-ID: <1357249446-25075-3-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1357249446-25075-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

Implement cpuset_for_each_descendant_pre() and replace the
cpuset-specific tree walking using cpuset->stack_list with it.

Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Reviewed-by: Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>
---
 kernel/cpuset.c | 123 ++++++++++++++++++++++----------------------------------
 1 file changed, 48 insertions(+), 75 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 58aa99b..b2f8dad 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -103,9 +103,6 @@ struct cpuset {
 	/* for custom sched domain */
 	int relax_domain_level;
 
-	/* used for walking a cpuset hierarchy */
-	struct list_head stack_list;
-
 	struct work_struct hotplug_work;
 };
 
@@ -207,6 +204,20 @@ static struct cpuset top_cpuset = {
 	cgroup_for_each_child((pos_cgrp), (parent_cs)->css.cgroup)	\
 		if (is_cpuset_online(((child_cs) = cgroup_cs((pos_cgrp)))))
 
+/**
+ * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
+ * @des_cs: loop cursor pointing to the current descendant
+ * @pos_cgrp: used for iteration
+ * @root_cs: target cpuset to walk ancestor of
+ *
+ * Walk @des_cs through the online descendants of @root_cs.  Must be used
+ * with RCU read locked.  The caller may modify @pos_cgrp by calling
+ * cgroup_rightmost_descendant() to skip subtree.
+ */
+#define cpuset_for_each_descendant_pre(des_cs, pos_cgrp, root_cs)	\
+	cgroup_for_each_descendant_pre((pos_cgrp), (root_cs)->css.cgroup) \
+		if (is_cpuset_online(((des_cs) = cgroup_cs((pos_cgrp)))))
+
 /*
  * There are two global mutexes guarding cpuset structures - cpuset_mutex
  * and callback_mutex.  The latter may nest inside the former.  We also
@@ -507,31 +518,24 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
 	return;
 }
 
-static void
-update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
+static void update_domain_attr_tree(struct sched_domain_attr *dattr,
+				    struct cpuset *root_cs)
 {
-	LIST_HEAD(q);
-
-	list_add(&c->stack_list, &q);
-	while (!list_empty(&q)) {
-		struct cpuset *cp;
-		struct cgroup *cont;
-		struct cpuset *child;
-
-		cp = list_first_entry(&q, struct cpuset, stack_list);
-		list_del(q.next);
+	struct cpuset *cp;
+	struct cgroup *pos_cgrp;
 
-		if (cpumask_empty(cp->cpus_allowed))
+	rcu_read_lock();
+	cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
+		/* skip the whole subtree if @cp doesn't have any CPU */
+		if (cpumask_empty(cp->cpus_allowed)) {
+			pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
 			continue;
+		}
 
 		if (is_sched_load_balance(cp))
 			update_domain_attr(dattr, cp);
-
-		rcu_read_lock();
-		cpuset_for_each_child(child, cont, cp)
-			list_add_tail(&child->stack_list, &q);
-		rcu_read_unlock();
 	}
+	rcu_read_unlock();
 }
 
 /*
@@ -591,7 +595,6 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
 static int generate_sched_domains(cpumask_var_t **domains,
 			struct sched_domain_attr **attributes)
 {
-	LIST_HEAD(q);		/* queue of cpusets to be scanned */
 	struct cpuset *cp;	/* scans q */
 	struct cpuset **csa;	/* array of all cpuset ptrs */
 	int csn;		/* how many cpuset ptrs in csa so far */
@@ -600,6 +603,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
 	struct sched_domain_attr *dattr;  /* attributes for custom domains */
 	int ndoms = 0;		/* number of sched domains in result */
 	int nslot;		/* next empty doms[] struct cpumask slot */
+	struct cgroup *pos_cgrp;
 
 	doms = NULL;
 	dattr = NULL;
@@ -627,33 +631,27 @@ static int generate_sched_domains(cpumask_var_t **domains,
 		goto done;
 	csn = 0;
 
-	list_add(&top_cpuset.stack_list, &q);
-	while (!list_empty(&q)) {
-		struct cgroup *cont;
-		struct cpuset *child;   /* scans child cpusets of cp */
-
-		cp = list_first_entry(&q, struct cpuset, stack_list);
-		list_del(q.next);
-
-		if (cpumask_empty(cp->cpus_allowed))
-			continue;
-
+	rcu_read_lock();
+	cpuset_for_each_descendant_pre(cp, pos_cgrp, &top_cpuset) {
 		/*
-		 * All child cpusets contain a subset of the parent's cpus, so
-		 * just skip them, and then we call update_domain_attr_tree()
-		 * to calc relax_domain_level of the corresponding sched
-		 * domain.
+		 * Continue traversing beyond @cp iff @cp has some CPUs and
+		 * isn't load balancing.  The former is obvious.  The
+		 * latter: All child cpusets contain a subset of the
+		 * parent's cpus, so just skip them, and then we call
+		 * update_domain_attr_tree() to calc relax_domain_level of
+		 * the corresponding sched domain.
 		 */
-		if (is_sched_load_balance(cp)) {
-			csa[csn++] = cp;
+		if (!cpumask_empty(cp->cpus_allowed) &&
+		    !is_sched_load_balance(cp))
 			continue;
-		}
 
-		rcu_read_lock();
-		cpuset_for_each_child(child, cont, cp)
-			list_add_tail(&child->stack_list, &q);
-		rcu_read_unlock();
-  	}
+		if (is_sched_load_balance(cp))
+			csa[csn++] = cp;
+
+		/* skip @cp's subtree */
+		pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
+	}
+	rcu_read_unlock();
 
 	for (i = 0; i < csn; i++)
 		csa[i]->pn = i;
@@ -2068,31 +2066,6 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
 	move_member_tasks_to_cpuset(cs, parent);
 }
 
-/*
- * Helper function to traverse cpusets.
- * It can be used to walk the cpuset tree from top to bottom, completing
- * one layer before dropping down to the next (thus always processing a
- * node before any of its children).
- */
-static struct cpuset *cpuset_next(struct list_head *queue)
-{
-	struct cpuset *cp;
-	struct cpuset *child;	/* scans child cpusets of cp */
-	struct cgroup *cont;
-
-	if (list_empty(queue))
-		return NULL;
-
-	cp = list_first_entry(queue, struct cpuset, stack_list);
-	list_del(queue->next);
-	rcu_read_lock();
-	cpuset_for_each_child(child, cont, cp)
-		list_add_tail(&child->stack_list, queue);
-	rcu_read_unlock();
-
-	return cp;
-}
-
 /**
  * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset
  * @cs: cpuset in interest
@@ -2229,12 +2202,12 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 	/* if cpus or mems went down, we need to propagate to descendants */
 	if (cpus_offlined || mems_offlined) {
 		struct cpuset *cs;
-		LIST_HEAD(queue);
+		struct cgroup *pos_cgrp;
 
-		list_add_tail(&top_cpuset.stack_list, &queue);
-		while ((cs = cpuset_next(&queue)))
-			if (cs != &top_cpuset)
-				schedule_cpuset_propagate_hotplug(cs);
+		rcu_read_lock();
+		cpuset_for_each_descendant_pre(cs, pos_cgrp, &top_cpuset)
+			schedule_cpuset_propagate_hotplug(cs);
+		rcu_read_unlock();
 	}
 
 	mutex_unlock(&cpuset_mutex);
-- 
1.8.0.2

  parent reply	other threads:[~2013-01-03 21:44 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-03 21:44 [PATCHSET] cpuset: drop cpuset->stack_list and ->parent, take#2 Tejun Heo
     [not found] ` <1357249446-25075-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2013-01-03 21:44   ` [PATCH 1/3] cpuset: implement cgroup_rightmost_descendant() Tejun Heo
2013-01-03 21:44   ` Tejun Heo [this message]
2013-01-03 21:44   ` [PATCH 3/3] cpuset: remove cpuset->parent Tejun Heo
2013-01-06  9:27   ` [PATCHSET] cpuset: drop cpuset->stack_list and ->parent, take#2 Li Zefan
     [not found]     ` <50E94392.2050509-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2013-01-07 16:47       ` Tejun Heo
     [not found]         ` <20130107164707.GI3926-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
2013-01-07 16:52           ` Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2012-11-28 22:26 [PATCHSET cgroup/for-3.8] cpuset: drop cpuset->stack_list and ->parent Tejun Heo
     [not found] ` <1354141621-11906-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2012-11-28 22:27   ` [PATCH 2/3] cpuset: replace cpuset->stack_list with cpuset_for_each_descendant_pre() Tejun Heo
     [not found]     ` <1354141621-11906-3-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2012-12-03 16:18       ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1357249446-25075-3-git-send-email-tj@kernel.org \
    --to=tj-dgejt+ai2ygdnm+yrofe0a@public.gmane.org \
    --cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org \
    --cc=mhocko-AlSwsSmVLrQ@public.gmane.org \
    --cc=paul-inf54ven1CmVyaH7bEyXVA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).