[RFT][PATCH] sched, cgroup: Optimize load_balance

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [RFT][PATCH] sched, cgroup: Optimize load_balance_fair()
@ 2011-07-13 11:36 Peter Zijlstra
  2011-07-13 17:13 ` Paul Turner
  2011-07-21 18:28 ` [tip:sched/core] " tip-bot for Peter Zijlstra
  0 siblings, 2 replies; 8+ messages in thread
From: Peter Zijlstra @ 2011-07-13 11:36 UTC (permalink / raw)
  To: Paul Turner, Nikhil Rao, Srivatsa Vaddagiri
  Cc: linux-kernel, Ingo Molnar, Mike Galbraith

Subject: sched, cgroup: Optimize load_balance_fair()
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed Jul 13 13:09:25 CEST 2011

Use for_each_leaf_cfs_rq() instead of list_for_each_entry_rcu(), this
achieves that load_balance_fair() only iterates those task_groups that
actually have tasks on busiest, and that we iterate bottom-up, trying to
move light groups before the heavier ones.

No idea if it will actually work out to be beneficial in practice, does
anybody have a cgroup workload that might show a difference one way or
the other?

[ Also move update_h_load to sched_fair.c, loosing #ifdef-ery ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched.c      |   32 --------------------------------
 kernel/sched_fair.c |   40 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 37 deletions(-)

Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -1568,38 +1568,6 @@ static unsigned long cpu_avg_load_per_ta
 	return rq->avg_load_per_task;
 }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-
-/*
- * Compute the cpu's hierarchical load factor for each task group.
- * This needs to be done in a top-down fashion because the load of a child
- * group is a fraction of its parents load.
- */
-static int tg_load_down(struct task_group *tg, void *data)
-{
-	unsigned long load;
-	long cpu = (long)data;
-
-	if (!tg->parent) {
-		load = cpu_rq(cpu)->load.weight;
-	} else {
-		load = tg->parent->cfs_rq[cpu]->h_load;
-		load *= tg->se[cpu]->load.weight;
-		load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
-	}
-
-	tg->cfs_rq[cpu]->h_load = load;
-
-	return 0;
-}
-
-static void update_h_load(long cpu)
-{
-	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
-}
-
-#endif
-
 #ifdef CONFIG_PREEMPT
 
 static void double_rq_lock(struct rq *rq1, struct rq *rq2);
Index: linux-2.6/kernel/sched_fair.c
===================================================================
--- linux-2.6.orig/kernel/sched_fair.c
+++ linux-2.6/kernel/sched_fair.c
@@ -2232,11 +2232,43 @@ static void update_shares(int cpu)
 	struct rq *rq = cpu_rq(cpu);
 
 	rcu_read_lock();
+	/*
+	 * Iterates the task_group tree in a bottom up fashion, see
+	 * list_add_leaf_cfs_rq() for details.
+	 */
 	for_each_leaf_cfs_rq(rq, cfs_rq)
 		update_shares_cpu(cfs_rq->tg, cpu);
 	rcu_read_unlock();
 }
 
+/*
+ * Compute the cpu's hierarchical load factor for each task group.
+ * This needs to be done in a top-down fashion because the load of a child
+ * group is a fraction of its parents load.
+ */
+static int tg_load_down(struct task_group *tg, void *data)
+{
+	unsigned long load;
+	long cpu = (long)data;
+
+	if (!tg->parent) {
+		load = cpu_rq(cpu)->load.weight;
+	} else {
+		load = tg->parent->cfs_rq[cpu]->h_load;
+		load *= tg->se[cpu]->load.weight;
+		load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
+	}
+
+	tg->cfs_rq[cpu]->h_load = load;
+
+	return 0;
+}
+
+static void update_h_load(long cpu)
+{
+	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
@@ -2244,14 +2276,12 @@ load_balance_fair(struct rq *this_rq, in
 		  int *all_pinned)
 {
 	long rem_load_move = max_load_move;
-	int busiest_cpu = cpu_of(busiest);
-	struct task_group *tg;
+	struct cfs_rq *busiest_cfs_rq;
 
 	rcu_read_lock();
-	update_h_load(busiest_cpu);
+	update_h_load(cpu_of(busiest));
 
-	list_for_each_entry_rcu(tg, &task_groups, list) {
-		struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
+	for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
 		unsigned long busiest_h_load = busiest_cfs_rq->h_load;
 		unsigned long busiest_weight = busiest_cfs_rq->load.weight;
 		u64 rem_load, moved_load;


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFT][PATCH] sched, cgroup: Optimize load_balance_fair()
  2011-07-13 11:36 [RFT][PATCH] sched, cgroup: Optimize load_balance_fair() Peter Zijlstra
@ 2011-07-13 17:13 ` Paul Turner
  2011-07-13 21:01   ` Peter Zijlstra
  2011-07-13 21:02   ` Peter Zijlstra
  2011-07-21 18:28 ` [tip:sched/core] " tip-bot for Peter Zijlstra
  1 sibling, 2 replies; 8+ messages in thread
From: Paul Turner @ 2011-07-13 17:13 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Nikhil Rao, Srivatsa Vaddagiri, linux-kernel, Ingo Molnar,
	Mike Galbraith

Nice! The continued usage of task_groups had been irking me for a
while but I haven't had the time to scratch the itch :).

On Wed, Jul 13, 2011 at 4:36 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> Subject: sched, cgroup: Optimize load_balance_fair()
> From: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Date: Wed Jul 13 13:09:25 CEST 2011
>
> Use for_each_leaf_cfs_rq() instead of list_for_each_entry_rcu(), this
> achieves that load_balance_fair() only iterates those task_groups that
> actually have tasks on busiest, and that we iterate bottom-up, trying to
> move light groups before the heavier ones.
>
> No idea if it will actually work out to be beneficial in practice, does
> anybody have a cgroup workload that might show a difference one way or
> the other?
>
> [ Also move update_h_load to sched_fair.c, loosing #ifdef-ery ]
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
>  kernel/sched.c      |   32 --------------------------------
>  kernel/sched_fair.c |   40 +++++++++++++++++++++++++++++++++++-----
>  2 files changed, 35 insertions(+), 37 deletions(-)
>
> Index: linux-2.6/kernel/sched.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched.c
> +++ linux-2.6/kernel/sched.c
> @@ -1568,38 +1568,6 @@ static unsigned long cpu_avg_load_per_ta
>        return rq->avg_load_per_task;
>  }
>
> -#ifdef CONFIG_FAIR_GROUP_SCHED
> -
> -/*
> - * Compute the cpu's hierarchical load factor for each task group.
> - * This needs to be done in a top-down fashion because the load of a child
> - * group is a fraction of its parents load.
> - */
> -static int tg_load_down(struct task_group *tg, void *data)
> -{
> -       unsigned long load;
> -       long cpu = (long)data;
> -
> -       if (!tg->parent) {
> -               load = cpu_rq(cpu)->load.weight;
> -       } else {
> -               load = tg->parent->cfs_rq[cpu]->h_load;
> -               load *= tg->se[cpu]->load.weight;
> -               load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
> -       }
> -
> -       tg->cfs_rq[cpu]->h_load = load;
> -
> -       return 0;
> -}
> -
> -static void update_h_load(long cpu)
> -{
> -       walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
> -}
> -
> -#endif
> -
>  #ifdef CONFIG_PREEMPT
>
>  static void double_rq_lock(struct rq *rq1, struct rq *rq2);
> Index: linux-2.6/kernel/sched_fair.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched_fair.c
> +++ linux-2.6/kernel/sched_fair.c
> @@ -2232,11 +2232,43 @@ static void update_shares(int cpu)
>        struct rq *rq = cpu_rq(cpu);
>
>        rcu_read_lock();
> +       /*
> +        * Iterates the task_group tree in a bottom up fashion, see
> +        * list_add_leaf_cfs_rq() for details.
> +        */
>        for_each_leaf_cfs_rq(rq, cfs_rq)
>                update_shares_cpu(cfs_rq->tg, cpu);
>        rcu_read_unlock();
>  }
>
> +/*
> + * Compute the cpu's hierarchical load factor for each task group.
> + * This needs to be done in a top-down fashion because the load of a child
> + * group is a fraction of its parents load.
> + */
> +static int tg_load_down(struct task_group *tg, void *data)
> +{
> +       unsigned long load;
> +       long cpu = (long)data;
> +
> +       if (!tg->parent) {
> +               load = cpu_rq(cpu)->load.weight;
> +       } else {
> +               load = tg->parent->cfs_rq[cpu]->h_load;
> +               load *= tg->se[cpu]->load.weight;
> +               load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
> +       }
> +
> +       tg->cfs_rq[cpu]->h_load = load;
> +
> +       return 0;
> +}
> +
> +static void update_h_load(long cpu)
> +{
> +       walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
> +}

With a list_for_each_entry_reverse_rcu() this could also only operate
on the local hierarchy and avoid the tg tree walk.

> +
>  static unsigned long
>  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
>                  unsigned long max_load_move,
> @@ -2244,14 +2276,12 @@ load_balance_fair(struct rq *this_rq, in
>                  int *all_pinned)
>  {
>        long rem_load_move = max_load_move;
> -       int busiest_cpu = cpu_of(busiest);
> -       struct task_group *tg;
> +       struct cfs_rq *busiest_cfs_rq;
>
>        rcu_read_lock();
> -       update_h_load(busiest_cpu);
> +       update_h_load(cpu_of(busiest));
>
> -       list_for_each_entry_rcu(tg, &task_groups, list) {
> -               struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
> +       for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
>                unsigned long busiest_h_load = busiest_cfs_rq->h_load;
>                unsigned long busiest_weight = busiest_cfs_rq->load.weight;
>                u64 rem_load, moved_load;
>
>

Reviewed-by: Paul Turner <pjt@google.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFT][PATCH] sched, cgroup: Optimize load_balance_fair()
  2011-07-13 17:13 ` Paul Turner
@ 2011-07-13 21:01   ` Peter Zijlstra
  2011-07-14  0:46     ` Paul E. McKenney
  2011-07-13 21:02   ` Peter Zijlstra
  1 sibling, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2011-07-13 21:01 UTC (permalink / raw)
  To: Paul Turner
  Cc: Nikhil Rao, Srivatsa Vaddagiri, linux-kernel, Ingo Molnar,
	Mike Galbraith

On Wed, 2011-07-13 at 10:13 -0700, Paul Turner wrote:
> > +static void update_h_load(long cpu)
> > +{
> > +       walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
> > +}
> 
> With a list_for_each_entry_reverse_rcu() this could also only operate
> on the local hierarchy and avoid the tg tree walk. 

Ah, sadly that primitive cannot exist, rcu list primitives only keeps
the fwd link.

Although I guess we could 'fix' that.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFT][PATCH] sched, cgroup: Optimize load_balance_fair()
  2011-07-13 17:13 ` Paul Turner
  2011-07-13 21:01   ` Peter Zijlstra
@ 2011-07-13 21:02   ` Peter Zijlstra
  2011-07-13 21:14     ` Paul Turner
  1 sibling, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2011-07-13 21:02 UTC (permalink / raw)
  To: Paul Turner
  Cc: Nikhil Rao, Srivatsa Vaddagiri, linux-kernel, Ingo Molnar,
	Mike Galbraith

On Wed, 2011-07-13 at 10:13 -0700, Paul Turner wrote:
> Nice! The continued usage of task_groups had been irking me for a
> while but I haven't had the time to scratch the itch :).
> 
> On Wed, Jul 13, 2011 at 4:36 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> > Subject: sched, cgroup: Optimize load_balance_fair()
> > From: Peter Zijlstra <a.p.zijlstra@chello.nl>
> > Date: Wed Jul 13 13:09:25 CEST 2011
> >
> > Use for_each_leaf_cfs_rq() instead of list_for_each_entry_rcu(), this
> > achieves that load_balance_fair() only iterates those task_groups that
> > actually have tasks on busiest, and that we iterate bottom-up, trying to
> > move light groups before the heavier ones.
> >
> > No idea if it will actually work out to be beneficial in practice, does
> > anybody have a cgroup workload that might show a difference one way or
> > the other?
> >
> > [ Also move update_h_load to sched_fair.c, loosing #ifdef-ery ]
> >
> > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

> Reviewed-by: Paul Turner <pjt@google.com>

So you think I should just merge it and see if any cgroup workload
dislikes it?

OK, I guess I can do that..

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFT][PATCH] sched, cgroup: Optimize load_balance_fair()
  2011-07-13 21:02   ` Peter Zijlstra
@ 2011-07-13 21:14     ` Paul Turner
  2011-07-13 21:18       ` Peter Zijlstra
  0 siblings, 1 reply; 8+ messages in thread
From: Paul Turner @ 2011-07-13 21:14 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Nikhil Rao, Srivatsa Vaddagiri, linux-kernel, Ingo Molnar,
	Mike Galbraith

On Wed, Jul 13, 2011 at 2:02 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, 2011-07-13 at 10:13 -0700, Paul Turner wrote:
>> Nice! The continued usage of task_groups had been irking me for a
>> while but I haven't had the time to scratch the itch :).
>>
>> On Wed, Jul 13, 2011 at 4:36 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>> > Subject: sched, cgroup: Optimize load_balance_fair()
>> > From: Peter Zijlstra <a.p.zijlstra@chello.nl>
>> > Date: Wed Jul 13 13:09:25 CEST 2011
>> >
>> > Use for_each_leaf_cfs_rq() instead of list_for_each_entry_rcu(), this
>> > achieves that load_balance_fair() only iterates those task_groups that
>> > actually have tasks on busiest, and that we iterate bottom-up, trying to
>> > move light groups before the heavier ones.
>> >
>> > No idea if it will actually work out to be beneficial in practice, does
>> > anybody have a cgroup workload that might show a difference one way or
>> > the other?
>> >
>> > [ Also move update_h_load to sched_fair.c, loosing #ifdef-ery ]
>> >
>> > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
>
>> Reviewed-by: Paul Turner <pjt@google.com>
>
> So you think I should just merge it and see if any cgroup workload
> dislikes it?
>
> OK, I guess I can do that..
>

The task_list order was completely arbitrary anyway and had some
unfair bias against the groups most recently created.

I experimented a while back with a change that built a (task) h_load
ordered heap and chose which group to balance based on that.  It
improved sum(run_delay) on a few workloads and didn't seem to regress
anything.  (Unfortunately this was quite a while back and I don't have
the data available anymore.)

Using for_each_leaf_cfs_rq approximates that which leaves my
expectations somewhere between "can't hurt -- it's already arbitrary"
and "might help".  Moreover, if there are cases that result in bad
behavior under this new ordering then their group creation order can
likely permuted to create the same problem under the former
task_groups ordering making it a bug we have to deal with either way.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFT][PATCH] sched, cgroup: Optimize load_balance_fair()
  2011-07-13 21:14     ` Paul Turner
@ 2011-07-13 21:18       ` Peter Zijlstra
  0 siblings, 0 replies; 8+ messages in thread
From: Peter Zijlstra @ 2011-07-13 21:18 UTC (permalink / raw)
  To: Paul Turner
  Cc: Nikhil Rao, Srivatsa Vaddagiri, linux-kernel, Ingo Molnar,
	Mike Galbraith

On Wed, 2011-07-13 at 14:14 -0700, Paul Turner wrote:
> 
> The task_list order was completely arbitrary anyway and had some
> unfair bias against the groups most recently created.

fair enough I guess.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFT][PATCH] sched, cgroup: Optimize load_balance_fair()
  2011-07-13 21:01   ` Peter Zijlstra
@ 2011-07-14  0:46     ` Paul E. McKenney
  0 siblings, 0 replies; 8+ messages in thread
From: Paul E. McKenney @ 2011-07-14  0:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Turner, Nikhil Rao, Srivatsa Vaddagiri, linux-kernel,
	Ingo Molnar, Mike Galbraith

On Wed, Jul 13, 2011 at 11:01:03PM +0200, Peter Zijlstra wrote:
> On Wed, 2011-07-13 at 10:13 -0700, Paul Turner wrote:
> > > +static void update_h_load(long cpu)
> > > +{
> > > +       walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
> > > +}
> > 
> > With a list_for_each_entry_reverse_rcu() this could also only operate
> > on the local hierarchy and avoid the tg tree walk. 
> 
> Ah, sadly that primitive cannot exist, rcu list primitives only keeps
> the fwd link.
> 
> Although I guess we could 'fix' that.

We could, at least in theory -- make list_del_rcu() not poison the
->prev link.  Or, given that there are use cases that absolutely cannot
tolerate following ->prev links, have a list_del_rcu_both() or something
so that list_del_rcu() keeps its current error checking.  Oddly enough,
__list_add_rcu() doesn't need to change because the rcu_assign_pointer()
for the predecessor's ->next pointer covers the successor's ->prev
pointer as well.  OK, a comment is clearly needed...

Of course, in a two-way-RCU doubly linked list, p->next->prev is not
necessarily equal to p.

But how deep/wide is the tree and how many cache misses are expected?
Would this solve a real problem?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:sched/core] sched, cgroup: Optimize load_balance_fair()
  2011-07-13 11:36 [RFT][PATCH] sched, cgroup: Optimize load_balance_fair() Peter Zijlstra
  2011-07-13 17:13 ` Paul Turner
@ 2011-07-21 18:28 ` tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 8+ messages in thread
From: tip-bot for Peter Zijlstra @ 2011-07-21 18:28 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, a.p.zijlstra, pjt, tglx, mingo

Commit-ID:  9763b67fb9f3050c6da739105888327587c30c4d
Gitweb:     http://git.kernel.org/tip/9763b67fb9f3050c6da739105888327587c30c4d
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 13 Jul 2011 13:09:25 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Thu, 21 Jul 2011 18:01:46 +0200

sched, cgroup: Optimize load_balance_fair()

Use for_each_leaf_cfs_rq() instead of list_for_each_entry_rcu(), this
achieves that load_balance_fair() only iterates those task_groups that
actually have tasks on busiest, and that we iterate bottom-up, trying to
move light groups before the heavier ones.

No idea if it will actually work out to be beneficial in practice, does
anybody have a cgroup workload that might show a difference one way or
the other?

[ Also move update_h_load to sched_fair.c, loosing #ifdef-ery ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/1310557009.2586.28.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c      |   32 --------------------------------
 kernel/sched_fair.c |   40 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 37 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index b0e7ad7..474f341 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1568,38 +1568,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 	return rq->avg_load_per_task;
 }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-
-/*
- * Compute the cpu's hierarchical load factor for each task group.
- * This needs to be done in a top-down fashion because the load of a child
- * group is a fraction of its parents load.
- */
-static int tg_load_down(struct task_group *tg, void *data)
-{
-	unsigned long load;
-	long cpu = (long)data;
-
-	if (!tg->parent) {
-		load = cpu_rq(cpu)->load.weight;
-	} else {
-		load = tg->parent->cfs_rq[cpu]->h_load;
-		load *= tg->se[cpu]->load.weight;
-		load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
-	}
-
-	tg->cfs_rq[cpu]->h_load = load;
-
-	return 0;
-}
-
-static void update_h_load(long cpu)
-{
-	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
-}
-
-#endif
-
 #ifdef CONFIG_PREEMPT
 
 static void double_rq_lock(struct rq *rq1, struct rq *rq2);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6cdff84..180bcf1 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2232,11 +2232,43 @@ static void update_shares(int cpu)
 	struct rq *rq = cpu_rq(cpu);
 
 	rcu_read_lock();
+	/*
+	 * Iterates the task_group tree in a bottom up fashion, see
+	 * list_add_leaf_cfs_rq() for details.
+	 */
 	for_each_leaf_cfs_rq(rq, cfs_rq)
 		update_shares_cpu(cfs_rq->tg, cpu);
 	rcu_read_unlock();
 }
 
+/*
+ * Compute the cpu's hierarchical load factor for each task group.
+ * This needs to be done in a top-down fashion because the load of a child
+ * group is a fraction of its parents load.
+ */
+static int tg_load_down(struct task_group *tg, void *data)
+{
+	unsigned long load;
+	long cpu = (long)data;
+
+	if (!tg->parent) {
+		load = cpu_rq(cpu)->load.weight;
+	} else {
+		load = tg->parent->cfs_rq[cpu]->h_load;
+		load *= tg->se[cpu]->load.weight;
+		load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
+	}
+
+	tg->cfs_rq[cpu]->h_load = load;
+
+	return 0;
+}
+
+static void update_h_load(long cpu)
+{
+	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
@@ -2244,14 +2276,12 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  int *all_pinned)
 {
 	long rem_load_move = max_load_move;
-	int busiest_cpu = cpu_of(busiest);
-	struct task_group *tg;
+	struct cfs_rq *busiest_cfs_rq;
 
 	rcu_read_lock();
-	update_h_load(busiest_cpu);
+	update_h_load(cpu_of(busiest));
 
-	list_for_each_entry_rcu(tg, &task_groups, list) {
-		struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
+	for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
 		unsigned long busiest_h_load = busiest_cfs_rq->h_load;
 		unsigned long busiest_weight = busiest_cfs_rq->load.weight;
 		u64 rem_load, moved_load;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-07-21 18:29 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-07-13 11:36 [RFT][PATCH] sched, cgroup: Optimize load_balance_fair() Peter Zijlstra
2011-07-13 17:13 ` Paul Turner
2011-07-13 21:01   ` Peter Zijlstra
2011-07-14  0:46     ` Paul E. McKenney
2011-07-13 21:02   ` Peter Zijlstra
2011-07-13 21:14     ` Paul Turner
2011-07-13 21:18       ` Peter Zijlstra
2011-07-21 18:28 ` [tip:sched/core] " tip-bot for Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox