linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched: Prefer sibiling only if local group is under-utilized
@ 2017-03-22 17:57 Srikar Dronamraju
  2017-03-23  7:42 ` Peter Zijlstra
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Srikar Dronamraju @ 2017-03-22 17:57 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: Vincent Guittot, Mike Galbraith, Thomas Gleixner, LKML,
	Srikar Dronamraju

If the child domain prefers tasks to go siblings, the local group could
end up pulling tasks to itself even if the local group is almost equally
loaded as the source group.

Lets assume a 4 core,smt==2 machine running 5 thread ebizzy workload.
Everytime, local group has capacity and source group has atleast 2 threads,
local group tries to pull the task. This causes the threads to constantly
move between different cores. This is even more profound if the cores have
more threads, like in Power 8, smt 8 mode.

Fix this by only allowing local group to pull a task, if the source group
has more number of tasks than the local group.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
Here are the relevant perf stat numbers of a 22 core,smt 8 Power 8 machine.

Without patch:
 Performance counter stats for 'ebizzy -t 22 -S 100' (5 runs):

             1,440      context-switches          #    0.001 K/sec                    ( +-  1.26% )
               366      cpu-migrations            #    0.000 K/sec                    ( +-  5.58% )
             3,933      page-faults               #    0.002 K/sec                    ( +- 11.08% )

 Performance counter stats for 'ebizzy -t 48 -S 100' (5 runs):

             6,287      context-switches          #    0.001 K/sec                    ( +-  3.65% )
             3,776      cpu-migrations            #    0.001 K/sec                    ( +-  4.84% )
             5,702      page-faults               #    0.001 K/sec                    ( +-  9.36% )

 Performance counter stats for 'ebizzy -t 96 -S 100' (5 runs):

             8,776      context-switches          #    0.001 K/sec                    ( +-  0.73% )
             2,790      cpu-migrations            #    0.000 K/sec                    ( +-  0.98% )
            10,540      page-faults               #    0.001 K/sec                    ( +-  3.12% )

With patch:
 Performance counter stats for 'ebizzy -t 22 -S 100' (5 runs):

             1,133      context-switches          #    0.001 K/sec                    ( +-  4.72% )
               123      cpu-migrations            #    0.000 K/sec                    ( +-  3.42% )
             3,858      page-faults               #    0.002 K/sec                    ( +-  8.52% )

 Performance counter stats for 'ebizzy -t 48 -S 100' (5 runs):

             2,169      context-switches          #    0.000 K/sec                    ( +-  6.19% )
               189      cpu-migrations            #    0.000 K/sec                    ( +- 12.75% )
             5,917      page-faults               #    0.001 K/sec                    ( +-  8.09% )

 Performance counter stats for 'ebizzy -t 96 -S 100' (5 runs):

             5,333      context-switches          #    0.001 K/sec                    ( +-  5.91% )
               506      cpu-migrations            #    0.000 K/sec                    ( +-  3.35% )
            10,792      page-faults               #    0.001 K/sec                    ( +-  7.75% )
---
 kernel/sched/fair.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6559d19..a6dd010 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7496,6 +7496,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 {
 	struct sched_domain *child = env->sd->child;
 	struct sched_group *sg = env->sd->groups;
+	struct sg_lb_stats *local = &sds->local_stat;
 	struct sg_lb_stats tmp_sgs;
 	int load_idx, prefer_sibling = 0;
 	bool overload = false;
@@ -7512,7 +7513,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
 		if (local_group) {
 			sds->local = sg;
-			sgs = &sds->local_stat;
+			sgs = local;
 
 			if (env->idle != CPU_NEWLY_IDLE ||
 			    time_after_eq(jiffies, sg->sgc->next_update))
@@ -7536,8 +7537,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		 * the tasks on the system).
 		 */
 		if (prefer_sibling && sds->local &&
-		    group_has_capacity(env, &sds->local_stat) &&
-		    (sgs->sum_nr_running > 1)) {
+		    group_has_capacity(env, local) &&
+		    (sgs->sum_nr_running > local->sum_nr_running + 1)) {
 			sgs->group_no_capacity = 1;
 			sgs->group_type = group_classify(sg, sgs);
 		}
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] sched: Prefer sibiling only if local group is under-utilized
  2017-03-22 17:57 [PATCH] sched: Prefer sibiling only if local group is under-utilized Srikar Dronamraju
@ 2017-03-23  7:42 ` Peter Zijlstra
  2017-03-23  8:37 ` Vincent Guittot
  2017-03-27 10:26 ` [tip:sched/core] sched/fair: " tip-bot for Srikar Dronamraju
  2 siblings, 0 replies; 4+ messages in thread
From: Peter Zijlstra @ 2017-03-23  7:42 UTC (permalink / raw)
  To: Srikar Dronamraju
  Cc: Ingo Molnar, Vincent Guittot, Mike Galbraith, Thomas Gleixner,
	LKML

On Wed, Mar 22, 2017 at 11:27:50PM +0530, Srikar Dronamraju wrote:

> @@ -7536,8 +7537,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>  		 * the tasks on the system).
>  		 */
>  		if (prefer_sibling && sds->local &&
> -		    group_has_capacity(env, &sds->local_stat) &&
> -		    (sgs->sum_nr_running > 1)) {
> +		    group_has_capacity(env, local) &&
> +		    (sgs->sum_nr_running > local->sum_nr_running + 1)) {
>  			sgs->group_no_capacity = 1;
>  			sgs->group_type = group_classify(sg, sgs);
>  		}

Ah, yes that makes sense!

Thanks

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] sched: Prefer sibiling only if local group is under-utilized
  2017-03-22 17:57 [PATCH] sched: Prefer sibiling only if local group is under-utilized Srikar Dronamraju
  2017-03-23  7:42 ` Peter Zijlstra
@ 2017-03-23  8:37 ` Vincent Guittot
  2017-03-27 10:26 ` [tip:sched/core] sched/fair: " tip-bot for Srikar Dronamraju
  2 siblings, 0 replies; 4+ messages in thread
From: Vincent Guittot @ 2017-03-23  8:37 UTC (permalink / raw)
  To: Srikar Dronamraju
  Cc: Ingo Molnar, Peter Zijlstra, Mike Galbraith, Thomas Gleixner,
	LKML

On 22 March 2017 at 18:57, Srikar Dronamraju <srikar@linux.vnet.ibm.com> wrote:
> If the child domain prefers tasks to go siblings, the local group could
> end up pulling tasks to itself even if the local group is almost equally
> loaded as the source group.
>
> Lets assume a 4 core,smt==2 machine running 5 thread ebizzy workload.
> Everytime, local group has capacity and source group has atleast 2 threads,
> local group tries to pull the task. This causes the threads to constantly
> move between different cores. This is even more profound if the cores have
> more threads, like in Power 8, smt 8 mode.
>
> Fix this by only allowing local group to pull a task, if the source group
> has more number of tasks than the local group.
>
> Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>

Acked-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---
> Here are the relevant perf stat numbers of a 22 core,smt 8 Power 8 machine.
>
> Without patch:
>  Performance counter stats for 'ebizzy -t 22 -S 100' (5 runs):
>
>              1,440      context-switches          #    0.001 K/sec                    ( +-  1.26% )
>                366      cpu-migrations            #    0.000 K/sec                    ( +-  5.58% )
>              3,933      page-faults               #    0.002 K/sec                    ( +- 11.08% )
>
>  Performance counter stats for 'ebizzy -t 48 -S 100' (5 runs):
>
>              6,287      context-switches          #    0.001 K/sec                    ( +-  3.65% )
>              3,776      cpu-migrations            #    0.001 K/sec                    ( +-  4.84% )
>              5,702      page-faults               #    0.001 K/sec                    ( +-  9.36% )
>
>  Performance counter stats for 'ebizzy -t 96 -S 100' (5 runs):
>
>              8,776      context-switches          #    0.001 K/sec                    ( +-  0.73% )
>              2,790      cpu-migrations            #    0.000 K/sec                    ( +-  0.98% )
>             10,540      page-faults               #    0.001 K/sec                    ( +-  3.12% )
>
> With patch:
>  Performance counter stats for 'ebizzy -t 22 -S 100' (5 runs):
>
>              1,133      context-switches          #    0.001 K/sec                    ( +-  4.72% )
>                123      cpu-migrations            #    0.000 K/sec                    ( +-  3.42% )
>              3,858      page-faults               #    0.002 K/sec                    ( +-  8.52% )
>
>  Performance counter stats for 'ebizzy -t 48 -S 100' (5 runs):
>
>              2,169      context-switches          #    0.000 K/sec                    ( +-  6.19% )
>                189      cpu-migrations            #    0.000 K/sec                    ( +- 12.75% )
>              5,917      page-faults               #    0.001 K/sec                    ( +-  8.09% )
>
>  Performance counter stats for 'ebizzy -t 96 -S 100' (5 runs):
>
>              5,333      context-switches          #    0.001 K/sec                    ( +-  5.91% )
>                506      cpu-migrations            #    0.000 K/sec                    ( +-  3.35% )
>             10,792      page-faults               #    0.001 K/sec                    ( +-  7.75% )
> ---
>  kernel/sched/fair.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 6559d19..a6dd010 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7496,6 +7496,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>  {
>         struct sched_domain *child = env->sd->child;
>         struct sched_group *sg = env->sd->groups;
> +       struct sg_lb_stats *local = &sds->local_stat;
>         struct sg_lb_stats tmp_sgs;
>         int load_idx, prefer_sibling = 0;
>         bool overload = false;
> @@ -7512,7 +7513,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>                 local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
>                 if (local_group) {
>                         sds->local = sg;
> -                       sgs = &sds->local_stat;
> +                       sgs = local;
>
>                         if (env->idle != CPU_NEWLY_IDLE ||
>                             time_after_eq(jiffies, sg->sgc->next_update))
> @@ -7536,8 +7537,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>                  * the tasks on the system).
>                  */
>                 if (prefer_sibling && sds->local &&
> -                   group_has_capacity(env, &sds->local_stat) &&
> -                   (sgs->sum_nr_running > 1)) {
> +                   group_has_capacity(env, local) &&
> +                   (sgs->sum_nr_running > local->sum_nr_running + 1)) {
>                         sgs->group_no_capacity = 1;
>                         sgs->group_type = group_classify(sg, sgs);
>                 }

> --
> 1.8.3.1
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [tip:sched/core] sched/fair: Prefer sibiling only if local group is under-utilized
  2017-03-22 17:57 [PATCH] sched: Prefer sibiling only if local group is under-utilized Srikar Dronamraju
  2017-03-23  7:42 ` Peter Zijlstra
  2017-03-23  8:37 ` Vincent Guittot
@ 2017-03-27 10:26 ` tip-bot for Srikar Dronamraju
  2 siblings, 0 replies; 4+ messages in thread
From: tip-bot for Srikar Dronamraju @ 2017-03-27 10:26 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, mingo, tglx, efault, peterz, vincent.guittot,
	torvalds, srikar, hpa

Commit-ID:  05b40e057734811ce452344fb3690d09965a7b6a
Gitweb:     http://git.kernel.org/tip/05b40e057734811ce452344fb3690d09965a7b6a
Author:     Srikar Dronamraju <srikar@linux.vnet.ibm.com>
AuthorDate: Wed, 22 Mar 2017 23:27:50 +0530
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Mon, 27 Mar 2017 10:22:26 +0200

sched/fair: Prefer sibiling only if local group is under-utilized

If the child domain prefers tasks to go siblings, the local group could
end up pulling tasks to itself even if the local group is almost equally
loaded as the source group.

Lets assume a 4 core,smt==2 machine running 5 thread ebizzy workload.
Everytime, local group has capacity and source group has atleast 2 threads,
local group tries to pull the task. This causes the threads to constantly
move between different cores. This is even more profound if the cores have
more threads, like in Power 8, smt 8 mode.

Fix this by only allowing local group to pull a task, if the source group
has more number of tasks than the local group.

Here are the relevant perf stat numbers of a 22 core,smt 8 Power 8 machine.

Without patch:
 Performance counter stats for 'ebizzy -t 22 -S 100' (5 runs):

             1,440      context-switches          #    0.001 K/sec                    ( +-  1.26% )
               366      cpu-migrations            #    0.000 K/sec                    ( +-  5.58% )
             3,933      page-faults               #    0.002 K/sec                    ( +- 11.08% )

 Performance counter stats for 'ebizzy -t 48 -S 100' (5 runs):

             6,287      context-switches          #    0.001 K/sec                    ( +-  3.65% )
             3,776      cpu-migrations            #    0.001 K/sec                    ( +-  4.84% )
             5,702      page-faults               #    0.001 K/sec                    ( +-  9.36% )

 Performance counter stats for 'ebizzy -t 96 -S 100' (5 runs):

             8,776      context-switches          #    0.001 K/sec                    ( +-  0.73% )
             2,790      cpu-migrations            #    0.000 K/sec                    ( +-  0.98% )
            10,540      page-faults               #    0.001 K/sec                    ( +-  3.12% )

With patch:

 Performance counter stats for 'ebizzy -t 22 -S 100' (5 runs):

             1,133      context-switches          #    0.001 K/sec                    ( +-  4.72% )
               123      cpu-migrations            #    0.000 K/sec                    ( +-  3.42% )
             3,858      page-faults               #    0.002 K/sec                    ( +-  8.52% )

 Performance counter stats for 'ebizzy -t 48 -S 100' (5 runs):

             2,169      context-switches          #    0.000 K/sec                    ( +-  6.19% )
               189      cpu-migrations            #    0.000 K/sec                    ( +- 12.75% )
             5,917      page-faults               #    0.001 K/sec                    ( +-  8.09% )

 Performance counter stats for 'ebizzy -t 96 -S 100' (5 runs):

             5,333      context-switches          #    0.001 K/sec                    ( +-  5.91% )
               506      cpu-migrations            #    0.000 K/sec                    ( +-  3.35% )
            10,792      page-faults               #    0.001 K/sec                    ( +-  7.75% )

Which show that in these workloads CPU migrations get reduced significantly.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Link: http://lkml.kernel.org/r/1490205470-10249-1-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 03adf9f..31453d5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7565,6 +7565,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 {
 	struct sched_domain *child = env->sd->child;
 	struct sched_group *sg = env->sd->groups;
+	struct sg_lb_stats *local = &sds->local_stat;
 	struct sg_lb_stats tmp_sgs;
 	int load_idx, prefer_sibling = 0;
 	bool overload = false;
@@ -7581,7 +7582,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
 		if (local_group) {
 			sds->local = sg;
-			sgs = &sds->local_stat;
+			sgs = local;
 
 			if (env->idle != CPU_NEWLY_IDLE ||
 			    time_after_eq(jiffies, sg->sgc->next_update))
@@ -7605,8 +7606,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		 * the tasks on the system).
 		 */
 		if (prefer_sibling && sds->local &&
-		    group_has_capacity(env, &sds->local_stat) &&
-		    (sgs->sum_nr_running > 1)) {
+		    group_has_capacity(env, local) &&
+		    (sgs->sum_nr_running > local->sum_nr_running + 1)) {
 			sgs->group_no_capacity = 1;
 			sgs->group_type = group_classify(sg, sgs);
 		}

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-03-27 10:31 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-03-22 17:57 [PATCH] sched: Prefer sibiling only if local group is under-utilized Srikar Dronamraju
2017-03-23  7:42 ` Peter Zijlstra
2017-03-23  8:37 ` Vincent Guittot
2017-03-27 10:26 ` [tip:sched/core] sched/fair: " tip-bot for Srikar Dronamraju

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).