[rfc patch] sched/fair: Use instantaneous load for fork/exec balancing

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Mike Galbraith <umgwanakikbuti@gmail.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Yuyang Du <yuyang.du@intel.com>, LKML <linux-kernel@vger.kernel.org>
Subject: [rfc patch] sched/fair: Use instantaneous load for fork/exec balancing
Date: Tue, 14 Jun 2016 09:58:31 +0200	[thread overview]
Message-ID: <1465891111.1694.13.camel@gmail.com> (raw)

SUSE's regression testing noticed that...

0905f04eb21f sched/fair: Fix new task's load avg removed from source CPU in wake_up_new_task()

...introduced a hackbench regression, and indeed it does.  I think this
regression has more to do with randomness than anything else, but in
general...

While averaging calms down load balancing, helping to keep migrations
down to a dull roar, it's not completely wonderful when it comes to
things that live in the here and now, hackbench being one such.

time sh -c 'for i in `seq 1000`; do hackbench -p -P > /dev/null; done'

real    0m55.397s
user    0m8.320s
sys     5m40.789s

echo LB_INSTANTANEOUS_LOAD > /sys/kernel/debug/sched_features

real    0m48.049s
user    0m6.510s
sys     5m6.291s

Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
---
 kernel/sched/fair.c     |   54 ++++++++++++++++++++++++------------------------
 kernel/sched/features.h |    1 
 kernel/sched/sched.h    |    6 +++++
 3 files changed, 35 insertions(+), 26 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -738,7 +738,7 @@ void post_init_entity_util_avg(struct sc
 	}
 }
 
-static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
+static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq, int avg);
 static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq);
 #else
 void init_entity_runnable_average(struct sched_entity *se)
@@ -1229,9 +1229,9 @@ bool should_numa_migrate_memory(struct t
 	       group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
 }
 
-static unsigned long weighted_cpuload(const int cpu);
-static unsigned long source_load(int cpu, int type);
-static unsigned long target_load(int cpu, int type);
+static unsigned long weighted_cpuload(const int cpu, int avg);
+static unsigned long source_load(int cpu, int type, int avg);
+static unsigned long target_load(int cpu, int type, int avg);
 static unsigned long capacity_of(int cpu);
 static long effective_load(struct task_group *tg, int cpu, long wl, long wg);
 
@@ -1261,7 +1261,7 @@ static void update_numa_stats(struct num
 		struct rq *rq = cpu_rq(cpu);
 
 		ns->nr_running += rq->nr_running;
-		ns->load += weighted_cpuload(cpu);
+		ns->load += weighted_cpuload(cpu, LOAD_AVERAGE);
 		ns->compute_capacity += capacity_of(cpu);
 
 		cpus++;
@@ -3102,8 +3102,10 @@ void remove_entity_load_avg(struct sched
 	atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
 }
 
-static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq)
+static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq, int avg)
 {
+	if (sched_feat(LB_INSTANTANEOUS_LOAD) && avg == LOAD_INSTANT)
+		return cfs_rq->load.weight;
 	return cfs_rq->runnable_load_avg;
 }
 
@@ -4701,9 +4703,9 @@ static void cpu_load_update(struct rq *t
 }
 
 /* Used instead of source_load when we know the type == 0 */
-static unsigned long weighted_cpuload(const int cpu)
+static unsigned long weighted_cpuload(const int cpu, int avg)
 {
-	return cfs_rq_runnable_load_avg(&cpu_rq(cpu)->cfs);
+	return cfs_rq_runnable_load_avg(&cpu_rq(cpu)->cfs, avg);
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
@@ -4748,7 +4750,7 @@ static void cpu_load_update_idle(struct
 	/*
 	 * bail if there's load or we're actually up-to-date.
 	 */
-	if (weighted_cpuload(cpu_of(this_rq)))
+	if (weighted_cpuload(cpu_of(this_rq), LOAD_AVERAGE))
 		return;
 
 	cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0);
@@ -4769,7 +4771,7 @@ void cpu_load_update_nohz_start(void)
 	 * concurrently we'll exit nohz. And cpu_load write can race with
 	 * cpu_load_update_idle() but both updater would be writing the same.
 	 */
-	this_rq->cpu_load[0] = weighted_cpuload(cpu_of(this_rq));
+	this_rq->cpu_load[0] = weighted_cpuload(cpu_of(this_rq), LOAD_AVERAGE);
 }
 
 /*
@@ -4784,7 +4786,7 @@ void cpu_load_update_nohz_stop(void)
 	if (curr_jiffies == this_rq->last_load_update_tick)
 		return;
 
-	load = weighted_cpuload(cpu_of(this_rq));
+	load = weighted_cpuload(cpu_of(this_rq), LOAD_AVERAGE);
 	raw_spin_lock(&this_rq->lock);
 	update_rq_clock(this_rq);
 	cpu_load_update_nohz(this_rq, curr_jiffies, load);
@@ -4810,7 +4812,7 @@ static void cpu_load_update_periodic(str
  */
 void cpu_load_update_active(struct rq *this_rq)
 {
-	unsigned long load = weighted_cpuload(cpu_of(this_rq));
+	unsigned long load = weighted_cpuload(cpu_of(this_rq), LOAD_AVERAGE);
 
 	if (tick_nohz_tick_stopped())
 		cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load);
@@ -4825,10 +4827,10 @@ void cpu_load_update_active(struct rq *t
  * We want to under-estimate the load of migration sources, to
  * balance conservatively.
  */
-static unsigned long source_load(int cpu, int type)
+static unsigned long source_load(int cpu, int type, int avg)
 {
 	struct rq *rq = cpu_rq(cpu);
-	unsigned long total = weighted_cpuload(cpu);
+	unsigned long total = weighted_cpuload(cpu, avg);
 
 	if (type == 0 || !sched_feat(LB_BIAS))
 		return total;
@@ -4840,10 +4842,10 @@ static unsigned long source_load(int cpu
  * Return a high guess at the load of a migration-target cpu weighted
  * according to the scheduling class and "nice" value.
  */
-static unsigned long target_load(int cpu, int type)
+static unsigned long target_load(int cpu, int type, int avg)
 {
 	struct rq *rq = cpu_rq(cpu);
-	unsigned long total = weighted_cpuload(cpu);
+	unsigned long total = weighted_cpuload(cpu, avg);
 
 	if (type == 0 || !sched_feat(LB_BIAS))
 		return total;
@@ -4865,7 +4867,7 @@ static unsigned long cpu_avg_load_per_ta
 {
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running);
-	unsigned long load_avg = weighted_cpuload(cpu);
+	unsigned long load_avg = weighted_cpuload(cpu, LOAD_AVERAGE);
 
 	if (nr_running)
 		return load_avg / nr_running;
@@ -5047,8 +5049,8 @@ static int wake_affine(struct sched_doma
 	idx	  = sd->wake_idx;
 	this_cpu  = smp_processor_id();
 	prev_cpu  = task_cpu(p);
-	load	  = source_load(prev_cpu, idx);
-	this_load = target_load(this_cpu, idx);
+	load	  = source_load(prev_cpu, idx, LOAD_AVERAGE);
+	this_load = target_load(this_cpu, idx, LOAD_AVERAGE);
 
 	/*
 	 * If sync wakeup then subtract the (maximum possible)
@@ -5136,9 +5138,9 @@ find_idlest_group(struct sched_domain *s
 		for_each_cpu(i, sched_group_cpus(group)) {
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
-				load = source_load(i, load_idx);
+				load = source_load(i, load_idx, LOAD_INSTANT);
 			else
-				load = target_load(i, load_idx);
+				load = target_load(i, load_idx, LOAD_INSTANT);
 
 			avg_load += load;
 		}
@@ -5197,7 +5199,7 @@ find_idlest_cpu(struct sched_group *grou
 				shallowest_idle_cpu = i;
 			}
 		} else if (shallowest_idle_cpu == -1) {
-			load = weighted_cpuload(i);
+			load = weighted_cpuload(i, LOAD_INSTANT);
 			if (load < min_load || (load == min_load && i == this_cpu)) {
 				min_load = load;
 				least_loaded_cpu = i;
@@ -6982,9 +6984,9 @@ static inline void update_sg_lb_stats(st
 
 		/* Bias balancing toward cpus of our domain */
 		if (local_group)
-			load = target_load(i, load_idx);
+			load = target_load(i, load_idx, LOAD_AVERAGE);
 		else
-			load = source_load(i, load_idx);
+			load = source_load(i, load_idx, LOAD_AVERAGE);
 
 		sgs->group_load += load;
 		sgs->group_util += cpu_util(i);
@@ -6998,7 +7000,7 @@ static inline void update_sg_lb_stats(st
 		sgs->nr_numa_running += rq->nr_numa_running;
 		sgs->nr_preferred_running += rq->nr_preferred_running;
 #endif
-		sgs->sum_weighted_load += weighted_cpuload(i);
+		sgs->sum_weighted_load += weighted_cpuload(i, LOAD_AVERAGE);
 		/*
 		 * No need to call idle_cpu() if nr_running is not 0
 		 */
@@ -7510,7 +7512,7 @@ static struct rq *find_busiest_queue(str
 
 		capacity = capacity_of(i);
 
-		wl = weighted_cpuload(i);
+		wl = weighted_cpuload(i, LOAD_AVERAGE);
 
 		/*
 		 * When comparing with imbalance, use weighted_cpuload()
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -39,6 +39,7 @@ SCHED_FEAT(WAKEUP_PREEMPTION, true)
 SCHED_FEAT(HRTICK, false)
 SCHED_FEAT(DOUBLE_TICK, false)
 SCHED_FEAT(LB_BIAS, true)
+SCHED_FEAT(LB_INSTANTANEOUS_LOAD, false)
 
 /*
  * Decrement CPU capacity based on time not spent running tasks
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1630,6 +1630,12 @@ static inline void double_rq_unlock(stru
 		__release(rq2->lock);
 }
 
+/*
+ * Tell load balancing functions whether we want instant or average load
+ */
+#define LOAD_INSTANT	0
+#define LOAD_AVERAGE	1
+
 #else /* CONFIG_SMP */
 
 /*

next             reply	other threads:[~2016-06-14  7:58 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-14  7:58 Mike Galbraith [this message]
2016-06-14 14:14 ` [rfc patch] sched/fair: Use instantaneous load for fork/exec balancing Dietmar Eggemann
2016-06-14 16:40   ` Mike Galbraith
2016-06-15 15:32     ` Dietmar Eggemann
2016-06-15 16:03       ` Mike Galbraith
2016-06-15 19:03         ` Dietmar Eggemann
2016-06-16  3:33           ` Mike Galbraith
2016-06-16  9:01             ` Dietmar Eggemann
2016-07-04 15:04       ` Matt Fleming
2016-07-04 17:43         ` Mike Galbraith
2016-07-06 11:45           ` Matt Fleming
2016-07-06 12:21             ` Mike Galbraith
2016-07-11  8:58         ` Dietmar Eggemann
2016-07-12 11:14           ` Matt Fleming
2016-06-14 22:42 ` Yuyang Du
2016-06-15  7:01   ` Mike Galbraith
2016-06-16 11:46     ` [patch] sched/fair: Use instantaneous load in wakeup paths Mike Galbraith
2016-06-16 12:04       ` Mike Galbraith
2016-06-16 12:41         ` Mike Galbraith
2016-06-17  6:21           ` Mike Galbraith
2016-06-17 10:55             ` Dietmar Eggemann
2016-06-17 13:57               ` Mike Galbraith

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1465891111.1694.13.camel@gmail.com \
    --to=umgwanakikbuti@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=yuyang.du@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.