From: Mike Galbraith <umgwanakikbuti@gmail.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Yuyang Du <yuyang.du@intel.com>, LKML <linux-kernel@vger.kernel.org>
Subject: [rfc patch] sched/fair: Use instantaneous load for fork/exec balancing
Date: Tue, 14 Jun 2016 09:58:31 +0200 [thread overview]
Message-ID: <1465891111.1694.13.camel@gmail.com> (raw)
SUSE's regression testing noticed that...
0905f04eb21f sched/fair: Fix new task's load avg removed from source CPU in wake_up_new_task()
...introduced a hackbench regression, and indeed it does. I think this
regression has more to do with randomness than anything else, but in
general...
While averaging calms down load balancing, helping to keep migrations
down to a dull roar, it's not completely wonderful when it comes to
things that live in the here and now, hackbench being one such.
time sh -c 'for i in `seq 1000`; do hackbench -p -P > /dev/null; done'
real 0m55.397s
user 0m8.320s
sys 5m40.789s
echo LB_INSTANTANEOUS_LOAD > /sys/kernel/debug/sched_features
real 0m48.049s
user 0m6.510s
sys 5m6.291s
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
---
kernel/sched/fair.c | 54 ++++++++++++++++++++++++------------------------
kernel/sched/features.h | 1
kernel/sched/sched.h | 6 +++++
3 files changed, 35 insertions(+), 26 deletions(-)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -738,7 +738,7 @@ void post_init_entity_util_avg(struct sc
}
}
-static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
+static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq, int avg);
static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq);
#else
void init_entity_runnable_average(struct sched_entity *se)
@@ -1229,9 +1229,9 @@ bool should_numa_migrate_memory(struct t
group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
}
-static unsigned long weighted_cpuload(const int cpu);
-static unsigned long source_load(int cpu, int type);
-static unsigned long target_load(int cpu, int type);
+static unsigned long weighted_cpuload(const int cpu, int avg);
+static unsigned long source_load(int cpu, int type, int avg);
+static unsigned long target_load(int cpu, int type, int avg);
static unsigned long capacity_of(int cpu);
static long effective_load(struct task_group *tg, int cpu, long wl, long wg);
@@ -1261,7 +1261,7 @@ static void update_numa_stats(struct num
struct rq *rq = cpu_rq(cpu);
ns->nr_running += rq->nr_running;
- ns->load += weighted_cpuload(cpu);
+ ns->load += weighted_cpuload(cpu, LOAD_AVERAGE);
ns->compute_capacity += capacity_of(cpu);
cpus++;
@@ -3102,8 +3102,10 @@ void remove_entity_load_avg(struct sched
atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
}
-static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq)
+static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq, int avg)
{
+ if (sched_feat(LB_INSTANTANEOUS_LOAD) && avg == LOAD_INSTANT)
+ return cfs_rq->load.weight;
return cfs_rq->runnable_load_avg;
}
@@ -4701,9 +4703,9 @@ static void cpu_load_update(struct rq *t
}
/* Used instead of source_load when we know the type == 0 */
-static unsigned long weighted_cpuload(const int cpu)
+static unsigned long weighted_cpuload(const int cpu, int avg)
{
- return cfs_rq_runnable_load_avg(&cpu_rq(cpu)->cfs);
+ return cfs_rq_runnable_load_avg(&cpu_rq(cpu)->cfs, avg);
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -4748,7 +4750,7 @@ static void cpu_load_update_idle(struct
/*
* bail if there's load or we're actually up-to-date.
*/
- if (weighted_cpuload(cpu_of(this_rq)))
+ if (weighted_cpuload(cpu_of(this_rq), LOAD_AVERAGE))
return;
cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0);
@@ -4769,7 +4771,7 @@ void cpu_load_update_nohz_start(void)
* concurrently we'll exit nohz. And cpu_load write can race with
* cpu_load_update_idle() but both updater would be writing the same.
*/
- this_rq->cpu_load[0] = weighted_cpuload(cpu_of(this_rq));
+ this_rq->cpu_load[0] = weighted_cpuload(cpu_of(this_rq), LOAD_AVERAGE);
}
/*
@@ -4784,7 +4786,7 @@ void cpu_load_update_nohz_stop(void)
if (curr_jiffies == this_rq->last_load_update_tick)
return;
- load = weighted_cpuload(cpu_of(this_rq));
+ load = weighted_cpuload(cpu_of(this_rq), LOAD_AVERAGE);
raw_spin_lock(&this_rq->lock);
update_rq_clock(this_rq);
cpu_load_update_nohz(this_rq, curr_jiffies, load);
@@ -4810,7 +4812,7 @@ static void cpu_load_update_periodic(str
*/
void cpu_load_update_active(struct rq *this_rq)
{
- unsigned long load = weighted_cpuload(cpu_of(this_rq));
+ unsigned long load = weighted_cpuload(cpu_of(this_rq), LOAD_AVERAGE);
if (tick_nohz_tick_stopped())
cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load);
@@ -4825,10 +4827,10 @@ void cpu_load_update_active(struct rq *t
* We want to under-estimate the load of migration sources, to
* balance conservatively.
*/
-static unsigned long source_load(int cpu, int type)
+static unsigned long source_load(int cpu, int type, int avg)
{
struct rq *rq = cpu_rq(cpu);
- unsigned long total = weighted_cpuload(cpu);
+ unsigned long total = weighted_cpuload(cpu, avg);
if (type == 0 || !sched_feat(LB_BIAS))
return total;
@@ -4840,10 +4842,10 @@ static unsigned long source_load(int cpu
* Return a high guess at the load of a migration-target cpu weighted
* according to the scheduling class and "nice" value.
*/
-static unsigned long target_load(int cpu, int type)
+static unsigned long target_load(int cpu, int type, int avg)
{
struct rq *rq = cpu_rq(cpu);
- unsigned long total = weighted_cpuload(cpu);
+ unsigned long total = weighted_cpuload(cpu, avg);
if (type == 0 || !sched_feat(LB_BIAS))
return total;
@@ -4865,7 +4867,7 @@ static unsigned long cpu_avg_load_per_ta
{
struct rq *rq = cpu_rq(cpu);
unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running);
- unsigned long load_avg = weighted_cpuload(cpu);
+ unsigned long load_avg = weighted_cpuload(cpu, LOAD_AVERAGE);
if (nr_running)
return load_avg / nr_running;
@@ -5047,8 +5049,8 @@ static int wake_affine(struct sched_doma
idx = sd->wake_idx;
this_cpu = smp_processor_id();
prev_cpu = task_cpu(p);
- load = source_load(prev_cpu, idx);
- this_load = target_load(this_cpu, idx);
+ load = source_load(prev_cpu, idx, LOAD_AVERAGE);
+ this_load = target_load(this_cpu, idx, LOAD_AVERAGE);
/*
* If sync wakeup then subtract the (maximum possible)
@@ -5136,9 +5138,9 @@ find_idlest_group(struct sched_domain *s
for_each_cpu(i, sched_group_cpus(group)) {
/* Bias balancing toward cpus of our domain */
if (local_group)
- load = source_load(i, load_idx);
+ load = source_load(i, load_idx, LOAD_INSTANT);
else
- load = target_load(i, load_idx);
+ load = target_load(i, load_idx, LOAD_INSTANT);
avg_load += load;
}
@@ -5197,7 +5199,7 @@ find_idlest_cpu(struct sched_group *grou
shallowest_idle_cpu = i;
}
} else if (shallowest_idle_cpu == -1) {
- load = weighted_cpuload(i);
+ load = weighted_cpuload(i, LOAD_INSTANT);
if (load < min_load || (load == min_load && i == this_cpu)) {
min_load = load;
least_loaded_cpu = i;
@@ -6982,9 +6984,9 @@ static inline void update_sg_lb_stats(st
/* Bias balancing toward cpus of our domain */
if (local_group)
- load = target_load(i, load_idx);
+ load = target_load(i, load_idx, LOAD_AVERAGE);
else
- load = source_load(i, load_idx);
+ load = source_load(i, load_idx, LOAD_AVERAGE);
sgs->group_load += load;
sgs->group_util += cpu_util(i);
@@ -6998,7 +7000,7 @@ static inline void update_sg_lb_stats(st
sgs->nr_numa_running += rq->nr_numa_running;
sgs->nr_preferred_running += rq->nr_preferred_running;
#endif
- sgs->sum_weighted_load += weighted_cpuload(i);
+ sgs->sum_weighted_load += weighted_cpuload(i, LOAD_AVERAGE);
/*
* No need to call idle_cpu() if nr_running is not 0
*/
@@ -7510,7 +7512,7 @@ static struct rq *find_busiest_queue(str
capacity = capacity_of(i);
- wl = weighted_cpuload(i);
+ wl = weighted_cpuload(i, LOAD_AVERAGE);
/*
* When comparing with imbalance, use weighted_cpuload()
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -39,6 +39,7 @@ SCHED_FEAT(WAKEUP_PREEMPTION, true)
SCHED_FEAT(HRTICK, false)
SCHED_FEAT(DOUBLE_TICK, false)
SCHED_FEAT(LB_BIAS, true)
+SCHED_FEAT(LB_INSTANTANEOUS_LOAD, false)
/*
* Decrement CPU capacity based on time not spent running tasks
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1630,6 +1630,12 @@ static inline void double_rq_unlock(stru
__release(rq2->lock);
}
+/*
+ * Tell load balancing functions whether we want instant or average load
+ */
+#define LOAD_INSTANT 0
+#define LOAD_AVERAGE 1
+
#else /* CONFIG_SMP */
/*
next reply other threads:[~2016-06-14 7:58 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-06-14 7:58 Mike Galbraith [this message]
2016-06-14 14:14 ` [rfc patch] sched/fair: Use instantaneous load for fork/exec balancing Dietmar Eggemann
2016-06-14 16:40 ` Mike Galbraith
2016-06-15 15:32 ` Dietmar Eggemann
2016-06-15 16:03 ` Mike Galbraith
2016-06-15 19:03 ` Dietmar Eggemann
2016-06-16 3:33 ` Mike Galbraith
2016-06-16 9:01 ` Dietmar Eggemann
2016-07-04 15:04 ` Matt Fleming
2016-07-04 17:43 ` Mike Galbraith
2016-07-06 11:45 ` Matt Fleming
2016-07-06 12:21 ` Mike Galbraith
2016-07-11 8:58 ` Dietmar Eggemann
2016-07-12 11:14 ` Matt Fleming
2016-06-14 22:42 ` Yuyang Du
2016-06-15 7:01 ` Mike Galbraith
2016-06-16 11:46 ` [patch] sched/fair: Use instantaneous load in wakeup paths Mike Galbraith
2016-06-16 12:04 ` Mike Galbraith
2016-06-16 12:41 ` Mike Galbraith
2016-06-17 6:21 ` Mike Galbraith
2016-06-17 10:55 ` Dietmar Eggemann
2016-06-17 13:57 ` Mike Galbraith
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1465891111.1694.13.camel@gmail.com \
--to=umgwanakikbuti@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=peterz@infradead.org \
--cc=yuyang.du@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.