From: Mike Galbraith <efault@gmx.de>
To: Uladzislau Rezki <urezki@gmail.com>
Cc: Atish Patra <atish.patra@oracle.com>,
Peter Zijlstra <peterz@infradead.org>,
Joel Fernandes <joelaf@google.com>,
LKML <linux-kernel@vger.kernel.org>,
Brendan Jackman <brendan.jackman@arm.com>,
Josef Bacik <jbacik@fb.com>, Ingo Molnar <mingo@redhat.com>
Subject: Re: [PATCH RFC 1/2] sched: Minimize the idle cpu selection race window.
Date: Sun, 26 Nov 2017 21:58:34 +0100 [thread overview]
Message-ID: <1511729914.22158.29.camel@gmx.de> (raw)
In-Reply-To: <1511549190.8029.233.camel@gmx.de>
On Fri, 2017-11-24 at 19:46 +0100, Mike Galbraith wrote:
>
> My view is you're barking up the wrong tree: you're making the idle
> data SIS is using more accurate, but I question the benefit. That it
> makes an imperfect placement decision occasionally due to raciness is
> nearly meaningless compared to the cost of frequent bounce.
Playing with SIS (yet again), below is a hack that I think illustrates
why I think the occasional races are nearly meaningless.
Box = i4790 desktop.
master masterx
TCP_STREAM-1 Avg: 70495 71295
TCP_STREAM-2 Avg: 54388 66202
TCP_STREAM-4 Avg: 19316 21413
TCP_STREAM-8 Avg: 9678 8894
TCP_STREAM-16 Avg: 4286 4360
TCP_MAERTS-1 Avg: 69238 71799
TCP_MAERTS-2 Avg: 50729 65612
TCP_MAERTS-4 Avg: 19095 21984
TCP_MAERTS-8 Avg: 9405 8888
TCP_MAERTS-16 Avg: 4891 4371
TCP_RR-1 Avg: 198617 203291
TCP_RR-2 Avg: 152862 191761
TCP_RR-4 Avg: 112241 117888
TCP_RR-8 Avg: 104453 113260
TCP_RR-16 Avg: 50897 55280
UDP_RR-1 Avg: 250738 264214
UDP_RR-2 Avg: 196250 253352
UDP_RR-4 Avg: 152862 158819
UDP_RR-8 Avg: 143781 154071
UDP_RR-16 Avg: 68605 76492
tbench 1 2 4 8 16
master 772 1207 1829 3516 3440
masterx 811 1466 1959 3737 3670
hackbench -l 10000
5.917 5.990 5.957 avg 5.954 NO_SIS_DEBOUNCE
5.886 5.808 5.826 avg 5.840 SIS_DEBOUNCE
echo 0 > tracing_on
echo 1 > events/sched/sched_migrate_task/enable
start endless tbench 2
for i in `seq 3`
do
echo > trace
echo 1 > tracing_on
sleep 10
echo 0 > tracing_on
cat trace|grep tbench|wc -l
done
kde desktop idling
NO_SIS_DEBOUNCE 261 208 199
SIS_DEBOUNCE 8 6 0
add firefox playing youtube documentary
NO_SIS_DEBOUNCE 10906 10094 10774
SIS_DEBOUNCE 34 34 34
tbench 2 throughput as firefox runs
NO_SIS_DEBOUNCE 1129.63 MB/sec
SIS_DEBOUNCE 1462.53 MB/sec
Advisory: welding goggles.
---
include/linux/sched.h | 3 ++-
kernel/sched/fair.c | 41 +++++++++++++++++++++++++++++++++++++++--
kernel/sched/features.h | 1 +
3 files changed, 42 insertions(+), 3 deletions(-)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -541,7 +541,6 @@ struct task_struct {
unsigned int ptrace;
#ifdef CONFIG_SMP
- struct llist_node wake_entry;
int on_cpu;
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* Current CPU: */
@@ -549,8 +548,10 @@ struct task_struct {
#endif
unsigned int wakee_flips;
unsigned long wakee_flip_decay_ts;
+ unsigned long wakee_placed;
struct task_struct *last_wakee;
+ struct llist_node wake_entry;
int wake_cpu;
#endif
int on_rq;
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6174,6 +6174,9 @@ static int select_idle_sibling(struct ta
if ((unsigned)i < nr_cpumask_bits)
return i;
+ if (sched_feat(SIS_DEBOUNCE))
+ p->wakee_placed = jiffies;
+
return target;
}
@@ -6258,6 +6261,22 @@ static int wake_cap(struct task_struct *
return min_cap * 1024 < task_util(p) * capacity_margin;
}
+static bool task_placed(struct task_struct *p)
+{
+ return p->wakee_placed == jiffies;
+}
+
+static bool task_llc_affine_and_cold(struct task_struct *p, int cpu, int prev)
+{
+ int cold = sysctl_sched_migration_cost;
+
+ if (!cpus_share_cache(cpu, prev))
+ return false;
+ if (cold > 0 && rq_clock_task(cpu_rq(prev)) - p->se.exec_start > cold)
+ return true;
+ return false;
+}
+
/*
* select_task_rq_fair: Select target runqueue for the waking task in domains
* that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -6276,16 +6295,26 @@ select_task_rq_fair(struct task_struct *
struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
int cpu = smp_processor_id();
int new_cpu = prev_cpu;
- int want_affine = 0;
+ int want_affine = 0, want_debounce = 0;
int sync = wake_flags & WF_SYNC;
+ rcu_read_lock();
if (sd_flag & SD_BALANCE_WAKE) {
record_wakee(p);
+ want_debounce = sched_feat(SIS_DEBOUNCE);
+ if (task_placed(p))
+ goto out_unlock;
+ /* Balance cold tasks to reduce hot task bounce tendency. */
+ if (want_debounce && task_llc_affine_and_cold(p, cpu, prev_cpu)) {
+ sd_flag |= SD_SHARE_PKG_RESOURCES;
+ sd = highest_flag_domain(prev_cpu, SD_SHARE_PKG_RESOURCES);
+ p->wakee_placed = jiffies;
+ goto pick_cpu_cold;
+ }
want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
&& cpumask_test_cpu(cpu, &p->cpus_allowed);
}
- rcu_read_lock();
for_each_domain(cpu, tmp) {
if (!(tmp->flags & SD_LOAD_BALANCE))
break;
@@ -6315,6 +6344,7 @@ select_task_rq_fair(struct task_struct *
new_cpu = cpu;
}
+pick_cpu_cold:
if (sd && !(sd_flag & SD_BALANCE_FORK)) {
/*
* We're going to need the task's util for capacity_spare_wake
@@ -6329,9 +6359,13 @@ select_task_rq_fair(struct task_struct *
if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
+ if (want_debounce && new_cpu == prev_cpu)
+ p->wakee_placed = jiffies;
+
} else {
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
}
+out_unlock:
rcu_read_unlock();
return new_cpu;
@@ -6952,6 +6986,9 @@ static int task_hot(struct task_struct *
if (sysctl_sched_migration_cost == 0)
return 0;
+ if (task_placed(p))
+ return 1;
+
delta = rq_clock_task(env->src_rq) - p->se.exec_start;
return delta < (s64)sysctl_sched_migration_cost;
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -57,6 +57,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
*/
SCHED_FEAT(SIS_AVG_CPU, false)
SCHED_FEAT(SIS_PROP, true)
+SCHED_FEAT(SIS_DEBOUNCE, true)
/*
* Issue a WARN when we do multiple update_rq_clock() calls
next prev parent reply other threads:[~2017-11-26 20:59 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-10-31 5:27 [PATCH RFC 0/2] Fix race window during idle cpu selection Atish Patra
2017-10-31 5:27 ` [PATCH RFC 1/2] sched: Minimize the idle cpu selection race window Atish Patra
2017-10-31 8:20 ` Peter Zijlstra
2017-10-31 8:48 ` Mike Galbraith
2017-11-01 6:08 ` Atish Patra
2017-11-01 6:54 ` Mike Galbraith
2017-11-01 7:18 ` Mike Galbraith
2017-11-01 16:36 ` Atish Patra
2017-11-01 20:20 ` Mike Galbraith
2017-11-05 0:58 ` Joel Fernandes
2017-11-22 5:23 ` Atish Patra
2017-11-23 10:52 ` Uladzislau Rezki
2017-11-23 13:13 ` Mike Galbraith
2017-11-23 16:00 ` Josef Bacik
2017-11-23 17:40 ` Mike Galbraith
2017-11-23 21:11 ` Atish Patra
2017-11-24 10:26 ` Uladzislau Rezki
2017-11-24 18:46 ` Mike Galbraith
2017-11-26 20:58 ` Mike Galbraith [this message]
2017-11-28 9:34 ` Uladzislau Rezki
2017-11-28 10:49 ` Mike Galbraith
2017-11-29 10:41 ` Uladzislau Rezki
2017-11-29 18:15 ` Mike Galbraith
2017-11-30 12:30 ` Uladzislau Rezki
2017-10-31 5:27 ` [PATCH DEBUG 2/2] sched: Add a stat for " Atish Patra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1511729914.22158.29.camel@gmx.de \
--to=efault@gmx.de \
--cc=atish.patra@oracle.com \
--cc=brendan.jackman@arm.com \
--cc=jbacik@fb.com \
--cc=joelaf@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=urezki@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.