From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>, Borislav Petkov <bp@alien8.de>,
Nikolay Ulyanitsky <lystor@gmail.com>,
Mike Galbraith <efault@gmx.de>,
linux-kernel@vger.kernel.org,
Andreas Herrmann <andreas.herrmann3@amd.com>,
Andrew Morton <akpm@linux-foundation.org>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@kernel.org>,
Suresh Siddha <suresh.b.siddha@intel.com>
Subject: Re: 20% performance drop on PostgreSQL 9.2 from kernel 3.5.3 to 3.6-rc5 on AMD chipsets - bisected
Date: Mon, 24 Sep 2012 19:44:17 +0200 [thread overview]
Message-ID: <1348508657.11847.114.camel@twins> (raw)
In-Reply-To: <1348505683.11847.111.camel@twins>
On Mon, 2012-09-24 at 18:54 +0200, Peter Zijlstra wrote:
> But let me try and come up with the list thing, I think we've
> actually got that someplace as well.
OK, I'm sure the below can be written better, but my brain is gone for
the day...
---
include/linux/sched.h | 1 +
kernel/sched/core.c | 1 +
kernel/sched/fair.c | 102 +++++++++++++++++++++++++++++++++++---------------
3 files changed, 73 insertions(+), 31 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0beac68..d72ea68 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -888,6 +888,7 @@ struct sched_group {
atomic_t ref;
unsigned int group_weight;
+ int group_first;
struct sched_group_power *sgp;
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b38f00e..1177eb1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5781,6 +5781,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
do {
sg->group_weight = cpumask_weight(sched_group_cpus(sg));
+ sg->group_first = cpumask_first(sched_group_cpus(sg));
sg = sg->next;
} while (sg != sd->groups);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b800a1..601bc38 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2634,50 +2634,90 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
*/
static int select_idle_sibling(struct task_struct *p, int target)
{
- int cpu = smp_processor_id();
- int prev_cpu = task_cpu(p);
- struct sched_domain *sd;
- struct sched_group *sg;
- int i;
+ struct sched_domain *sd_smt, *sd_llc;
+ struct sched_group *sg_smt, *sg_llc;
/*
- * If the task is going to be woken-up on this cpu and if it is
- * already idle, then it is the right target.
+ * Of the target is idle, easy peasy, we're done.
*/
- if (target == cpu && idle_cpu(cpu))
- return cpu;
+ if (idle_cpu(target))
+ return target;
/*
- * If the task is going to be woken-up on the cpu where it previously
- * ran and if it is currently idle, then it the right target.
+ * Otherwise, see if there's an idle core in the cache domain.
*/
- if (target == prev_cpu && idle_cpu(prev_cpu))
- return prev_cpu;
+ sd_llc = rcu_dereference(per_cpu(sd_llc, target));
+ sg_llc = sd_llc->groups;
+ do {
+ int candidate = -1;
+
+ sd_smt = rcu_dereference(per_cpu(sd_llc, sg_llc->group_first));
+ for_each_lower_domain(sd_smt) {
+ if (sd_smt->flags & SD_SHARE_CPUPOWER) /* aka. SMT */
+ break;
+ }
+
+ if (!sd_smt) {
+ int cpu = sg_llc->group_first; /* Assume singleton group */
+
+ if (!idle_cpu(cpu))
+ goto next_llc;
+
+ if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+ goto next_llc;
+
+ return cpu;
+ }
+
+ sg_smt = sd_smt->groups;
+ do {
+ int cpu = sg_smt->group_first; /* Assume singleton group */
+
+ if (!idle_cpu(cpu)) /* core is not idle, skip to next core */
+ goto next_llc;
+
+ if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+ goto next_smt;
+
+ if (candidate < 0)
+ candidate = cpu;
+
+next_smt:
+ sg_smt = sg_smt->next;
+ } while (sg_smt != sd_smt->groups);
+
+ if (candidate >= 0)
+ return candidate;
+
+next_llc:
+ sg_llc = sg_llc->next;
+ } while (sg_llc != sd_llc->groups);
/*
- * Otherwise, iterate the domains and find an elegible idle cpu.
+ * Failing that, see if there's an idle SMT sibling.
*/
- sd = rcu_dereference(per_cpu(sd_llc, target));
- for_each_lower_domain(sd) {
- sg = sd->groups;
+ sd_smt = rcu_dereference(per_cpu(sd_llc, target));
+ for_each_lower_domain(sd_smt) {
+ if (sd_smt->flags & SD_SHARE_CPUPOWER) /* aka. SMT */
+ break;
+ }
+
+ if (sd_smt) {
+ sg_smt = sd_smt->groups;
do {
- if (!cpumask_intersects(sched_group_cpus(sg),
- tsk_cpus_allowed(p)))
- goto next;
+ int cpu = sg_smt->group_first; /* Assume singleton group */
- for_each_cpu(i, sched_group_cpus(sg)) {
- if (!idle_cpu(i))
- goto next;
- }
+ if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) &&
+ idle_cpu(cpu))
+ return cpu;
- target = cpumask_first_and(sched_group_cpus(sg),
- tsk_cpus_allowed(p));
- goto done;
-next:
- sg = sg->next;
- } while (sg != sd->groups);
+ sg_smt = sg_smt->next;
+ } while (sg_smt != sd_smt->groups);
}
-done:
+
+ /*
+ * OK, no idle siblings of any kind, take what we started with.
+ */
return target;
}
next prev parent reply other threads:[~2012-09-24 17:45 UTC|newest]
Thread overview: 115+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-09-14 7:47 20% performance drop on PostgreSQL 9.2 from kernel 3.5.3 to 3.6-rc5 on AMD chipsets Nikolay Ulyanitsky
2012-09-14 18:40 ` Borislav Petkov
2012-09-14 18:51 ` Borislav Petkov
2012-09-14 21:27 ` 20% performance drop on PostgreSQL 9.2 from kernel 3.5.3 to 3.6-rc5 on AMD chipsets - bisected Borislav Petkov
2012-09-14 21:40 ` Peter Zijlstra
2012-09-14 21:44 ` Linus Torvalds
2012-09-14 21:56 ` Peter Zijlstra
2012-09-14 21:59 ` Peter Zijlstra
2012-09-15 3:57 ` Mike Galbraith
2012-09-14 22:01 ` Linus Torvalds
2012-09-14 22:10 ` Peter Zijlstra
2012-09-14 22:20 ` Linus Torvalds
2012-09-14 22:14 ` Borislav Petkov
2012-09-14 21:45 ` Borislav Petkov
2012-09-14 21:42 ` Linus Torvalds
2012-09-15 3:33 ` Mike Galbraith
2012-09-15 16:16 ` Andi Kleen
2012-09-15 16:36 ` Mike Galbraith
2012-09-15 17:08 ` richard -rw- weinberger
2012-09-16 4:48 ` Mike Galbraith
2012-09-15 21:32 ` Alan Cox
2012-09-16 4:35 ` Mike Galbraith
2012-09-16 19:57 ` Linus Torvalds
2012-09-17 8:08 ` Mike Galbraith
2012-09-17 10:07 ` Ingo Molnar
2012-09-17 10:47 ` Mike Galbraith
2012-09-17 14:39 ` Andi Kleen
2012-09-19 12:35 ` Mike Galbraith
2012-09-19 14:54 ` Ingo Molnar
2012-09-19 15:23 ` Mike Galbraith
2012-09-24 15:00 ` Mel Gorman
2012-09-24 15:23 ` Nikolay Ulyanitsky
2012-09-24 15:53 ` Borislav Petkov
2012-09-24 15:30 ` Peter Zijlstra
2012-09-24 15:51 ` Mike Galbraith
2012-09-24 15:52 ` Linus Torvalds
2012-09-24 16:07 ` Peter Zijlstra
2012-09-24 16:33 ` Linus Torvalds
2012-09-24 16:54 ` Peter Zijlstra
2012-09-25 12:10 ` Hillf Danton
2012-09-24 16:12 ` Peter Zijlstra
2012-09-24 16:30 ` Linus Torvalds
2012-09-24 16:52 ` Borislav Petkov
2012-09-24 16:54 ` Peter Zijlstra
2012-09-24 17:44 ` Peter Zijlstra [this message]
2012-09-25 13:23 ` Mel Gorman
2012-09-25 14:36 ` Peter Zijlstra
2012-09-24 18:26 ` Mike Galbraith
2012-09-24 19:12 ` Linus Torvalds
2012-09-24 19:20 ` Borislav Petkov
2012-09-25 1:57 ` Mike Galbraith
2012-09-25 2:11 ` Linus Torvalds
2012-09-25 2:49 ` Mike Galbraith
2012-09-25 3:10 ` Linus Torvalds
2012-09-25 3:20 ` Mike Galbraith
2012-09-25 3:32 ` Linus Torvalds
2012-09-25 3:43 ` Mike Galbraith
2012-09-25 11:58 ` Peter Zijlstra
2012-09-25 13:17 ` Borislav Petkov
2012-09-25 17:00 ` Borislav Petkov
2012-09-25 17:21 ` Linus Torvalds
2012-09-25 18:42 ` Borislav Petkov
2012-09-25 19:08 ` Linus Torvalds
2012-09-26 2:23 ` Mike Galbraith
2012-09-26 17:17 ` Borislav Petkov
2012-09-26 2:00 ` Mike Galbraith
2012-09-26 2:22 ` Linus Torvalds
2012-09-26 2:42 ` Mike Galbraith
2012-09-26 17:15 ` Borislav Petkov
2012-09-26 16:32 ` Borislav Petkov
2012-09-26 18:19 ` Linus Torvalds
2012-09-26 21:37 ` Borislav Petkov
2012-09-27 5:09 ` Mike Galbraith
2012-09-27 5:18 ` Borislav Petkov
2012-09-27 5:44 ` Mike Galbraith
2012-09-27 5:47 ` Ingo Molnar
2012-09-27 5:59 ` Ingo Molnar
2012-09-27 6:34 ` Mike Galbraith
2012-09-27 6:41 ` Ingo Molnar
2012-09-27 6:54 ` Mike Galbraith
2012-09-27 7:10 ` Ingo Molnar
2012-09-27 16:25 ` Borislav Petkov
2012-09-27 17:44 ` Linus Torvalds
2012-09-27 18:05 ` Borislav Petkov
2012-09-27 18:19 ` Linus Torvalds
2012-09-27 18:29 ` Peter Zijlstra
2012-09-27 19:24 ` Borislav Petkov
2012-09-28 3:50 ` Mike Galbraith
2012-09-28 12:30 ` Borislav Petkov
2012-09-27 19:40 ` Linus Torvalds
2012-09-28 4:13 ` Mike Galbraith
2012-09-28 8:37 ` Peter Zijlstra
2012-09-27 7:17 ` david
2012-09-27 7:55 ` Mike Galbraith
2012-09-27 10:20 ` Borislav Petkov
2012-09-27 13:38 ` Mike Galbraith
2012-09-27 16:55 ` david
2012-09-27 4:32 ` Mike Galbraith
2012-09-27 8:21 ` Peter Zijlstra
2012-09-27 16:48 ` david
2012-09-27 17:38 ` Peter Zijlstra
2012-09-27 17:45 ` david
2012-09-27 18:09 ` Peter Zijlstra
2012-09-27 18:15 ` Linus Torvalds
2012-09-27 18:24 ` Borislav Petkov
2012-09-25 1:39 ` Mike Galbraith
2012-09-25 21:11 ` Suresh Siddha
2012-09-25 4:16 ` Mike Galbraith
2012-09-15 4:11 ` Mike Galbraith
[not found] ` <CA+55aFz1A7HbMYS9o-GTS5Zm=Xx8MUD7cR05GMVo--2E34jcgQ@mail.gmail.com>
2012-09-15 4:42 ` Mike Galbraith
2012-09-15 10:44 ` Borislav Petkov
2012-09-15 14:47 ` Mike Galbraith
2012-09-15 15:18 ` Borislav Petkov
2012-09-15 16:13 ` Mike Galbraith
2012-09-15 19:44 ` Borislav Petkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1348508657.11847.114.camel@twins \
--to=peterz@infradead.org \
--cc=akpm@linux-foundation.org \
--cc=andreas.herrmann3@amd.com \
--cc=bp@alien8.de \
--cc=efault@gmx.de \
--cc=linux-kernel@vger.kernel.org \
--cc=lystor@gmail.com \
--cc=mgorman@suse.de \
--cc=mingo@kernel.org \
--cc=suresh.b.siddha@intel.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.