From: Andrea Arcangeli <aarcange@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Hillf Danton <dhillf@gmail.com>, Dan Smith <danms@us.ibm.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@elte.hu>,
Paul Turner <pjt@google.com>,
Suresh Siddha <suresh.b.siddha@intel.com>,
Mike Galbraith <efault@gmx.de>,
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
Lai Jiangshan <laijs@cn.fujitsu.com>,
Bharata B Rao <bharata.rao@gmail.com>,
Lee Schermerhorn <Lee.Schermerhorn@hp.com>,
Rik van Riel <riel@redhat.com>,
Johannes Weiner <hannes@cmpxchg.org>
Subject: [PATCH 39/39] autonuma: NUMA scheduler SMT awareness
Date: Mon, 26 Mar 2012 19:46:26 +0200 [thread overview]
Message-ID: <1332783986-24195-40-git-send-email-aarcange@redhat.com> (raw)
In-Reply-To: <1332783986-24195-1-git-send-email-aarcange@redhat.com>
Add SMT awareness to the NUMA scheduler so that it will not move load
from fully idle SMT threads, to semi idle SMT threads.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
include/linux/autonuma_flags.h | 10 ++++++++
kernel/sched/numa.c | 50 +++++++++++++++++++++++++++++++++++++--
mm/autonuma.c | 7 +++++
3 files changed, 64 insertions(+), 3 deletions(-)
diff --git a/include/linux/autonuma_flags.h b/include/linux/autonuma_flags.h
index 9c702fd..d6b34b0 100644
--- a/include/linux/autonuma_flags.h
+++ b/include/linux/autonuma_flags.h
@@ -8,6 +8,7 @@ enum autonuma_flag {
AUTONUMA_SCHED_LOAD_BALANCE_STRICT_FLAG,
AUTONUMA_SCHED_CLONE_RESET_FLAG,
AUTONUMA_SCHED_FORK_RESET_FLAG,
+ AUTONUMA_SCHED_SMT_FLAG,
AUTONUMA_SCAN_PMD_FLAG,
AUTONUMA_SCAN_USE_WORKING_SET_FLAG,
AUTONUMA_MIGRATE_DEFER_FLAG,
@@ -43,6 +44,15 @@ static bool inline autonuma_sched_fork_reset(void)
&autonuma_flags);
}
+static bool inline autonuma_sched_smt(void)
+{
+#ifdef CONFIG_SCHED_SMT
+ return !!test_bit(AUTONUMA_SCHED_SMT_FLAG, &autonuma_flags);
+#else
+ return 0;
+#endif
+}
+
static bool inline autonuma_scan_pmd(void)
{
return !!test_bit(AUTONUMA_SCAN_PMD_FLAG, &autonuma_flags);
diff --git a/kernel/sched/numa.c b/kernel/sched/numa.c
index d51e1ec..4211305 100644
--- a/kernel/sched/numa.c
+++ b/kernel/sched/numa.c
@@ -11,6 +11,30 @@
#include "sched.h"
+static inline bool idle_cpu_avg(int cpu, bool require_avg_idle)
+{
+ struct rq *rq = cpu_rq(cpu);
+ return idle_cpu(cpu) && (!require_avg_idle ||
+ rq->avg_idle > sysctl_sched_migration_cost);
+}
+
+/* A false avg_idle param makes it easier for smt_idle() to return true */
+static bool smt_idle(int _cpu, bool require_avg_idle)
+{
+#ifdef CONFIG_SCHED_SMT
+ int cpu;
+
+ for_each_cpu_and(cpu, topology_thread_cpumask(_cpu), cpu_online_mask) {
+ if (cpu == _cpu)
+ continue;
+ if (!idle_cpu_avg(cpu, require_avg_idle))
+ return false;
+ }
+#endif
+
+ return true;
+}
+
#define AUTONUMA_BALANCE_SCALE 1000
/*
@@ -47,6 +71,7 @@ void sched_autonuma_balance(void)
int cpu, nid, selected_cpu, selected_nid;
int cpu_nid = numa_node_id();
int this_cpu = smp_processor_id();
+ int this_smt_idle;
unsigned long p_w, p_t, m_w, m_t;
unsigned long weight_delta_max, weight;
struct cpumask *allowed;
@@ -96,6 +121,7 @@ void sched_autonuma_balance(void)
weight_current[nid] = p_w*AUTONUMA_BALANCE_SCALE/p_t;
}
+ this_smt_idle = smt_idle(this_cpu, false);
bitmap_zero(mm_mask, NR_CPUS);
for_each_online_node(nid) {
if (nid == cpu_nid)
@@ -103,11 +129,24 @@ void sched_autonuma_balance(void)
for_each_cpu_and(cpu, cpumask_of_node(nid), allowed) {
struct mm_struct *mm;
struct rq *rq = cpu_rq(cpu);
+ bool other_smt_idle;
if (!cpu_online(cpu))
continue;
weight_others[cpu] = LONG_MAX;
- if (idle_cpu(cpu) &&
- rq->avg_idle > sysctl_sched_migration_cost) {
+
+ other_smt_idle = smt_idle(cpu, true);
+ if (autonuma_sched_smt() &&
+ this_smt_idle && !other_smt_idle)
+ continue;
+
+ if (idle_cpu_avg(cpu, true)) {
+ if (autonuma_sched_smt() &&
+ !this_smt_idle && other_smt_idle) {
+ /* NUMA affinity override */
+ weight_others[cpu] = -2;
+ continue;
+ }
+
if (weight_current[nid] >
weight_current[cpu_nid] &&
weight_current_mm[nid] >
@@ -115,6 +154,11 @@ void sched_autonuma_balance(void)
weight_others[cpu] = -1;
continue;
}
+
+ if (autonuma_sched_smt() &&
+ this_smt_idle && cpu_rq(this_cpu)->nr_running <= 1)
+ continue;
+
mm = rq->curr->mm;
if (!mm)
continue;
@@ -169,7 +213,7 @@ void sched_autonuma_balance(void)
w_cpu_nid = weight_current_mm[cpu_nid];
}
if (w_nid > weight_others[cpu] &&
- w_nid > w_cpu_nid) {
+ (w_nid > w_cpu_nid || weight_others[cpu] == -2)) {
weight = w_nid -
weight_others[cpu] +
w_nid -
diff --git a/mm/autonuma.c b/mm/autonuma.c
index 7ca4992..4cce6a1 100644
--- a/mm/autonuma.c
+++ b/mm/autonuma.c
@@ -23,6 +23,7 @@ unsigned long autonuma_flags __read_mostly =
(1<<AUTONUMA_SCHED_LOAD_BALANCE_STRICT_FLAG)|
(1<<AUTONUMA_SCHED_CLONE_RESET_FLAG)|
(1<<AUTONUMA_SCHED_FORK_RESET_FLAG)|
+ (1<<AUTONUMA_SCHED_SMT_FLAG)|
#ifdef CONFIG_AUTONUMA_DEFAULT_ENABLED
(1<<AUTONUMA_FLAG)|
#endif
@@ -1089,6 +1090,9 @@ SYSFS_ENTRY(defer, AUTONUMA_MIGRATE_DEFER_FLAG);
SYSFS_ENTRY(load_balance_strict, AUTONUMA_SCHED_LOAD_BALANCE_STRICT_FLAG);
SYSFS_ENTRY(clone_reset, AUTONUMA_SCHED_CLONE_RESET_FLAG);
SYSFS_ENTRY(fork_reset, AUTONUMA_SCHED_FORK_RESET_FLAG);
+#ifdef CONFIG_SCHED_SMT
+SYSFS_ENTRY(smt, AUTONUMA_SCHED_SMT_FLAG);
+#endif
#undef SYSFS_ENTRY
@@ -1205,6 +1209,9 @@ static struct attribute *scheduler_attr[] = {
&clone_reset_attr.attr,
&fork_reset_attr.attr,
&load_balance_strict_attr.attr,
+#ifdef CONFIG_SCHED_SMT
+ &smt_attr.attr,
+#endif
NULL,
};
static struct attribute_group scheduler_attr_group = {
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2012-03-26 18:27 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-03-26 17:45 [PATCH 00/39] [RFC] AutoNUMA alpha10 Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 01/39] autonuma: make set_pmd_at always available Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 02/39] xen: document Xen is using an unused bit for the pagetables Andrea Arcangeli
2012-03-30 21:40 ` Konrad Rzeszutek Wilk
2012-03-26 17:45 ` [PATCH 03/39] autonuma: define _PAGE_NUMA_PTE and _PAGE_NUMA_PMD Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 04/39] autonuma: x86 pte_numa() and pmd_numa() Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 05/39] autonuma: generic " Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 06/39] autonuma: teach gup_fast about pte_numa Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 07/39] autonuma: introduce kthread_bind_node() Andrea Arcangeli
2012-03-26 18:32 ` Peter Zijlstra
2012-03-27 15:22 ` Andrea Arcangeli
2012-03-27 15:45 ` Peter Zijlstra
2012-03-27 16:04 ` Andrea Arcangeli
2012-03-27 16:19 ` Peter Zijlstra
2012-03-26 17:45 ` [PATCH 08/39] autonuma: mm_autonuma and sched_autonuma data structures Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 09/39] autonuma: define the autonuma flags Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 10/39] autonuma: core autonuma.h header Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 11/39] autonuma: CPU follow memory algorithm Andrea Arcangeli
2012-03-26 18:25 ` Peter Zijlstra
2012-03-26 19:28 ` Rik van Riel
2012-03-26 19:44 ` Andrea Arcangeli
2012-03-26 19:58 ` Linus Torvalds
2012-03-26 20:39 ` Andrea Arcangeli
2012-03-27 8:39 ` Peter Zijlstra
2012-03-27 14:37 ` Andrea Arcangeli
2012-03-27 16:15 ` Andrea Arcangeli
2012-03-28 11:26 ` Peter Zijlstra
2012-03-28 18:39 ` Andrea Arcangeli
2012-03-27 17:09 ` Ingo Molnar
2012-03-26 17:45 ` [PATCH 12/39] autonuma: add page structure fields Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 13/39] autonuma: knuma_migrated per NUMA node queues Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 14/39] autonuma: init knuma_migrated queues Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 15/39] autonuma: autonuma_enter/exit Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 16/39] autonuma: call autonuma_setup_new_exec() Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 17/39] autonuma: alloc/free/init sched_autonuma Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 18/39] autonuma: alloc/free/init mm_autonuma Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 19/39] mm: add unlikely to the mm allocation failure check Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 20/39] autonuma: avoid CFS select_task_rq_fair to return -1 Andrea Arcangeli
2012-03-26 19:36 ` Peter Zijlstra
2012-03-26 20:53 ` Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 21/39] autonuma: fix selecting task runqueue Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 22/39] autonuma: select_task_rq_fair cleanup new_cpu < 0 fix Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 23/39] autonuma: teach CFS about autonuma affinity Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 24/39] autonuma: fix finding idlest cpu Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 25/39] autonuma: fix selecting idle sibling Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 26/39] autonuma: select_idle_sibling cleanup target assignment Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 27/39] autonuma: core Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 28/39] autonuma: follow_page check for pte_numa/pmd_numa Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 29/39] autonuma: default mempolicy follow AutoNUMA Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 30/39] autonuma: call autonuma_split_huge_page() Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 31/39] autonuma: make khugepaged pte_numa aware Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 32/39] autonuma: retain page last_nid information in khugepaged Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 33/39] autonuma: numa hinting page faults entry points Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 34/39] autonuma: reset autonuma page data when pages are freed Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 35/39] autonuma: initialize page structure fields Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 36/39] autonuma: link mm/autonuma.o and kernel/sched/numa.o Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 37/39] autonuma: add CONFIG_AUTONUMA and CONFIG_AUTONUMA_DEFAULT_ENABLED Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 38/39] autonuma: boost khugepaged scanning rate Andrea Arcangeli
2012-03-26 17:46 ` Andrea Arcangeli [this message]
2012-03-26 18:57 ` [PATCH 39/39] autonuma: NUMA scheduler SMT awareness Peter Zijlstra
2012-03-27 0:00 ` Andrea Arcangeli
2012-03-28 13:51 ` Andrea Arcangeli
2012-04-03 20:35 ` [PATCH 00/39] [RFC] AutoNUMA alpha10 Srivatsa Vaddagiri
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1332783986-24195-40-git-send-email-aarcange@redhat.com \
--to=aarcange@redhat.com \
--cc=Lee.Schermerhorn@hp.com \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=bharata.rao@gmail.com \
--cc=danms@us.ibm.com \
--cc=dhillf@gmail.com \
--cc=efault@gmx.de \
--cc=hannes@cmpxchg.org \
--cc=laijs@cn.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@elte.hu \
--cc=paulmck@linux.vnet.ibm.com \
--cc=pjt@google.com \
--cc=riel@redhat.com \
--cc=suresh.b.siddha@intel.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).