linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andrea Arcangeli <aarcange@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Hillf Danton <dhillf@gmail.com>, Dan Smith <danms@us.ibm.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@elte.hu>,
	Paul Turner <pjt@google.com>,
	Suresh Siddha <suresh.b.siddha@intel.com>,
	Mike Galbraith <efault@gmx.de>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Lai Jiangshan <laijs@cn.fujitsu.com>,
	Bharata B Rao <bharata.rao@gmail.com>,
	Lee Schermerhorn <Lee.Schermerhorn@hp.com>,
	Rik van Riel <riel@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>
Subject: [PATCH 39/39] autonuma: NUMA scheduler SMT awareness
Date: Mon, 26 Mar 2012 19:46:26 +0200	[thread overview]
Message-ID: <1332783986-24195-40-git-send-email-aarcange@redhat.com> (raw)
In-Reply-To: <1332783986-24195-1-git-send-email-aarcange@redhat.com>

Add SMT awareness to the NUMA scheduler so that it will not move load
from fully idle SMT threads, to semi idle SMT threads.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
 include/linux/autonuma_flags.h |   10 ++++++++
 kernel/sched/numa.c            |   50 +++++++++++++++++++++++++++++++++++++--
 mm/autonuma.c                  |    7 +++++
 3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/include/linux/autonuma_flags.h b/include/linux/autonuma_flags.h
index 9c702fd..d6b34b0 100644
--- a/include/linux/autonuma_flags.h
+++ b/include/linux/autonuma_flags.h
@@ -8,6 +8,7 @@ enum autonuma_flag {
 	AUTONUMA_SCHED_LOAD_BALANCE_STRICT_FLAG,
 	AUTONUMA_SCHED_CLONE_RESET_FLAG,
 	AUTONUMA_SCHED_FORK_RESET_FLAG,
+	AUTONUMA_SCHED_SMT_FLAG,
 	AUTONUMA_SCAN_PMD_FLAG,
 	AUTONUMA_SCAN_USE_WORKING_SET_FLAG,
 	AUTONUMA_MIGRATE_DEFER_FLAG,
@@ -43,6 +44,15 @@ static bool inline autonuma_sched_fork_reset(void)
 			  &autonuma_flags);
 }
 
+static bool inline autonuma_sched_smt(void)
+{
+#ifdef CONFIG_SCHED_SMT
+	return !!test_bit(AUTONUMA_SCHED_SMT_FLAG, &autonuma_flags);
+#else
+	return 0;
+#endif
+}
+
 static bool inline autonuma_scan_pmd(void)
 {
 	return !!test_bit(AUTONUMA_SCAN_PMD_FLAG, &autonuma_flags);
diff --git a/kernel/sched/numa.c b/kernel/sched/numa.c
index d51e1ec..4211305 100644
--- a/kernel/sched/numa.c
+++ b/kernel/sched/numa.c
@@ -11,6 +11,30 @@
 
 #include "sched.h"
 
+static inline bool idle_cpu_avg(int cpu, bool require_avg_idle)
+{
+	struct rq *rq = cpu_rq(cpu);
+	return idle_cpu(cpu) && (!require_avg_idle ||
+				 rq->avg_idle > sysctl_sched_migration_cost);
+}
+
+/* A false avg_idle param makes it easier for smt_idle() to return true */
+static bool smt_idle(int _cpu, bool require_avg_idle)
+{
+#ifdef CONFIG_SCHED_SMT
+	int cpu;
+
+	for_each_cpu_and(cpu, topology_thread_cpumask(_cpu), cpu_online_mask) {
+		if (cpu == _cpu)
+			continue;
+		if (!idle_cpu_avg(cpu, require_avg_idle))
+			return false;
+	}
+#endif
+
+	return true;
+}
+
 #define AUTONUMA_BALANCE_SCALE 1000
 
 /*
@@ -47,6 +71,7 @@ void sched_autonuma_balance(void)
 	int cpu, nid, selected_cpu, selected_nid;
 	int cpu_nid = numa_node_id();
 	int this_cpu = smp_processor_id();
+	int this_smt_idle;
 	unsigned long p_w, p_t, m_w, m_t;
 	unsigned long weight_delta_max, weight;
 	struct cpumask *allowed;
@@ -96,6 +121,7 @@ void sched_autonuma_balance(void)
 		weight_current[nid] = p_w*AUTONUMA_BALANCE_SCALE/p_t;
 	}
 
+	this_smt_idle = smt_idle(this_cpu, false);
 	bitmap_zero(mm_mask, NR_CPUS);
 	for_each_online_node(nid) {
 		if (nid == cpu_nid)
@@ -103,11 +129,24 @@ void sched_autonuma_balance(void)
 		for_each_cpu_and(cpu, cpumask_of_node(nid), allowed) {
 			struct mm_struct *mm;
 			struct rq *rq = cpu_rq(cpu);
+			bool other_smt_idle;
 			if (!cpu_online(cpu))
 				continue;
 			weight_others[cpu] = LONG_MAX;
-			if (idle_cpu(cpu) &&
-			    rq->avg_idle > sysctl_sched_migration_cost) {
+
+			other_smt_idle = smt_idle(cpu, true);
+			if (autonuma_sched_smt() &&
+			    this_smt_idle && !other_smt_idle)
+				continue;
+
+			if (idle_cpu_avg(cpu, true)) {
+				if (autonuma_sched_smt() &&
+				    !this_smt_idle && other_smt_idle) {
+					/* NUMA affinity override */
+					weight_others[cpu] = -2;
+					continue;
+				}
+
 				if (weight_current[nid] >
 				    weight_current[cpu_nid] &&
 				    weight_current_mm[nid] >
@@ -115,6 +154,11 @@ void sched_autonuma_balance(void)
 					weight_others[cpu] = -1;
 				continue;
 			}
+
+			if (autonuma_sched_smt() &&
+			    this_smt_idle && cpu_rq(this_cpu)->nr_running <= 1)
+				continue;
+
 			mm = rq->curr->mm;
 			if (!mm)
 				continue;
@@ -169,7 +213,7 @@ void sched_autonuma_balance(void)
 				w_cpu_nid = weight_current_mm[cpu_nid];
 			}
 			if (w_nid > weight_others[cpu] &&
-			    w_nid > w_cpu_nid) {
+			    (w_nid > w_cpu_nid || weight_others[cpu] == -2)) {
 				weight = w_nid -
 					weight_others[cpu] +
 					w_nid -
diff --git a/mm/autonuma.c b/mm/autonuma.c
index 7ca4992..4cce6a1 100644
--- a/mm/autonuma.c
+++ b/mm/autonuma.c
@@ -23,6 +23,7 @@ unsigned long autonuma_flags __read_mostly =
 	(1<<AUTONUMA_SCHED_LOAD_BALANCE_STRICT_FLAG)|
 	(1<<AUTONUMA_SCHED_CLONE_RESET_FLAG)|
 	(1<<AUTONUMA_SCHED_FORK_RESET_FLAG)|
+	(1<<AUTONUMA_SCHED_SMT_FLAG)|
 #ifdef CONFIG_AUTONUMA_DEFAULT_ENABLED
 	(1<<AUTONUMA_FLAG)|
 #endif
@@ -1089,6 +1090,9 @@ SYSFS_ENTRY(defer, AUTONUMA_MIGRATE_DEFER_FLAG);
 SYSFS_ENTRY(load_balance_strict, AUTONUMA_SCHED_LOAD_BALANCE_STRICT_FLAG);
 SYSFS_ENTRY(clone_reset, AUTONUMA_SCHED_CLONE_RESET_FLAG);
 SYSFS_ENTRY(fork_reset, AUTONUMA_SCHED_FORK_RESET_FLAG);
+#ifdef CONFIG_SCHED_SMT
+SYSFS_ENTRY(smt, AUTONUMA_SCHED_SMT_FLAG);
+#endif
 
 #undef SYSFS_ENTRY
 
@@ -1205,6 +1209,9 @@ static struct attribute *scheduler_attr[] = {
 	&clone_reset_attr.attr,
 	&fork_reset_attr.attr,
 	&load_balance_strict_attr.attr,
+#ifdef CONFIG_SCHED_SMT
+	&smt_attr.attr,
+#endif
 	NULL,
 };
 static struct attribute_group scheduler_attr_group = {

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2012-03-26 18:27 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-26 17:45 [PATCH 00/39] [RFC] AutoNUMA alpha10 Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 01/39] autonuma: make set_pmd_at always available Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 02/39] xen: document Xen is using an unused bit for the pagetables Andrea Arcangeli
2012-03-30 21:40   ` Konrad Rzeszutek Wilk
2012-03-26 17:45 ` [PATCH 03/39] autonuma: define _PAGE_NUMA_PTE and _PAGE_NUMA_PMD Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 04/39] autonuma: x86 pte_numa() and pmd_numa() Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 05/39] autonuma: generic " Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 06/39] autonuma: teach gup_fast about pte_numa Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 07/39] autonuma: introduce kthread_bind_node() Andrea Arcangeli
2012-03-26 18:32   ` Peter Zijlstra
2012-03-27 15:22     ` Andrea Arcangeli
2012-03-27 15:45       ` Peter Zijlstra
2012-03-27 16:04         ` Andrea Arcangeli
2012-03-27 16:19           ` Peter Zijlstra
2012-03-26 17:45 ` [PATCH 08/39] autonuma: mm_autonuma and sched_autonuma data structures Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 09/39] autonuma: define the autonuma flags Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 10/39] autonuma: core autonuma.h header Andrea Arcangeli
2012-03-26 17:45 ` [PATCH 11/39] autonuma: CPU follow memory algorithm Andrea Arcangeli
2012-03-26 18:25   ` Peter Zijlstra
2012-03-26 19:28     ` Rik van Riel
2012-03-26 19:44       ` Andrea Arcangeli
2012-03-26 19:58         ` Linus Torvalds
2012-03-26 20:39           ` Andrea Arcangeli
2012-03-27  8:39             ` Peter Zijlstra
2012-03-27 14:37               ` Andrea Arcangeli
2012-03-27 16:15               ` Andrea Arcangeli
2012-03-28 11:26                 ` Peter Zijlstra
2012-03-28 18:39                   ` Andrea Arcangeli
2012-03-27 17:09               ` Ingo Molnar
2012-03-26 17:45 ` [PATCH 12/39] autonuma: add page structure fields Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 13/39] autonuma: knuma_migrated per NUMA node queues Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 14/39] autonuma: init knuma_migrated queues Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 15/39] autonuma: autonuma_enter/exit Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 16/39] autonuma: call autonuma_setup_new_exec() Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 17/39] autonuma: alloc/free/init sched_autonuma Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 18/39] autonuma: alloc/free/init mm_autonuma Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 19/39] mm: add unlikely to the mm allocation failure check Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 20/39] autonuma: avoid CFS select_task_rq_fair to return -1 Andrea Arcangeli
2012-03-26 19:36   ` Peter Zijlstra
2012-03-26 20:53     ` Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 21/39] autonuma: fix selecting task runqueue Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 22/39] autonuma: select_task_rq_fair cleanup new_cpu < 0 fix Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 23/39] autonuma: teach CFS about autonuma affinity Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 24/39] autonuma: fix finding idlest cpu Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 25/39] autonuma: fix selecting idle sibling Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 26/39] autonuma: select_idle_sibling cleanup target assignment Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 27/39] autonuma: core Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 28/39] autonuma: follow_page check for pte_numa/pmd_numa Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 29/39] autonuma: default mempolicy follow AutoNUMA Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 30/39] autonuma: call autonuma_split_huge_page() Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 31/39] autonuma: make khugepaged pte_numa aware Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 32/39] autonuma: retain page last_nid information in khugepaged Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 33/39] autonuma: numa hinting page faults entry points Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 34/39] autonuma: reset autonuma page data when pages are freed Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 35/39] autonuma: initialize page structure fields Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 36/39] autonuma: link mm/autonuma.o and kernel/sched/numa.o Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 37/39] autonuma: add CONFIG_AUTONUMA and CONFIG_AUTONUMA_DEFAULT_ENABLED Andrea Arcangeli
2012-03-26 17:46 ` [PATCH 38/39] autonuma: boost khugepaged scanning rate Andrea Arcangeli
2012-03-26 17:46 ` Andrea Arcangeli [this message]
2012-03-26 18:57   ` [PATCH 39/39] autonuma: NUMA scheduler SMT awareness Peter Zijlstra
2012-03-27  0:00     ` Andrea Arcangeli
2012-03-28 13:51       ` Andrea Arcangeli
2012-04-03 20:35 ` [PATCH 00/39] [RFC] AutoNUMA alpha10 Srivatsa Vaddagiri

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1332783986-24195-40-git-send-email-aarcange@redhat.com \
    --to=aarcange@redhat.com \
    --cc=Lee.Schermerhorn@hp.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=bharata.rao@gmail.com \
    --cc=danms@us.ibm.com \
    --cc=dhillf@gmail.com \
    --cc=efault@gmx.de \
    --cc=hannes@cmpxchg.org \
    --cc=laijs@cn.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=pjt@google.com \
    --cc=riel@redhat.com \
    --cc=suresh.b.siddha@intel.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).