[PATCH 08/10] sched: Converge NUMA migrations

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Ingo Molnar <mingo@kernel.org>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Paul Turner <pjt@google.com>,
	Lee Schermerhorn <Lee.Schermerhorn@hp.com>,
	Christoph Lameter <cl@linux.com>, Rik van Riel <riel@redhat.com>,
	Mel Gorman <mgorman@suse.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Hugh Dickins <hughd@google.com>
Subject: [PATCH 08/10] sched: Converge NUMA migrations
Date: Fri, 30 Nov 2012 20:58:39 +0100	[thread overview]
Message-ID: <1354305521-11583-9-git-send-email-mingo@kernel.org> (raw)
In-Reply-To: <1354305521-11583-1-git-send-email-mingo@kernel.org>

Consolidate the various convergence models and add a new one: when
a strongly converged NUMA task migrates, prefer to migrate it in
the direction of its preferred node.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c     | 59 +++++++++++++++++++++++++++++++++++--------------
 kernel/sched/features.h |  3 ++-
 2 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1f6104a..10cbfa3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4750,6 +4750,35 @@ done:
 	return target;
 }
 
+static bool numa_allow_migration(struct task_struct *p, int prev_cpu, int new_cpu)
+{
+#ifdef CONFIG_NUMA_BALANCING
+	if (sched_feat(NUMA_CONVERGE_MIGRATIONS)) {
+		/* Help in the direction of expected convergence: */
+		if (p->convergence_node >= 0 && (cpu_to_node(new_cpu) != p->convergence_node))
+			return false;
+
+		return true;
+	}
+
+	if (sched_feat(NUMA_BALANCE_ALL)) {
+ 		if (task_numa_shared(p) >= 0)
+			return false;
+
+		return true;
+	}
+
+	if (sched_feat(NUMA_BALANCE_INTERNODE)) {
+		if (task_numa_shared(p) >= 0) {
+ 			if (cpu_to_node(prev_cpu) != cpu_to_node(new_cpu))
+				return false;
+		}
+	}
+#endif
+	return true;
+}
+
+
 /*
  * sched_balance_self: balance the current task (running on cpu) in domains
  * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
@@ -4766,7 +4795,8 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 {
 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
 	int cpu = smp_processor_id();
-	int prev_cpu = task_cpu(p);
+	int prev0_cpu = task_cpu(p);
+	int prev_cpu = prev0_cpu;
 	int new_cpu = cpu;
 	int want_affine = 0;
 	int sync = wake_flags & WF_SYNC;
@@ -4775,10 +4805,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 		return prev_cpu;
 
 #ifdef CONFIG_NUMA_BALANCING
-	/* We do NUMA balancing elsewhere: */
-	if (sched_feat(NUMA_BALANCE_ALL) && task_numa_shared(p) >= 0)
-		return prev_cpu;
-
 	if (sched_feat(WAKE_ON_IDEAL_CPU) && p->ideal_cpu >= 0)
 		return p->ideal_cpu;
 #endif
@@ -4857,8 +4883,8 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 unlock:
 	rcu_read_unlock();
 
-	if (sched_feat(NUMA_BALANCE_INTERNODE) && task_numa_shared(p) >= 0 && (cpu_to_node(prev_cpu) != cpu_to_node(new_cpu)))
-		return prev_cpu;
+	if (!numa_allow_migration(p, prev0_cpu, new_cpu))
+		return prev0_cpu;
 
 	return new_cpu;
 }
@@ -5401,8 +5427,11 @@ static bool can_migrate_running_task(struct task_struct *p, struct lb_env *env)
 static int can_migrate_task(struct task_struct *p, struct lb_env *env)
 {
 	/* We do NUMA balancing elsewhere: */
-	if (sched_feat(NUMA_BALANCE_ALL) && task_numa_shared(p) > 0 && env->failed <= env->sd->cache_nice_tries)
-		return false;
+
+	if (env->failed <= env->sd->cache_nice_tries) {
+		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu))
+			return false;
+	}
 
 	if (!can_migrate_pinned_task(p, env))
 		return false;
@@ -5461,10 +5490,7 @@ static int move_one_task(struct lb_env *env)
 		if (!can_migrate_task(p, env))
 			continue;
 
-		if (sched_feat(NUMA_BALANCE_ALL) && task_numa_shared(p) >= 0)
-			continue;
-
-		if (sched_feat(NUMA_BALANCE_INTERNODE) && task_numa_shared(p) >= 0 && (cpu_to_node(env->src_rq->cpu) != cpu_to_node(env->dst_cpu)))
+		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu))
 			continue;
 
 		move_task(p, env);
@@ -5527,10 +5553,7 @@ static int move_tasks(struct lb_env *env)
 		if (!can_migrate_task(p, env))
 			goto next;
 
-		if (sched_feat(NUMA_BALANCE_ALL) && task_numa_shared(p) >= 0)
-			continue;
-
-		if (sched_feat(NUMA_BALANCE_INTERNODE) && task_numa_shared(p) >= 0 && (cpu_to_node(env->src_rq->cpu) != cpu_to_node(env->dst_cpu)))
+		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu))
 			goto next;
 
 		move_task(p, env);
@@ -6520,8 +6543,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		.iteration          = 0,
 	};
 
+#ifdef CONFIG_NUMA_BALANCING
 	if (sched_feat(NUMA_BALANCE_ALL))
 		return 1;
+#endif
 
 	cpumask_copy(cpus, cpu_active_mask);
 	max_lb_iterations = cpumask_weight(env.dst_grpmask);
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index fd9db0b..9075faf 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -76,9 +76,10 @@ SCHED_FEAT(WAKE_ON_IDEAL_CPU,		false)
 /* Do the working set probing faults: */
 SCHED_FEAT(NUMA,			true)
 SCHED_FEAT(NUMA_BALANCE_ALL,		false)
-SCHED_FEAT(NUMA_BALANCE_INTERNODE,		false)
+SCHED_FEAT(NUMA_BALANCE_INTERNODE,	false)
 SCHED_FEAT(NUMA_LB,			false)
 SCHED_FEAT(NUMA_GROUP_LB_COMPRESS,	true)
 SCHED_FEAT(NUMA_GROUP_LB_SPREAD,	true)
 SCHED_FEAT(MIGRATE_FAULT_STATS,		false)
+SCHED_FEAT(NUMA_CONVERGE_MIGRATIONS,	true)
 #endif
-- 
1.7.11.7

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)

From: Ingo Molnar <mingo@kernel.org>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Paul Turner <pjt@google.com>,
	Lee Schermerhorn <Lee.Schermerhorn@hp.com>,
	Christoph Lameter <cl@linux.com>, Rik van Riel <riel@redhat.com>,
	Mel Gorman <mgorman@suse.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Hugh Dickins <hughd@google.com>
Subject: [PATCH 08/10] sched: Converge NUMA migrations
Date: Fri, 30 Nov 2012 20:58:39 +0100	[thread overview]
Message-ID: <1354305521-11583-9-git-send-email-mingo@kernel.org> (raw)
In-Reply-To: <1354305521-11583-1-git-send-email-mingo@kernel.org>

Consolidate the various convergence models and add a new one: when
a strongly converged NUMA task migrates, prefer to migrate it in
the direction of its preferred node.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c     | 59 +++++++++++++++++++++++++++++++++++--------------
 kernel/sched/features.h |  3 ++-
 2 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1f6104a..10cbfa3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4750,6 +4750,35 @@ done:
 	return target;
 }
 
+static bool numa_allow_migration(struct task_struct *p, int prev_cpu, int new_cpu)
+{
+#ifdef CONFIG_NUMA_BALANCING
+	if (sched_feat(NUMA_CONVERGE_MIGRATIONS)) {
+		/* Help in the direction of expected convergence: */
+		if (p->convergence_node >= 0 && (cpu_to_node(new_cpu) != p->convergence_node))
+			return false;
+
+		return true;
+	}
+
+	if (sched_feat(NUMA_BALANCE_ALL)) {
+ 		if (task_numa_shared(p) >= 0)
+			return false;
+
+		return true;
+	}
+
+	if (sched_feat(NUMA_BALANCE_INTERNODE)) {
+		if (task_numa_shared(p) >= 0) {
+ 			if (cpu_to_node(prev_cpu) != cpu_to_node(new_cpu))
+				return false;
+		}
+	}
+#endif
+	return true;
+}
+
+
 /*
  * sched_balance_self: balance the current task (running on cpu) in domains
  * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
@@ -4766,7 +4795,8 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 {
 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
 	int cpu = smp_processor_id();
-	int prev_cpu = task_cpu(p);
+	int prev0_cpu = task_cpu(p);
+	int prev_cpu = prev0_cpu;
 	int new_cpu = cpu;
 	int want_affine = 0;
 	int sync = wake_flags & WF_SYNC;
@@ -4775,10 +4805,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 		return prev_cpu;
 
 #ifdef CONFIG_NUMA_BALANCING
-	/* We do NUMA balancing elsewhere: */
-	if (sched_feat(NUMA_BALANCE_ALL) && task_numa_shared(p) >= 0)
-		return prev_cpu;
-
 	if (sched_feat(WAKE_ON_IDEAL_CPU) && p->ideal_cpu >= 0)
 		return p->ideal_cpu;
 #endif
@@ -4857,8 +4883,8 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 unlock:
 	rcu_read_unlock();
 
-	if (sched_feat(NUMA_BALANCE_INTERNODE) && task_numa_shared(p) >= 0 && (cpu_to_node(prev_cpu) != cpu_to_node(new_cpu)))
-		return prev_cpu;
+	if (!numa_allow_migration(p, prev0_cpu, new_cpu))
+		return prev0_cpu;
 
 	return new_cpu;
 }
@@ -5401,8 +5427,11 @@ static bool can_migrate_running_task(struct task_struct *p, struct lb_env *env)
 static int can_migrate_task(struct task_struct *p, struct lb_env *env)
 {
 	/* We do NUMA balancing elsewhere: */
-	if (sched_feat(NUMA_BALANCE_ALL) && task_numa_shared(p) > 0 && env->failed <= env->sd->cache_nice_tries)
-		return false;
+
+	if (env->failed <= env->sd->cache_nice_tries) {
+		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu))
+			return false;
+	}
 
 	if (!can_migrate_pinned_task(p, env))
 		return false;
@@ -5461,10 +5490,7 @@ static int move_one_task(struct lb_env *env)
 		if (!can_migrate_task(p, env))
 			continue;
 
-		if (sched_feat(NUMA_BALANCE_ALL) && task_numa_shared(p) >= 0)
-			continue;
-
-		if (sched_feat(NUMA_BALANCE_INTERNODE) && task_numa_shared(p) >= 0 && (cpu_to_node(env->src_rq->cpu) != cpu_to_node(env->dst_cpu)))
+		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu))
 			continue;
 
 		move_task(p, env);
@@ -5527,10 +5553,7 @@ static int move_tasks(struct lb_env *env)
 		if (!can_migrate_task(p, env))
 			goto next;
 
-		if (sched_feat(NUMA_BALANCE_ALL) && task_numa_shared(p) >= 0)
-			continue;
-
-		if (sched_feat(NUMA_BALANCE_INTERNODE) && task_numa_shared(p) >= 0 && (cpu_to_node(env->src_rq->cpu) != cpu_to_node(env->dst_cpu)))
+		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu))
 			goto next;
 
 		move_task(p, env);
@@ -6520,8 +6543,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		.iteration          = 0,
 	};
 
+#ifdef CONFIG_NUMA_BALANCING
 	if (sched_feat(NUMA_BALANCE_ALL))
 		return 1;
+#endif
 
 	cpumask_copy(cpus, cpu_active_mask);
 	max_lb_iterations = cpumask_weight(env.dst_grpmask);
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index fd9db0b..9075faf 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -76,9 +76,10 @@ SCHED_FEAT(WAKE_ON_IDEAL_CPU,		false)
 /* Do the working set probing faults: */
 SCHED_FEAT(NUMA,			true)
 SCHED_FEAT(NUMA_BALANCE_ALL,		false)
-SCHED_FEAT(NUMA_BALANCE_INTERNODE,		false)
+SCHED_FEAT(NUMA_BALANCE_INTERNODE,	false)
 SCHED_FEAT(NUMA_LB,			false)
 SCHED_FEAT(NUMA_GROUP_LB_COMPRESS,	true)
 SCHED_FEAT(NUMA_GROUP_LB_SPREAD,	true)
 SCHED_FEAT(MIGRATE_FAULT_STATS,		false)
+SCHED_FEAT(NUMA_CONVERGE_MIGRATIONS,	true)
 #endif
-- 
1.7.11.7

next prev parent reply	other threads:[~2012-11-30 19:59 UTC|newest]

Thread overview: 78+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-30 19:58 [PATCH 00/10] Latest numa/core release, v18 Ingo Molnar
2012-11-30 19:58 ` Ingo Molnar
2012-11-30 19:58 ` [PATCH 01/10] sched: Add "task flipping" support Ingo Molnar
2012-11-30 19:58   ` Ingo Molnar
2012-11-30 19:58 ` [PATCH 02/10] sched: Move the NUMA placement logic to a worklet Ingo Molnar
2012-11-30 19:58   ` Ingo Molnar
2012-11-30 19:58 ` [PATCH 03/10] numa, mempolicy: Improve CONFIG_NUMA_BALANCING=y OOM behavior Ingo Molnar
2012-11-30 19:58   ` Ingo Molnar
2012-11-30 19:58 ` [PATCH 04/10] mm, numa: Turn 4K pte NUMA faults into effective hugepage ones Ingo Molnar
2012-11-30 19:58   ` Ingo Molnar
2012-11-30 19:58 ` [PATCH 05/10] sched: Introduce directed NUMA convergence Ingo Molnar
2012-11-30 19:58   ` Ingo Molnar
2012-11-30 19:58 ` [PATCH 06/10] sched: Remove statistical NUMA scheduling Ingo Molnar
2012-11-30 19:58   ` Ingo Molnar
2012-11-30 19:58 ` [PATCH 07/10] sched: Track quality and strength of convergence Ingo Molnar
2012-11-30 19:58   ` Ingo Molnar
2012-11-30 19:58 ` Ingo Molnar [this message]
2012-11-30 19:58   ` [PATCH 08/10] sched: Converge NUMA migrations Ingo Molnar
2012-11-30 19:58 ` [PATCH 09/10] sched: Add convergence strength based adaptive NUMA page fault rate Ingo Molnar
2012-11-30 19:58   ` Ingo Molnar
2012-11-30 19:58 ` [PATCH 10/10] sched: Refine the 'shared tasks' memory interleaving logic Ingo Molnar
2012-11-30 19:58   ` Ingo Molnar
2012-11-30 20:37 ` [PATCH 00/10] Latest numa/core release, v18 Linus Torvalds
2012-11-30 20:37   ` Linus Torvalds
2012-12-01  9:49   ` [RFC PATCH] mm/migration: Don't lock anon vmas in rmap_walk_anon() Ingo Molnar
2012-12-01  9:49     ` Ingo Molnar
2012-12-01 12:26     ` [RFC PATCH] mm/migration: Remove anon vma locking from try_to_unmap() use Ingo Molnar
2012-12-01 12:26       ` Ingo Molnar
2012-12-01 18:38       ` Linus Torvalds
2012-12-01 18:38         ` Linus Torvalds
2012-12-01 18:41         ` Ingo Molnar
2012-12-01 18:41           ` Ingo Molnar
2012-12-01 18:50           ` Linus Torvalds
2012-12-01 18:50             ` Linus Torvalds
2012-12-01 20:10             ` [PATCH 1/2] mm/rmap: Convert the struct anon_vma::mutex to an rwsem Ingo Molnar
2012-12-01 20:10               ` Ingo Molnar
2012-12-01 20:19               ` Rik van Riel
2012-12-01 20:19                 ` Rik van Riel
2012-12-02 15:10                 ` Ingo Molnar
2012-12-02 15:10                   ` Ingo Molnar
2012-12-03 13:59               ` Mel Gorman
2012-12-03 13:59                 ` Mel Gorman
2012-12-01 20:15             ` [PATCH 2/2] mm/migration: Make rmap_walk_anon() and try_to_unmap_anon() more scalable Ingo Molnar
2012-12-01 20:15               ` Ingo Molnar
2012-12-01 20:33               ` Rik van Riel
2012-12-01 20:33                 ` Rik van Riel
2012-12-02 15:12                 ` [PATCH 2/2, v2] " Ingo Molnar
2012-12-02 15:12                   ` Ingo Molnar
2012-12-02 17:53                   ` Rik van Riel
2012-12-02 17:53                     ` Rik van Riel
2012-12-04 14:42                   ` Michel Lespinasse
2012-12-04 14:42                     ` Michel Lespinasse
2012-12-05  2:59                   ` Michel Lespinasse
2012-12-05  2:59                     ` Michel Lespinasse
2012-12-03 14:17               ` [PATCH 2/2] " Mel Gorman
2012-12-03 14:17                 ` Mel Gorman
2012-12-04 14:37                 ` Michel Lespinasse
2012-12-04 14:37                   ` Michel Lespinasse
2012-12-04 18:17                   ` Mel Gorman
2012-12-04 18:17                     ` Mel Gorman
2012-12-01 18:55         ` [RFC PATCH] mm/migration: Remove anon vma locking from try_to_unmap() use Rik van Riel
2012-12-01 18:55           ` Rik van Riel
2012-12-01 16:19     ` [RFC PATCH] mm/migration: Don't lock anon vmas in rmap_walk_anon() Rik van Riel
2012-12-01 16:19       ` Rik van Riel
2012-12-01 17:55     ` Linus Torvalds
2012-12-01 17:55       ` Linus Torvalds
2012-12-01 18:30       ` Ingo Molnar
2012-12-01 18:30         ` Ingo Molnar
2012-12-03 13:41   ` [PATCH 00/10] Latest numa/core release, v18 Mel Gorman
2012-12-03 13:41     ` Mel Gorman
2012-12-04 17:30     ` Thomas Gleixner
2012-12-04 17:30       ` Thomas Gleixner
2012-12-03 10:43 ` Mel Gorman
2012-12-03 10:43   ` Mel Gorman
2012-12-03 11:32 ` Mel Gorman
2012-12-03 11:32   ` Mel Gorman
2012-12-04 22:49 ` Mel Gorman
2012-12-04 22:49   ` Mel Gorman

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:1f6104a dfblob:10cbfa3 dfblob:fd9db0b dfblob:9075faf
dfblob:1f6104a dfblob:10cbfa3 dfblob:fd9db0b dfblob:9075faf )
 OR (
bs:"[PATCH 08/10] sched: Converge NUMA migrations" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1354305521-11583-9-git-send-email-mingo@kernel.org \
    --to=mingo@kernel.org \
    --cc=Lee.Schermerhorn@hp.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=pjt@google.com \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.