linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@kernel.org>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Paul Turner <pjt@google.com>,
	Lee Schermerhorn <Lee.Schermerhorn@hp.com>,
	Christoph Lameter <cl@linux.com>, Rik van Riel <riel@redhat.com>,
	Mel Gorman <mgorman@suse.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Hugh Dickins <hughd@google.com>
Subject: [PATCH 9/9] numa, sched: Streamline and fix numa_allow_migration() use
Date: Fri,  7 Dec 2012 01:19:26 +0100	[thread overview]
Message-ID: <1354839566-15697-10-git-send-email-mingo@kernel.org> (raw)
In-Reply-To: <1354839566-15697-1-git-send-email-mingo@kernel.org>

There were a few inconsistencies in how numa_allow_migration() was
used, in particular it did no always take into account
high-imbalance scenarios, where affinity preferences are generally
overriden.

To fix this make use of numa_allow_migration() more consistent and
also pass in the load-balancing environment to the function, where
it can look at env->failed and env->sd->cache_nice_tries.

Also add a NUMA check to ALB.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 103 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 57 insertions(+), 46 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c393fba..503ec29 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4792,6 +4792,39 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 
 #endif
 
+#define LBF_ALL_PINNED	0x01
+#define LBF_NEED_BREAK	0x02
+#define LBF_SOME_PINNED	0x04
+
+struct lb_env {
+	struct sched_domain	*sd;
+
+	struct rq		*src_rq;
+	int			src_cpu;
+
+	int			dst_cpu;
+	struct rq		*dst_rq;
+
+	struct cpumask		*dst_grpmask;
+	int			new_dst_cpu;
+	enum cpu_idle_type	idle;
+	long			imbalance;
+	/* The set of CPUs under consideration for load-balancing */
+	struct cpumask		*cpus;
+
+	unsigned int		flags;
+	unsigned int		failed;
+	unsigned int		iteration;
+
+	unsigned int		loop;
+	unsigned int		loop_break;
+	unsigned int		loop_max;
+
+	struct rq *		(*find_busiest_queue)(struct lb_env *,
+						      struct sched_group *);
+};
+
+
 static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 {
 	s64 this_load, load;
@@ -5011,30 +5044,35 @@ done:
 	return target;
 }
 
-static bool numa_allow_migration(struct task_struct *p, int prev_cpu, int new_cpu)
+static bool numa_allow_migration(struct task_struct *p, int prev_cpu, int new_cpu,
+				 struct lb_env *env)
 {
 #ifdef CONFIG_NUMA_BALANCING
+
 	if (sched_feat(NUMA_CONVERGE_MIGRATIONS)) {
 		/* Help in the direction of expected convergence: */
 		if (p->convergence_node >= 0 && (cpu_to_node(new_cpu) != p->convergence_node))
 			return false;
 
-		return true;
-	}
-
-	if (sched_feat(NUMA_BALANCE_ALL)) {
- 		if (task_numa_shared(p) >= 0)
-			return false;
-
-		return true;
+		if (!env || env->failed <= env->sd->cache_nice_tries) {
+			if (task_numa_shared(p) >= 0 &&
+					cpu_to_node(prev_cpu) != cpu_to_node(new_cpu))
+				return false;
+		}
 	}
 
 	if (sched_feat(NUMA_BALANCE_INTERNODE)) {
 		if (task_numa_shared(p) >= 0) {
- 			if (cpu_to_node(prev_cpu) != cpu_to_node(new_cpu))
+			if (cpu_to_node(prev_cpu) != cpu_to_node(new_cpu))
 				return false;
 		}
 	}
+
+	if (sched_feat(NUMA_BALANCE_ALL)) {
+		if (task_numa_shared(p) >= 0)
+			return false;
+	}
+
 #endif
 	return true;
 }
@@ -5148,7 +5186,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 		/* while loop will break here if sd == NULL */
 	}
 unlock:
-	if (!numa_allow_migration(p, prev0_cpu, new_cpu)) {
+	if (!numa_allow_migration(p, prev0_cpu, new_cpu, NULL)) {
 		if (cpumask_test_cpu(prev0_cpu, tsk_cpus_allowed(p)))
 			new_cpu = prev0_cpu;
 	}
@@ -5567,38 +5605,6 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
 
 static unsigned long __read_mostly max_load_balance_interval = HZ/10;
 
-#define LBF_ALL_PINNED	0x01
-#define LBF_NEED_BREAK	0x02
-#define LBF_SOME_PINNED	0x04
-
-struct lb_env {
-	struct sched_domain	*sd;
-
-	struct rq		*src_rq;
-	int			src_cpu;
-
-	int			dst_cpu;
-	struct rq		*dst_rq;
-
-	struct cpumask		*dst_grpmask;
-	int			new_dst_cpu;
-	enum cpu_idle_type	idle;
-	long			imbalance;
-	/* The set of CPUs under consideration for load-balancing */
-	struct cpumask		*cpus;
-
-	unsigned int		flags;
-	unsigned int		failed;
-	unsigned int		iteration;
-
-	unsigned int		loop;
-	unsigned int		loop_break;
-	unsigned int		loop_max;
-
-	struct rq *		(*find_busiest_queue)(struct lb_env *,
-						      struct sched_group *);
-};
-
 /*
  * move_task - move a task from one runqueue to another runqueue.
  * Both runqueues must be locked.
@@ -5699,7 +5705,7 @@ static int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	/* We do NUMA balancing elsewhere: */
 
 	if (env->failed <= env->sd->cache_nice_tries) {
-		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu))
+		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu, env))
 			return false;
 	}
 
@@ -5760,7 +5766,7 @@ static int move_one_task(struct lb_env *env)
 		if (!can_migrate_task(p, env))
 			continue;
 
-		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu))
+		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu, env))
 			continue;
 
 		move_task(p, env);
@@ -5823,7 +5829,7 @@ static int move_tasks(struct lb_env *env)
 		if (!can_migrate_task(p, env))
 			goto next;
 
-		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu))
+		if (!numa_allow_migration(p, env->src_rq->cpu, env->dst_cpu, env))
 			goto next;
 
 		move_task(p, env);
@@ -6944,6 +6950,11 @@ more_balance:
 			goto out_pinned;
 		}
 
+		/* Is this active load-balancing NUMA-beneficial? */
+		if (!numa_allow_migration(busiest->curr, env.src_rq->cpu, env.dst_cpu, &env)) {
+			raw_spin_unlock_irqrestore(&busiest->lock, flags);
+			goto out;
+		}
 		/*
 		 * ->active_balance synchronizes accesses to
 		 * ->active_balance_work.  Once set, it's cleared
-- 
1.7.11.7

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2012-12-07  0:19 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-07  0:19 [GIT TREE] Unified NUMA balancing tree, v3 Ingo Molnar
2012-12-07  0:19 ` [PATCH 1/9] numa, sched: Fix NUMA tick ->numa_shared setting Ingo Molnar
2012-12-07  0:19 ` [PATCH 2/9] numa, sched: Add tracking of runnable NUMA tasks Ingo Molnar
2012-12-07  0:19 ` [PATCH 3/9] numa, sched: Implement wake-cpu migration support Ingo Molnar
2012-12-07  0:19 ` [PATCH 4/9] numa, mm, sched: Implement last-CPU+PID hash tracking Ingo Molnar
2012-12-07  0:19 ` [PATCH 5/9] numa, mm, sched: Fix NUMA affinity tracking logic Ingo Molnar
2012-12-07  0:19 ` [PATCH 6/9] numa, mm: Fix !THP, 4K-pte "2M-emu" NUMA fault handling Ingo Molnar
2012-12-07  0:19 ` [PATCH 7/9] numa, sched: Improve staggered convergence Ingo Molnar
2012-12-07  0:19 ` [PATCH 8/9] numa, sched: Improve directed convergence Ingo Molnar
2012-12-07  0:19 ` Ingo Molnar [this message]
2012-12-10 18:22 ` [GIT TREE] Unified NUMA balancing tree, v3 Thomas Gleixner
2012-12-10 18:41   ` Rik van Riel
2012-12-10 19:15     ` Ingo Molnar
2012-12-10 19:28       ` Mel Gorman
2012-12-10 20:07         ` Ingo Molnar
2012-12-10 20:10           ` Ingo Molnar
2012-12-10 21:03           ` Ingo Molnar
2012-12-10 22:19           ` Mel Gorman
2012-12-10 19:32   ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1354839566-15697-10-git-send-email-mingo@kernel.org \
    --to=mingo@kernel.org \
    --cc=Lee.Schermerhorn@hp.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=pjt@google.com \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).