linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: riel@redhat.com
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, mgorman@suse.de, chegu_vinod@hp.com,
	mingo@kernel.org, efault@gmx.de, vincent.guittot@linaro.org
Subject: [PATCH RFC 3/5] sched,numa: preparations for complex topology placement
Date: Wed,  8 Oct 2014 15:37:28 -0400	[thread overview]
Message-ID: <1412797050-8903-4-git-send-email-riel@redhat.com> (raw)
In-Reply-To: <1412797050-8903-1-git-send-email-riel@redhat.com>

From: Rik van Riel <riel@redhat.com>

Preparatory patch for adding NUMA placement on systems with
complex NUMA topology. Also fix a potential divide by zero
in group_weight()

Signed-off-by: Rik van Riel <riel@redhat.com>
---
 include/linux/topology.h |  1 +
 kernel/sched/core.c      |  2 +-
 kernel/sched/fair.c      | 57 +++++++++++++++++++++++++++++++-----------------
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/include/linux/topology.h b/include/linux/topology.h
index bf40d46..f8dfad9 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -47,6 +47,7 @@
 		if (nr_cpus_node(node))
 
 int arch_update_cpu_topology(void);
+extern int sched_domains_numa_levels;
 extern int node_hops(int i, int j);
 
 enum numa_topology_type {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1898914..2528f97 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6074,7 +6074,7 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
 }
 
 #ifdef CONFIG_NUMA
-static int sched_domains_numa_levels;
+int sched_domains_numa_levels;
 enum numa_topology_type sched_numa_topology_type;
 static int *sched_domains_numa_distance;
 static int *sched_domains_numa_hops;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6d44052..8b3f884 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -930,9 +930,10 @@ static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
  * larger multiplier, in order to group tasks together that are almost
  * evenly spread out between numa nodes.
  */
-static inline unsigned long task_weight(struct task_struct *p, int nid)
+static inline unsigned long task_weight(struct task_struct *p, int nid,
+					int hops)
 {
-	unsigned long total_faults;
+	unsigned long faults, total_faults;
 
 	if (!p->numa_faults_memory)
 		return 0;
@@ -942,15 +943,25 @@ static inline unsigned long task_weight(struct task_struct *p, int nid)
 	if (!total_faults)
 		return 0;
 
-	return 1000 * task_faults(p, nid) / total_faults;
+	faults = task_faults(p, nid);
+	return 1000 * faults / total_faults;
 }
 
-static inline unsigned long group_weight(struct task_struct *p, int nid)
+static inline unsigned long group_weight(struct task_struct *p, int nid,
+					 int hops)
 {
-	if (!p->numa_group || !p->numa_group->total_faults)
+	unsigned long faults, total_faults;
+
+	if (!p->numa_group)
+		return 0;
+
+	total_faults = p->numa_group->total_faults;
+
+	if (!total_faults)
 		return 0;
 
-	return 1000 * group_faults(p, nid) / p->numa_group->total_faults;
+	faults = group_faults(p, nid);
+	return 1000 * faults / total_faults;
 }
 
 bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
@@ -1083,6 +1094,7 @@ struct task_numa_env {
 	struct numa_stats src_stats, dst_stats;
 
 	int imbalance_pct;
+	int hops;
 
 	struct task_struct *best_task;
 	long best_imp;
@@ -1162,6 +1174,7 @@ static void task_numa_compare(struct task_numa_env *env,
 	long load;
 	long imp = env->p->numa_group ? groupimp : taskimp;
 	long moveimp = imp;
+	int hops = env->hops;
 
 	rcu_read_lock();
 	cur = ACCESS_ONCE(dst_rq->curr);
@@ -1185,8 +1198,8 @@ static void task_numa_compare(struct task_numa_env *env,
 		 * in any group then look only at task weights.
 		 */
 		if (cur->numa_group == env->p->numa_group) {
-			imp = taskimp + task_weight(cur, env->src_nid) -
-			      task_weight(cur, env->dst_nid);
+			imp = taskimp + task_weight(cur, env->src_nid, hops) -
+			      task_weight(cur, env->dst_nid, hops);
 			/*
 			 * Add some hysteresis to prevent swapping the
 			 * tasks within a group over tiny differences.
@@ -1200,11 +1213,11 @@ static void task_numa_compare(struct task_numa_env *env,
 			 * instead.
 			 */
 			if (cur->numa_group)
-				imp += group_weight(cur, env->src_nid) -
-				       group_weight(cur, env->dst_nid);
+				imp += group_weight(cur, env->src_nid, hops) -
+				       group_weight(cur, env->dst_nid, hops);
 			else
-				imp += task_weight(cur, env->src_nid) -
-				       task_weight(cur, env->dst_nid);
+				imp += task_weight(cur, env->src_nid, hops) -
+				       task_weight(cur, env->dst_nid, hops);
 		}
 	}
 
@@ -1303,7 +1316,7 @@ static int task_numa_migrate(struct task_struct *p)
 	};
 	struct sched_domain *sd;
 	unsigned long taskweight, groupweight;
-	int nid, ret;
+	int nid, ret, hops;
 	long taskimp, groupimp;
 
 	/*
@@ -1331,12 +1344,13 @@ static int task_numa_migrate(struct task_struct *p)
 		return -EINVAL;
 	}
 
-	taskweight = task_weight(p, env.src_nid);
-	groupweight = group_weight(p, env.src_nid);
-	update_numa_stats(&env.src_stats, env.src_nid);
 	env.dst_nid = p->numa_preferred_nid;
-	taskimp = task_weight(p, env.dst_nid) - taskweight;
-	groupimp = group_weight(p, env.dst_nid) - groupweight;
+	hops = env.hops = node_hops(env.src_nid, env.dst_nid);
+	taskweight = task_weight(p, env.src_nid, hops);
+	groupweight = group_weight(p, env.src_nid, hops);
+	update_numa_stats(&env.src_stats, env.src_nid);
+	taskimp = task_weight(p, env.dst_nid, hops) - taskweight;
+	groupimp = group_weight(p, env.dst_nid, hops) - groupweight;
 	update_numa_stats(&env.dst_stats, env.dst_nid);
 
 	/* Try to find a spot on the preferred nid. */
@@ -1348,12 +1362,15 @@ static int task_numa_migrate(struct task_struct *p)
 			if (nid == env.src_nid || nid == p->numa_preferred_nid)
 				continue;
 
+			hops = node_hops(env.src_nid, env.dst_nid);
+
 			/* Only consider nodes where both task and groups benefit */
-			taskimp = task_weight(p, nid) - taskweight;
-			groupimp = group_weight(p, nid) - groupweight;
+			taskimp = task_weight(p, nid, hops) - taskweight;
+			groupimp = group_weight(p, nid, hops) - groupweight;
 			if (taskimp < 0 && groupimp < 0)
 				continue;
 
+			env.hops = hops;
 			env.dst_nid = nid;
 			update_numa_stats(&env.dst_stats, env.dst_nid);
 			task_numa_find_cpu(&env, taskimp, groupimp);
-- 
1.9.3


  parent reply	other threads:[~2014-10-08 19:39 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-08 19:37 [PATCH RFC 0/5] sched,numa: task placement with complex NUMA topologies riel
2014-10-08 19:37 ` [PATCH RFC 1/5] sched,numa: build table of node hop distance riel
2014-10-12 13:17   ` Peter Zijlstra
2014-10-12 13:28     ` Rik van Riel
2014-10-14  6:47       ` Peter Zijlstra
2014-10-14  7:49         ` Rik van Riel
2014-10-08 19:37 ` [PATCH RFC 2/5] sched,numa: classify the NUMA topology of a system riel
2014-10-12 14:30   ` Peter Zijlstra
2014-10-13  7:12     ` Rik van Riel
2014-10-08 19:37 ` riel [this message]
2014-10-12 14:37   ` [PATCH RFC 3/5] sched,numa: preparations for complex topology placement Peter Zijlstra
2014-10-13  7:12     ` Rik van Riel
2014-10-08 19:37 ` [PATCH RFC 4/5] sched,numa: calculate node scores in complex NUMA topologies riel
2014-10-12 14:53   ` Peter Zijlstra
2014-10-13  7:15     ` Rik van Riel
2014-10-08 19:37 ` [PATCH RFC 5/5] sched,numa: find the preferred nid with complex NUMA topology riel
2014-10-12 14:56   ` Peter Zijlstra
2014-10-13  7:17     ` Rik van Riel
     [not found] ` <4168C988EBDF2141B4E0B6475B6A73D126F58E4F@G6W2504.americas.hpqcorp.net>
     [not found]   ` <54367446.3020603@redhat.com>
2014-10-10 18:44     ` [PATCH RFC 0/5] sched,numa: task placement with complex NUMA topologies Vinod, Chegu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1412797050-8903-4-git-send-email-riel@redhat.com \
    --to=riel@redhat.com \
    --cc=chegu_vinod@hp.com \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).