From: riel@redhat.com
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, mgorman@suse.de, chegu_vinod@hp.com,
mingo@kernel.org, efault@gmx.de, vincent.guittot@linaro.org
Subject: [PATCH RFC 3/5] sched,numa: preparations for complex topology placement
Date: Wed, 8 Oct 2014 15:37:28 -0400 [thread overview]
Message-ID: <1412797050-8903-4-git-send-email-riel@redhat.com> (raw)
In-Reply-To: <1412797050-8903-1-git-send-email-riel@redhat.com>
From: Rik van Riel <riel@redhat.com>
Preparatory patch for adding NUMA placement on systems with
complex NUMA topology. Also fix a potential divide by zero
in group_weight()
Signed-off-by: Rik van Riel <riel@redhat.com>
---
include/linux/topology.h | 1 +
kernel/sched/core.c | 2 +-
kernel/sched/fair.c | 57 +++++++++++++++++++++++++++++++-----------------
3 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/include/linux/topology.h b/include/linux/topology.h
index bf40d46..f8dfad9 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -47,6 +47,7 @@
if (nr_cpus_node(node))
int arch_update_cpu_topology(void);
+extern int sched_domains_numa_levels;
extern int node_hops(int i, int j);
enum numa_topology_type {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1898914..2528f97 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6074,7 +6074,7 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
}
#ifdef CONFIG_NUMA
-static int sched_domains_numa_levels;
+int sched_domains_numa_levels;
enum numa_topology_type sched_numa_topology_type;
static int *sched_domains_numa_distance;
static int *sched_domains_numa_hops;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6d44052..8b3f884 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -930,9 +930,10 @@ static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
* larger multiplier, in order to group tasks together that are almost
* evenly spread out between numa nodes.
*/
-static inline unsigned long task_weight(struct task_struct *p, int nid)
+static inline unsigned long task_weight(struct task_struct *p, int nid,
+ int hops)
{
- unsigned long total_faults;
+ unsigned long faults, total_faults;
if (!p->numa_faults_memory)
return 0;
@@ -942,15 +943,25 @@ static inline unsigned long task_weight(struct task_struct *p, int nid)
if (!total_faults)
return 0;
- return 1000 * task_faults(p, nid) / total_faults;
+ faults = task_faults(p, nid);
+ return 1000 * faults / total_faults;
}
-static inline unsigned long group_weight(struct task_struct *p, int nid)
+static inline unsigned long group_weight(struct task_struct *p, int nid,
+ int hops)
{
- if (!p->numa_group || !p->numa_group->total_faults)
+ unsigned long faults, total_faults;
+
+ if (!p->numa_group)
+ return 0;
+
+ total_faults = p->numa_group->total_faults;
+
+ if (!total_faults)
return 0;
- return 1000 * group_faults(p, nid) / p->numa_group->total_faults;
+ faults = group_faults(p, nid);
+ return 1000 * faults / total_faults;
}
bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
@@ -1083,6 +1094,7 @@ struct task_numa_env {
struct numa_stats src_stats, dst_stats;
int imbalance_pct;
+ int hops;
struct task_struct *best_task;
long best_imp;
@@ -1162,6 +1174,7 @@ static void task_numa_compare(struct task_numa_env *env,
long load;
long imp = env->p->numa_group ? groupimp : taskimp;
long moveimp = imp;
+ int hops = env->hops;
rcu_read_lock();
cur = ACCESS_ONCE(dst_rq->curr);
@@ -1185,8 +1198,8 @@ static void task_numa_compare(struct task_numa_env *env,
* in any group then look only at task weights.
*/
if (cur->numa_group == env->p->numa_group) {
- imp = taskimp + task_weight(cur, env->src_nid) -
- task_weight(cur, env->dst_nid);
+ imp = taskimp + task_weight(cur, env->src_nid, hops) -
+ task_weight(cur, env->dst_nid, hops);
/*
* Add some hysteresis to prevent swapping the
* tasks within a group over tiny differences.
@@ -1200,11 +1213,11 @@ static void task_numa_compare(struct task_numa_env *env,
* instead.
*/
if (cur->numa_group)
- imp += group_weight(cur, env->src_nid) -
- group_weight(cur, env->dst_nid);
+ imp += group_weight(cur, env->src_nid, hops) -
+ group_weight(cur, env->dst_nid, hops);
else
- imp += task_weight(cur, env->src_nid) -
- task_weight(cur, env->dst_nid);
+ imp += task_weight(cur, env->src_nid, hops) -
+ task_weight(cur, env->dst_nid, hops);
}
}
@@ -1303,7 +1316,7 @@ static int task_numa_migrate(struct task_struct *p)
};
struct sched_domain *sd;
unsigned long taskweight, groupweight;
- int nid, ret;
+ int nid, ret, hops;
long taskimp, groupimp;
/*
@@ -1331,12 +1344,13 @@ static int task_numa_migrate(struct task_struct *p)
return -EINVAL;
}
- taskweight = task_weight(p, env.src_nid);
- groupweight = group_weight(p, env.src_nid);
- update_numa_stats(&env.src_stats, env.src_nid);
env.dst_nid = p->numa_preferred_nid;
- taskimp = task_weight(p, env.dst_nid) - taskweight;
- groupimp = group_weight(p, env.dst_nid) - groupweight;
+ hops = env.hops = node_hops(env.src_nid, env.dst_nid);
+ taskweight = task_weight(p, env.src_nid, hops);
+ groupweight = group_weight(p, env.src_nid, hops);
+ update_numa_stats(&env.src_stats, env.src_nid);
+ taskimp = task_weight(p, env.dst_nid, hops) - taskweight;
+ groupimp = group_weight(p, env.dst_nid, hops) - groupweight;
update_numa_stats(&env.dst_stats, env.dst_nid);
/* Try to find a spot on the preferred nid. */
@@ -1348,12 +1362,15 @@ static int task_numa_migrate(struct task_struct *p)
if (nid == env.src_nid || nid == p->numa_preferred_nid)
continue;
+ hops = node_hops(env.src_nid, env.dst_nid);
+
/* Only consider nodes where both task and groups benefit */
- taskimp = task_weight(p, nid) - taskweight;
- groupimp = group_weight(p, nid) - groupweight;
+ taskimp = task_weight(p, nid, hops) - taskweight;
+ groupimp = group_weight(p, nid, hops) - groupweight;
if (taskimp < 0 && groupimp < 0)
continue;
+ env.hops = hops;
env.dst_nid = nid;
update_numa_stats(&env.dst_stats, env.dst_nid);
task_numa_find_cpu(&env, taskimp, groupimp);
--
1.9.3
next prev parent reply other threads:[~2014-10-08 19:39 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-08 19:37 [PATCH RFC 0/5] sched,numa: task placement with complex NUMA topologies riel
2014-10-08 19:37 ` [PATCH RFC 1/5] sched,numa: build table of node hop distance riel
2014-10-12 13:17 ` Peter Zijlstra
2014-10-12 13:28 ` Rik van Riel
2014-10-14 6:47 ` Peter Zijlstra
2014-10-14 7:49 ` Rik van Riel
2014-10-08 19:37 ` [PATCH RFC 2/5] sched,numa: classify the NUMA topology of a system riel
2014-10-12 14:30 ` Peter Zijlstra
2014-10-13 7:12 ` Rik van Riel
2014-10-08 19:37 ` riel [this message]
2014-10-12 14:37 ` [PATCH RFC 3/5] sched,numa: preparations for complex topology placement Peter Zijlstra
2014-10-13 7:12 ` Rik van Riel
2014-10-08 19:37 ` [PATCH RFC 4/5] sched,numa: calculate node scores in complex NUMA topologies riel
2014-10-12 14:53 ` Peter Zijlstra
2014-10-13 7:15 ` Rik van Riel
2014-10-08 19:37 ` [PATCH RFC 5/5] sched,numa: find the preferred nid with complex NUMA topology riel
2014-10-12 14:56 ` Peter Zijlstra
2014-10-13 7:17 ` Rik van Riel
[not found] ` <4168C988EBDF2141B4E0B6475B6A73D126F58E4F@G6W2504.americas.hpqcorp.net>
[not found] ` <54367446.3020603@redhat.com>
2014-10-10 18:44 ` [PATCH RFC 0/5] sched,numa: task placement with complex NUMA topologies Vinod, Chegu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1412797050-8903-4-git-send-email-riel@redhat.com \
--to=riel@redhat.com \
--cc=chegu_vinod@hp.com \
--cc=efault@gmx.de \
--cc=linux-kernel@vger.kernel.org \
--cc=mgorman@suse.de \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
--cc=vincent.guittot@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).