From: Tang Chen <tangchen@cn.fujitsu.com>
To: tj@kernel.org, mingo@redhat.com, akpm@linux-foundation.org,
rjw@rjwysocki.net, hpa@zytor.com, laijs@cn.fujitsu.com,
yasu.isimatu@gmail.com, isimatu.yasuaki@jp.fujitsu.com,
kamezawa.hiroyu@jp.fujitsu.com, izumi.taku@jp.fujitsu.com,
gongzhaogang@inspur.com
Cc: tangchen@cn.fujitsu.com, x86@kernel.org,
linux-acpi@vger.kernel.org, Gu Zheng <guz.fnst@cn.fujitsu.com>
Subject: [PATCH 1/5] x86, gfp: Cache best near node for memory allocation.
Date: Wed, 1 Jul 2015 12:45:55 +0800 [thread overview]
Message-ID: <1435725959-18689-2-git-send-email-tangchen@cn.fujitsu.com> (raw)
In-Reply-To: <1435725959-18689-1-git-send-email-tangchen@cn.fujitsu.com>
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
In current code, all possible cpus are mapped to the best near online
node if the node they reside in is offline in init_cpu_to_node().
init_cpu_to_node()
{
......
for_each_possible_cpu(cpu) {
......
if (!node_online(node))
node = find_near_online_node(node);
numa_set_node(cpu, node);
}
}
Why doing this is to prevent memory allocation failure if the cpu is
online but there is no memory on that node.
But since cpuid <-> nodeid mapping will fix after this patch-set, doing
so in initialization pharse makes no sense any more. The best near online
node for each cpu should be cached somewhere.
In this patch, a per-cpu cache named x86_cpu_to_near_online_node is
introduced to store these info, and make use of them when memory allocation
fails in alloc_pages_node() and alloc_pages_exact_node().
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
---
arch/x86/include/asm/topology.h | 2 ++
arch/x86/mm/numa.c | 57 ++++++++++++++++++++++++++---------------
include/linux/gfp.h | 12 ++++++++-
3 files changed, 50 insertions(+), 21 deletions(-)
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 0fb4648..e3e22b2 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -82,6 +82,8 @@ static inline const struct cpumask *cpumask_of_node(int node)
}
#endif
+extern int get_near_online_node(int node);
+
extern void setup_node_to_cpumask_map(void);
/*
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 4053bb5..13bd0d7 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -69,6 +69,7 @@ int numa_cpu_node(int cpu)
return NUMA_NO_NODE;
}
+cpumask_t node_to_cpuid_mask_map[MAX_NUMNODES];
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
EXPORT_SYMBOL(node_to_cpumask_map);
@@ -78,6 +79,31 @@ EXPORT_SYMBOL(node_to_cpumask_map);
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+/*
+ * Map cpu index to the best near online node. The best near online node
+ * is the backup node for memory allocation on offline node.
+ */
+DEFINE_PER_CPU(int, x86_cpu_to_near_online_node);
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_near_online_node);
+
+static int find_near_online_node(int node)
+{
+ int n, val;
+ int min_val = INT_MAX;
+ int best_node = -1;
+
+ for_each_online_node(n) {
+ val = node_distance(node, n);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ return best_node;
+}
+
void numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
@@ -95,7 +121,11 @@ void numa_set_node(int cpu, int node)
return;
}
#endif
+
+ per_cpu(x86_cpu_to_near_online_node, cpu) =
+ find_near_online_node(numa_cpu_node(cpu));
per_cpu(x86_cpu_to_node_map, cpu) = node;
+ cpumask_set_cpu(cpu, &node_to_cpuid_mask_map[numa_cpu_node(cpu)]);
set_cpu_numa_node(cpu, node);
}
@@ -105,6 +135,13 @@ void numa_clear_node(int cpu)
numa_set_node(cpu, NUMA_NO_NODE);
}
+int get_near_online_node(int node)
+{
+ return per_cpu(x86_cpu_to_near_online_node,
+ cpumask_first(&node_to_cpuid_mask_map[node]));
+}
+EXPORT_SYMBOL(get_near_online_node);
+
/*
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
@@ -702,24 +739,6 @@ void __init x86_numa_init(void)
numa_init(dummy_numa_init);
}
-static __init int find_near_online_node(int node)
-{
- int n, val;
- int min_val = INT_MAX;
- int best_node = -1;
-
- for_each_online_node(n) {
- val = node_distance(node, n);
-
- if (val < min_val) {
- min_val = val;
- best_node = n;
- }
- }
-
- return best_node;
-}
-
/*
* Setup early cpu_to_node.
*
@@ -746,8 +765,6 @@ void __init init_cpu_to_node(void)
if (node == NUMA_NO_NODE)
continue;
- if (!node_online(node))
- node = find_near_online_node(node);
numa_set_node(cpu, node);
}
}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6ba7cf2..4a18b21 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -307,13 +307,23 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
if (nid < 0)
nid = numa_node_id();
+#if IS_ENABLED(CONFIG_X86) && IS_ENABLED(CONFIG_NUMA)
+ if (!node_online(nid))
+ nid = get_near_online_node(nid);
+#endif
+
return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
}
static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask,
unsigned int order)
{
- VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid));
+ VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
+
+#if IS_ENABLED(CONFIG_X86) && IS_ENABLED(CONFIG_NUMA)
+ if (!node_online(nid))
+ nid = get_near_online_node(nid);
+#endif
return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
}
--
1.9.3
next prev parent reply other threads:[~2015-07-01 4:45 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-01 4:45 [PATCH 0/5] Make cpuid <-> nodeid mapping persistent Tang Chen
2015-07-01 4:45 ` Tang Chen [this message]
2015-07-01 4:45 ` [PATCH 2/5] x86, acpi, cpu-hotplug: Enable acpi to register all possible cpus at boot time Tang Chen
2015-07-01 4:45 ` [PATCH 3/5] x86, acpi, cpu-hotplug: Introduce apicid_to_cpuid[] array to store persistent cpuid <-> apicid mapping Tang Chen
2015-07-01 4:45 ` [PATCH 4/5] x86, acpi, cpu-hotplug: Enable MADT APIs to return disabled apicid Tang Chen
2015-07-01 4:45 ` [PATCH 5/5] x86, acpi, cpu-hotplug: Set persistent cpuid <-> nodeid mapping when booting Tang Chen
-- strict thread matches above, loose matches on Subject: below --
2015-07-07 9:30 [PATCH 0/5] Make cpuid <-> nodeid mapping persistent Tang Chen
2015-07-07 9:30 ` [PATCH 1/5] x86, gfp: Cache best near node for memory allocation Tang Chen
2015-07-15 21:48 ` Tejun Heo
2015-08-04 3:36 ` Tang Chen
2015-08-04 8:05 ` Jiang Liu
2015-08-04 8:24 ` Tang Chen
2015-08-09 6:15 ` Tang Chen
2015-08-12 1:53 ` Jiang Liu
2015-08-04 8:26 ` gongzhaogang
2015-08-04 8:53 ` Tang Chen
2015-08-04 8:58 ` Tang Chen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1435725959-18689-2-git-send-email-tangchen@cn.fujitsu.com \
--to=tangchen@cn.fujitsu.com \
--cc=akpm@linux-foundation.org \
--cc=gongzhaogang@inspur.com \
--cc=guz.fnst@cn.fujitsu.com \
--cc=hpa@zytor.com \
--cc=isimatu.yasuaki@jp.fujitsu.com \
--cc=izumi.taku@jp.fujitsu.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=laijs@cn.fujitsu.com \
--cc=linux-acpi@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=rjw@rjwysocki.net \
--cc=tj@kernel.org \
--cc=x86@kernel.org \
--cc=yasu.isimatu@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).