[PATCH 16/16] x86: Unify NUMA initialization between 32 and 64bit

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, mingo@redhat.com,
	tglx@linutronix.de, hpa@zytor.com, x86@kernel.org,
	eric.dumazet@gmail.com, yinghai@kernel.org, brgerst@gmail.com,
	gorcunov@gmail.com, penberg@kernel.org, shaohui.zheng@intel.com
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 16/16] x86: Unify NUMA initialization between 32 and 64bit
Date: Tue, 28 Dec 2010 12:48:49 +0100	[thread overview]
Message-ID: <1293536929-31683-17-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1293536929-31683-1-git-send-email-tj@kernel.org>

Now that everything else is unified, NUMA initialization can be
unified too.

* numa_init_array() and init_cpu_to_node() are moved from numa_64 to
  numa.

* numa_32::initmem_init() is updated to call numa_init_array() and
  setup_arch() to call init_cpu_to_node() on 32bit too.

* x86_cpu_to_node_map is now initialized to NUMA_NO_NODE on 32bit too.
  This is safe now as numa_init_array() will initialize it early
  during boot.

This makes NUMA mapping fully initialized before setup_per_cpu_areas()
on 32bit too and thus makes the first percpu chunk which contains all
the static variables and some of dynamic area allocated with NUMA
affinity correctly considered.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/numa.h    |    4 ++
 arch/x86/include/asm/numa_64.h |    3 --
 arch/x86/kernel/setup.c        |    2 -
 arch/x86/mm/numa.c             |   76 +++++++++++++++++++++++++++++++++++++--
 arch/x86/mm/numa_32.c          |    1 +
 arch/x86/mm/numa_64.c          |   75 ---------------------------------------
 6 files changed, 77 insertions(+), 84 deletions(-)

diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 46f3e0d..927e12f 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -34,6 +34,8 @@ static inline void set_apicid_to_node(int apicid, s16 node)
 #ifdef CONFIG_NUMA
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
+extern void __init numa_init_array(void);
+extern void __init init_cpu_to_node(void);
 
 # ifdef CONFIG_DEBUG_PER_CPU_MAPS
 extern void __cpuinit numa_add_cpu(int cpu);
@@ -52,6 +54,8 @@ static inline void __cpuinit numa_remove_cpu(int cpu)
 #else	/* CONFIG_NUMA */
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
+static inline void numa_init_array(void)		{ }
+static inline void init_cpu_to_node(void)		{ }
 static inline void numa_add_cpu(int cpu)		{ }
 static inline void numa_remove_cpu(int cpu)		{ }
 #endif	/* CONFIG_NUMA */
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 6331190..db3baa2 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -13,7 +13,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
 
-extern void numa_init_array(void);
 extern int numa_off;
 
 extern unsigned long numa_free_all_bootmem(void);
@@ -28,7 +27,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
  */
 #define NODE_MIN_SIZE (4*1024*1024)
 
-extern void __init init_cpu_to_node(void);
 extern int __cpuinit numa_cpu_node(int cpu);
 
 #ifdef CONFIG_NUMA_EMU
@@ -36,7 +34,6 @@ extern int __cpuinit numa_cpu_node(int cpu);
 #define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
 #endif /* CONFIG_NUMA_EMU */
 #else
-static inline void init_cpu_to_node(void)		{ }
 static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 #endif
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0afb8c7..9d7cd90 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1030,9 +1030,7 @@ void __init setup_arch(char **cmdline_p)
 
 	prefill_possible_map();
 
-#ifdef CONFIG_X86_64
 	init_cpu_to_node();
-#endif
 
 	init_apic_mappings();
 	ioapic_and_gsi_init();
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index c52c267..dd93a41 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -16,11 +16,7 @@ EXPORT_SYMBOL(node_to_cpumask_map);
 /*
  * Map cpu index to node index
  */
-#ifdef CONFIG_X86_32
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0);
-#else
 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-#endif
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
 
 void __cpuinit numa_set_node(int cpu, int node)
@@ -77,6 +73,78 @@ void __init setup_node_to_cpumask_map(void)
 	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
 }
 
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
+void __init numa_init_array(void)
+{
+	int rr, i;
+
+	rr = first_node(node_online_map);
+	for (i = 0; i < nr_cpu_ids; i++) {
+		if (early_cpu_to_node(i) != NUMA_NO_NODE)
+			continue;
+		numa_set_node(i, rr);
+		rr = next_node(rr, node_online_map);
+		if (rr == MAX_NUMNODES)
+			rr = first_node(node_online_map);
+	}
+}
+
+static __init int find_near_online_node(int node)
+{
+	int n, val;
+	int min_val = INT_MAX;
+	int best_node = -1;
+
+	for_each_online_node(n) {
+		val = node_distance(node, n);
+
+		if (val < min_val) {
+			min_val = val;
+			best_node = n;
+		}
+	}
+
+	return best_node;
+}
+
+/*
+ * Setup early cpu_to_node.
+ *
+ * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
+ * and apicid_to_node[] tables have valid entries for a CPU.
+ * This means we skip cpu_to_node[] initialisation for NUMA
+ * emulation and faking node case (when running a kernel compiled
+ * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
+ * is already initialized in a round robin manner at numa_init_array,
+ * prior to this call, and this initialization is good enough
+ * for the fake NUMA cases.
+ *
+ * Called before the per_cpu areas are setup.
+ */
+void __init init_cpu_to_node(void)
+{
+	int cpu;
+	u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+
+	BUG_ON(cpu_to_apicid == NULL);
+
+	for_each_possible_cpu(cpu) {
+		int node = numa_cpu_node(cpu);
+
+		if (node == NUMA_NO_NODE)
+			continue;
+		if (!node_online(node))
+			node = find_near_online_node(node);
+		numa_set_node(cpu, node);
+	}
+}
+
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 
 int __cpu_to_node(int cpu)
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 8d91d22..505bb04 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -367,6 +367,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
 	 */
 
 	get_memcfg_numa();
+	numa_init_array();
 
 	kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 6013aca..b38700a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -225,28 +225,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 	node_set_online(nodeid);
 }
 
-/*
- * There are unfortunately some poorly designed mainboards around that
- * only connect memory to a single CPU. This breaks the 1:1 cpu->node
- * mapping. To avoid this fill in the mapping for all possible CPUs,
- * as the number of CPUs is not known yet. We round robin the existing
- * nodes.
- */
-void __init numa_init_array(void)
-{
-	int rr, i;
-
-	rr = first_node(node_online_map);
-	for (i = 0; i < nr_cpu_ids; i++) {
-		if (early_cpu_to_node(i) != NUMA_NO_NODE)
-			continue;
-		numa_set_node(i, rr);
-		rr = next_node(rr, node_online_map);
-		if (rr == MAX_NUMNODES)
-			rr = first_node(node_online_map);
-	}
-}
-
 #ifdef CONFIG_NUMA_EMU
 /* Numa emulation */
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
@@ -669,59 +647,6 @@ static __init int numa_setup(char *opt)
 }
 early_param("numa", numa_setup);
 
-#ifdef CONFIG_NUMA
-
-static __init int find_near_online_node(int node)
-{
-	int n, val;
-	int min_val = INT_MAX;
-	int best_node = -1;
-
-	for_each_online_node(n) {
-		val = node_distance(node, n);
-
-		if (val < min_val) {
-			min_val = val;
-			best_node = n;
-		}
-	}
-
-	return best_node;
-}
-
-/*
- * Setup early cpu_to_node.
- *
- * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
- * and apicid_to_node[] tables have valid entries for a CPU.
- * This means we skip cpu_to_node[] initialisation for NUMA
- * emulation and faking node case (when running a kernel compiled
- * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
- * is already initialized in a round robin manner at numa_init_array,
- * prior to this call, and this initialization is good enough
- * for the fake NUMA cases.
- *
- * Called before the per_cpu areas are setup.
- */
-void __init init_cpu_to_node(void)
-{
-	int cpu;
-	u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-
-	BUG_ON(cpu_to_apicid == NULL);
-
-	for_each_possible_cpu(cpu) {
-		int node = numa_cpu_node(cpu);
-
-		if (node == NUMA_NO_NODE)
-			continue;
-		if (!node_online(node))
-			node = find_near_online_node(node);
-		numa_set_node(cpu, node);
-	}
-}
-#endif
-
 int __cpuinit numa_cpu_node(int cpu)
 {
 	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-- 
1.7.1

next prev parent reply	other threads:[~2010-12-28 11:49 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
2010-12-28 11:48 ` [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c Tejun Heo
2010-12-28 11:48 ` [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC Tejun Heo
2010-12-28 11:48 ` [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only Tejun Heo
2010-12-28 11:48 ` [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable Tejun Heo
2010-12-28 11:48 ` [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id Tejun Heo
2010-12-28 11:48 ` [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid() Tejun Heo
2010-12-28 11:48 ` [PATCH 07/16] x86: Add apic->x86_32_early_logical_apicid() Tejun Heo
2010-12-28 11:48 ` [PATCH 08/16] x86: Implement the default x86_32_early_logical_apicid() Tejun Heo
2010-12-28 11:48 ` [PATCH 09/16] x86: Implement x86_32_early_logical_apicid() for bigsmp_32 Tejun Heo
2010-12-28 11:48 ` [PATCH 10/16] x86: Implement x86_32_early_logical_apicid() for summit_32 Tejun Heo
2010-12-28 11:48 ` [PATCH 11/16] x86: Implement x86_32_early_logical_apicid() for numaq_32 Tejun Heo
2010-12-28 11:48 ` [PATCH 12/16] x86: Replace apic->apicid_to_node() with ->x86_32_numa_cpu_node() Tejun Heo
2010-12-28 11:48 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
2010-12-28 20:35   ` David Rientjes
2010-12-29 10:52     ` Tejun Heo
2010-12-29 19:36       ` H. Peter Anvin
2010-12-29 22:05         ` H. Peter Anvin
2010-12-30 11:33           ` Tejun Heo
2010-12-28 11:48 ` [PATCH 14/16] x86: Unify CPU -> " Tejun Heo
2010-12-28 20:39   ` David Rientjes
2010-12-28 11:48 ` [PATCH 15/16] x86: Unify node_to_cpumask_map handling " Tejun Heo
2010-12-28 20:43   ` David Rientjes
2010-12-30 12:48     ` Tejun Heo
2010-12-30 14:58       ` Tejun Heo
2010-12-30 18:40         ` David Rientjes
2010-12-31 13:20           ` Tejun Heo
2010-12-31 23:09             ` David Rientjes
2010-12-30 18:43       ` David Rientjes
2010-12-28 11:48 ` Tejun Heo [this message]
  -- strict thread matches above, loose matches on Subject: below --
2011-01-23 13:37 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#5 Tejun Heo
2011-01-23 13:37 ` [PATCH 16/16] x86: Unify NUMA initialization between 32 and 64bit Tejun Heo
2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
2010-12-30 17:49 ` [PATCH 16/16] x86: Unify NUMA initialization between 32 and 64bit Tejun Heo
2010-11-27 15:21 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#2 Tejun Heo
2010-11-27 15:22 ` [PATCH 16/16] x86: Unify NUMA initialization between 32 and 64bit Tejun Heo

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:46f3e0d dfblob:927e12f dfblob:6331190 dfblob:db3baa2
dfblob:0afb8c7 dfblob:9d7cd90 dfblob:c52c267 dfblob:dd93a41
dfblob:8d91d22 dfblob:505bb04 dfblob:6013aca dfblob:b38700a )
 OR (
bs:"[PATCH 16/16] x86: Unify NUMA initialization between 32 and 64bit" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1293536929-31683-17-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=brgerst@gmail.com \
    --cc=eric.dumazet@gmail.com \
    --cc=gorcunov@gmail.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=penberg@kernel.org \
    --cc=shaohui.zheng@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    --cc=yinghai@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).