* [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-11-27 15:21 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#2 Tejun Heo
@ 2010-11-27 15:22 ` Tejun Heo
  2010-12-09 21:43   ` Thomas Gleixner
  0 siblings, 1 reply; 37+ messages in thread
From: Tejun Heo @ 2010-11-27 15:22 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg
  Cc: Tejun Heo
The mapping between cpu/apicid and node is done via apicid_to_node[]
on 64bit and apicid_2_node[] + apic->numa_cpu_node() on 32bit.  This
difference makes it difficult to further unify 32 and 64bit NUMA
hanlding.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed by
two accessors - set_apicid_to_node() and numa_cpu_node().  On 64bit,
numa_cpu_node() always consults __apicid_to_node[] directly while
32bit goes through apic->numa_cpu_node() method to allow apic
implementation to override it.
There are several places where using numa_cpu_node() is awkward and
the override doesn't matter.  In those places, __apicid_to_node[] are
used directly.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/mpspec.h  |    1 -
 arch/x86/include/asm/numa.h    |   31 +++++++++++++++++++++++++++++++
 arch/x86/include/asm/numa_32.h |    6 ++++++
 arch/x86/include/asm/numa_64.h |    5 ++---
 arch/x86/kernel/acpi/boot.c    |    3 +--
 arch/x86/kernel/apic/apic.c    |    6 +++++-
 arch/x86/kernel/cpu/amd.c      |   14 +++++++-------
 arch/x86/kernel/cpu/intel.c    |    3 +--
 arch/x86/kernel/smpboot.c      |    6 +-----
 arch/x86/mm/k8topology_64.c    |    2 +-
 arch/x86/mm/numa.c             |    6 +++++-
 arch/x86/mm/numa_32.c          |    6 ++++++
 arch/x86/mm/numa_64.c          |   18 +++++++++---------
 arch/x86/mm/srat_32.c          |    2 +-
 arch/x86/mm/srat_64.c          |   10 +++++-----
 15 files changed, 81 insertions(+), 38 deletions(-)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 018ffc1..ae78732 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -24,7 +24,6 @@ extern int pic_mode;
 #define MAX_IRQ_SOURCES		256
 
 extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400..e40bf6f 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,36 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid
+ * and node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and
+ * thus should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ *
+ * If the user knows that it doesn't care about 32bit APIC-specific
+ * overrides, __apicid_to_node[] may be used directly.
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+	__apicid_to_node[apicid] = node;
+}
+#else	/* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_X86_32
 # include "numa_32.h"
 #else
 # include "numa_64.h"
 #endif
+
+#endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index a372290..d30eb6c 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,6 +4,12 @@
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int apicid);
+#else	/* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 823e070..17171ee 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_NUMA_64_H
 
 #include <linux/nodemask.h>
-#include <asm/apicdef.h>
 
 struct bootnode {
 	u64 start;
@@ -17,8 +16,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
 extern unsigned long numa_free_all_bootmem(void);
 extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
@@ -32,6 +29,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 #define NODE_MIN_SIZE (4*1024*1024)
 
 extern void __init init_cpu_to_node(void);
+extern int __cpuinit numa_cpu_node(int cpu);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
 extern void __cpuinit numa_add_cpu(int cpu);
@@ -43,6 +41,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
 #endif /* CONFIG_NUMA_EMU */
 #else
 static inline void init_cpu_to_node(void)		{ }
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 71232b9..edff4f5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -583,11 +583,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	nid = acpi_get_node(handle);
 	if (nid == -1 || !node_online(nid))
 		return;
+	set_apicid_to_node(physid, nid);
 #ifdef CONFIG_X86_64
-	apicid_to_node[physid] = nid;
 	numa_set_node(cpu, nid);
 #else /* CONFIG_X86_32 */
-	apicid_2_node[physid] = nid;
 	cpu_to_node_map[cpu] = nid;
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 597ef66..ee20fe7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2016,7 +2016,11 @@ void default_init_apic_ldr(void)
 int default_numa_cpu_node(int cpu)
 {
 #ifdef CONFIG_NUMA
-	return apicid_2_node[early_per_cpu(x86_cpu_to_apicid, cpu)];
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
 #else
 	return 0;
 #endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8..aa3c613 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -239,12 +239,12 @@ static int __cpuinit nearby_node(int apicid)
 	int i, node;
 
 	for (i = apicid - 1; i >= 0; i--) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
 	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -339,10 +339,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	int node;
 	unsigned apicid = c->apicid;
 
-	node = per_cpu(cpu_llc_id, cpu);
+	node = numa_cpu_node(cpu);
+	if (node == NUMA_NO_NODE)
+		node = per_cpu(cpu_llc_id, cpu);
 
-	if (apicid_to_node[apicid] != NUMA_NO_NODE)
-		node = apicid_to_node[apicid];
 	if (!node_online(node)) {
 		/* Two possibilities here:
 		   - The CPU is missing memory and no node was created.
@@ -357,8 +357,8 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 		int ht_nodeid = c->initial_apicid;
 
 		if (ht_nodeid >= 0 &&
-		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-			node = apicid_to_node[ht_nodeid];
+		    __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = __apicid_to_node[ht_nodeid];
 		/* Pick a nearby node */
 		if (!node_online(node))
 			node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c5..6052004 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -279,11 +279,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	unsigned node;
 	int cpu = smp_processor_id();
-	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
-	node = apicid_to_node[apicid];
+	node = numa_cpu_node(cpu);
 	if (node == NUMA_NO_NODE || !node_online(node)) {
 		/* reuse the value from init_cpu_to_node() */
 		node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 963c44b..4b8b72d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -71,10 +71,6 @@
 #include <asm/smpboot_hooks.h>
 #include <asm/i8259.h>
 
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_LOCAL_APIC];
-#endif
-
 /* State of each CPU */
 DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
@@ -170,7 +166,7 @@ static void map_cpu_to_logical_apicid(void)
 	int cpu = smp_processor_id();
 	int node;
 
-	node = apic->numa_cpu_node(cpu);
+	node = numa_cpu_node(cpu);
 	if (!node_online(node))
 		node = first_online_node;
 
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 804a3b6..484d80c 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -228,7 +228,7 @@ int __init k8_scan_nodes(void)
 				nodes[i].start >> PAGE_SHIFT,
 				nodes[i].end >> PAGE_SHIFT);
 		for (j = apicid_base; j < cores + apicid_base; j++)
-			apicid_to_node[(i << bits) + j] = i;
+			set_apicid_to_node((i << bits) + j, i);
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 787c52c..63db99c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -4,8 +4,12 @@
 #include <linux/bootmem.h>
 
 /*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
  */
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c..9f27ae2 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 static unsigned long kva_start_pfn;
 static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+	return apic->numa_cpu_node(cpu);
+}
+
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7ffc9b7..47ca1b0 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,10 +26,6 @@ EXPORT_SYMBOL(node_data);
 
 struct memnode memnode;
 
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
-	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
 int numa_off __initdata;
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
@@ -721,12 +717,8 @@ void __init init_cpu_to_node(void)
 	BUG_ON(cpu_to_apicid == NULL);
 
 	for_each_possible_cpu(cpu) {
-		int node;
-		u16 apicid = cpu_to_apicid[cpu];
+		int node = numa_cpu_node(cpu);
 
-		if (apicid == BAD_APICID)
-			continue;
-		node = apicid_to_node[apicid];
 		if (node == NUMA_NO_NODE)
 			continue;
 		if (!node_online(node))
@@ -736,6 +728,14 @@ void __init init_cpu_to_node(void)
 }
 #endif
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index e55e748..7fcae55 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -255,7 +255,7 @@ int __init get_memcfg_from_srat(void)
 			 num_memory_chunks);
 
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
 
 	for (j = 0; j < num_memory_chunks; j++){
 		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a35cb9d..1af9c6e 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,7 +79,7 @@ static __init void bad_srat(void)
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_to_node[i] = NUMA_NO_NODE;
+		set_apicid_to_node(i, NUMA_NO_NODE);
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		nodes[i].start = nodes[i].end = 0;
 		nodes_add[i].start = nodes_add[i].end = 0;
@@ -134,7 +134,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	}
 
 	apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
@@ -168,7 +168,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
 	else
 		apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
@@ -512,13 +512,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 		 * node, it must now point to the fake node ID.
 		 */
 		for (j = 0; j < MAX_LOCAL_APIC; j++)
-			if (apicid_to_node[j] == nid &&
+			if (__apicid_to_node[j] == nid &&
 			    fake_apicid_to_node[j] == NUMA_NO_NODE)
 				fake_apicid_to_node[j] = i;
 	}
 	for (i = 0; i < num_nodes; i++)
 		__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 
 	nodes_clear(nodes_parsed);
 	for (i = 0; i < num_nodes; i++)
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-11-27 15:22 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
@ 2010-12-09 21:43   ` Thomas Gleixner
  2010-12-10 20:45     ` Tejun Heo
  0 siblings, 1 reply; 37+ messages in thread
From: Thomas Gleixner @ 2010-12-09 21:43 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, mingo, hpa, x86, eric.dumazet, yinghai, brgerst,
	gorcunov, penberg
On Sat, 27 Nov 2010, Tejun Heo wrote:
> The mapping between cpu/apicid and node is done via apicid_to_node[]
> on 64bit and apicid_2_node[] + apic->numa_cpu_node() on 32bit.  This
> difference makes it difficult to further unify 32 and 64bit NUMA
> hanlding.
> 
> This patch unifies it by replacing both apicid_to_node[] and
> apicid_2_node[] with __apicid_to_node[] array, which is accessed by
> two accessors - set_apicid_to_node() and numa_cpu_node().  On 64bit,
> numa_cpu_node() always consults __apicid_to_node[] directly while
> 32bit goes through apic->numa_cpu_node() method to allow apic
> implementation to override it.
> 
> There are several places where using numa_cpu_node() is awkward and
> the override doesn't matter.  In those places, __apicid_to_node[] are
> used directly.
Why is it awkward? Anything outside the accessor functions or
specialized setup code using it is awkward, inconsistent and sloppy.
Thanks,
	tglx
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-09 21:43   ` Thomas Gleixner
@ 2010-12-10 20:45     ` Tejun Heo
  2010-12-10 20:54       ` Tejun Heo
  0 siblings, 1 reply; 37+ messages in thread
From: Tejun Heo @ 2010-12-10 20:45 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: linux-kernel, mingo, hpa, x86, eric.dumazet, yinghai, brgerst,
	gorcunov, penberg
Hello, Thomas.
On 12/09/2010 10:43 PM, Thomas Gleixner wrote:
>> There are several places where using numa_cpu_node() is awkward and
>> the override doesn't matter.  In those places, __apicid_to_node[] are
>> used directly.
> 
> Why is it awkward? Anything outside the accessor functions or
> specialized setup code using it is awkward, inconsistent and sloppy.
There are two places which index the mapping by apicid instead of cpu.
One is acpi_fake_nodes() - this sets up the table, so it doesn't
constitute violation of accessors.
The problematic one is the nearby detection workaround in
kernel/cpu/amd.c.  This is rather hacky and looks like added to deal
with early AMD NUMAs which had broken BIOS.  The intel counterpart
omits this workaround and simply ignore NUMA configuration in those
cases.
I can change srat_detect_node() and nearby_node() to index by cpu but
as I have no idea what kind of broken configurations this is supposed
to deal with, I'm concerned that this may lead to different outcome by
walking the table in a different order.  I can implement an apicid ->
numa node mapping function for this but this is something which is
inherently ugly, so maybe it's best to leave it ugly.
For now, how about adding a big fat comment explaining the ugliness in
amd.c?
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-10 20:45     ` Tejun Heo
@ 2010-12-10 20:54       ` Tejun Heo
  2010-12-10 21:17         ` Yinghai Lu
  0 siblings, 1 reply; 37+ messages in thread
From: Tejun Heo @ 2010-12-10 20:54 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: linux-kernel, mingo, hpa, x86, eric.dumazet, yinghai, brgerst,
	gorcunov, penberg
On 12/10/2010 09:45 PM, Tejun Heo wrote:
> I can change srat_detect_node() and nearby_node() to index by cpu but
> as I have no idea what kind of broken configurations this is supposed
> to deal with, I'm concerned that this may lead to different outcome by
> walking the table in a different order.  I can implement an apicid ->
> numa node mapping function for this but this is something which is
> inherently ugly, so maybe it's best to leave it ugly.
Oh, right, another problem.  It's possible that apicid <-> numa
mapping exists when apicid <-> cpu doesn't.  Again, this is a corner
case which might not matter but I have no idea what kind of brokeness
is being worked around and it would also be difficult to test whether
the change is okay.
If someone can tell me that this workaround can go away, it would be
awesome.
-- 
tejun
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-10 20:54       ` Tejun Heo
@ 2010-12-10 21:17         ` Yinghai Lu
  0 siblings, 0 replies; 37+ messages in thread
From: Yinghai Lu @ 2010-12-10 21:17 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Thomas Gleixner, linux-kernel, mingo, hpa, x86, eric.dumazet,
	brgerst, gorcunov, penberg
On 12/10/2010 12:54 PM, Tejun Heo wrote:
> On 12/10/2010 09:45 PM, Tejun Heo wrote:
>> I can change srat_detect_node() and nearby_node() to index by cpu but
>> as I have no idea what kind of broken configurations this is supposed
>> to deal with, I'm concerned that this may lead to different outcome by
>> walking the table in a different order.  I can implement an apicid ->
>> numa node mapping function for this but this is something which is
>> inherently ugly, so maybe it's best to leave it ugly.
> 
> Oh, right, another problem.  It's possible that apicid <-> numa
> mapping exists when apicid <-> cpu doesn't.  Again, this is a corner
> case which might not matter but I have no idea what kind of brokeness
> is being worked around and it would also be difficult to test whether
> the change is okay.
> 
that could happen...when SRAT and MADT is in different order. and your are booting nr_cpus=<less....> in SRAT...
should be fixed in
https://lkml.org/lkml/2010/11/22/5
Thanks
Yinghai
^ permalink raw reply	[flat|nested] 37+ messages in thread
* [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3
@ 2010-12-28 11:48 Tejun Heo
  2010-12-28 11:48 ` [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c Tejun Heo
                   ` (15 more replies)
  0 siblings, 16 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
Hello,
This is a repost of 3rd take of unify-x86_32-and-64-NUMA-init-paths
patchset which was initially posted on 11st Dec but didn't get
archived anywhere because vger had its disk full at the time.  Nothing
has changed since the first posting.
Most changes from the last take[L] are to reflect comments from tglx.
* Rebased on top of tip/x86/apic-cleanup.
* "x86: apic: Cleanup and simplify setup_local_APIC()" is already
  merged into x86/apic-cleanup and dropped from this series.
* Instead of repurposing apic->cpu_to_logical_apicid(), replace it
  with a new 32bit specific callback
  apic->x86_32_early_logical_apicid().
* For consistency, in 0012, apic->apicid_to_node() is renamed to
  x86_32_numa_cpu_node() and put inside CONFIG_X86_32.
* NULL assignments for unimplemented apic ops dropped.
* 0013 now explicitly describes why amd srat_detect_node() accesses
  __apicid_to_node[] directly.  Also, add a comment in the function
  noting the ugliness.
* Patch description updated in 0014.
* Other misc updates.
This patchset started as an attempt to fix percpu setup problem on
x86_32 NUMA configurations reported by Eric Dumazet.  On x86_32, NUMA
configuration is initialized during SMP bringup which happens way
later than percpu setup.  As percpu setup code doesn't have access to
NUMA configuration, it gets set up as if the system is UMA.
The NUMA init paths differ subtly yet significantly between x86_32 and
64 making it quite difficult to follow.  Custom apic configurations on
x86_32 worsens the problem.
There is no fundamental reason why x86_32 can't be initialized in the
same steps as x86_64.  They just were written differently and just
weren't unified during x86_32/64 code merge.  This patchset unifies
them and in the process fixes percpu setup problem on 32bit NUMA.
This patchset contains the following sixteen patches.
 0001-x86-Kill-unused-static-boot_cpu_logical_apicid-in-sm.patch
 0002-x86-Rename-x86_32-MAX_APICID-to-MAX_LOCAL_APIC.patch
 0003-x86-Make-default_send_IPI_mask_sequence-allbutself_l.patch
 0004-x86-Replace-cpu_2_logical_apicid-with-early-percpu-v.patch
 0005-x86-Always-use-x86_cpu_to_logical_apicid-for-cpu-log.patch
 0006-x86-Kill-apic-cpu_to_logical_apicid.patch
 0007-x86-Add-apic-x86_32_early_logical_apicid.patch
 0008-x86-Implement-the-default-x86_32_early_logical_apici.patch
 0009-x86-Implement-x86_32_early_logical_apicid-for-bigsmp.patch
 0010-x86-Implement-x86_32_early_logical_apicid-for-summit.patch
 0011-x86-Implement-x86_32_early_logical_apicid-for-numaq_.patch
 0012-x86-Replace-apic-apicid_to_node-with-x86_32_numa_cpu.patch
 0013-x86-Unify-cpu-apicid-NUMA-node-mapping-between-32-an.patch
 0014-x86-Unify-CPU-NUMA-node-mapping-between-32-and-64bit.patch
 0015-x86-Unify-node_to_cpumask_map-handling-between-32-an.patch
 0016-x86-Unify-NUMA-initialization-between-32-and-64bit.patch
0001-0003 are small preparation patches.
0004-0011 kills apic->cpu_to_logical_apicid() and adds
->x86_32_early_logical_apicid() which is called once for each cpu
early during boot and the output is recorded.  The default apic works
fine but I couldn't test other apic configurations.
I couldn't figure out how to determine logical_apicid early for numaq
and it still has the stub cpu_to_logical_apicid() implementation which
would result in suboptimal percpu configuration.  NUMA configuration
is still updated during SMP bringup, so the situation is the same as
before the series, but if someone knows how to convert this one,
please go ahead.
0012-0016 unify different parts of NUMA initialization/handling step
by step.  Afterwards, x86_32 and 64 behave mostly the same regarding
NUMA initialization making the whole thing much easier to understand
and percpu setup works correctly on x86_32 too.
This patchset is on top of tip/x86/apic-cleanup
(0aa002fe602939370e9476e5ec32b562000a0425) and also available in the
following git branch,
 git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git x86_32-numa
and contains the following changes.
 arch/x86/Kconfig                      |    2 
 arch/x86/include/asm/apic.h           |   36 +++--
 arch/x86/include/asm/apicdef.h        |    1 
 arch/x86/include/asm/ipi.h            |    8 -
 arch/x86/include/asm/mpspec.h         |    3 
 arch/x86/include/asm/numa.h           |   58 +++++++++
 arch/x86/include/asm/numa_32.h        |    7 -
 arch/x86/include/asm/numa_64.h        |   16 --
 arch/x86/include/asm/smp.h            |    3 
 arch/x86/include/asm/topology.h       |   17 --
 arch/x86/kernel/acpi/boot.c           |    8 -
 arch/x86/kernel/apic/apic.c           |   39 +++++-
 arch/x86/kernel/apic/apic_flat_64.c   |    4 
 arch/x86/kernel/apic/apic_noop.c      |   26 ++--
 arch/x86/kernel/apic/bigsmp_32.c      |   34 ++---
 arch/x86/kernel/apic/es7000_32.c      |   35 ++---
 arch/x86/kernel/apic/ipi.c            |   12 -
 arch/x86/kernel/apic/numaq_32.c       |   21 +--
 arch/x86/kernel/apic/probe_32.c       |   10 +
 arch/x86/kernel/apic/summit_32.c      |   47 ++-----
 arch/x86/kernel/apic/x2apic_cluster.c |    2 
 arch/x86/kernel/apic/x2apic_phys.c    |    2 
 arch/x86/kernel/apic/x2apic_uv_x.c    |    2 
 arch/x86/kernel/cpu/amd.c             |   51 +++++---
 arch/x86/kernel/cpu/common.c          |    2 
 arch/x86/kernel/cpu/intel.c           |    5 
 arch/x86/kernel/setup.c               |    2 
 arch/x86/kernel/setup_percpu.c        |   11 +
 arch/x86/kernel/smpboot.c             |   68 -----------
 arch/x86/mm/amdtopology_64.c          |    2 
 arch/x86/mm/numa.c                    |  179 +++++++++++++++++++++++++++++
 arch/x86/mm/numa_32.c                 |    7 +
 arch/x86/mm/numa_64.c                 |  205 ----------------------------------
 arch/x86/mm/srat_32.c                 |    6 
 arch/x86/mm/srat_64.c                 |   10 -
 35 files changed, 464 insertions(+), 477 deletions(-)
Thanks.
--
tejun
[L] http://thread.gmane.org/gmane.linux.kernel/1068909
^ permalink raw reply	[flat|nested] 37+ messages in thread
* [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC Tejun Heo
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/kernel/smpboot.c |    6 +-----
 1 files changed, 1 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d..4de6a00 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -165,8 +165,6 @@ static void unmap_cpu_to_node(int cpu)
 #endif
 
 #ifdef CONFIG_X86_32
-static int boot_cpu_logical_apicid;
-
 u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
 					{ [0 ... NR_CPUS-1] = BAD_APICID };
 
@@ -1101,9 +1099,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	 * Setup boot CPU information
 	 */
 	smp_store_cpu_info(0); /* Final full version of the data */
-#ifdef CONFIG_X86_32
-	boot_cpu_logical_apicid = logical_smp_processor_id();
-#endif
+
 	current_thread_info()->cpu = 0;  /* needed? */
 	for_each_possible_cpu(i) {
 		zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
  2010-12-28 11:48 ` [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only Tejun Heo
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Replace x86_32 MAX_APICID in include/asm/mpspec.h with MAX_LOCAL_APIC
in include/asm/apicdef.h to make it consistent with x86_64.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/apicdef.h |    1 +
 arch/x86/include/asm/mpspec.h  |    2 --
 arch/x86/kernel/smpboot.c      |    2 +-
 arch/x86/mm/srat_32.c          |    4 ++--
 4 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index a859ca4..47a30ff 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -145,6 +145,7 @@
 
 #ifdef CONFIG_X86_32
 # define MAX_IO_APICS 64
+# define MAX_LOCAL_APIC 256
 #else
 # define MAX_IO_APICS 128
 # define MAX_LOCAL_APIC 32768
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index c82868e..018ffc1 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -32,8 +32,6 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES];
 extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
 #endif
 
-#define MAX_APICID		256
-
 #else /* CONFIG_X86_64: */
 
 #define MAX_MP_BUSSES		256
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4de6a00..0b32f17 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -72,7 +72,7 @@
 #include <asm/i8259.h>
 
 #ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_APICID];
+u8 apicid_2_node[MAX_LOCAL_APIC];
 #endif
 
 /* State of each CPU */
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index a17dffd..e55e748 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -57,7 +57,7 @@ struct node_memory_chunk_s {
 static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
 
 static int __initdata num_memory_chunks; /* total number of memory chunks */
-static u8 __initdata apicid_to_pxm[MAX_APICID];
+static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
 
 int numa_off __initdata;
 int acpi_numa __initdata;
@@ -254,7 +254,7 @@ int __init get_memcfg_from_srat(void)
 	printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
 			 num_memory_chunks);
 
-	for (i = 0; i < MAX_APICID; i++)
+	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
 
 	for (j = 0; j < num_memory_chunks; j++){
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
  2010-12-28 11:48 ` [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c Tejun Heo
  2010-12-28 11:48 ` [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable Tejun Heo
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Both functions are used only in 32bit.  Put them inside CONFIG_X86_32.
This is to prepare for logical apicid handling update.
- Cyrill Gorcunov spotted that I forgot to move declarations in ipi.h
  under CONFIG_X86_32.  Fixed.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 arch/x86/include/asm/ipi.h |    8 ++++----
 arch/x86/kernel/apic/ipi.c |    4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 0b72282..615fa90 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -123,10 +123,6 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
 						 int vector);
 extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
 							 int vector);
-extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
-							 int vector);
-extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
-							 int vector);
 
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
@@ -150,6 +146,10 @@ static inline void __default_local_send_IPI_all(int vector)
 }
 
 #ifdef CONFIG_X86_32
+extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+							 int vector);
+extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+							 int vector);
 extern void default_send_IPI_mask_logical(const struct cpumask *mask,
 						 int vector);
 extern void default_send_IPI_allbutself(int vector);
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 08385e0..5037736 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -56,6 +56,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_X86_32
+
 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
 						 int vector)
 {
@@ -96,8 +98,6 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
 	local_irq_restore(flags);
 }
 
-#ifdef CONFIG_X86_32
-
 /*
  * This is only used on smaller machines.
  */
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (2 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id Tejun Heo
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Unlike x86_64, on x86_32, the mapping from cpu to logical apicid may
vary depending on apic in use.  cpu_2_logical_apicid[] array is used
for this mapping.  Replace it with early percpu variable
x86_cpu_to_logical_apicid to make it better aligned with other
mappings.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/apic.h      |    4 ----
 arch/x86/include/asm/smp.h       |    3 +++
 arch/x86/kernel/apic/apic.c      |   11 +++++++++++
 arch/x86/kernel/apic/es7000_32.c |    2 +-
 arch/x86/kernel/apic/numaq_32.c  |    2 +-
 arch/x86/kernel/apic/summit_32.c |    4 ++--
 arch/x86/kernel/setup_percpu.c   |    7 +++++++
 arch/x86/kernel/smpboot.c        |    7 ++-----
 8 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 5e3969c..eb139ec 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -595,8 +595,4 @@ extern int default_check_phys_apicid_present(int phys_apicid);
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
-#ifdef CONFIG_X86_32
-extern u8 cpu_2_logical_apicid[NR_CPUS];
-#endif
-
 #endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4c2f63c..dc7c46a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -38,6 +38,9 @@ static inline struct cpumask *cpu_core_mask(int cpu)
 
 DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
+#endif
 
 /* Static state in head.S used to set up a CPU */
 extern struct {
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c0f6426..ba78b1e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -79,6 +79,17 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 
 #ifdef CONFIG_X86_32
+
+#ifdef CONFIG_SMP
+/*
+ * On x86_32, the mapping between cpu and logical apicid may vary
+ * depending on apic in use.  The following early percpu variable is
+ * used for the mapping.  This is where the behaviors of x86_64 and 32
+ * actually diverge.  Let's keep it ugly for now.
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
+#endif
+
 /*
  * Knob to control our willingness to enable the local APIC.
  *
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8593582..7cb73e1 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -534,7 +534,7 @@ static int es7000_cpu_to_logical_apicid(int cpu)
 #ifdef CONFIG_SMP
 	if (cpu >= nr_cpu_ids)
 		return BAD_APICID;
-	return cpu_2_logical_apicid[cpu];
+	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 #else
 	return logical_smp_processor_id();
 #endif
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 960f26a..4ed90c4 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -377,7 +377,7 @@ static inline int numaq_cpu_to_logical_apicid(int cpu)
 {
 	if (cpu >= nr_cpu_ids)
 		return BAD_APICID;
-	return cpu_2_logical_apicid[cpu];
+	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 }
 
 /*
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9b41926..82cfc3e 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -206,7 +206,7 @@ static void summit_init_apic_ldr(void)
 
 	/* Create logical APIC IDs by counting CPUs already in cluster. */
 	for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
-		lid = cpu_2_logical_apicid[i];
+		lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
 		if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
 			++count;
 	}
@@ -247,7 +247,7 @@ static inline int summit_cpu_to_logical_apicid(int cpu)
 #ifdef CONFIG_SMP
 	if (cpu >= nr_cpu_ids)
 		return BAD_APICID;
-	return cpu_2_logical_apicid[cpu];
+	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 #else
 	return logical_smp_processor_id();
 #endif
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 002b796..b5147f0 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -225,6 +225,10 @@ void __init setup_per_cpu_areas(void)
 		per_cpu(x86_bios_cpu_apicid, cpu) =
 			early_per_cpu_map(x86_bios_cpu_apicid, cpu);
 #endif
+#ifdef CONFIG_X86_32
+		per_cpu(x86_cpu_to_logical_apicid, cpu) =
+			early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
+#endif
 #ifdef CONFIG_X86_64
 		per_cpu(irq_stack_ptr, cpu) =
 			per_cpu(irq_stack_union.irq_stack, cpu) +
@@ -256,6 +260,9 @@ void __init setup_per_cpu_areas(void)
 	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
 	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
 #endif
+#ifdef CONFIG_X86_32
+	early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
+#endif
 #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0b32f17..eb04f30 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -165,9 +165,6 @@ static void unmap_cpu_to_node(int cpu)
 #endif
 
 #ifdef CONFIG_X86_32
-u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
-					{ [0 ... NR_CPUS-1] = BAD_APICID };
-
 static void map_cpu_to_logical_apicid(void)
 {
 	int cpu = smp_processor_id();
@@ -177,13 +174,13 @@ static void map_cpu_to_logical_apicid(void)
 	if (!node_online(node))
 		node = first_online_node;
 
-	cpu_2_logical_apicid[cpu] = apicid;
+	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = apicid;
 	map_cpu_to_node(cpu, node);
 }
 
 void numa_remove_cpu(int cpu)
 {
-	cpu_2_logical_apicid[cpu] = BAD_APICID;
+	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = BAD_APICID;
 	unmap_cpu_to_node(cpu);
 }
 #else
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (3 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid() Tejun Heo
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Currently, cpu -> logical apic id translation is done by
apic->cpu_to_logical_apicid() callback which may or may not use
x86_cpu_to_logical_apicid.  This is unnecessary as it should always
equal logical_smp_processor_id() which is known early during CPU bring
up.
Initialize x86_cpu_to_logical_apicid after apic->init_apic_ldr() in
setup_local_APIC() and always use x86_cpu_to_logical_apicid for cpu ->
logical apic id mapping.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/apic/apic.c |    8 ++++++++
 arch/x86/kernel/apic/ipi.c  |    8 ++++----
 arch/x86/kernel/smpboot.c   |    7 +++----
 3 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ba78b1e..8ad231c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1252,6 +1252,14 @@ void __cpuinit setup_local_APIC(void)
 	 */
 	apic->init_apic_ldr();
 
+#ifdef CONFIG_X86_32
+	/*
+	 * APIC LDR is initialized.  Fetch and store logical_apic_id.
+	 */
+	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+		logical_smp_processor_id();
+#endif
+
 	/*
 	 * Set Task Priority to 'accept all'. We never change this
 	 * later on.
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 5037736..cce91bf 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -73,8 +73,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
 	local_irq_save(flags);
 	for_each_cpu(query_cpu, mask)
 		__default_send_IPI_dest_field(
-			apic->cpu_to_logical_apicid(query_cpu), vector,
-			apic->dest_logical);
+			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+			vector, apic->dest_logical);
 	local_irq_restore(flags);
 }
 
@@ -92,8 +92,8 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
 		if (query_cpu == this_cpu)
 			continue;
 		__default_send_IPI_dest_field(
-			apic->cpu_to_logical_apicid(query_cpu), vector,
-			apic->dest_logical);
+			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+			vector, apic->dest_logical);
 		}
 	local_irq_restore(flags);
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index eb04f30..0768761 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -168,19 +168,18 @@ static void unmap_cpu_to_node(int cpu)
 static void map_cpu_to_logical_apicid(void)
 {
 	int cpu = smp_processor_id();
-	int apicid = logical_smp_processor_id();
-	int node = apic->apicid_to_node(apicid);
+	int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+	int node;
 
+	node = apic->apicid_to_node(logical_apicid);
 	if (!node_online(node))
 		node = first_online_node;
 
-	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = apicid;
 	map_cpu_to_node(cpu, node);
 }
 
 void numa_remove_cpu(int cpu)
 {
-	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = BAD_APICID;
 	unmap_cpu_to_node(cpu);
 }
 #else
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid()
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (4 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 07/16] x86: Add apic->x86_32_early_logical_apicid() Tejun Heo
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
After the previous patch, apic->cpu_to_logical_apicid() is no longer
used.  Kill it.
For apic types with custom cpu_to_logical_apicid() which is also used
for other purposes, remove the function and modify its users to do the
mapping directly.
#ifdef's on CONFIG_SMP in es7000_32 and summit_32 are ignored during
conversion as they are not used for UP kernels.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/apic.h           |    7 -------
 arch/x86/kernel/apic/apic_flat_64.c   |    2 --
 arch/x86/kernel/apic/apic_noop.c      |    6 ------
 arch/x86/kernel/apic/bigsmp_32.c      |   19 +++++++------------
 arch/x86/kernel/apic/es7000_32.c      |   18 ++----------------
 arch/x86/kernel/apic/numaq_32.c       |    8 --------
 arch/x86/kernel/apic/probe_32.c       |    1 -
 arch/x86/kernel/apic/summit_32.c      |   17 ++---------------
 arch/x86/kernel/apic/x2apic_cluster.c |    1 -
 arch/x86/kernel/apic/x2apic_phys.c    |    1 -
 arch/x86/kernel/apic/x2apic_uv_x.c    |    1 -
 11 files changed, 11 insertions(+), 70 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index eb139ec..d1aa0c3 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -307,7 +307,6 @@ struct apic {
 	void (*setup_apic_routing)(void);
 	int (*multi_timer_check)(int apic, int irq);
 	int (*apicid_to_node)(int logical_apicid);
-	int (*cpu_to_logical_apicid)(int cpu);
 	int (*cpu_present_to_apicid)(int mps_cpu);
 	void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
 	void (*setup_portio_remap)(void);
@@ -557,12 +556,6 @@ static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_ma
 	*retmap = *phys_map;
 }
 
-/* Mapping from cpu number to logical apicid */
-static inline int default_cpu_to_logical_apicid(int cpu)
-{
-	return 1 << cpu;
-}
-
 static inline int __default_cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 09d3b17..5a9d11a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -186,7 +186,6 @@ struct apic apic_flat =  {
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= NULL,
-	.cpu_to_logical_apicid		= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
@@ -338,7 +337,6 @@ struct apic apic_physflat =  {
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= NULL,
-	.cpu_to_logical_apicid		= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e31b9ff..f3d19b2 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void)
 	return 0;
 }
 
-static int noop_cpu_to_logical_apicid(int cpu)
-{
-	return 0;
-}
-
 static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
 {
 	return 0;
@@ -155,7 +150,6 @@ struct apic apic_noop = {
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= noop_apicid_to_node,
 
-	.cpu_to_logical_apicid		= noop_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cb804c5..4c62592 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -93,14 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
 	return BAD_APICID;
 }
 
-/* Mapping from cpu number to logical apicid */
-static inline int bigsmp_cpu_to_logical_apicid(int cpu)
-{
-	if (cpu >= nr_cpu_ids)
-		return BAD_APICID;
-	return cpu_physical_id(cpu);
-}
-
 static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 {
 	/* For clustered we don't have a good way to do this yet - hack */
@@ -115,7 +107,11 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
 /* As we are using single CPU as destination, pick only one CPU here */
 static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
-	return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask));
+	int cpu = cpumask_first(cpumask);
+
+	if (cpu < nr_cpu_ids)
+		return cpu_physical_id(cpu);
+	return BAD_APICID;
 }
 
 static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -129,9 +125,9 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 */
 	for_each_cpu_and(cpu, cpumask, andmask) {
 		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
+			return cpu_physical_id(cpu);
 	}
-	return bigsmp_cpu_to_logical_apicid(cpu);
+	return BAD_APICID;
 }
 
 static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -220,7 +216,6 @@ struct apic apic_bigsmp = {
 	.setup_apic_routing		= bigsmp_setup_apic_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= bigsmp_apicid_to_node,
-	.cpu_to_logical_apicid		= bigsmp_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= bigsmp_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 7cb73e1..6840681 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -528,18 +528,6 @@ static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
 	++cpu_id;
 }
 
-/* Mapping from cpu number to logical apicid */
-static int es7000_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
-	if (cpu >= nr_cpu_ids)
-		return BAD_APICID;
-	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-#else
-	return logical_smp_processor_id();
-#endif
-}
-
 static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 {
 	/* For clustered we don't have a good way to do this yet - hack */
@@ -561,7 +549,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	 * The cpus in the mask must all be on the apic cluster.
 	 */
 	for_each_cpu(cpu, cpumask) {
-		int new_apicid = es7000_cpu_to_logical_apicid(cpu);
+		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
 			WARN(1, "Not a valid mask!");
@@ -578,7 +566,7 @@ static unsigned int
 es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
 			      const struct cpumask *andmask)
 {
-	int apicid = es7000_cpu_to_logical_apicid(0);
+	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 	cpumask_var_t cpumask;
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -656,7 +644,6 @@ struct apic __refdata apic_es7000_cluster = {
 	.setup_apic_routing		= es7000_setup_apic_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= es7000_apicid_to_node,
-	.cpu_to_logical_apicid		= es7000_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
@@ -721,7 +708,6 @@ struct apic __refdata apic_es7000 = {
 	.setup_apic_routing		= es7000_setup_apic_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= es7000_apicid_to_node,
-	.cpu_to_logical_apicid		= es7000_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 4ed90c4..2b434d5 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -373,13 +373,6 @@ static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask
 	return physids_promote(0xFUL, retmap);
 }
 
-static inline int numaq_cpu_to_logical_apicid(int cpu)
-{
-	if (cpu >= nr_cpu_ids)
-		return BAD_APICID;
-	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-}
-
 /*
  * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
  * cpu to APIC ID relation to properly interact with the intelligent
@@ -509,7 +502,6 @@ struct apic __refdata apic_numaq = {
 	.setup_apic_routing		= numaq_setup_apic_routing,
 	.multi_timer_check		= numaq_multi_timer_check,
 	.apicid_to_node			= numaq_apicid_to_node,
-	.cpu_to_logical_apicid		= numaq_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= numaq_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= numaq_apicid_to_cpu_present,
 	.setup_portio_remap		= numaq_setup_portio_remap,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 99d2fe0..24a6828 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -131,7 +131,6 @@ struct apic apic_default = {
 	.setup_apic_routing		= setup_apic_flat_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= default_apicid_to_node,
-	.cpu_to_logical_apicid		= default_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 82cfc3e..1ef4c14 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -241,18 +241,6 @@ static int summit_apicid_to_node(int logical_apicid)
 #endif
 }
 
-/* Mapping from cpu number to logical apicid */
-static inline int summit_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
-	if (cpu >= nr_cpu_ids)
-		return BAD_APICID;
-	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-#else
-	return logical_smp_processor_id();
-#endif
-}
-
 static int summit_cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < nr_cpu_ids)
@@ -286,7 +274,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	 * The cpus in the mask must all be on the apic cluster.
 	 */
 	for_each_cpu(cpu, cpumask) {
-		int new_apicid = summit_cpu_to_logical_apicid(cpu);
+		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
 			printk("%s: Not a valid mask!\n", __func__);
@@ -301,7 +289,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
 static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
 			      const struct cpumask *andmask)
 {
-	int apicid = summit_cpu_to_logical_apicid(0);
+	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 	cpumask_var_t cpumask;
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -529,7 +517,6 @@ struct apic apic_summit = {
 	.setup_apic_routing		= summit_setup_apic_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= summit_apicid_to_node,
-	.cpu_to_logical_apicid		= summit_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= summit_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= summit_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cf69c59..badc1fd 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -207,7 +207,6 @@ struct apic apic_x2apic_cluster = {
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= NULL,
-	.cpu_to_logical_apicid		= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8972f38..f28bf4c 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -196,7 +196,6 @@ struct apic apic_x2apic_phys = {
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= NULL,
-	.cpu_to_logical_apicid		= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 194539a..365f53d 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -316,7 +316,6 @@ struct apic __refdata apic_x2apic_uv_x = {
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= NULL,
-	.cpu_to_logical_apicid		= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 07/16] x86: Add apic->x86_32_early_logical_apicid()
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (5 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid() Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 08/16] x86: Implement the default x86_32_early_logical_apicid() Tejun Heo
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
On x86_32, the mapping between cpu and logical apic ID differs
depending on the specific apic implementation in use.  The mapping is
initialized while bringing up CPUs; however, this makes early inits
ignore memory topology.
Add a x86_32 specific apic->x86_32_early_logical_apicid() which is
called early during boot to query the mapping.  The mapping is later
verified against the result of init_apic_ldr().  The method is allowed
to return BAD_APICID if it can't be determined early.
noop variant which always returns BAD_APICID is implemented and added
to all x86_32 apic implementations.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/apic.h      |   19 +++++++++++++++++++
 arch/x86/kernel/apic/apic.c      |   12 ++++++++++--
 arch/x86/kernel/apic/apic_noop.c |    4 ++++
 arch/x86/kernel/apic/bigsmp_32.c |    2 ++
 arch/x86/kernel/apic/es7000_32.c |    4 ++++
 arch/x86/kernel/apic/numaq_32.c  |    2 ++
 arch/x86/kernel/apic/probe_32.c  |    2 ++
 arch/x86/kernel/apic/summit_32.c |    2 ++
 8 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d1aa0c3..efb073b 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -354,6 +354,20 @@ struct apic {
 	void (*icr_write)(u32 low, u32 high);
 	void (*wait_icr_idle)(void);
 	u32 (*safe_wait_icr_idle)(void);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Called very early during boot from get_smp_config().  It should
+	 * return the logical apicid.  x86_[bios]_cpu_to_apicid is
+	 * initialized before this function is called.
+	 *
+	 * If logical apicid can't be determined that early, the function
+	 * may return BAD_APICID.  Logical apicid will be configured after
+	 * init_apic_ldr() while bringing up CPUs.  Note that NUMA affinity
+	 * won't be applied properly during early boot in this case.
+	 */
+	int (*x86_32_early_logical_apicid)(int cpu);
+#endif
 };
 
 /*
@@ -501,6 +515,11 @@ extern struct apic apic_noop;
 
 extern struct apic apic_default;
 
+static inline int noop_x86_32_early_logical_apicid(int cpu)
+{
+	return BAD_APICID;
+}
+
 /*
  * Set up the logical destination ID.
  *
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8ad231c..6a82dc6 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1254,8 +1254,13 @@ void __cpuinit setup_local_APIC(void)
 
 #ifdef CONFIG_X86_32
 	/*
-	 * APIC LDR is initialized.  Fetch and store logical_apic_id.
+	 * APIC LDR is initialized.  If logical_apicid mapping was
+	 * initialized during get_smp_config(), make sure it matches the
+	 * actual value.
 	 */
+	i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+	WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
+	/* always use the value from LDR */
 	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
 		logical_smp_processor_id();
 #endif
@@ -1994,7 +1999,10 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
 	early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
 #endif
-
+#ifdef CONFIG_X86_32
+	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+		apic->x86_32_early_logical_apicid(cpu);
+#endif
 	set_cpu_possible(cpu, true);
 	set_cpu_present(cpu, true);
 }
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index f3d19b2..0309c58 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -191,4 +191,8 @@ struct apic apic_noop = {
 	.icr_write			= noop_apic_icr_write,
 	.wait_icr_idle			= noop_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= noop_safe_apic_wait_icr_idle,
+
+#ifdef CONFIG_X86_32
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+#endif
 };
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 4c62592..dd32a9b 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -251,4 +251,6 @@ struct apic apic_bigsmp = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 6840681..0ffc1ec 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -682,6 +682,8 @@ struct apic __refdata apic_es7000_cluster = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
 
 struct apic __refdata apic_es7000 = {
@@ -744,4 +746,6 @@ struct apic __refdata apic_es7000 = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 2b434d5..f1a8b12 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -539,4 +539,6 @@ struct apic __refdata apic_numaq = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 24a6828..40be7c3 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -166,6 +166,8 @@ struct apic apic_default = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
 
 extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 1ef4c14..172c498 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -552,4 +552,6 @@ struct apic apic_summit = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 08/16] x86: Implement the default x86_32_early_logical_apicid()
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (6 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 07/16] x86: Add apic->x86_32_early_logical_apicid() Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 09/16] x86: Implement x86_32_early_logical_apicid() for bigsmp_32 Tejun Heo
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Implement x86_32_early_logical_apicid() for the default apic flat
routing.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/apic/probe_32.c |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 40be7c3..0f9a9ab 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -77,6 +77,11 @@ void __init default_setup_apic_routing(void)
 		apic->setup_apic_routing();
 }
 
+static int default_x86_32_early_logical_apicid(int cpu)
+{
+	return 1 << cpu;
+}
+
 static void setup_apic_flat_routing(void)
 {
 #ifdef CONFIG_X86_IO_APIC
@@ -167,7 +172,7 @@ struct apic apic_default = {
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
-	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_early_logical_apicid	= default_x86_32_early_logical_apicid,
 };
 
 extern struct apic apic_numaq;
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 09/16] x86: Implement x86_32_early_logical_apicid() for bigsmp_32
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (7 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 08/16] x86: Implement the default x86_32_early_logical_apicid() Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 10/16] x86: Implement x86_32_early_logical_apicid() for summit_32 Tejun Heo
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/apic/bigsmp_32.c |    8 +++++++-
 1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index dd32a9b..bc7ed04 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -45,6 +45,12 @@ static unsigned long bigsmp_check_apicid_present(int bit)
 	return 1;
 }
 
+static int bigsmp_early_logical_apicid(int cpu)
+{
+	/* on bigsmp, logical apicid is the same as physical */
+	return early_per_cpu(x86_cpu_to_apicid, cpu);
+}
+
 static inline unsigned long calculate_ldr(int cpu)
 {
 	unsigned long val, id;
@@ -252,5 +258,5 @@ struct apic apic_bigsmp = {
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
-	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_early_logical_apicid	= bigsmp_early_logical_apicid,
 };
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 10/16] x86: Implement x86_32_early_logical_apicid() for summit_32
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (8 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 09/16] x86: Implement x86_32_early_logical_apicid() for bigsmp_32 Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 11/16] x86: Implement x86_32_early_logical_apicid() for numaq_32 Tejun Heo
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Factor out logical apic id calculation from summit_init_apic_ldr() and
use it for the x86_32_early_logical_apicid() callback.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/apic/summit_32.c |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 172c498..8c91473 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -194,11 +194,10 @@ static unsigned long summit_check_apicid_present(int bit)
 	return 1;
 }
 
-static void summit_init_apic_ldr(void)
+static int summit_early_logical_apicid(int cpu)
 {
-	unsigned long val, id;
 	int count = 0;
-	u8 my_id = (u8)hard_smp_processor_id();
+	u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
 	u8 my_cluster = APIC_CLUSTER(my_id);
 #ifdef CONFIG_SMP
 	u8 lid;
@@ -214,7 +213,15 @@ static void summit_init_apic_ldr(void)
 	/* We only have a 4 wide bitmap in cluster mode.  If a deranged
 	 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
 	BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
-	id = my_cluster | (1UL << count);
+	return my_cluster | (1UL << count);
+}
+
+static void summit_init_apic_ldr(void)
+{
+	int cpu = smp_processor_id();
+	unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+	unsigned long val;
+
 	apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
 	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
 	val |= SET_APIC_LOGICAL_ID(id);
@@ -553,5 +560,5 @@ struct apic apic_summit = {
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
-	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_early_logical_apicid	= summit_early_logical_apicid,
 };
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 11/16] x86: Implement x86_32_early_logical_apicid() for numaq_32
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (9 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 10/16] x86: Implement x86_32_early_logical_apicid() for summit_32 Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 12/16] x86: Replace apic->apicid_to_node() with ->x86_32_numa_cpu_node() Tejun Heo
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/apic/es7000_32.c |   10 ++++++++--
 1 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 0ffc1ec..5c53d05 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -460,6 +460,12 @@ static unsigned long es7000_check_apicid_present(int bit)
 	return physid_isset(bit, phys_cpu_present_map);
 }
 
+static int es7000_early_logical_apicid(int cpu)
+{
+	/* on es7000, logical apicid is the same as physical */
+	return early_per_cpu(x86_bios_cpu_apicid, cpu);
+}
+
 static unsigned long calculate_ldr(int cpu)
 {
 	unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
@@ -683,7 +689,7 @@ struct apic __refdata apic_es7000_cluster = {
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
-	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
 };
 
 struct apic __refdata apic_es7000 = {
@@ -747,5 +753,5 @@ struct apic __refdata apic_es7000 = {
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
-	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
 };
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 12/16] x86: Replace apic->apicid_to_node() with ->x86_32_numa_cpu_node()
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (10 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 11/16] x86: Implement x86_32_early_logical_apicid() for numaq_32 Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 11:48 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
apic->apicid_to_node() is 32bit specific apic operation which
determines NUMA node for a CPU.  Depending on the APIC implementation,
it can be easier to determine NUMA node from either physical or
logical apicid.  Currently, ->apicid_to_node() takes @logical_apicid
and calls hard_smp_processor_id() if the physical apicid is needed.
This prevents NUMA mapping from being queried from a different CPU,
which in turn makes it impossible to initialize NUMA mapping before
SMP bringup.
This patch replaces apic->apicid_to_node() with
->x86_32_numa_cpu_node() which takes @cpu, from which both logical and
physical apicids can easily be determined.  While at it, drop
duplicate implementations from bigsmp_32 and summit_32, and use the
default one.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/apic.h           |    6 ++++--
 arch/x86/kernel/apic/apic.c           |   10 +++++++---
 arch/x86/kernel/apic/apic_flat_64.c   |    2 --
 arch/x86/kernel/apic/apic_noop.c      |   16 +++++++++-------
 arch/x86/kernel/apic/bigsmp_32.c      |    7 +------
 arch/x86/kernel/apic/es7000_32.c      |    7 +++----
 arch/x86/kernel/apic/numaq_32.c       |   11 ++++++++++-
 arch/x86/kernel/apic/probe_32.c       |    2 +-
 arch/x86/kernel/apic/summit_32.c      |   11 +----------
 arch/x86/kernel/apic/x2apic_cluster.c |    1 -
 arch/x86/kernel/apic/x2apic_phys.c    |    1 -
 arch/x86/kernel/apic/x2apic_uv_x.c    |    1 -
 arch/x86/kernel/smpboot.c             |    3 +--
 13 files changed, 37 insertions(+), 41 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index efb073b..ad30ca4 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -306,7 +306,6 @@ struct apic {
 
 	void (*setup_apic_routing)(void);
 	int (*multi_timer_check)(int apic, int irq);
-	int (*apicid_to_node)(int logical_apicid);
 	int (*cpu_present_to_apicid)(int mps_cpu);
 	void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
 	void (*setup_portio_remap)(void);
@@ -367,6 +366,9 @@ struct apic {
 	 * won't be applied properly during early boot in this case.
 	 */
 	int (*x86_32_early_logical_apicid)(int cpu);
+
+	/* determine CPU -> NUMA node mapping */
+	int (*x86_32_numa_cpu_node)(int cpu);
 #endif
 };
 
@@ -539,7 +541,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 	return cpuid_apic >> index_msb;
 }
 
-extern int default_apicid_to_node(int logical_apicid);
+extern int default_x86_32_numa_cpu_node(int cpu);
 
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6a82dc6..5e097e5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2023,10 +2023,14 @@ void default_init_apic_ldr(void)
 }
 
 #ifdef CONFIG_X86_32
-int default_apicid_to_node(int logical_apicid)
+int default_x86_32_numa_cpu_node(int cpu)
 {
-#ifdef CONFIG_SMP
-	return apicid_2_node[hard_smp_processor_id()];
+#ifdef CONFIG_NUMA
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return apicid_2_node[apicid];
+	return NUMA_NO_NODE;
 #else
 	return 0;
 #endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 5a9d11a..5652d31 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,7 +185,6 @@ struct apic apic_flat =  {
 	.ioapic_phys_id_map		= NULL,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
@@ -336,7 +335,6 @@ struct apic apic_physflat =  {
 	.ioapic_phys_id_map		= NULL,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 0309c58..f1baa2d 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -108,12 +108,6 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
 	cpumask_set_cpu(cpu, retmask);
 }
 
-int noop_apicid_to_node(int logical_apicid)
-{
-	/* we're always on node 0 */
-	return 0;
-}
-
 static u32 noop_apic_read(u32 reg)
 {
 	WARN_ON_ONCE((cpu_has_apic && !disable_apic));
@@ -125,6 +119,14 @@ static void noop_apic_write(u32 reg, u32 v)
 	WARN_ON_ONCE(cpu_has_apic && !disable_apic);
 }
 
+#ifdef CONFIG_X86_32
+static int noop_x86_32_numa_cpu_node(int cpu)
+{
+	/* we're always on node 0 */
+	return 0;
+}
+#endif
+
 struct apic apic_noop = {
 	.name				= "noop",
 	.probe				= noop_probe,
@@ -148,7 +150,6 @@ struct apic apic_noop = {
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= noop_apicid_to_node,
 
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
@@ -194,5 +195,6 @@ struct apic apic_noop = {
 
 #ifdef CONFIG_X86_32
 	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_numa_cpu_node		= noop_x86_32_numa_cpu_node,
 #endif
 };
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index bc7ed04..541a2e4 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -86,11 +86,6 @@ static void bigsmp_setup_apic_routing(void)
 		nr_ioapics);
 }
 
-static int bigsmp_apicid_to_node(int logical_apicid)
-{
-	return apicid_2_node[hard_smp_processor_id()];
-}
-
 static int bigsmp_cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < nr_cpu_ids)
@@ -221,7 +216,6 @@ struct apic apic_bigsmp = {
 	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map,
 	.setup_apic_routing		= bigsmp_setup_apic_routing,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= bigsmp_apicid_to_node,
 	.cpu_present_to_apicid		= bigsmp_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 	.setup_portio_remap		= NULL,
@@ -259,4 +253,5 @@ struct apic apic_bigsmp = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= bigsmp_early_logical_apicid,
+	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 5c53d05..3e9de48 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -510,12 +510,11 @@ static void es7000_setup_apic_routing(void)
 		nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
 }
 
-static int es7000_apicid_to_node(int logical_apicid)
+static int es7000_numa_cpu_node(int cpu)
 {
 	return 0;
 }
 
-
 static int es7000_cpu_present_to_apicid(int mps_cpu)
 {
 	if (!mps_cpu)
@@ -649,7 +648,6 @@ struct apic __refdata apic_es7000_cluster = {
 	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
 	.setup_apic_routing		= es7000_setup_apic_routing,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= es7000_apicid_to_node,
 	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
@@ -690,6 +688,7 @@ struct apic __refdata apic_es7000_cluster = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
+	.x86_32_numa_cpu_node		= es7000_numa_cpu_node,
 };
 
 struct apic __refdata apic_es7000 = {
@@ -715,7 +714,6 @@ struct apic __refdata apic_es7000 = {
 	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
 	.setup_apic_routing		= es7000_setup_apic_routing,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= es7000_apicid_to_node,
 	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
@@ -754,4 +752,5 @@ struct apic __refdata apic_es7000 = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
+	.x86_32_numa_cpu_node		= es7000_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index f1a8b12..6273eee 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -391,6 +391,15 @@ static inline int numaq_apicid_to_node(int logical_apicid)
 	return logical_apicid >> 4;
 }
 
+static int numaq_numa_cpu_node(int cpu)
+{
+	int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+	if (logical_apicid != BAD_APICID)
+		return numaq_apicid_to_node(logical_apicid);
+	return NUMA_NO_NODE;
+}
+
 static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
 {
 	int node = numaq_apicid_to_node(logical_apicid);
@@ -501,7 +510,6 @@ struct apic __refdata apic_numaq = {
 	.ioapic_phys_id_map		= numaq_ioapic_phys_id_map,
 	.setup_apic_routing		= numaq_setup_apic_routing,
 	.multi_timer_check		= numaq_multi_timer_check,
-	.apicid_to_node			= numaq_apicid_to_node,
 	.cpu_present_to_apicid		= numaq_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= numaq_apicid_to_cpu_present,
 	.setup_portio_remap		= numaq_setup_portio_remap,
@@ -541,4 +549,5 @@ struct apic __refdata apic_numaq = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_numa_cpu_node		= numaq_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 0f9a9ab..fc84c7b 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -135,7 +135,6 @@ struct apic apic_default = {
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
 	.setup_apic_routing		= setup_apic_flat_routing,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= default_apicid_to_node,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 	.setup_portio_remap		= NULL,
@@ -173,6 +172,7 @@ struct apic apic_default = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= default_x86_32_early_logical_apicid,
+	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
 
 extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 8c91473..e4b8059 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -239,15 +239,6 @@ static void summit_setup_apic_routing(void)
 						nr_ioapics);
 }
 
-static int summit_apicid_to_node(int logical_apicid)
-{
-#ifdef CONFIG_SMP
-	return apicid_2_node[hard_smp_processor_id()];
-#else
-	return 0;
-#endif
-}
-
 static int summit_cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < nr_cpu_ids)
@@ -523,7 +514,6 @@ struct apic apic_summit = {
 	.ioapic_phys_id_map		= summit_ioapic_phys_id_map,
 	.setup_apic_routing		= summit_setup_apic_routing,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= summit_apicid_to_node,
 	.cpu_present_to_apicid		= summit_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= summit_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
@@ -561,4 +551,5 @@ struct apic apic_summit = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= summit_early_logical_apicid,
+	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index badc1fd..90949bb 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -206,7 +206,6 @@ struct apic apic_x2apic_cluster = {
 	.ioapic_phys_id_map		= NULL,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f28bf4c..c7e6d66 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -195,7 +195,6 @@ struct apic apic_x2apic_phys = {
 	.ioapic_phys_id_map		= NULL,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 365f53d..e654ff9 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -315,7 +315,6 @@ struct apic __refdata apic_x2apic_uv_x = {
 	.ioapic_phys_id_map		= NULL,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0768761..031d2e1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -168,10 +168,9 @@ static void unmap_cpu_to_node(int cpu)
 static void map_cpu_to_logical_apicid(void)
 {
 	int cpu = smp_processor_id();
-	int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 	int node;
 
-	node = apic->apicid_to_node(logical_apicid);
+	node = apic->x86_32_numa_cpu_node(cpu);
 	if (!node_online(node))
 		node = first_online_node;
 
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (11 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 12/16] x86: Replace apic->apicid_to_node() with ->x86_32_numa_cpu_node() Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 20:35   ` David Rientjes
  2010-12-28 11:48 ` [PATCH 14/16] x86: Unify CPU -> " Tejun Heo
                   ` (2 subsequent siblings)
  15 siblings, 1 reply; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
The mapping between cpu/apicid and node is done via apicid_to_node[]
on 64bit and apicid_2_node[] + apic->x86_32_numa_cpu_node() on 32bit.
This difference makes it difficult to further unify 32 and 64bit NUMA
handling.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed by
two accessors - set_apicid_to_node() and numa_cpu_node().  On 64bit,
numa_cpu_node() always consults __apicid_to_node[] directly while
32bit goes through apic->numa_cpu_node() method to allow apic
implementations to override it.
srat_detect_node() for amd cpus contains workaround for broken NUMA
configuration which assumes relationship between APIC ID, HT node ID
and NUMA topology.  Leave it to access __apicid_to_node[] directly as
mapping through CPU might result in undesirable behavior change.  The
comment is reformatted and updated to note the ugliness.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/mpspec.h  |    1 -
 arch/x86/include/asm/numa.h    |   28 +++++++++++++++++++++++
 arch/x86/include/asm/numa_32.h |    6 +++++
 arch/x86/include/asm/numa_64.h |    5 +--
 arch/x86/kernel/acpi/boot.c    |    3 +-
 arch/x86/kernel/apic/apic.c    |    2 +-
 arch/x86/kernel/cpu/amd.c      |   47 +++++++++++++++++++++++++--------------
 arch/x86/kernel/cpu/intel.c    |    3 +-
 arch/x86/kernel/smpboot.c      |    6 +----
 arch/x86/mm/amdtopology_64.c   |    2 +-
 arch/x86/mm/numa.c             |    6 ++++-
 arch/x86/mm/numa_32.c          |    6 +++++
 arch/x86/mm/numa_64.c          |   18 +++++++-------
 arch/x86/mm/srat_32.c          |    2 +-
 arch/x86/mm/srat_64.c          |   10 ++++----
 15 files changed, 97 insertions(+), 48 deletions(-)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 018ffc1..ae78732 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -24,7 +24,6 @@ extern int pic_mode;
 #define MAX_IRQ_SOURCES		256
 
 extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400..5e01c76 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,33 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid and
+ * node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
+ * should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+	__apicid_to_node[apicid] = node;
+}
+#else	/* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_X86_32
 # include "numa_32.h"
 #else
 # include "numa_64.h"
 #endif
+
+#endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index a372290..d30eb6c 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,6 +4,12 @@
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int apicid);
+#else	/* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 823e070..17171ee 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_NUMA_64_H
 
 #include <linux/nodemask.h>
-#include <asm/apicdef.h>
 
 struct bootnode {
 	u64 start;
@@ -17,8 +16,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
 extern unsigned long numa_free_all_bootmem(void);
 extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
@@ -32,6 +29,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 #define NODE_MIN_SIZE (4*1024*1024)
 
 extern void __init init_cpu_to_node(void);
+extern int __cpuinit numa_cpu_node(int cpu);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
 extern void __cpuinit numa_add_cpu(int cpu);
@@ -43,6 +41,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
 #endif /* CONFIG_NUMA_EMU */
 #else
 static inline void init_cpu_to_node(void)		{ }
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 1a5b9a8..de3308b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -583,11 +583,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	nid = acpi_get_node(handle);
 	if (nid == -1 || !node_online(nid))
 		return;
+	set_apicid_to_node(physid, nid);
 #ifdef CONFIG_X86_64
-	apicid_to_node[physid] = nid;
 	numa_set_node(cpu, nid);
 #else /* CONFIG_X86_32 */
-	apicid_2_node[physid] = nid;
 	cpu_to_node_map[cpu] = nid;
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 5e097e5..4f2f210 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2029,7 +2029,7 @@ int default_x86_32_numa_cpu_node(int cpu)
 	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
 
 	if (apicid != BAD_APICID)
-		return apicid_2_node[apicid];
+		return __apicid_to_node[apicid];
 	return NUMA_NO_NODE;
 #else
 	return 0;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8..d8e81e5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -234,17 +234,21 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 #endif
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+/*
+ * To workaround broken NUMA config.  Read the comment in
+ * srat_detect_node().
+ */
 static int __cpuinit nearby_node(int apicid)
 {
 	int i, node;
 
 	for (i = apicid - 1; i >= 0; i--) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
 	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -339,26 +343,35 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	int node;
 	unsigned apicid = c->apicid;
 
-	node = per_cpu(cpu_llc_id, cpu);
+	node = numa_cpu_node(cpu);
+	if (node == NUMA_NO_NODE)
+		node = per_cpu(cpu_llc_id, cpu);
 
-	if (apicid_to_node[apicid] != NUMA_NO_NODE)
-		node = apicid_to_node[apicid];
 	if (!node_online(node)) {
-		/* Two possibilities here:
-		   - The CPU is missing memory and no node was created.
-		   In that case try picking one from a nearby CPU
-		   - The APIC IDs differ from the HyperTransport node IDs
-		   which the K8 northbridge parsing fills in.
-		   Assume they are all increased by a constant offset,
-		   but in the same order as the HT nodeids.
-		   If that doesn't result in a usable node fall back to the
-		   path for the previous case.  */
-
+		/*
+		 * Two possibilities here:
+		 *
+		 * - The CPU is missing memory and no node was created.  In
+		 *   that case try picking one from a nearby CPU.
+		 *
+		 * - The APIC IDs differ from the HyperTransport node IDs
+		 *   which the K8 northbridge parsing fills in.  Assume
+		 *   they are all increased by a constant offset, but in
+		 *   the same order as the HT nodeids.  If that doesn't
+		 *   result in a usable node fall back to the path for the
+		 *   previous case.
+		 *
+		 * This workaround operates directly on the mapping between
+		 * APIC ID and NUMA node, assuming certain relationship
+		 * between APIC ID, HT node ID and NUMA topology.  As going
+		 * through CPU mapping may alter the outcome, directly
+		 * access __apicid_to_node[].
+		 */
 		int ht_nodeid = c->initial_apicid;
 
 		if (ht_nodeid >= 0 &&
-		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-			node = apicid_to_node[ht_nodeid];
+		    __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = __apicid_to_node[ht_nodeid];
 		/* Pick a nearby node */
 		if (!node_online(node))
 			node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c5..6052004 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -279,11 +279,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	unsigned node;
 	int cpu = smp_processor_id();
-	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
-	node = apicid_to_node[apicid];
+	node = numa_cpu_node(cpu);
 	if (node == NUMA_NO_NODE || !node_online(node)) {
 		/* reuse the value from init_cpu_to_node() */
 		node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 031d2e1..4b8b72d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -71,10 +71,6 @@
 #include <asm/smpboot_hooks.h>
 #include <asm/i8259.h>
 
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_LOCAL_APIC];
-#endif
-
 /* State of each CPU */
 DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
@@ -170,7 +166,7 @@ static void map_cpu_to_logical_apicid(void)
 	int cpu = smp_processor_id();
 	int node;
 
-	node = apic->x86_32_numa_cpu_node(cpu);
+	node = numa_cpu_node(cpu);
 	if (!node_online(node))
 		node = first_online_node;
 
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index 08a0069..3555d93 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -227,7 +227,7 @@ int __init amd_scan_nodes(void)
 				nodes[i].start >> PAGE_SHIFT,
 				nodes[i].end >> PAGE_SHIFT);
 		for (j = apicid_base; j < cores + apicid_base; j++)
-			apicid_to_node[(i << bits) + j] = i;
+			set_apicid_to_node((i << bits) + j, i);
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 787c52c..63db99c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -4,8 +4,12 @@
 #include <linux/bootmem.h>
 
 /*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
  */
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c..8d91d22 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 static unsigned long kva_start_pfn;
 static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+	return apic->x86_32_numa_cpu_node(cpu);
+}
+
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7762a51..989f23b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,10 +26,6 @@ EXPORT_SYMBOL(node_data);
 
 struct memnode memnode;
 
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
-	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
 int numa_off __initdata;
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
@@ -721,12 +717,8 @@ void __init init_cpu_to_node(void)
 	BUG_ON(cpu_to_apicid == NULL);
 
 	for_each_possible_cpu(cpu) {
-		int node;
-		u16 apicid = cpu_to_apicid[cpu];
+		int node = numa_cpu_node(cpu);
 
-		if (apicid == BAD_APICID)
-			continue;
-		node = apicid_to_node[apicid];
 		if (node == NUMA_NO_NODE)
 			continue;
 		if (!node_online(node))
@@ -736,6 +728,14 @@ void __init init_cpu_to_node(void)
 }
 #endif
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index e55e748..7fcae55 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -255,7 +255,7 @@ int __init get_memcfg_from_srat(void)
 			 num_memory_chunks);
 
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
 
 	for (j = 0; j < num_memory_chunks; j++){
 		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a35cb9d..1af9c6e 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,7 +79,7 @@ static __init void bad_srat(void)
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_to_node[i] = NUMA_NO_NODE;
+		set_apicid_to_node(i, NUMA_NO_NODE);
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		nodes[i].start = nodes[i].end = 0;
 		nodes_add[i].start = nodes_add[i].end = 0;
@@ -134,7 +134,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	}
 
 	apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
@@ -168,7 +168,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
 	else
 		apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
@@ -512,13 +512,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 		 * node, it must now point to the fake node ID.
 		 */
 		for (j = 0; j < MAX_LOCAL_APIC; j++)
-			if (apicid_to_node[j] == nid &&
+			if (__apicid_to_node[j] == nid &&
 			    fake_apicid_to_node[j] == NUMA_NO_NODE)
 				fake_apicid_to_node[j] = i;
 	}
 	for (i = 0; i < num_nodes; i++)
 		__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 
 	nodes_clear(nodes_parsed);
 	for (i = 0; i < num_nodes; i++)
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 14/16] x86: Unify CPU -> NUMA node mapping between 32 and 64bit
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (12 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 20:39   ` David Rientjes
  2010-12-28 11:48 ` [PATCH 15/16] x86: Unify node_to_cpumask_map handling " Tejun Heo
  2010-12-28 11:48 ` [PATCH 16/16] x86: Unify NUMA initialization " Tejun Heo
  15 siblings, 1 reply; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Unlike 64bit, 32bit has been using its own cpu_to_node_map[] for CPU
-> NUMA node mapping.  Replace it with early_percpu variable
x86_cpu_to_node_map and share the mapping code with 64bit.
* USE_PERCPU_NUMA_NODE_ID is now enabled for 32bit too.
* x86_cpu_to_node_map and numa_set/clear_node() are moved from numa_64
  to numa.  For now, on 32bit, x86_cpu_to_node_map is initialized with
  0 instead of NUMA_NO_NODE.  This is to avoid introducing unexpected
  behavior change and will be updated once init path is unified.
* srat_detect_node() is now enabled for x86_32 too.  It calls
  numa_set_node() and initializes the mapping making explicit
  cpu_to_node_map[] updates from map/unmap_cpu_to_node() unnecessary.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/Kconfig                |    2 +-
 arch/x86/include/asm/numa.h     |    8 ++++
 arch/x86/include/asm/numa_64.h  |    4 --
 arch/x86/include/asm/topology.h |   17 ---------
 arch/x86/kernel/acpi/boot.c     |    5 ---
 arch/x86/kernel/cpu/amd.c       |    4 +-
 arch/x86/kernel/cpu/intel.c     |    2 +-
 arch/x86/kernel/setup_percpu.c  |    4 +-
 arch/x86/kernel/smpboot.c       |    6 ---
 arch/x86/mm/numa.c              |   72 ++++++++++++++++++++++++++++++++++++++-
 arch/x86/mm/numa_64.c           |   65 -----------------------------------
 11 files changed, 85 insertions(+), 104 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 58ecad5..8d9dd6c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1707,7 +1707,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
 	depends on NUMA
 
 config USE_PERCPU_NUMA_NODE_ID
-	def_bool X86_64
+	def_bool y
 	depends on NUMA
 
 menu "Power management and ACPI options"
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 5e01c76..2b21fff 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -30,4 +30,12 @@ static inline void set_apicid_to_node(int apicid, s16 node)
 # include "numa_64.h"
 #endif
 
+#ifdef CONFIG_NUMA
+extern void __cpuinit numa_set_node(int cpu, int node);
+extern void __cpuinit numa_clear_node(int cpu);
+#else	/* CONFIG_NUMA */
+static inline void numa_set_node(int cpu, int node)	{ }
+static inline void numa_clear_node(int cpu)		{ }
+#endif	/* CONFIG_NUMA */
+
 #endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 17171ee..96691a0 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -30,8 +30,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 
 extern void __init init_cpu_to_node(void);
 extern int __cpuinit numa_cpu_node(int cpu);
-extern void __cpuinit numa_set_node(int cpu, int node);
-extern void __cpuinit numa_clear_node(int cpu);
 extern void __cpuinit numa_add_cpu(int cpu);
 extern void __cpuinit numa_remove_cpu(int cpu);
 
@@ -42,8 +40,6 @@ extern void __cpuinit numa_remove_cpu(int cpu);
 #else
 static inline void init_cpu_to_node(void)		{ }
 static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
-static inline void numa_set_node(int cpu, int node)	{ }
-static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
 static inline void numa_remove_cpu(int cpu)		{ }
 #endif
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 21899cc..b101c17 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -47,21 +47,6 @@
 
 #include <asm/mpspec.h>
 
-#ifdef CONFIG_X86_32
-
-/* Mappings between logical cpu number and node number */
-extern int cpu_to_node_map[];
-
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int __cpu_to_node(int cpu)
-{
-	return cpu_to_node_map[cpu];
-}
-#define early_cpu_to_node __cpu_to_node
-#define cpu_to_node __cpu_to_node
-
-#else /* CONFIG_X86_64 */
-
 /* Mappings between logical cpu number and node number */
 DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
 
@@ -84,8 +69,6 @@ static inline int early_cpu_to_node(int cpu)
 
 #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
 
-#endif /* CONFIG_X86_64 */
-
 /* Mappings between node number and cpus on that node. */
 extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index de3308b..bf5d299 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -584,12 +584,7 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	if (nid == -1 || !node_online(nid))
 		return;
 	set_apicid_to_node(physid, nid);
-#ifdef CONFIG_X86_64
 	numa_set_node(cpu, nid);
-#else /* CONFIG_X86_32 */
-	cpu_to_node_map[cpu] = nid;
-#endif
-
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d8e81e5..9a1be2c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -233,7 +233,7 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 }
 #endif
 
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
 /*
  * To workaround broken NUMA config.  Read the comment in
  * srat_detect_node().
@@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id);
 
 static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 {
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
 	int cpu = smp_processor_id();
 	int node;
 	unsigned apicid = c->apicid;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 6052004..df86bc8 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -276,7 +276,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 
 static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 {
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
 	unsigned node;
 	int cpu = smp_processor_id();
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index b5147f0..71f4727 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -233,6 +233,7 @@ void __init setup_per_cpu_areas(void)
 		per_cpu(irq_stack_ptr, cpu) =
 			per_cpu(irq_stack_union.irq_stack, cpu) +
 			IRQ_STACK_SIZE - 64;
+#endif
 #ifdef CONFIG_NUMA
 		per_cpu(x86_cpu_to_node_map, cpu) =
 			early_per_cpu_map(x86_cpu_to_node_map, cpu);
@@ -246,7 +247,6 @@ void __init setup_per_cpu_areas(void)
 		 */
 		set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
 #endif
-#endif
 		/*
 		 * Up to this point, the boot CPU has been using .init.data
 		 * area.  Reload any changed state for the boot CPU.
@@ -263,7 +263,7 @@ void __init setup_per_cpu_areas(void)
 #ifdef CONFIG_X86_32
 	early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
 #endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+#ifdef CONFIG_NUMA
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4b8b72d..b78ce8c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -133,16 +133,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 atomic_t init_deasserted;
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
-/* which node each logical CPU is on */
-int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_to_node_map);
-
 /* set up a mapping between cpu and node. */
 static void map_cpu_to_node(int cpu, int node)
 {
 	printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
 	cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
-	cpu_to_node_map[cpu] = node;
 }
 
 /* undo a mapping between cpu and node. */
@@ -153,7 +148,6 @@ static void unmap_cpu_to_node(int cpu)
 	printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
 	for (node = 0; node < MAX_NUMNODES; node++)
 		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
-	cpu_to_node_map[cpu] = 0;
 }
 #else /* !(CONFIG_NUMA && CONFIG_X86_32) */
 #define map_cpu_to_node(cpu, node)	({})
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 63db99c..b23b5fe 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -14,6 +14,44 @@ cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
 /*
+ * Map cpu index to node index
+ */
+#ifdef CONFIG_X86_32
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0);
+#else
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+#endif
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+	/* early setting, no percpu area yet */
+	if (cpu_to_node_map) {
+		cpu_to_node_map[cpu] = node;
+		return;
+	}
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+	if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
+		printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
+		dump_stack();
+		return;
+	}
+#endif
+	per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+	if (node != NUMA_NO_NODE)
+		set_cpu_numa_node(cpu, node);
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+	numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+/*
  * Allocate node_to_cpumask_map based on number of available nodes
  * Requires node_possible_map to be valid.
  *
@@ -40,6 +78,37 @@ void __init setup_node_to_cpumask_map(void)
 }
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
+
+int __cpu_to_node(int cpu)
+{
+	if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+		printk(KERN_WARNING
+			"cpu_to_node(%d): usage too early!\n", cpu);
+		dump_stack();
+		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+	}
+	return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(__cpu_to_node);
+
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
+int early_cpu_to_node(int cpu)
+{
+	if (early_per_cpu_ptr(x86_cpu_to_node_map))
+		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+	if (!cpu_possible(cpu)) {
+		printk(KERN_WARNING
+			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+		dump_stack();
+		return NUMA_NO_NODE;
+	}
+	return per_cpu(x86_cpu_to_node_map, cpu);
+}
+
 /*
  * Returns a pointer to the bitmask of CPUs on Node 'node'.
  */
@@ -62,4 +131,5 @@ const struct cpumask *cpumask_of_node(int node)
 	return node_to_cpumask_map[node];
 }
 EXPORT_SYMBOL(cpumask_of_node);
-#endif
+
+#endif	/* CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 989f23b..ce33f69 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,12 +31,6 @@ static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
 
 /*
- * Map cpu index to node index
- */
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-
-/*
  * Given a shift value, try to populate memnodemap[]
  * Returns :
  * 1 if OK
@@ -737,34 +731,6 @@ int __cpuinit numa_cpu_node(int cpu)
 	return NUMA_NO_NODE;
 }
 
-void __cpuinit numa_set_node(int cpu, int node)
-{
-	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
-
-	/* early setting, no percpu area yet */
-	if (cpu_to_node_map) {
-		cpu_to_node_map[cpu] = node;
-		return;
-	}
-
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-	if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
-		printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
-		dump_stack();
-		return;
-	}
-#endif
-	per_cpu(x86_cpu_to_node_map, cpu) = node;
-
-	if (node != NUMA_NO_NODE)
-		set_cpu_numa_node(cpu, node);
-}
-
-void __cpuinit numa_clear_node(int cpu)
-{
-	numa_set_node(cpu, NUMA_NO_NODE);
-}
-
 #ifndef CONFIG_DEBUG_PER_CPU_MAPS
 
 void __cpuinit numa_add_cpu(int cpu)
@@ -814,37 +780,6 @@ void __cpuinit numa_remove_cpu(int cpu)
 {
 	numa_set_cpumask(cpu, 0);
 }
-
-int __cpu_to_node(int cpu)
-{
-	if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
-		printk(KERN_WARNING
-			"cpu_to_node(%d): usage too early!\n", cpu);
-		dump_stack();
-		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-	}
-	return per_cpu(x86_cpu_to_node_map, cpu);
-}
-EXPORT_SYMBOL(__cpu_to_node);
-
-/*
- * Same function as cpu_to_node() but used if called before the
- * per_cpu areas are setup.
- */
-int early_cpu_to_node(int cpu)
-{
-	if (early_per_cpu_ptr(x86_cpu_to_node_map))
-		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
-	if (!cpu_possible(cpu)) {
-		printk(KERN_WARNING
-			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
-		dump_stack();
-		return NUMA_NO_NODE;
-	}
-	return per_cpu(x86_cpu_to_node_map, cpu);
-}
-
 /*
  * --------- end of debug versions of the numa functions ---------
  */
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 15/16] x86: Unify node_to_cpumask_map handling between 32 and 64bit
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (13 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 14/16] x86: Unify CPU -> " Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 20:43   ` David Rientjes
  2010-12-28 11:48 ` [PATCH 16/16] x86: Unify NUMA initialization " Tejun Heo
  15 siblings, 1 reply; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
x86_32 has been managing node_to_cpumask_map explicitly from
map_cpu_to_node() and friends in a rather ugly way.  With previous
changes, it's now possible to simply share the code with 64bit.
* numa_add/remove_cpu() are moved from numa_64 to numa.
* identify_cpu() now calls numa_add_cpu() for 32bit too.  This makes
  the explicit mask management from map_cpu_to_node() unnecessary.
* The whole x86_32 specific map_cpu_to_node() chunk is no longer
  necessary.  Dropped.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/numa.h    |   18 +++++++++++++
 arch/x86/include/asm/numa_32.h |    1 -
 arch/x86/include/asm/numa_64.h |    4 ---
 arch/x86/kernel/cpu/common.c   |    2 +-
 arch/x86/kernel/smpboot.c      |   47 ----------------------------------
 arch/x86/mm/numa.c             |   33 ++++++++++++++++++++++++
 arch/x86/mm/numa_64.c          |   55 ----------------------------------------
 7 files changed, 52 insertions(+), 108 deletions(-)
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 2b21fff..46f3e0d 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_X86_NUMA_H
 #define _ASM_X86_NUMA_H
 
+#include <asm/topology.h>
 #include <asm/apicdef.h>
 
 #ifdef CONFIG_NUMA
@@ -33,9 +34,26 @@ static inline void set_apicid_to_node(int apicid, s16 node)
 #ifdef CONFIG_NUMA
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
+
+# ifdef CONFIG_DEBUG_PER_CPU_MAPS
+extern void __cpuinit numa_add_cpu(int cpu);
+extern void __cpuinit numa_remove_cpu(int cpu);
+# else	/* CONFIG_DEBUG_PER_CPU_MAPS */
+static inline void __cpuinit numa_add_cpu(int cpu)
+{
+	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+static inline void __cpuinit numa_remove_cpu(int cpu)
+{
+	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+# endif	/* CONFIG_DEBUG_PER_CPU_MAPS */
+
 #else	/* CONFIG_NUMA */
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
+static inline void numa_add_cpu(int cpu)		{ }
+static inline void numa_remove_cpu(int cpu)		{ }
 #endif	/* CONFIG_NUMA */
 
 #endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index d30eb6c..ba0ea62 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_NUMA_32_H
 
 extern int pxm_to_nid(int pxm);
-extern void numa_remove_cpu(int cpu);
 
 #ifdef CONFIG_NUMA
 extern int __cpuinit numa_cpu_node(int apicid);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 96691a0..6331190 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -30,8 +30,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 
 extern void __init init_cpu_to_node(void);
 extern int __cpuinit numa_cpu_node(int cpu);
-extern void __cpuinit numa_add_cpu(int cpu);
-extern void __cpuinit numa_remove_cpu(int cpu);
 
 #ifdef CONFIG_NUMA_EMU
 #define FAKE_NODE_MIN_SIZE	((u64)64 << 20)
@@ -40,8 +38,6 @@ extern void __cpuinit numa_remove_cpu(int cpu);
 #else
 static inline void init_cpu_to_node(void)		{ }
 static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
-static inline void numa_add_cpu(int cpu, int node)	{ }
-static inline void numa_remove_cpu(int cpu)		{ }
 #endif
 
 #endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda..0e9a1d7 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
 	select_idle_routine(c);
 
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
 #endif
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index b78ce8c..b152077 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -132,49 +132,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 atomic_t init_deasserted;
 
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
-/* set up a mapping between cpu and node. */
-static void map_cpu_to_node(int cpu, int node)
-{
-	printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
-	cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
-}
-
-/* undo a mapping between cpu and node. */
-static void unmap_cpu_to_node(int cpu)
-{
-	int node;
-
-	printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
-	for (node = 0; node < MAX_NUMNODES; node++)
-		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
-}
-#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
-#define map_cpu_to_node(cpu, node)	({})
-#define unmap_cpu_to_node(cpu)	({})
-#endif
-
-#ifdef CONFIG_X86_32
-static void map_cpu_to_logical_apicid(void)
-{
-	int cpu = smp_processor_id();
-	int node;
-
-	node = numa_cpu_node(cpu);
-	if (!node_online(node))
-		node = first_online_node;
-
-	map_cpu_to_node(cpu, node);
-}
-
-void numa_remove_cpu(int cpu)
-{
-	unmap_cpu_to_node(cpu);
-}
-#else
-#define map_cpu_to_logical_apicid()  do {} while (0)
-#endif
-
 /*
  * Report back to the Boot Processor.
  * Running on AP.
@@ -242,7 +199,6 @@ static void __cpuinit smp_callin(void)
 		apic->smp_callin_clear_local_apic();
 	setup_local_APIC();
 	end_local_APIC_setup();
-	map_cpu_to_logical_apicid();
 
 	/*
 	 * Need to setup vector mappings before we enable interrupts.
@@ -946,7 +902,6 @@ static __init void disable_smp(void)
 		physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 	else
 		physid_set_mask_of_physid(0, &phys_cpu_present_map);
-	map_cpu_to_logical_apicid();
 	cpumask_set_cpu(0, cpu_sibling_mask(0));
 	cpumask_set_cpu(0, cpu_core_mask(0));
 }
@@ -1125,8 +1080,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
 	end_local_APIC_setup();
 
-	map_cpu_to_logical_apicid();
-
 	if (apic->setup_portio_remap)
 		apic->setup_portio_remap();
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index b23b5fe..c52c267 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -109,6 +109,39 @@ int early_cpu_to_node(int cpu)
 	return per_cpu(x86_cpu_to_node_map, cpu);
 }
 
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+	int node = early_cpu_to_node(cpu);
+	struct cpumask *mask;
+	char buf[64];
+
+	mask = node_to_cpumask_map[node];
+	if (mask == NULL) {
+		printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
+		dump_stack();
+		return;
+	}
+
+	if (enable)
+		cpumask_set_cpu(cpu, mask);
+	else
+		cpumask_clear_cpu(cpu, mask);
+
+	cpulist_scnprintf(buf, sizeof(buf), mask);
+	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+	numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+	numa_set_cpumask(cpu, 0);
+}
+
 /*
  * Returns a pointer to the bitmask of CPUs on Node 'node'.
  */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index ce33f69..6013aca 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -730,58 +730,3 @@ int __cpuinit numa_cpu_node(int cpu)
 		return __apicid_to_node[apicid];
 	return NUMA_NO_NODE;
 }
-
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
-
-void __cpuinit numa_add_cpu(int cpu)
-{
-	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-void __cpuinit numa_remove_cpu(int cpu)
-{
-	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
-
-/*
- * --------- debug versions of the numa functions ---------
- */
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
-{
-	int node = early_cpu_to_node(cpu);
-	struct cpumask *mask;
-	char buf[64];
-
-	mask = node_to_cpumask_map[node];
-	if (mask == NULL) {
-		printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
-		dump_stack();
-		return;
-	}
-
-	if (enable)
-		cpumask_set_cpu(cpu, mask);
-	else
-		cpumask_clear_cpu(cpu, mask);
-
-	cpulist_scnprintf(buf, sizeof(buf), mask);
-	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
-		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
-}
-
-void __cpuinit numa_add_cpu(int cpu)
-{
-	numa_set_cpumask(cpu, 1);
-}
-
-void __cpuinit numa_remove_cpu(int cpu)
-{
-	numa_set_cpumask(cpu, 0);
-}
-/*
- * --------- end of debug versions of the numa functions ---------
- */
-
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [PATCH 16/16] x86: Unify NUMA initialization between 32 and 64bit
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
                   ` (14 preceding siblings ...)
  2010-12-28 11:48 ` [PATCH 15/16] x86: Unify node_to_cpumask_map handling " Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  15 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
Now that everything else is unified, NUMA initialization can be
unified too.
* numa_init_array() and init_cpu_to_node() are moved from numa_64 to
  numa.
* numa_32::initmem_init() is updated to call numa_init_array() and
  setup_arch() to call init_cpu_to_node() on 32bit too.
* x86_cpu_to_node_map is now initialized to NUMA_NO_NODE on 32bit too.
  This is safe now as numa_init_array() will initialize it early
  during boot.
This makes NUMA mapping fully initialized before setup_per_cpu_areas()
on 32bit too and thus makes the first percpu chunk which contains all
the static variables and some of dynamic area allocated with NUMA
affinity correctly considered.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/numa.h    |    4 ++
 arch/x86/include/asm/numa_64.h |    3 --
 arch/x86/kernel/setup.c        |    2 -
 arch/x86/mm/numa.c             |   76 +++++++++++++++++++++++++++++++++++++--
 arch/x86/mm/numa_32.c          |    1 +
 arch/x86/mm/numa_64.c          |   75 ---------------------------------------
 6 files changed, 77 insertions(+), 84 deletions(-)
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 46f3e0d..927e12f 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -34,6 +34,8 @@ static inline void set_apicid_to_node(int apicid, s16 node)
 #ifdef CONFIG_NUMA
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
+extern void __init numa_init_array(void);
+extern void __init init_cpu_to_node(void);
 
 # ifdef CONFIG_DEBUG_PER_CPU_MAPS
 extern void __cpuinit numa_add_cpu(int cpu);
@@ -52,6 +54,8 @@ static inline void __cpuinit numa_remove_cpu(int cpu)
 #else	/* CONFIG_NUMA */
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
+static inline void numa_init_array(void)		{ }
+static inline void init_cpu_to_node(void)		{ }
 static inline void numa_add_cpu(int cpu)		{ }
 static inline void numa_remove_cpu(int cpu)		{ }
 #endif	/* CONFIG_NUMA */
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 6331190..db3baa2 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -13,7 +13,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
 
-extern void numa_init_array(void);
 extern int numa_off;
 
 extern unsigned long numa_free_all_bootmem(void);
@@ -28,7 +27,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
  */
 #define NODE_MIN_SIZE (4*1024*1024)
 
-extern void __init init_cpu_to_node(void);
 extern int __cpuinit numa_cpu_node(int cpu);
 
 #ifdef CONFIG_NUMA_EMU
@@ -36,7 +34,6 @@ extern int __cpuinit numa_cpu_node(int cpu);
 #define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
 #endif /* CONFIG_NUMA_EMU */
 #else
-static inline void init_cpu_to_node(void)		{ }
 static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 #endif
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0afb8c7..9d7cd90 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1030,9 +1030,7 @@ void __init setup_arch(char **cmdline_p)
 
 	prefill_possible_map();
 
-#ifdef CONFIG_X86_64
 	init_cpu_to_node();
-#endif
 
 	init_apic_mappings();
 	ioapic_and_gsi_init();
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index c52c267..dd93a41 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -16,11 +16,7 @@ EXPORT_SYMBOL(node_to_cpumask_map);
 /*
  * Map cpu index to node index
  */
-#ifdef CONFIG_X86_32
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0);
-#else
 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-#endif
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
 
 void __cpuinit numa_set_node(int cpu, int node)
@@ -77,6 +73,78 @@ void __init setup_node_to_cpumask_map(void)
 	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
 }
 
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
+void __init numa_init_array(void)
+{
+	int rr, i;
+
+	rr = first_node(node_online_map);
+	for (i = 0; i < nr_cpu_ids; i++) {
+		if (early_cpu_to_node(i) != NUMA_NO_NODE)
+			continue;
+		numa_set_node(i, rr);
+		rr = next_node(rr, node_online_map);
+		if (rr == MAX_NUMNODES)
+			rr = first_node(node_online_map);
+	}
+}
+
+static __init int find_near_online_node(int node)
+{
+	int n, val;
+	int min_val = INT_MAX;
+	int best_node = -1;
+
+	for_each_online_node(n) {
+		val = node_distance(node, n);
+
+		if (val < min_val) {
+			min_val = val;
+			best_node = n;
+		}
+	}
+
+	return best_node;
+}
+
+/*
+ * Setup early cpu_to_node.
+ *
+ * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
+ * and apicid_to_node[] tables have valid entries for a CPU.
+ * This means we skip cpu_to_node[] initialisation for NUMA
+ * emulation and faking node case (when running a kernel compiled
+ * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
+ * is already initialized in a round robin manner at numa_init_array,
+ * prior to this call, and this initialization is good enough
+ * for the fake NUMA cases.
+ *
+ * Called before the per_cpu areas are setup.
+ */
+void __init init_cpu_to_node(void)
+{
+	int cpu;
+	u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+
+	BUG_ON(cpu_to_apicid == NULL);
+
+	for_each_possible_cpu(cpu) {
+		int node = numa_cpu_node(cpu);
+
+		if (node == NUMA_NO_NODE)
+			continue;
+		if (!node_online(node))
+			node = find_near_online_node(node);
+		numa_set_node(cpu, node);
+	}
+}
+
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 
 int __cpu_to_node(int cpu)
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 8d91d22..505bb04 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -367,6 +367,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
 	 */
 
 	get_memcfg_numa();
+	numa_init_array();
 
 	kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 6013aca..b38700a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -225,28 +225,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 	node_set_online(nodeid);
 }
 
-/*
- * There are unfortunately some poorly designed mainboards around that
- * only connect memory to a single CPU. This breaks the 1:1 cpu->node
- * mapping. To avoid this fill in the mapping for all possible CPUs,
- * as the number of CPUs is not known yet. We round robin the existing
- * nodes.
- */
-void __init numa_init_array(void)
-{
-	int rr, i;
-
-	rr = first_node(node_online_map);
-	for (i = 0; i < nr_cpu_ids; i++) {
-		if (early_cpu_to_node(i) != NUMA_NO_NODE)
-			continue;
-		numa_set_node(i, rr);
-		rr = next_node(rr, node_online_map);
-		if (rr == MAX_NUMNODES)
-			rr = first_node(node_online_map);
-	}
-}
-
 #ifdef CONFIG_NUMA_EMU
 /* Numa emulation */
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
@@ -669,59 +647,6 @@ static __init int numa_setup(char *opt)
 }
 early_param("numa", numa_setup);
 
-#ifdef CONFIG_NUMA
-
-static __init int find_near_online_node(int node)
-{
-	int n, val;
-	int min_val = INT_MAX;
-	int best_node = -1;
-
-	for_each_online_node(n) {
-		val = node_distance(node, n);
-
-		if (val < min_val) {
-			min_val = val;
-			best_node = n;
-		}
-	}
-
-	return best_node;
-}
-
-/*
- * Setup early cpu_to_node.
- *
- * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
- * and apicid_to_node[] tables have valid entries for a CPU.
- * This means we skip cpu_to_node[] initialisation for NUMA
- * emulation and faking node case (when running a kernel compiled
- * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
- * is already initialized in a round robin manner at numa_init_array,
- * prior to this call, and this initialization is good enough
- * for the fake NUMA cases.
- *
- * Called before the per_cpu areas are setup.
- */
-void __init init_cpu_to_node(void)
-{
-	int cpu;
-	u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-
-	BUG_ON(cpu_to_apicid == NULL);
-
-	for_each_possible_cpu(cpu) {
-		int node = numa_cpu_node(cpu);
-
-		if (node == NUMA_NO_NODE)
-			continue;
-		if (!node_online(node))
-			node = find_near_online_node(node);
-		numa_set_node(cpu, node);
-	}
-}
-#endif
-
 int __cpuinit numa_cpu_node(int cpu)
 {
 	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-28 11:48 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
@ 2010-12-28 20:35   ` David Rientjes
  2010-12-29 10:52     ` Tejun Heo
  0 siblings, 1 reply; 37+ messages in thread
From: David Rientjes @ 2010-12-28 20:35 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Tue, 28 Dec 2010, Tejun Heo wrote:
> diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
> index a35cb9d..1af9c6e 100644
> --- a/arch/x86/mm/srat_64.c
> +++ b/arch/x86/mm/srat_64.c
> @@ -79,7 +79,7 @@ static __init void bad_srat(void)
>  	printk(KERN_ERR "SRAT: SRAT not used.\n");
>  	acpi_numa = -1;
>  	for (i = 0; i < MAX_LOCAL_APIC; i++)
> -		apicid_to_node[i] = NUMA_NO_NODE;
> +		set_apicid_to_node(i, NUMA_NO_NODE);
>  	for (i = 0; i < MAX_NUMNODES; i++) {
>  		nodes[i].start = nodes[i].end = 0;
>  		nodes_add[i].start = nodes_add[i].end = 0;
> @@ -134,7 +134,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
>  	}
>  
>  	apic_id = pa->apic_id;
> -	apicid_to_node[apic_id] = node;
> +	set_apicid_to_node(apic_id, node);
>  	node_set(node, cpu_nodes_parsed);
>  	acpi_numa = 1;
>  	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
> @@ -168,7 +168,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
>  		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
>  	else
>  		apic_id = pa->apic_id;
> -	apicid_to_node[apic_id] = node;
> +	set_apicid_to_node(apic_id, node);
>  	node_set(node, cpu_nodes_parsed);
>  	acpi_numa = 1;
>  	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
> @@ -512,13 +512,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
>  		 * node, it must now point to the fake node ID.
>  		 */
>  		for (j = 0; j < MAX_LOCAL_APIC; j++)
> -			if (apicid_to_node[j] == nid &&
> +			if (__apicid_to_node[j] == nid &&
>  			    fake_apicid_to_node[j] == NUMA_NO_NODE)
>  				fake_apicid_to_node[j] = i;
>  	}
>  	for (i = 0; i < num_nodes; i++)
>  		__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
> -	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
> +	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
>  
>  	nodes_clear(nodes_parsed);
>  	for (i = 0; i < num_nodes; i++)
This is going to conflict with a387e95a ("") in x86/numa, so you'll need 
the following hunk for acpi_fake_nodes().  I'm not sure why this patchset 
is being based on x86/apic-cleanup rather than x86/numa?
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -511,7 +511,7 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes
                 * node, it must now point to the fake node ID.
                 */
                for (j = 0; j < MAX_LOCAL_APIC; j++)
-                       if (apicid_to_node[j] == nid &&
+                       if (__apicid_to_node[j] == nid &&
                            fake_apicid_to_node[j] == NUMA_NO_NODE)
                                fake_apicid_to_node[j] = i;
        }
@@ -522,13 +522,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nod
         * value.
         */
        for (i = 0; i < MAX_LOCAL_APIC; i++)
-               if (apicid_to_node[i] != NUMA_NO_NODE &&
+               if (__apicid_to_node[i] != NUMA_NO_NODE &&
                    fake_apicid_to_node[i] == NUMA_NO_NODE)
                        fake_apicid_to_node[i] = 0;
 
        for (i = 0; i < num_nodes; i++)
                __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
-       memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+       memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 
        nodes_clear(nodes_parsed);
        for (i = 0; i < num_nodes; i++)
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 14/16] x86: Unify CPU -> NUMA node mapping between 32 and 64bit
  2010-12-28 11:48 ` [PATCH 14/16] x86: Unify CPU -> " Tejun Heo
@ 2010-12-28 20:39   ` David Rientjes
  0 siblings, 0 replies; 37+ messages in thread
From: David Rientjes @ 2010-12-28 20:39 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Tue, 28 Dec 2010, Tejun Heo wrote:
> diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
> index 989f23b..ce33f69 100644
> --- a/arch/x86/mm/numa_64.c
> +++ b/arch/x86/mm/numa_64.c
> @@ -31,12 +31,6 @@ static unsigned long __initdata nodemap_addr;
>  static unsigned long __initdata nodemap_size;
>  
>  /*
> - * Map cpu index to node index
> - */
> -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
> -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
> -
> -/*
>   * Given a shift value, try to populate memnodemap[]
>   * Returns :
>   * 1 if OK
> @@ -737,34 +731,6 @@ int __cpuinit numa_cpu_node(int cpu)
>  	return NUMA_NO_NODE;
>  }
>  
> -void __cpuinit numa_set_node(int cpu, int node)
> -{
> -	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
> -
> -	/* early setting, no percpu area yet */
> -	if (cpu_to_node_map) {
> -		cpu_to_node_map[cpu] = node;
> -		return;
> -	}
> -
> -#ifdef CONFIG_DEBUG_PER_CPU_MAPS
> -	if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
> -		printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
> -		dump_stack();
> -		return;
> -	}
> -#endif
> -	per_cpu(x86_cpu_to_node_map, cpu) = node;
> -
> -	if (node != NUMA_NO_NODE)
> -		set_cpu_numa_node(cpu, node);
> -}
> -
> -void __cpuinit numa_clear_node(int cpu)
> -{
> -	numa_set_node(cpu, NUMA_NO_NODE);
> -}
> -
>  #ifndef CONFIG_DEBUG_PER_CPU_MAPS
>  
>  void __cpuinit numa_add_cpu(int cpu)
This is also going to conflict with c1c3443c ("x86, numa: Fake 
node-to-cpumask for NUMA emulation") in x86/numa, but it's trival to merge 
because there's no actual implementation change.
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 15/16] x86: Unify node_to_cpumask_map handling between 32 and 64bit
  2010-12-28 11:48 ` [PATCH 15/16] x86: Unify node_to_cpumask_map handling " Tejun Heo
@ 2010-12-28 20:43   ` David Rientjes
  2010-12-30 12:48     ` Tejun Heo
  0 siblings, 1 reply; 37+ messages in thread
From: David Rientjes @ 2010-12-28 20:43 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Tue, 28 Dec 2010, Tejun Heo wrote:
> diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
> index ce33f69..6013aca 100644
> --- a/arch/x86/mm/numa_64.c
> +++ b/arch/x86/mm/numa_64.c
> @@ -730,58 +730,3 @@ int __cpuinit numa_cpu_node(int cpu)
>  		return __apicid_to_node[apicid];
>  	return NUMA_NO_NODE;
>  }
> -
> -#ifndef CONFIG_DEBUG_PER_CPU_MAPS
> -
> -void __cpuinit numa_add_cpu(int cpu)
> -{
> -	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
> -}
> -
> -void __cpuinit numa_remove_cpu(int cpu)
> -{
> -	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
> -}
> -
> -#else /* CONFIG_DEBUG_PER_CPU_MAPS */
> -
> -/*
> - * --------- debug versions of the numa functions ---------
> - */
> -static void __cpuinit numa_set_cpumask(int cpu, int enable)
> -{
> -	int node = early_cpu_to_node(cpu);
> -	struct cpumask *mask;
> -	char buf[64];
> -
> -	mask = node_to_cpumask_map[node];
> -	if (mask == NULL) {
> -		printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
> -		dump_stack();
> -		return;
> -	}
> -
> -	if (enable)
> -		cpumask_set_cpu(cpu, mask);
> -	else
> -		cpumask_clear_cpu(cpu, mask);
> -
> -	cpulist_scnprintf(buf, sizeof(buf), mask);
> -	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
> -		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
> -}
> -
> -void __cpuinit numa_add_cpu(int cpu)
> -{
> -	numa_set_cpumask(cpu, 1);
> -}
> -
> -void __cpuinit numa_remove_cpu(int cpu)
> -{
> -	numa_set_cpumask(cpu, 0);
> -}
> -/*
> - * --------- end of debug versions of the numa functions ---------
> - */
> -
> -#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
This has been almost entirely rewritten in x86/numa by c1c3443c ("x86, 
numa: Fake node-to-cpumask for NUMA emulation") and the pending fix 
http://marc.info/?l=linux-kernel&m=129340072128297 ("x86, numa: Fix 
CONFIG_DEBUG_PER_CPU_MAPS without NUMA").  Since this is only moving those 
functions to numa.c, it should be trivial to fix once based on x86/numa.
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-28 20:35   ` David Rientjes
@ 2010-12-29 10:52     ` Tejun Heo
  2010-12-29 19:36       ` H. Peter Anvin
  0 siblings, 1 reply; 37+ messages in thread
From: Tejun Heo @ 2010-12-29 10:52 UTC (permalink / raw)
  To: David Rientjes
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Tue, Dec 28, 2010 at 12:35:45PM -0800, David Rientjes wrote:
> This is going to conflict with a387e95a ("") in x86/numa, so you'll need 
> the following hunk for acpi_fake_nodes().  I'm not sure why this patchset 
> is being based on x86/apic-cleanup rather than x86/numa?
Because several patches from the patchset have already been committed
into x86/apic-cleanup.  Thomas, Peter, if this needs to be rebased
somewhere, let me know.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-29 10:52     ` Tejun Heo
@ 2010-12-29 19:36       ` H. Peter Anvin
  2010-12-29 22:05         ` H. Peter Anvin
  0 siblings, 1 reply; 37+ messages in thread
From: H. Peter Anvin @ 2010-12-29 19:36 UTC (permalink / raw)
  To: Tejun Heo
  Cc: David Rientjes, linux-kernel, Ingo Molnar, tglx, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On 12/29/2010 02:52 AM, Tejun Heo wrote:
> On Tue, Dec 28, 2010 at 12:35:45PM -0800, David Rientjes wrote:
>> This is going to conflict with a387e95a ("") in x86/numa, so you'll need 
>> the following hunk for acpi_fake_nodes().  I'm not sure why this patchset 
>> is being based on x86/apic-cleanup rather than x86/numa?
> 
> Because several patches from the patchset have already been committed
> into x86/apic-cleanup.  Thomas, Peter, if this needs to be rebased
> somewhere, let me know.
> 
x86/numa is already a dependent branch (on x86/amd-nb), so I'm fine
merging x86/apic-cleanup into that branch and then if you could rebase
it on top of the new x86/numa then I'll push it out tomorrow.
OK?
	-hpa
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-29 19:36       ` H. Peter Anvin
@ 2010-12-29 22:05         ` H. Peter Anvin
  2010-12-30 11:33           ` Tejun Heo
  0 siblings, 1 reply; 37+ messages in thread
From: H. Peter Anvin @ 2010-12-29 22:05 UTC (permalink / raw)
  To: Tejun Heo
  Cc: David Rientjes, linux-kernel, Ingo Molnar, tglx, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On 12/29/2010 11:36 AM, H. Peter Anvin wrote:
> On 12/29/2010 02:52 AM, Tejun Heo wrote:
>> On Tue, Dec 28, 2010 at 12:35:45PM -0800, David Rientjes wrote:
>>> This is going to conflict with a387e95a ("") in x86/numa, so you'll need 
>>> the following hunk for acpi_fake_nodes().  I'm not sure why this patchset 
>>> is being based on x86/apic-cleanup rather than x86/numa?
>>
>> Because several patches from the patchset have already been committed
>> into x86/apic-cleanup.  Thomas, Peter, if this needs to be rebased
>> somewhere, let me know.
>>
> 
> x86/numa is already a dependent branch (on x86/amd-nb), so I'm fine
> merging x86/apic-cleanup into that branch and then if you could rebase
> it on top of the new x86/numa then I'll push it out tomorrow.
> 
> OK?
> 
I have pushed out this merge onto x86/numa, so waiting for your rebase.
 If you don't have time, then I'll rebase.
	-hpa
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-29 22:05         ` H. Peter Anvin
@ 2010-12-30 11:33           ` Tejun Heo
  0 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-30 11:33 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: David Rientjes, linux-kernel, Ingo Molnar, tglx, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Wed, Dec 29, 2010 at 02:05:06PM -0800, H. Peter Anvin wrote:
> On 12/29/2010 11:36 AM, H. Peter Anvin wrote:
> > On 12/29/2010 02:52 AM, Tejun Heo wrote:
> >> On Tue, Dec 28, 2010 at 12:35:45PM -0800, David Rientjes wrote:
> >>> This is going to conflict with a387e95a ("") in x86/numa, so you'll need 
> >>> the following hunk for acpi_fake_nodes().  I'm not sure why this patchset 
> >>> is being based on x86/apic-cleanup rather than x86/numa?
> >>
> >> Because several patches from the patchset have already been committed
> >> into x86/apic-cleanup.  Thomas, Peter, if this needs to be rebased
> >> somewhere, let me know.
> >>
> > 
> > x86/numa is already a dependent branch (on x86/amd-nb), so I'm fine
> > merging x86/apic-cleanup into that branch and then if you could rebase
> > it on top of the new x86/numa then I'll push it out tomorrow.
> > 
> > OK?
> > 
> 
> I have pushed out this merge onto x86/numa, so waiting for your rebase.
>  If you don't have time, then I'll rebase.
Will rebase & repost soon.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 15/16] x86: Unify node_to_cpumask_map handling between 32 and 64bit
  2010-12-28 20:43   ` David Rientjes
@ 2010-12-30 12:48     ` Tejun Heo
  2010-12-30 14:58       ` Tejun Heo
  2010-12-30 18:43       ` David Rientjes
  0 siblings, 2 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-30 12:48 UTC (permalink / raw)
  To: David Rientjes
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Tue, Dec 28, 2010 at 12:43:24PM -0800, David Rientjes wrote:
> This has been almost entirely rewritten in x86/numa by c1c3443c ("x86, 
> numa: Fake node-to-cpumask for NUMA emulation") and the pending fix 
> http://marc.info/?l=linux-kernel&m=129340072128297 ("x86, numa: Fix 
> CONFIG_DEBUG_PER_CPU_MAPS without NUMA").  Since this is only moving those 
> functions to numa.c, it should be trivial to fix once based on x86/numa.
Sigh, the new NUMA emulation thing is 64bit only. :-( I'll see if it
can be applied to 32bit too.  BTW, how does this interact with the
Shaohui's patchset.  Isn't that about NUMA emulation too?  Are these
the same patches?
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 15/16] x86: Unify node_to_cpumask_map handling between 32 and 64bit
  2010-12-30 12:48     ` Tejun Heo
@ 2010-12-30 14:58       ` Tejun Heo
  2010-12-30 18:40         ` David Rientjes
  2010-12-30 18:43       ` David Rientjes
  1 sibling, 1 reply; 37+ messages in thread
From: Tejun Heo @ 2010-12-30 14:58 UTC (permalink / raw)
  To: David Rientjes
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
Hello, again.
On Thu, Dec 30, 2010 at 01:48:00PM +0100, Tejun Heo wrote:
> On Tue, Dec 28, 2010 at 12:43:24PM -0800, David Rientjes wrote:
> > This has been almost entirely rewritten in x86/numa by c1c3443c ("x86, 
> > numa: Fake node-to-cpumask for NUMA emulation") and the pending fix 
> > http://marc.info/?l=linux-kernel&m=129340072128297 ("x86, numa: Fix 
> > CONFIG_DEBUG_PER_CPU_MAPS without NUMA").  Since this is only moving those 
> > functions to numa.c, it should be trivial to fix once based on x86/numa.
> 
> Sigh, the new NUMA emulation thing is 64bit only. :-( I'll see if it
> can be applied to 32bit too.  BTW, how does this interact with the
> Shaohui's patchset.  Isn't that about NUMA emulation too?  Are these
> the same patches?
Okay, after going through the changes, here are some thoughts FWIW.
I'm doubtful the direction was the correct one.  NUMA emulation,
almost by definition, doesn't vary too much depending on the
underlying hardware and how it's configured.  All that's necessary is
the physical node map an apicid to nid mapping, which can and probably
should be represented in common form regardless of emulation and the
emulation code should simply manipulate the parsed information.
The original implementation was 64bit apic specific.  If it needed to
be extended to cover amd topology, the right way would be updating the
apic and amd numa code so that they generate the information in a
uniform way and NUMA emulation would follow naturally.  Instead, it
looks like what happened was simply adding amd specific code with more
gluing.  The unnecessary differences between acpi and amd paths sure
aren't your fault but I really don't think we should be moving toward
that direction when things can (and definitely should) be unified with
a bit more effort.  NUMA code already is unnecessary hairy with
32/64bit and different subarchs somewhat unified but subtly differing
in many places without clear indications.
That said, Shaohui is already working on unifying the emulation code,
so I'll just do another quick dirty #ifdef'ing for the affected part.
Thank you.
-- 
tejun
^ permalink raw reply	[flat|nested] 37+ messages in thread
* [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  0 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
The mapping between cpu/apicid and node is done via apicid_to_node[]
on 64bit and apicid_2_node[] + apic->x86_32_numa_cpu_node() on 32bit.
This difference makes it difficult to further unify 32 and 64bit NUMA
handling.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed by
two accessors - set_apicid_to_node() and numa_cpu_node().  On 64bit,
numa_cpu_node() always consults __apicid_to_node[] directly while
32bit goes through apic->numa_cpu_node() method to allow apic
implementations to override it.
srat_detect_node() for amd cpus contains workaround for broken NUMA
configuration which assumes relationship between APIC ID, HT node ID
and NUMA topology.  Leave it to access __apicid_to_node[] directly as
mapping through CPU might result in undesirable behavior change.  The
comment is reformatted and updated to note the ugliness.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
---
 arch/x86/include/asm/mpspec.h  |    1 -
 arch/x86/include/asm/numa.h    |   28 +++++++++++++++++++++++
 arch/x86/include/asm/numa_32.h |    6 +++++
 arch/x86/include/asm/numa_64.h |    5 +--
 arch/x86/kernel/acpi/boot.c    |    3 +-
 arch/x86/kernel/apic/apic.c    |    2 +-
 arch/x86/kernel/cpu/amd.c      |   47 +++++++++++++++++++++++++--------------
 arch/x86/kernel/cpu/intel.c    |    3 +-
 arch/x86/kernel/smpboot.c      |    6 +----
 arch/x86/mm/amdtopology_64.c   |    4 +-
 arch/x86/mm/numa.c             |    6 ++++-
 arch/x86/mm/numa_32.c          |    6 +++++
 arch/x86/mm/numa_64.c          |   26 +++++++++------------
 arch/x86/mm/srat_32.c          |    2 +-
 arch/x86/mm/srat_64.c          |   12 +++++-----
 15 files changed, 101 insertions(+), 56 deletions(-)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 018ffc1..ae78732 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -24,7 +24,6 @@ extern int pic_mode;
 #define MAX_IRQ_SOURCES		256
 
 extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400..5e01c76 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,33 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid and
+ * node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
+ * should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+	__apicid_to_node[apicid] = node;
+}
+#else	/* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_X86_32
 # include "numa_32.h"
 #else
 # include "numa_64.h"
 #endif
+
+#endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index a372290..d30eb6c 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,6 +4,12 @@
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int apicid);
+#else	/* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 5ae8728..17abf80 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_NUMA_64_H
 
 #include <linux/nodemask.h>
-#include <asm/apicdef.h>
 
 struct bootnode {
 	u64 start;
@@ -17,8 +16,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
 extern unsigned long numa_free_all_bootmem(void);
 extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
@@ -32,6 +29,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 #define NODE_MIN_SIZE (4*1024*1024)
 
 extern void __init init_cpu_to_node(void);
+extern int __cpuinit numa_cpu_node(int cpu);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
 extern void __cpuinit numa_add_cpu(int cpu);
@@ -43,6 +41,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
 #endif /* CONFIG_NUMA_EMU */
 #else
 static inline void init_cpu_to_node(void)		{ }
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 1a5b9a8..de3308b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -583,11 +583,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	nid = acpi_get_node(handle);
 	if (nid == -1 || !node_online(nid))
 		return;
+	set_apicid_to_node(physid, nid);
 #ifdef CONFIG_X86_64
-	apicid_to_node[physid] = nid;
 	numa_set_node(cpu, nid);
 #else /* CONFIG_X86_32 */
-	apicid_2_node[physid] = nid;
 	cpu_to_node_map[cpu] = nid;
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 5e097e5..4f2f210 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2029,7 +2029,7 @@ int default_x86_32_numa_cpu_node(int cpu)
 	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
 
 	if (apicid != BAD_APICID)
-		return apicid_2_node[apicid];
+		return __apicid_to_node[apicid];
 	return NUMA_NO_NODE;
 #else
 	return 0;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8..d8e81e5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -234,17 +234,21 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 #endif
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+/*
+ * To workaround broken NUMA config.  Read the comment in
+ * srat_detect_node().
+ */
 static int __cpuinit nearby_node(int apicid)
 {
 	int i, node;
 
 	for (i = apicid - 1; i >= 0; i--) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
 	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -339,26 +343,35 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	int node;
 	unsigned apicid = c->apicid;
 
-	node = per_cpu(cpu_llc_id, cpu);
+	node = numa_cpu_node(cpu);
+	if (node == NUMA_NO_NODE)
+		node = per_cpu(cpu_llc_id, cpu);
 
-	if (apicid_to_node[apicid] != NUMA_NO_NODE)
-		node = apicid_to_node[apicid];
 	if (!node_online(node)) {
-		/* Two possibilities here:
-		   - The CPU is missing memory and no node was created.
-		   In that case try picking one from a nearby CPU
-		   - The APIC IDs differ from the HyperTransport node IDs
-		   which the K8 northbridge parsing fills in.
-		   Assume they are all increased by a constant offset,
-		   but in the same order as the HT nodeids.
-		   If that doesn't result in a usable node fall back to the
-		   path for the previous case.  */
-
+		/*
+		 * Two possibilities here:
+		 *
+		 * - The CPU is missing memory and no node was created.  In
+		 *   that case try picking one from a nearby CPU.
+		 *
+		 * - The APIC IDs differ from the HyperTransport node IDs
+		 *   which the K8 northbridge parsing fills in.  Assume
+		 *   they are all increased by a constant offset, but in
+		 *   the same order as the HT nodeids.  If that doesn't
+		 *   result in a usable node fall back to the path for the
+		 *   previous case.
+		 *
+		 * This workaround operates directly on the mapping between
+		 * APIC ID and NUMA node, assuming certain relationship
+		 * between APIC ID, HT node ID and NUMA topology.  As going
+		 * through CPU mapping may alter the outcome, directly
+		 * access __apicid_to_node[].
+		 */
 		int ht_nodeid = c->initial_apicid;
 
 		if (ht_nodeid >= 0 &&
-		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-			node = apicid_to_node[ht_nodeid];
+		    __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = __apicid_to_node[ht_nodeid];
 		/* Pick a nearby node */
 		if (!node_online(node))
 			node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c5..6052004 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -279,11 +279,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	unsigned node;
 	int cpu = smp_processor_id();
-	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
-	node = apicid_to_node[apicid];
+	node = numa_cpu_node(cpu);
 	if (node == NUMA_NO_NODE || !node_online(node)) {
 		/* reuse the value from init_cpu_to_node() */
 		node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 031d2e1..4b8b72d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -71,10 +71,6 @@
 #include <asm/smpboot_hooks.h>
 #include <asm/i8259.h>
 
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_LOCAL_APIC];
-#endif
-
 /* State of each CPU */
 DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
@@ -170,7 +166,7 @@ static void map_cpu_to_logical_apicid(void)
 	int cpu = smp_processor_id();
 	int node;
 
-	node = apic->x86_32_numa_cpu_node(cpu);
+	node = numa_cpu_node(cpu);
 	if (!node_online(node))
 		node = first_online_node;
 
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c..c7fae38 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -247,7 +247,7 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
 		__acpi_map_pxm_to_node(nid, i);
 #endif
 	}
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 }
 #endif /* CONFIG_NUMA_EMU */
 
@@ -285,7 +285,7 @@ int __init amd_scan_nodes(void)
 				nodes[i].start >> PAGE_SHIFT,
 				nodes[i].end >> PAGE_SHIFT);
 		for (j = apicid_base; j < cores + apicid_base; j++)
-			apicid_to_node[(i << bits) + j] = i;
+			set_apicid_to_node((i << bits) + j, i);
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 787c52c..63db99c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -4,8 +4,12 @@
 #include <linux/bootmem.h>
 
 /*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
  */
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c..8d91d22 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 static unsigned long kva_start_pfn;
 static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+	return apic->x86_32_numa_cpu_node(cpu);
+}
+
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 3d73201..e32b405 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,10 +26,6 @@ EXPORT_SYMBOL(node_data);
 
 struct memnode memnode;
 
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
-	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
 int numa_off __initdata;
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
@@ -730,12 +726,8 @@ void __init init_cpu_to_node(void)
 	BUG_ON(cpu_to_apicid == NULL);
 
 	for_each_possible_cpu(cpu) {
-		int node;
-		u16 apicid = cpu_to_apicid[cpu];
+		int node = numa_cpu_node(cpu);
 
-		if (apicid == BAD_APICID)
-			continue;
-		node = apicid_to_node[apicid];
 		if (node == NUMA_NO_NODE)
 			continue;
 		if (!node_online(node))
@@ -745,6 +737,14 @@ void __init init_cpu_to_node(void)
 }
 #endif
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
@@ -790,13 +790,9 @@ void __cpuinit numa_remove_cpu(int cpu)
 void __cpuinit numa_add_cpu(int cpu)
 {
 	unsigned long addr;
-	u16 apicid;
-	int physnid;
-	int nid = NUMA_NO_NODE;
+	int physnid, nid;
 
-	apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-	if (apicid != BAD_APICID)
-		nid = apicid_to_node[apicid];
+	nid = numa_cpu_node(cpu);
 	if (nid == NUMA_NO_NODE)
 		nid = early_cpu_to_node(cpu);
 	BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index e55e748..7fcae55 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -255,7 +255,7 @@ int __init get_memcfg_from_srat(void)
 			 num_memory_chunks);
 
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
 
 	for (j = 0; j < num_memory_chunks; j++){
 		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a756bcf..8b3a81e 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,7 +79,7 @@ static __init void bad_srat(void)
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_to_node[i] = NUMA_NO_NODE;
+		set_apicid_to_node(i, NUMA_NO_NODE);
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		nodes[i].start = nodes[i].end = 0;
 		nodes_add[i].start = nodes_add[i].end = 0;
@@ -134,7 +134,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	}
 
 	apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
@@ -168,7 +168,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
 	else
 		apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
@@ -511,7 +511,7 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 		 * node, it must now point to the fake node ID.
 		 */
 		for (j = 0; j < MAX_LOCAL_APIC; j++)
-			if (apicid_to_node[j] == nid &&
+			if (__apicid_to_node[j] == nid &&
 			    fake_apicid_to_node[j] == NUMA_NO_NODE)
 				fake_apicid_to_node[j] = i;
 	}
@@ -522,13 +522,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 	 * value.
 	 */
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		if (apicid_to_node[i] != NUMA_NO_NODE &&
+		if (__apicid_to_node[i] != NUMA_NO_NODE &&
 		    fake_apicid_to_node[i] == NUMA_NO_NODE)
 			fake_apicid_to_node[i] = 0;
 
 	for (i = 0; i < num_nodes; i++)
 		__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 
 	nodes_clear(nodes_parsed);
 	for (i = 0; i < num_nodes; i++)
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
* Re: [PATCH 15/16] x86: Unify node_to_cpumask_map handling between 32 and 64bit
  2010-12-30 14:58       ` Tejun Heo
@ 2010-12-30 18:40         ` David Rientjes
  2010-12-31 13:20           ` Tejun Heo
  0 siblings, 1 reply; 37+ messages in thread
From: David Rientjes @ 2010-12-30 18:40 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Thu, 30 Dec 2010, Tejun Heo wrote:
> > Sigh, the new NUMA emulation thing is 64bit only. :-( I'll see if it
> > can be applied to 32bit too.  BTW, how does this interact with the
> > Shaohui's patchset.  Isn't that about NUMA emulation too?  Are these
> > the same patches?
> 
Shaohui's patchset is for node hotplug emulation, not for NUMA emulation 
which has existed for x86_64 for at least over four years.
> Okay, after going through the changes, here are some thoughts FWIW.
> 
> I'm doubtful the direction was the correct one.  NUMA emulation,
> almost by definition, doesn't vary too much depending on the
> underlying hardware and how it's configured.  All that's necessary is
> the physical node map an apicid to nid mapping, which can and probably
> should be represented in common form regardless of emulation and the
> emulation code should simply manipulate the parsed information.
> 
And pxm mappings since node_distance() must reflect the physical topology 
on the emulated environment.  We have specialized callbacks in the amd and 
ACPI code to ascertain the physical topology because its discovery is 
handled differently, either with an implied set of possible nodes in the 
amd case and the statically-allocated nodes_parsed in the ACPI case, prior 
to actually onlining the nodes which is suppressed for NUMA emulation.
> The original implementation was 64bit apic specific.  If it needed to
> be extended to cover amd topology, the right way would be updating the
> apic and amd numa code so that they generate the information in a
> uniform way and NUMA emulation would follow naturally.
You could export an apicid-to-node and pxm-to-node mapping from both amd 
and ACPI code to do the actual remap, but it would still require seperate 
functions, amd_fake_nodes() and acpi_fake_nodes(), to determine what the 
mapping should be.  Instead of adding an additional layer of indirection, 
it's currently done in a single function depending on whether amd or acpi 
is being used.
> Instead, it
> looks like what happened was simply adding amd specific code with more
> gluing.  The unnecessary differences between acpi and amd paths sure
> aren't your fault but I really don't think we should be moving toward
> that direction when things can (and definitely should) be unified with
> a bit more effort.  NUMA code already is unnecessary hairy with
> 32/64bit and different subarchs somewhat unified but subtly differing
> in many places without clear indications.
> 
I agree that unification is in our best interest but the patches as they 
sit in x86/numa right now actually fix real bugs when using numa=fake on 
the command line and adding i386 NUMA emulation is an additional feature 
(and the unification that can be done in the meantime would only be the 
actual apicid-to-node and pxm-to-node mappings after it is exported from 
amd and acpi specific functions), so I'd prefer to handle cleanups as 
incremental patches on top of those.
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 15/16] x86: Unify node_to_cpumask_map handling between 32 and 64bit
  2010-12-30 12:48     ` Tejun Heo
  2010-12-30 14:58       ` Tejun Heo
@ 2010-12-30 18:43       ` David Rientjes
  1 sibling, 0 replies; 37+ messages in thread
From: David Rientjes @ 2010-12-30 18:43 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Thu, 30 Dec 2010, Tejun Heo wrote:
> Sigh, the new NUMA emulation thing is 64bit only. :-( I'll see if it
> can be applied to 32bit too.  BTW, how does this interact with the
> Shaohui's patchset.  Isn't that about NUMA emulation too?  Are these
> the same patches?
> 
There's nothing new about NUMA emulation on x86_64, it's existed for well 
over four years (we've been using it at Google since 2.6.18, see 
Documentation/x86/x86_64/fake-numa-for-cpusets).  Shaohui's patches are 
completely seperate and add hotplug emulation.  You _could_ boot with a 
very minimal amount of memory and then hotplug all the emulated nodes like 
numa=fake does, but you may find it difficult to parse the e820 and 
interleave the nodes appropriately over the set of physical nodes in 
userspace.
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 15/16] x86: Unify node_to_cpumask_map handling between 32 and 64bit
  2010-12-30 18:40         ` David Rientjes
@ 2010-12-31 13:20           ` Tejun Heo
  2010-12-31 23:09             ` David Rientjes
  0 siblings, 1 reply; 37+ messages in thread
From: Tejun Heo @ 2010-12-31 13:20 UTC (permalink / raw)
  To: David Rientjes
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
Hello,
On Thu, Dec 30, 2010 at 10:40:33AM -0800, David Rientjes wrote:
> On Thu, 30 Dec 2010, Tejun Heo wrote:
> 
> > > Sigh, the new NUMA emulation thing is 64bit only. :-( I'll see if it
> > > can be applied to 32bit too.  BTW, how does this interact with the
> > > Shaohui's patchset.  Isn't that about NUMA emulation too?  Are these
> > > the same patches?
> 
> Shaohui's patchset is for node hotplug emulation, not for NUMA emulation 
> which has existed for x86_64 for at least over four years.
I see.  Yeah, I should have actually read the patchset before talking
about it.
> I agree that unification is in our best interest but the patches as they 
> sit in x86/numa right now actually fix real bugs when using numa=fake on 
> the command line and adding i386 NUMA emulation is an additional feature 
I'm not really arguing against you and as said before the patches were
fine given the current state of the code, but I still want to point
out that it's through these mostly innocent series of changes which
build upon existing complications which eventually lead to
unsustainable pile of mess.  No one is particularly wrong but then
again none does what really needs to be done.
> (and the unification that can be done in the meantime would only be the 
> actual apicid-to-node and pxm-to-node mappings after it is exported from 
> amd and acpi specific functions), so I'd prefer to handle cleanups as 
> incremental patches on top of those.
The problem is that those "incremental" patches often don't happen
once the itch is gone while the immediate fixes/improvements aggravate
existing complications.  If you were/are planning to clean it up, I
have nothing to whine about.  :-)
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 37+ messages in thread
* Re: [PATCH 15/16] x86: Unify node_to_cpumask_map handling between 32 and 64bit
  2010-12-31 13:20           ` Tejun Heo
@ 2010-12-31 23:09             ` David Rientjes
  0 siblings, 0 replies; 37+ messages in thread
From: David Rientjes @ 2010-12-31 23:09 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Fri, 31 Dec 2010, Tejun Heo wrote:
> > (and the unification that can be done in the meantime would only be the 
> > actual apicid-to-node and pxm-to-node mappings after it is exported from 
> > amd and acpi specific functions), so I'd prefer to handle cleanups as 
> > incremental patches on top of those.
> 
> The problem is that those "incremental" patches often don't happen
> once the itch is gone while the immediate fixes/improvements aggravate
> existing complications.  If you were/are planning to clean it up, I
> have nothing to whine about.  :-)
> 
We can certainly implement i386 NUMA emulation after the unification is 
done (only to avoid additional conflicts on your set :), but I'm not sure 
if there's an audience of testers and debuggers who want to use it for 
i386 NUMA; there hasn't been one in the past when people like Magnus Damm 
proposed it about four years ago.  Agreed that it's probably going to be 
simpler after your unification patchset is merged, but it seems like we're 
in the unique position where we're extending a feature for an architecture 
because it's simple and makes things cleaner in the code rather than being 
actually useful.
^ permalink raw reply	[flat|nested] 37+ messages in thread
* [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2011-01-23 13:37 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#5 Tejun Heo
@ 2011-01-23 13:37 ` Tejun Heo
  0 siblings, 0 replies; 37+ messages in thread
From: Tejun Heo @ 2011-01-23 13:37 UTC (permalink / raw)
  To: linux-kernel, hpa
  Cc: mingo, tglx, x86, eric.dumazet, yinghai, brgerst, gorcunov,
	penberg, shaohui.zheng, rientjes, Tejun Heo
The mapping between cpu/apicid and node is done via apicid_to_node[]
on 64bit and apicid_2_node[] + apic->x86_32_numa_cpu_node() on 32bit.
This difference makes it difficult to further unify 32 and 64bit NUMA
handling.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed by
two accessors - set_apicid_to_node() and numa_cpu_node().  On 64bit,
numa_cpu_node() always consults __apicid_to_node[] directly while
32bit goes through apic->numa_cpu_node() method to allow apic
implementations to override it.
srat_detect_node() for amd cpus contains workaround for broken NUMA
configuration which assumes relationship between APIC ID, HT node ID
and NUMA topology.  Leave it to access __apicid_to_node[] directly as
mapping through CPU might result in undesirable behavior change.  The
comment is reformatted and updated to note the ugliness.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
---
 arch/x86/include/asm/mpspec.h  |    1 -
 arch/x86/include/asm/numa.h    |   28 +++++++++++++++++++++++
 arch/x86/include/asm/numa_32.h |    6 +++++
 arch/x86/include/asm/numa_64.h |    5 +--
 arch/x86/kernel/acpi/boot.c    |    3 +-
 arch/x86/kernel/apic/apic.c    |    2 +-
 arch/x86/kernel/cpu/amd.c      |   47 +++++++++++++++++++++++++--------------
 arch/x86/kernel/cpu/intel.c    |    3 +-
 arch/x86/kernel/smpboot.c      |    6 +----
 arch/x86/mm/amdtopology_64.c   |    4 +-
 arch/x86/mm/numa.c             |    6 ++++-
 arch/x86/mm/numa_32.c          |    6 +++++
 arch/x86/mm/numa_64.c          |   26 +++++++++------------
 arch/x86/mm/srat_32.c          |    2 +-
 arch/x86/mm/srat_64.c          |   12 +++++-----
 15 files changed, 101 insertions(+), 56 deletions(-)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index edc2a45..9c7d95f 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -25,7 +25,6 @@ extern int pic_mode;
 #define MAX_IRQ_SOURCES		256
 
 extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400..5e01c76 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,33 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid and
+ * node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
+ * should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+	__apicid_to_node[apicid] = node;
+}
+#else	/* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_X86_32
 # include "numa_32.h"
 #else
 # include "numa_64.h"
 #endif
+
+#endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index b0ef2b4..cdf8043 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -6,6 +6,12 @@ extern int numa_off;
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int apicid);
+#else	/* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 0493be3..4982a9c 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_NUMA_64_H
 
 #include <linux/nodemask.h>
-#include <asm/apicdef.h>
 
 struct bootnode {
 	u64 start;
@@ -17,8 +16,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
 extern unsigned long numa_free_all_bootmem(void);
 extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
@@ -32,6 +29,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 #define NODE_MIN_SIZE (4*1024*1024)
 
 extern void __init init_cpu_to_node(void);
+extern int __cpuinit numa_cpu_node(int cpu);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
 extern void __cpuinit numa_add_cpu(int cpu);
@@ -44,6 +42,7 @@ void numa_emu_cmdline(char *);
 #endif /* CONFIG_NUMA_EMU */
 #else
 static inline void init_cpu_to_node(void)		{ }
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b3a7113..a7bca59 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -589,11 +589,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	nid = acpi_get_node(handle);
 	if (nid == -1 || !node_online(nid))
 		return;
+	set_apicid_to_node(physid, nid);
 #ifdef CONFIG_X86_64
-	apicid_to_node[physid] = nid;
 	numa_set_node(cpu, nid);
 #else /* CONFIG_X86_32 */
-	apicid_2_node[physid] = nid;
 	cpu_to_node_map[cpu] = nid;
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0f4f3c1..4686ea5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2026,7 +2026,7 @@ int default_x86_32_numa_cpu_node(int cpu)
 	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
 
 	if (apicid != BAD_APICID)
-		return apicid_2_node[apicid];
+		return __apicid_to_node[apicid];
 	return NUMA_NO_NODE;
 #else
 	return 0;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c7bedb..3cce8f2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -234,17 +234,21 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 #endif
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+/*
+ * To workaround broken NUMA config.  Read the comment in
+ * srat_detect_node().
+ */
 static int __cpuinit nearby_node(int apicid)
 {
 	int i, node;
 
 	for (i = apicid - 1; i >= 0; i--) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
 	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -339,26 +343,35 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	int node;
 	unsigned apicid = c->apicid;
 
-	node = per_cpu(cpu_llc_id, cpu);
+	node = numa_cpu_node(cpu);
+	if (node == NUMA_NO_NODE)
+		node = per_cpu(cpu_llc_id, cpu);
 
-	if (apicid_to_node[apicid] != NUMA_NO_NODE)
-		node = apicid_to_node[apicid];
 	if (!node_online(node)) {
-		/* Two possibilities here:
-		   - The CPU is missing memory and no node was created.
-		   In that case try picking one from a nearby CPU
-		   - The APIC IDs differ from the HyperTransport node IDs
-		   which the K8 northbridge parsing fills in.
-		   Assume they are all increased by a constant offset,
-		   but in the same order as the HT nodeids.
-		   If that doesn't result in a usable node fall back to the
-		   path for the previous case.  */
-
+		/*
+		 * Two possibilities here:
+		 *
+		 * - The CPU is missing memory and no node was created.  In
+		 *   that case try picking one from a nearby CPU.
+		 *
+		 * - The APIC IDs differ from the HyperTransport node IDs
+		 *   which the K8 northbridge parsing fills in.  Assume
+		 *   they are all increased by a constant offset, but in
+		 *   the same order as the HT nodeids.  If that doesn't
+		 *   result in a usable node fall back to the path for the
+		 *   previous case.
+		 *
+		 * This workaround operates directly on the mapping between
+		 * APIC ID and NUMA node, assuming certain relationship
+		 * between APIC ID, HT node ID and NUMA topology.  As going
+		 * through CPU mapping may alter the outcome, directly
+		 * access __apicid_to_node[].
+		 */
 		int ht_nodeid = c->initial_apicid;
 
 		if (ht_nodeid >= 0 &&
-		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-			node = apicid_to_node[ht_nodeid];
+		    __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = __apicid_to_node[ht_nodeid];
 		/* Pick a nearby node */
 		if (!node_online(node))
 			node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c5..6052004 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -279,11 +279,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	unsigned node;
 	int cpu = smp_processor_id();
-	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
-	node = apicid_to_node[apicid];
+	node = numa_cpu_node(cpu);
 	if (node == NUMA_NO_NODE || !node_online(node)) {
 		/* reuse the value from init_cpu_to_node() */
 		node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e7e570f..d6afd57 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -71,10 +71,6 @@
 #include <asm/smpboot_hooks.h>
 #include <asm/i8259.h>
 
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_LOCAL_APIC];
-#endif
-
 /* State of each CPU */
 DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
@@ -170,7 +166,7 @@ static void map_cpu_to_logical_apicid(void)
 	int cpu = smp_processor_id();
 	int node;
 
-	node = apic->x86_32_numa_cpu_node(cpu);
+	node = numa_cpu_node(cpu);
 	if (!node_online(node))
 		node = first_online_node;
 
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c..c7fae38 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -247,7 +247,7 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
 		__acpi_map_pxm_to_node(nid, i);
 #endif
 	}
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 }
 #endif /* CONFIG_NUMA_EMU */
 
@@ -285,7 +285,7 @@ int __init amd_scan_nodes(void)
 				nodes[i].start >> PAGE_SHIFT,
 				nodes[i].end >> PAGE_SHIFT);
 		for (j = apicid_base; j < cores + apicid_base; j++)
-			apicid_to_node[(i << bits) + j] = i;
+			set_apicid_to_node((i << bits) + j, i);
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ebf6d78..480b357 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -26,8 +26,12 @@ static __init int numa_setup(char *opt)
 early_param("numa", numa_setup);
 
 /*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
  */
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c..8d91d22 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 static unsigned long kva_start_pfn;
 static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+	return apic->x86_32_numa_cpu_node(cpu);
+}
+
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea155..1e1026f 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,10 +26,6 @@ EXPORT_SYMBOL(node_data);
 
 struct memnode memnode;
 
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
-	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
 
@@ -716,12 +712,8 @@ void __init init_cpu_to_node(void)
 	BUG_ON(cpu_to_apicid == NULL);
 
 	for_each_possible_cpu(cpu) {
-		int node;
-		u16 apicid = cpu_to_apicid[cpu];
+		int node = numa_cpu_node(cpu);
 
-		if (apicid == BAD_APICID)
-			continue;
-		node = apicid_to_node[apicid];
 		if (node == NUMA_NO_NODE)
 			continue;
 		if (!node_online(node))
@@ -731,6 +723,14 @@ void __init init_cpu_to_node(void)
 }
 #endif
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
@@ -776,13 +776,9 @@ void __cpuinit numa_remove_cpu(int cpu)
 void __cpuinit numa_add_cpu(int cpu)
 {
 	unsigned long addr;
-	u16 apicid;
-	int physnid;
-	int nid = NUMA_NO_NODE;
+	int physnid, nid;
 
-	apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-	if (apicid != BAD_APICID)
-		nid = apicid_to_node[apicid];
+	nid = numa_cpu_node(cpu);
 	if (nid == NUMA_NO_NODE)
 		nid = early_cpu_to_node(cpu);
 	BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 6027a48..48651c6 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -255,7 +255,7 @@ int __init get_memcfg_from_srat(void)
 			 num_memory_chunks);
 
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
 
 	for (j = 0; j < num_memory_chunks; j++){
 		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 603d285..9a97261 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,7 +79,7 @@ static __init void bad_srat(void)
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_to_node[i] = NUMA_NO_NODE;
+		set_apicid_to_node(i, NUMA_NO_NODE);
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		nodes[i].start = nodes[i].end = 0;
 		nodes_add[i].start = nodes_add[i].end = 0;
@@ -138,7 +138,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
 		return;
 	}
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
@@ -178,7 +178,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		return;
 	}
 
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
@@ -521,7 +521,7 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 		 * node, it must now point to the fake node ID.
 		 */
 		for (j = 0; j < MAX_LOCAL_APIC; j++)
-			if (apicid_to_node[j] == nid &&
+			if (__apicid_to_node[j] == nid &&
 			    fake_apicid_to_node[j] == NUMA_NO_NODE)
 				fake_apicid_to_node[j] = i;
 	}
@@ -532,13 +532,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 	 * value.
 	 */
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		if (apicid_to_node[i] != NUMA_NO_NODE &&
+		if (__apicid_to_node[i] != NUMA_NO_NODE &&
 		    fake_apicid_to_node[i] == NUMA_NO_NODE)
 			fake_apicid_to_node[i] = 0;
 
 	for (i = 0; i < num_nodes; i++)
 		__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 
 	nodes_clear(nodes_parsed);
 	for (i = 0; i < num_nodes; i++)
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 37+ messages in thread
end of thread, other threads:[~2011-01-23 13:38 UTC | newest]
Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
2010-12-28 11:48 ` [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c Tejun Heo
2010-12-28 11:48 ` [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC Tejun Heo
2010-12-28 11:48 ` [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only Tejun Heo
2010-12-28 11:48 ` [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable Tejun Heo
2010-12-28 11:48 ` [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id Tejun Heo
2010-12-28 11:48 ` [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid() Tejun Heo
2010-12-28 11:48 ` [PATCH 07/16] x86: Add apic->x86_32_early_logical_apicid() Tejun Heo
2010-12-28 11:48 ` [PATCH 08/16] x86: Implement the default x86_32_early_logical_apicid() Tejun Heo
2010-12-28 11:48 ` [PATCH 09/16] x86: Implement x86_32_early_logical_apicid() for bigsmp_32 Tejun Heo
2010-12-28 11:48 ` [PATCH 10/16] x86: Implement x86_32_early_logical_apicid() for summit_32 Tejun Heo
2010-12-28 11:48 ` [PATCH 11/16] x86: Implement x86_32_early_logical_apicid() for numaq_32 Tejun Heo
2010-12-28 11:48 ` [PATCH 12/16] x86: Replace apic->apicid_to_node() with ->x86_32_numa_cpu_node() Tejun Heo
2010-12-28 11:48 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
2010-12-28 20:35   ` David Rientjes
2010-12-29 10:52     ` Tejun Heo
2010-12-29 19:36       ` H. Peter Anvin
2010-12-29 22:05         ` H. Peter Anvin
2010-12-30 11:33           ` Tejun Heo
2010-12-28 11:48 ` [PATCH 14/16] x86: Unify CPU -> " Tejun Heo
2010-12-28 20:39   ` David Rientjes
2010-12-28 11:48 ` [PATCH 15/16] x86: Unify node_to_cpumask_map handling " Tejun Heo
2010-12-28 20:43   ` David Rientjes
2010-12-30 12:48     ` Tejun Heo
2010-12-30 14:58       ` Tejun Heo
2010-12-30 18:40         ` David Rientjes
2010-12-31 13:20           ` Tejun Heo
2010-12-31 23:09             ` David Rientjes
2010-12-30 18:43       ` David Rientjes
2010-12-28 11:48 ` [PATCH 16/16] x86: Unify NUMA initialization " Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2011-01-23 13:37 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#5 Tejun Heo
2011-01-23 13:37 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
2010-12-30 17:49 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
2010-11-27 15:21 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#2 Tejun Heo
2010-11-27 15:22 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
2010-12-09 21:43   ` Thomas Gleixner
2010-12-10 20:45     ` Tejun Heo
2010-12-10 20:54       ` Tejun Heo
2010-12-10 21:17         ` Yinghai Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).