* [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-11-27 15:21 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#2 Tejun Heo
@ 2010-11-27 15:22 ` Tejun Heo
  2010-12-09 21:43   ` Thomas Gleixner
  0 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-11-27 15:22 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg
  Cc: Tejun Heo
The mapping between cpu/apicid and node is done via apicid_to_node[]
on 64bit and apicid_2_node[] + apic->numa_cpu_node() on 32bit.  This
difference makes it difficult to further unify 32 and 64bit NUMA
hanlding.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed by
two accessors - set_apicid_to_node() and numa_cpu_node().  On 64bit,
numa_cpu_node() always consults __apicid_to_node[] directly while
32bit goes through apic->numa_cpu_node() method to allow apic
implementation to override it.
There are several places where using numa_cpu_node() is awkward and
the override doesn't matter.  In those places, __apicid_to_node[] are
used directly.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/mpspec.h  |    1 -
 arch/x86/include/asm/numa.h    |   31 +++++++++++++++++++++++++++++++
 arch/x86/include/asm/numa_32.h |    6 ++++++
 arch/x86/include/asm/numa_64.h |    5 ++---
 arch/x86/kernel/acpi/boot.c    |    3 +--
 arch/x86/kernel/apic/apic.c    |    6 +++++-
 arch/x86/kernel/cpu/amd.c      |   14 +++++++-------
 arch/x86/kernel/cpu/intel.c    |    3 +--
 arch/x86/kernel/smpboot.c      |    6 +-----
 arch/x86/mm/k8topology_64.c    |    2 +-
 arch/x86/mm/numa.c             |    6 +++++-
 arch/x86/mm/numa_32.c          |    6 ++++++
 arch/x86/mm/numa_64.c          |   18 +++++++++---------
 arch/x86/mm/srat_32.c          |    2 +-
 arch/x86/mm/srat_64.c          |   10 +++++-----
 15 files changed, 81 insertions(+), 38 deletions(-)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 018ffc1..ae78732 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -24,7 +24,6 @@ extern int pic_mode;
 #define MAX_IRQ_SOURCES		256
 
 extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400..e40bf6f 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,36 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid
+ * and node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and
+ * thus should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ *
+ * If the user knows that it doesn't care about 32bit APIC-specific
+ * overrides, __apicid_to_node[] may be used directly.
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+	__apicid_to_node[apicid] = node;
+}
+#else	/* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_X86_32
 # include "numa_32.h"
 #else
 # include "numa_64.h"
 #endif
+
+#endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index a372290..d30eb6c 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,6 +4,12 @@
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int apicid);
+#else	/* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 823e070..17171ee 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_NUMA_64_H
 
 #include <linux/nodemask.h>
-#include <asm/apicdef.h>
 
 struct bootnode {
 	u64 start;
@@ -17,8 +16,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
 extern unsigned long numa_free_all_bootmem(void);
 extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
@@ -32,6 +29,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 #define NODE_MIN_SIZE (4*1024*1024)
 
 extern void __init init_cpu_to_node(void);
+extern int __cpuinit numa_cpu_node(int cpu);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
 extern void __cpuinit numa_add_cpu(int cpu);
@@ -43,6 +41,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
 #endif /* CONFIG_NUMA_EMU */
 #else
 static inline void init_cpu_to_node(void)		{ }
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 71232b9..edff4f5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -583,11 +583,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	nid = acpi_get_node(handle);
 	if (nid == -1 || !node_online(nid))
 		return;
+	set_apicid_to_node(physid, nid);
 #ifdef CONFIG_X86_64
-	apicid_to_node[physid] = nid;
 	numa_set_node(cpu, nid);
 #else /* CONFIG_X86_32 */
-	apicid_2_node[physid] = nid;
 	cpu_to_node_map[cpu] = nid;
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 597ef66..ee20fe7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2016,7 +2016,11 @@ void default_init_apic_ldr(void)
 int default_numa_cpu_node(int cpu)
 {
 #ifdef CONFIG_NUMA
-	return apicid_2_node[early_per_cpu(x86_cpu_to_apicid, cpu)];
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
 #else
 	return 0;
 #endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8..aa3c613 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -239,12 +239,12 @@ static int __cpuinit nearby_node(int apicid)
 	int i, node;
 
 	for (i = apicid - 1; i >= 0; i--) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
 	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -339,10 +339,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	int node;
 	unsigned apicid = c->apicid;
 
-	node = per_cpu(cpu_llc_id, cpu);
+	node = numa_cpu_node(cpu);
+	if (node == NUMA_NO_NODE)
+		node = per_cpu(cpu_llc_id, cpu);
 
-	if (apicid_to_node[apicid] != NUMA_NO_NODE)
-		node = apicid_to_node[apicid];
 	if (!node_online(node)) {
 		/* Two possibilities here:
 		   - The CPU is missing memory and no node was created.
@@ -357,8 +357,8 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 		int ht_nodeid = c->initial_apicid;
 
 		if (ht_nodeid >= 0 &&
-		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-			node = apicid_to_node[ht_nodeid];
+		    __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = __apicid_to_node[ht_nodeid];
 		/* Pick a nearby node */
 		if (!node_online(node))
 			node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c5..6052004 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -279,11 +279,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	unsigned node;
 	int cpu = smp_processor_id();
-	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
-	node = apicid_to_node[apicid];
+	node = numa_cpu_node(cpu);
 	if (node == NUMA_NO_NODE || !node_online(node)) {
 		/* reuse the value from init_cpu_to_node() */
 		node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 963c44b..4b8b72d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -71,10 +71,6 @@
 #include <asm/smpboot_hooks.h>
 #include <asm/i8259.h>
 
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_LOCAL_APIC];
-#endif
-
 /* State of each CPU */
 DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
@@ -170,7 +166,7 @@ static void map_cpu_to_logical_apicid(void)
 	int cpu = smp_processor_id();
 	int node;
 
-	node = apic->numa_cpu_node(cpu);
+	node = numa_cpu_node(cpu);
 	if (!node_online(node))
 		node = first_online_node;
 
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 804a3b6..484d80c 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -228,7 +228,7 @@ int __init k8_scan_nodes(void)
 				nodes[i].start >> PAGE_SHIFT,
 				nodes[i].end >> PAGE_SHIFT);
 		for (j = apicid_base; j < cores + apicid_base; j++)
-			apicid_to_node[(i << bits) + j] = i;
+			set_apicid_to_node((i << bits) + j, i);
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 787c52c..63db99c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -4,8 +4,12 @@
 #include <linux/bootmem.h>
 
 /*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
  */
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c..9f27ae2 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 static unsigned long kva_start_pfn;
 static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+	return apic->numa_cpu_node(cpu);
+}
+
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7ffc9b7..47ca1b0 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,10 +26,6 @@ EXPORT_SYMBOL(node_data);
 
 struct memnode memnode;
 
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
-	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
 int numa_off __initdata;
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
@@ -721,12 +717,8 @@ void __init init_cpu_to_node(void)
 	BUG_ON(cpu_to_apicid == NULL);
 
 	for_each_possible_cpu(cpu) {
-		int node;
-		u16 apicid = cpu_to_apicid[cpu];
+		int node = numa_cpu_node(cpu);
 
-		if (apicid == BAD_APICID)
-			continue;
-		node = apicid_to_node[apicid];
 		if (node == NUMA_NO_NODE)
 			continue;
 		if (!node_online(node))
@@ -736,6 +728,14 @@ void __init init_cpu_to_node(void)
 }
 #endif
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index e55e748..7fcae55 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -255,7 +255,7 @@ int __init get_memcfg_from_srat(void)
 			 num_memory_chunks);
 
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
 
 	for (j = 0; j < num_memory_chunks; j++){
 		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a35cb9d..1af9c6e 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,7 +79,7 @@ static __init void bad_srat(void)
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_to_node[i] = NUMA_NO_NODE;
+		set_apicid_to_node(i, NUMA_NO_NODE);
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		nodes[i].start = nodes[i].end = 0;
 		nodes_add[i].start = nodes_add[i].end = 0;
@@ -134,7 +134,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	}
 
 	apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
@@ -168,7 +168,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
 	else
 		apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
@@ -512,13 +512,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 		 * node, it must now point to the fake node ID.
 		 */
 		for (j = 0; j < MAX_LOCAL_APIC; j++)
-			if (apicid_to_node[j] == nid &&
+			if (__apicid_to_node[j] == nid &&
 			    fake_apicid_to_node[j] == NUMA_NO_NODE)
 				fake_apicid_to_node[j] = i;
 	}
 	for (i = 0; i < num_nodes; i++)
 		__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 
 	nodes_clear(nodes_parsed);
 	for (i = 0; i < num_nodes; i++)
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-11-27 15:22 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
@ 2010-12-09 21:43   ` Thomas Gleixner
  2010-12-10 20:45     ` Tejun Heo
  0 siblings, 1 reply; 40+ messages in thread
From: Thomas Gleixner @ 2010-12-09 21:43 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, mingo, hpa, x86, eric.dumazet, yinghai, brgerst,
	gorcunov, penberg
On Sat, 27 Nov 2010, Tejun Heo wrote:
> The mapping between cpu/apicid and node is done via apicid_to_node[]
> on 64bit and apicid_2_node[] + apic->numa_cpu_node() on 32bit.  This
> difference makes it difficult to further unify 32 and 64bit NUMA
> hanlding.
> 
> This patch unifies it by replacing both apicid_to_node[] and
> apicid_2_node[] with __apicid_to_node[] array, which is accessed by
> two accessors - set_apicid_to_node() and numa_cpu_node().  On 64bit,
> numa_cpu_node() always consults __apicid_to_node[] directly while
> 32bit goes through apic->numa_cpu_node() method to allow apic
> implementation to override it.
> 
> There are several places where using numa_cpu_node() is awkward and
> the override doesn't matter.  In those places, __apicid_to_node[] are
> used directly.
Why is it awkward? Anything outside the accessor functions or
specialized setup code using it is awkward, inconsistent and sloppy.
Thanks,
	tglx
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-09 21:43   ` Thomas Gleixner
@ 2010-12-10 20:45     ` Tejun Heo
  2010-12-10 20:54       ` Tejun Heo
  0 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-12-10 20:45 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: linux-kernel, mingo, hpa, x86, eric.dumazet, yinghai, brgerst,
	gorcunov, penberg
Hello, Thomas.
On 12/09/2010 10:43 PM, Thomas Gleixner wrote:
>> There are several places where using numa_cpu_node() is awkward and
>> the override doesn't matter.  In those places, __apicid_to_node[] are
>> used directly.
> 
> Why is it awkward? Anything outside the accessor functions or
> specialized setup code using it is awkward, inconsistent and sloppy.
There are two places which index the mapping by apicid instead of cpu.
One is acpi_fake_nodes() - this sets up the table, so it doesn't
constitute violation of accessors.
The problematic one is the nearby detection workaround in
kernel/cpu/amd.c.  This is rather hacky and looks like added to deal
with early AMD NUMAs which had broken BIOS.  The intel counterpart
omits this workaround and simply ignore NUMA configuration in those
cases.
I can change srat_detect_node() and nearby_node() to index by cpu but
as I have no idea what kind of broken configurations this is supposed
to deal with, I'm concerned that this may lead to different outcome by
walking the table in a different order.  I can implement an apicid ->
numa node mapping function for this but this is something which is
inherently ugly, so maybe it's best to leave it ugly.
For now, how about adding a big fat comment explaining the ugliness in
amd.c?
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-10 20:45     ` Tejun Heo
@ 2010-12-10 20:54       ` Tejun Heo
  2010-12-10 21:17         ` Yinghai Lu
  0 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-12-10 20:54 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: linux-kernel, mingo, hpa, x86, eric.dumazet, yinghai, brgerst,
	gorcunov, penberg
On 12/10/2010 09:45 PM, Tejun Heo wrote:
> I can change srat_detect_node() and nearby_node() to index by cpu but
> as I have no idea what kind of broken configurations this is supposed
> to deal with, I'm concerned that this may lead to different outcome by
> walking the table in a different order.  I can implement an apicid ->
> numa node mapping function for this but this is something which is
> inherently ugly, so maybe it's best to leave it ugly.
Oh, right, another problem.  It's possible that apicid <-> numa
mapping exists when apicid <-> cpu doesn't.  Again, this is a corner
case which might not matter but I have no idea what kind of brokeness
is being worked around and it would also be difficult to test whether
the change is okay.
If someone can tell me that this workaround can go away, it would be
awesome.
-- 
tejun
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-10 20:54       ` Tejun Heo
@ 2010-12-10 21:17         ` Yinghai Lu
  0 siblings, 0 replies; 40+ messages in thread
From: Yinghai Lu @ 2010-12-10 21:17 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Thomas Gleixner, linux-kernel, mingo, hpa, x86, eric.dumazet,
	brgerst, gorcunov, penberg
On 12/10/2010 12:54 PM, Tejun Heo wrote:
> On 12/10/2010 09:45 PM, Tejun Heo wrote:
>> I can change srat_detect_node() and nearby_node() to index by cpu but
>> as I have no idea what kind of broken configurations this is supposed
>> to deal with, I'm concerned that this may lead to different outcome by
>> walking the table in a different order.  I can implement an apicid ->
>> numa node mapping function for this but this is something which is
>> inherently ugly, so maybe it's best to leave it ugly.
> 
> Oh, right, another problem.  It's possible that apicid <-> numa
> mapping exists when apicid <-> cpu doesn't.  Again, this is a corner
> case which might not matter but I have no idea what kind of brokeness
> is being worked around and it would also be difficult to test whether
> the change is okay.
> 
that could happen...when SRAT and MADT is in different order. and your are booting nr_cpus=<less....> in SRAT...
should be fixed in
https://lkml.org/lkml/2010/11/22/5
Thanks
Yinghai
^ permalink raw reply	[flat|nested] 40+ messages in thread
* [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
@ 2010-12-28 11:48 ` Tejun Heo
  2010-12-28 20:35   ` David Rientjes
  0 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-12-28 11:48 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
  Cc: Tejun Heo
The mapping between cpu/apicid and node is done via apicid_to_node[]
on 64bit and apicid_2_node[] + apic->x86_32_numa_cpu_node() on 32bit.
This difference makes it difficult to further unify 32 and 64bit NUMA
handling.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed by
two accessors - set_apicid_to_node() and numa_cpu_node().  On 64bit,
numa_cpu_node() always consults __apicid_to_node[] directly while
32bit goes through apic->numa_cpu_node() method to allow apic
implementations to override it.
srat_detect_node() for amd cpus contains workaround for broken NUMA
configuration which assumes relationship between APIC ID, HT node ID
and NUMA topology.  Leave it to access __apicid_to_node[] directly as
mapping through CPU might result in undesirable behavior change.  The
comment is reformatted and updated to note the ugliness.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/mpspec.h  |    1 -
 arch/x86/include/asm/numa.h    |   28 +++++++++++++++++++++++
 arch/x86/include/asm/numa_32.h |    6 +++++
 arch/x86/include/asm/numa_64.h |    5 +--
 arch/x86/kernel/acpi/boot.c    |    3 +-
 arch/x86/kernel/apic/apic.c    |    2 +-
 arch/x86/kernel/cpu/amd.c      |   47 +++++++++++++++++++++++++--------------
 arch/x86/kernel/cpu/intel.c    |    3 +-
 arch/x86/kernel/smpboot.c      |    6 +----
 arch/x86/mm/amdtopology_64.c   |    2 +-
 arch/x86/mm/numa.c             |    6 ++++-
 arch/x86/mm/numa_32.c          |    6 +++++
 arch/x86/mm/numa_64.c          |   18 +++++++-------
 arch/x86/mm/srat_32.c          |    2 +-
 arch/x86/mm/srat_64.c          |   10 ++++----
 15 files changed, 97 insertions(+), 48 deletions(-)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 018ffc1..ae78732 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -24,7 +24,6 @@ extern int pic_mode;
 #define MAX_IRQ_SOURCES		256
 
 extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400..5e01c76 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,33 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid and
+ * node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
+ * should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+	__apicid_to_node[apicid] = node;
+}
+#else	/* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_X86_32
 # include "numa_32.h"
 #else
 # include "numa_64.h"
 #endif
+
+#endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index a372290..d30eb6c 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,6 +4,12 @@
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int apicid);
+#else	/* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 823e070..17171ee 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_NUMA_64_H
 
 #include <linux/nodemask.h>
-#include <asm/apicdef.h>
 
 struct bootnode {
 	u64 start;
@@ -17,8 +16,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
 extern unsigned long numa_free_all_bootmem(void);
 extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
@@ -32,6 +29,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 #define NODE_MIN_SIZE (4*1024*1024)
 
 extern void __init init_cpu_to_node(void);
+extern int __cpuinit numa_cpu_node(int cpu);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
 extern void __cpuinit numa_add_cpu(int cpu);
@@ -43,6 +41,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
 #endif /* CONFIG_NUMA_EMU */
 #else
 static inline void init_cpu_to_node(void)		{ }
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 1a5b9a8..de3308b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -583,11 +583,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	nid = acpi_get_node(handle);
 	if (nid == -1 || !node_online(nid))
 		return;
+	set_apicid_to_node(physid, nid);
 #ifdef CONFIG_X86_64
-	apicid_to_node[physid] = nid;
 	numa_set_node(cpu, nid);
 #else /* CONFIG_X86_32 */
-	apicid_2_node[physid] = nid;
 	cpu_to_node_map[cpu] = nid;
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 5e097e5..4f2f210 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2029,7 +2029,7 @@ int default_x86_32_numa_cpu_node(int cpu)
 	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
 
 	if (apicid != BAD_APICID)
-		return apicid_2_node[apicid];
+		return __apicid_to_node[apicid];
 	return NUMA_NO_NODE;
 #else
 	return 0;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8..d8e81e5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -234,17 +234,21 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 #endif
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+/*
+ * To workaround broken NUMA config.  Read the comment in
+ * srat_detect_node().
+ */
 static int __cpuinit nearby_node(int apicid)
 {
 	int i, node;
 
 	for (i = apicid - 1; i >= 0; i--) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
 	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -339,26 +343,35 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	int node;
 	unsigned apicid = c->apicid;
 
-	node = per_cpu(cpu_llc_id, cpu);
+	node = numa_cpu_node(cpu);
+	if (node == NUMA_NO_NODE)
+		node = per_cpu(cpu_llc_id, cpu);
 
-	if (apicid_to_node[apicid] != NUMA_NO_NODE)
-		node = apicid_to_node[apicid];
 	if (!node_online(node)) {
-		/* Two possibilities here:
-		   - The CPU is missing memory and no node was created.
-		   In that case try picking one from a nearby CPU
-		   - The APIC IDs differ from the HyperTransport node IDs
-		   which the K8 northbridge parsing fills in.
-		   Assume they are all increased by a constant offset,
-		   but in the same order as the HT nodeids.
-		   If that doesn't result in a usable node fall back to the
-		   path for the previous case.  */
-
+		/*
+		 * Two possibilities here:
+		 *
+		 * - The CPU is missing memory and no node was created.  In
+		 *   that case try picking one from a nearby CPU.
+		 *
+		 * - The APIC IDs differ from the HyperTransport node IDs
+		 *   which the K8 northbridge parsing fills in.  Assume
+		 *   they are all increased by a constant offset, but in
+		 *   the same order as the HT nodeids.  If that doesn't
+		 *   result in a usable node fall back to the path for the
+		 *   previous case.
+		 *
+		 * This workaround operates directly on the mapping between
+		 * APIC ID and NUMA node, assuming certain relationship
+		 * between APIC ID, HT node ID and NUMA topology.  As going
+		 * through CPU mapping may alter the outcome, directly
+		 * access __apicid_to_node[].
+		 */
 		int ht_nodeid = c->initial_apicid;
 
 		if (ht_nodeid >= 0 &&
-		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-			node = apicid_to_node[ht_nodeid];
+		    __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = __apicid_to_node[ht_nodeid];
 		/* Pick a nearby node */
 		if (!node_online(node))
 			node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c5..6052004 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -279,11 +279,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	unsigned node;
 	int cpu = smp_processor_id();
-	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
-	node = apicid_to_node[apicid];
+	node = numa_cpu_node(cpu);
 	if (node == NUMA_NO_NODE || !node_online(node)) {
 		/* reuse the value from init_cpu_to_node() */
 		node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 031d2e1..4b8b72d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -71,10 +71,6 @@
 #include <asm/smpboot_hooks.h>
 #include <asm/i8259.h>
 
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_LOCAL_APIC];
-#endif
-
 /* State of each CPU */
 DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
@@ -170,7 +166,7 @@ static void map_cpu_to_logical_apicid(void)
 	int cpu = smp_processor_id();
 	int node;
 
-	node = apic->x86_32_numa_cpu_node(cpu);
+	node = numa_cpu_node(cpu);
 	if (!node_online(node))
 		node = first_online_node;
 
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index 08a0069..3555d93 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -227,7 +227,7 @@ int __init amd_scan_nodes(void)
 				nodes[i].start >> PAGE_SHIFT,
 				nodes[i].end >> PAGE_SHIFT);
 		for (j = apicid_base; j < cores + apicid_base; j++)
-			apicid_to_node[(i << bits) + j] = i;
+			set_apicid_to_node((i << bits) + j, i);
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 787c52c..63db99c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -4,8 +4,12 @@
 #include <linux/bootmem.h>
 
 /*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
  */
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c..8d91d22 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 static unsigned long kva_start_pfn;
 static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+	return apic->x86_32_numa_cpu_node(cpu);
+}
+
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7762a51..989f23b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,10 +26,6 @@ EXPORT_SYMBOL(node_data);
 
 struct memnode memnode;
 
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
-	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
 int numa_off __initdata;
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
@@ -721,12 +717,8 @@ void __init init_cpu_to_node(void)
 	BUG_ON(cpu_to_apicid == NULL);
 
 	for_each_possible_cpu(cpu) {
-		int node;
-		u16 apicid = cpu_to_apicid[cpu];
+		int node = numa_cpu_node(cpu);
 
-		if (apicid == BAD_APICID)
-			continue;
-		node = apicid_to_node[apicid];
 		if (node == NUMA_NO_NODE)
 			continue;
 		if (!node_online(node))
@@ -736,6 +728,14 @@ void __init init_cpu_to_node(void)
 }
 #endif
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index e55e748..7fcae55 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -255,7 +255,7 @@ int __init get_memcfg_from_srat(void)
 			 num_memory_chunks);
 
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
 
 	for (j = 0; j < num_memory_chunks; j++){
 		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a35cb9d..1af9c6e 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,7 +79,7 @@ static __init void bad_srat(void)
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_to_node[i] = NUMA_NO_NODE;
+		set_apicid_to_node(i, NUMA_NO_NODE);
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		nodes[i].start = nodes[i].end = 0;
 		nodes_add[i].start = nodes_add[i].end = 0;
@@ -134,7 +134,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	}
 
 	apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
@@ -168,7 +168,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
 	else
 		apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
@@ -512,13 +512,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 		 * node, it must now point to the fake node ID.
 		 */
 		for (j = 0; j < MAX_LOCAL_APIC; j++)
-			if (apicid_to_node[j] == nid &&
+			if (__apicid_to_node[j] == nid &&
 			    fake_apicid_to_node[j] == NUMA_NO_NODE)
 				fake_apicid_to_node[j] = i;
 	}
 	for (i = 0; i < num_nodes; i++)
 		__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 
 	nodes_clear(nodes_parsed);
 	for (i = 0; i < num_nodes; i++)
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-28 11:48 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
@ 2010-12-28 20:35   ` David Rientjes
  2010-12-29 10:52     ` Tejun Heo
  0 siblings, 1 reply; 40+ messages in thread
From: David Rientjes @ 2010-12-28 20:35 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Tue, 28 Dec 2010, Tejun Heo wrote:
> diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
> index a35cb9d..1af9c6e 100644
> --- a/arch/x86/mm/srat_64.c
> +++ b/arch/x86/mm/srat_64.c
> @@ -79,7 +79,7 @@ static __init void bad_srat(void)
>  	printk(KERN_ERR "SRAT: SRAT not used.\n");
>  	acpi_numa = -1;
>  	for (i = 0; i < MAX_LOCAL_APIC; i++)
> -		apicid_to_node[i] = NUMA_NO_NODE;
> +		set_apicid_to_node(i, NUMA_NO_NODE);
>  	for (i = 0; i < MAX_NUMNODES; i++) {
>  		nodes[i].start = nodes[i].end = 0;
>  		nodes_add[i].start = nodes_add[i].end = 0;
> @@ -134,7 +134,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
>  	}
>  
>  	apic_id = pa->apic_id;
> -	apicid_to_node[apic_id] = node;
> +	set_apicid_to_node(apic_id, node);
>  	node_set(node, cpu_nodes_parsed);
>  	acpi_numa = 1;
>  	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
> @@ -168,7 +168,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
>  		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
>  	else
>  		apic_id = pa->apic_id;
> -	apicid_to_node[apic_id] = node;
> +	set_apicid_to_node(apic_id, node);
>  	node_set(node, cpu_nodes_parsed);
>  	acpi_numa = 1;
>  	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
> @@ -512,13 +512,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
>  		 * node, it must now point to the fake node ID.
>  		 */
>  		for (j = 0; j < MAX_LOCAL_APIC; j++)
> -			if (apicid_to_node[j] == nid &&
> +			if (__apicid_to_node[j] == nid &&
>  			    fake_apicid_to_node[j] == NUMA_NO_NODE)
>  				fake_apicid_to_node[j] = i;
>  	}
>  	for (i = 0; i < num_nodes; i++)
>  		__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
> -	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
> +	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
>  
>  	nodes_clear(nodes_parsed);
>  	for (i = 0; i < num_nodes; i++)
This is going to conflict with a387e95a ("") in x86/numa, so you'll need 
the following hunk for acpi_fake_nodes().  I'm not sure why this patchset 
is being based on x86/apic-cleanup rather than x86/numa?
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -511,7 +511,7 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes
                 * node, it must now point to the fake node ID.
                 */
                for (j = 0; j < MAX_LOCAL_APIC; j++)
-                       if (apicid_to_node[j] == nid &&
+                       if (__apicid_to_node[j] == nid &&
                            fake_apicid_to_node[j] == NUMA_NO_NODE)
                                fake_apicid_to_node[j] = i;
        }
@@ -522,13 +522,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nod
         * value.
         */
        for (i = 0; i < MAX_LOCAL_APIC; i++)
-               if (apicid_to_node[i] != NUMA_NO_NODE &&
+               if (__apicid_to_node[i] != NUMA_NO_NODE &&
                    fake_apicid_to_node[i] == NUMA_NO_NODE)
                        fake_apicid_to_node[i] = 0;
 
        for (i = 0; i < num_nodes; i++)
                __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
-       memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+       memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 
        nodes_clear(nodes_parsed);
        for (i = 0; i < num_nodes; i++)
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-28 20:35   ` David Rientjes
@ 2010-12-29 10:52     ` Tejun Heo
  2010-12-29 19:36       ` H. Peter Anvin
  0 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-12-29 10:52 UTC (permalink / raw)
  To: David Rientjes
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Tue, Dec 28, 2010 at 12:35:45PM -0800, David Rientjes wrote:
> This is going to conflict with a387e95a ("") in x86/numa, so you'll need 
> the following hunk for acpi_fake_nodes().  I'm not sure why this patchset 
> is being based on x86/apic-cleanup rather than x86/numa?
Because several patches from the patchset have already been committed
into x86/apic-cleanup.  Thomas, Peter, if this needs to be rebased
somewhere, let me know.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-29 10:52     ` Tejun Heo
@ 2010-12-29 19:36       ` H. Peter Anvin
  2010-12-29 22:05         ` H. Peter Anvin
  0 siblings, 1 reply; 40+ messages in thread
From: H. Peter Anvin @ 2010-12-29 19:36 UTC (permalink / raw)
  To: Tejun Heo
  Cc: David Rientjes, linux-kernel, Ingo Molnar, tglx, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On 12/29/2010 02:52 AM, Tejun Heo wrote:
> On Tue, Dec 28, 2010 at 12:35:45PM -0800, David Rientjes wrote:
>> This is going to conflict with a387e95a ("") in x86/numa, so you'll need 
>> the following hunk for acpi_fake_nodes().  I'm not sure why this patchset 
>> is being based on x86/apic-cleanup rather than x86/numa?
> 
> Because several patches from the patchset have already been committed
> into x86/apic-cleanup.  Thomas, Peter, if this needs to be rebased
> somewhere, let me know.
> 
x86/numa is already a dependent branch (on x86/amd-nb), so I'm fine
merging x86/apic-cleanup into that branch and then if you could rebase
it on top of the new x86/numa then I'll push it out tomorrow.
OK?
	-hpa
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-29 19:36       ` H. Peter Anvin
@ 2010-12-29 22:05         ` H. Peter Anvin
  2010-12-30 11:33           ` Tejun Heo
  0 siblings, 1 reply; 40+ messages in thread
From: H. Peter Anvin @ 2010-12-29 22:05 UTC (permalink / raw)
  To: Tejun Heo
  Cc: David Rientjes, linux-kernel, Ingo Molnar, tglx, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On 12/29/2010 11:36 AM, H. Peter Anvin wrote:
> On 12/29/2010 02:52 AM, Tejun Heo wrote:
>> On Tue, Dec 28, 2010 at 12:35:45PM -0800, David Rientjes wrote:
>>> This is going to conflict with a387e95a ("") in x86/numa, so you'll need 
>>> the following hunk for acpi_fake_nodes().  I'm not sure why this patchset 
>>> is being based on x86/apic-cleanup rather than x86/numa?
>>
>> Because several patches from the patchset have already been committed
>> into x86/apic-cleanup.  Thomas, Peter, if this needs to be rebased
>> somewhere, let me know.
>>
> 
> x86/numa is already a dependent branch (on x86/amd-nb), so I'm fine
> merging x86/apic-cleanup into that branch and then if you could rebase
> it on top of the new x86/numa then I'll push it out tomorrow.
> 
> OK?
> 
I have pushed out this merge onto x86/numa, so waiting for your rebase.
 If you don't have time, then I'll rebase.
	-hpa
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-29 22:05         ` H. Peter Anvin
@ 2010-12-30 11:33           ` Tejun Heo
  0 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 11:33 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: David Rientjes, linux-kernel, Ingo Molnar, tglx, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Wed, Dec 29, 2010 at 02:05:06PM -0800, H. Peter Anvin wrote:
> On 12/29/2010 11:36 AM, H. Peter Anvin wrote:
> > On 12/29/2010 02:52 AM, Tejun Heo wrote:
> >> On Tue, Dec 28, 2010 at 12:35:45PM -0800, David Rientjes wrote:
> >>> This is going to conflict with a387e95a ("") in x86/numa, so you'll need 
> >>> the following hunk for acpi_fake_nodes().  I'm not sure why this patchset 
> >>> is being based on x86/apic-cleanup rather than x86/numa?
> >>
> >> Because several patches from the patchset have already been committed
> >> into x86/apic-cleanup.  Thomas, Peter, if this needs to be rebased
> >> somewhere, let me know.
> >>
> > 
> > x86/numa is already a dependent branch (on x86/amd-nb), so I'm fine
> > merging x86/apic-cleanup into that branch and then if you could rebase
> > it on top of the new x86/numa then I'll push it out tomorrow.
> > 
> > OK?
> > 
> 
> I have pushed out this merge onto x86/numa, so waiting for your rebase.
>  If you don't have time, then I'll rebase.
Will rebase & repost soon.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 40+ messages in thread
* [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4
@ 2010-12-30 17:49 Tejun Heo
  2010-12-30 17:49 ` [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c Tejun Heo
                   ` (17 more replies)
  0 siblings, 18 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
Hello,
This is the fourth take of unify-x86_32-and-64-NUMA-init-paths
patchset.
The only change from the last take[L] is that it's now based on
tip/x86/numa.  Unfortunately, some of the collisions weren't trivial
and led to some ugliness.
Commit c1c3443c ("x86, numa: Fake node-to-cpumask for NUMA emulation")
introduced hard dependency on x86_64 into numa_add/remove_cpu() when
CONFIG_NUMA_EMU is enabled.  0015 has been updated so that the 32/64
bit common versions used when !CONFIG_NUMA_EMU are in numa.c while
CONFIG_NUMA_EMU variants are in numa_64.c.
This is ugly but still better than before.  IIUC, Shaohui's patchsets
is going to unify NUMA emulation across 32 and 64bit, which should
remove the above ugliness.  I haven't looked through the patchset yet
but after skimming through the current NUMA_EMU code, here are some of
my thoughts, FWIW.
* There's no reason for different NUMA config methods to construct
  different data structures.  They all, including 32bit, can build a
  single set of data structures.
* Then, unification of NUMA_EMU would naturally follow.  There's no
  reason to think about whether the underlying NUMA and proximity
  information is provided by ACPI, AMD or whatever.  It just needs to
  manipulate the processed data.
Let's _please_ head that way instead of adding more gluing codes and
hacks everywhere.  It would be a bit more churn but I don't think
there's any other sustainable way.
This patchset contains the following sixteen patches.
 0001-x86-Kill-unused-static-boot_cpu_logical_apicid-in-sm.patch
 0002-x86-Rename-x86_32-MAX_APICID-to-MAX_LOCAL_APIC.patch
 0003-x86-Make-default_send_IPI_mask_sequence-allbutself_l.patch
 0004-x86-Replace-cpu_2_logical_apicid-with-early-percpu-v.patch
 0005-x86-Always-use-x86_cpu_to_logical_apicid-for-cpu-log.patch
 0006-x86-Kill-apic-cpu_to_logical_apicid.patch
 0007-x86-Add-apic-x86_32_early_logical_apicid.patch
 0008-x86-Implement-the-default-x86_32_early_logical_apici.patch
 0009-x86-Implement-x86_32_early_logical_apicid-for-bigsmp.patch
 0010-x86-Implement-x86_32_early_logical_apicid-for-summit.patch
 0011-x86-Implement-x86_32_early_logical_apicid-for-numaq_.patch
 0012-x86-Replace-apic-apicid_to_node-with-x86_32_numa_cpu.patch
 0013-x86-Unify-cpu-apicid-NUMA-node-mapping-between-32-an.patch
 0014-x86-Unify-CPU-NUMA-node-mapping-between-32-and-64bit.patch
 0015-x86-Unify-node_to_cpumask_map-handling-between-32-an.patch
 0016-x86-Unify-NUMA-initialization-between-32-and-64bit.patch
It's based on top of the current tip/x86/numa (d50e8fc7) and available
in the following git branch.
 git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git unify-numa
Diffstat follows.
 arch/x86/Kconfig                      |    2 
 arch/x86/include/asm/apic.h           |   36 +++---
 arch/x86/include/asm/apicdef.h        |    1 
 arch/x86/include/asm/ipi.h            |    8 -
 arch/x86/include/asm/mpspec.h         |    3 
 arch/x86/include/asm/numa.h           |   45 +++++++
 arch/x86/include/asm/numa_32.h        |    7 +
 arch/x86/include/asm/numa_64.h        |   16 --
 arch/x86/include/asm/smp.h            |    3 
 arch/x86/include/asm/topology.h       |   17 --
 arch/x86/kernel/acpi/boot.c           |    8 -
 arch/x86/kernel/apic/apic.c           |   39 ++++++
 arch/x86/kernel/apic/apic_flat_64.c   |    4 
 arch/x86/kernel/apic/apic_noop.c      |   26 ++--
 arch/x86/kernel/apic/bigsmp_32.c      |   34 ++---
 arch/x86/kernel/apic/es7000_32.c      |   35 ++----
 arch/x86/kernel/apic/ipi.c            |   12 +-
 arch/x86/kernel/apic/numaq_32.c       |   21 ++-
 arch/x86/kernel/apic/probe_32.c       |   10 +
 arch/x86/kernel/apic/summit_32.c      |   47 ++------
 arch/x86/kernel/apic/x2apic_cluster.c |    2 
 arch/x86/kernel/apic/x2apic_phys.c    |    2 
 arch/x86/kernel/apic/x2apic_uv_x.c    |    2 
 arch/x86/kernel/cpu/amd.c             |   51 +++++---
 arch/x86/kernel/cpu/common.c          |    2 
 arch/x86/kernel/cpu/intel.c           |    5 
 arch/x86/kernel/setup.c               |    2 
 arch/x86/kernel/setup_percpu.c        |   11 +
 arch/x86/kernel/smpboot.c             |   68 -----------
 arch/x86/mm/amdtopology_64.c          |    4 
 arch/x86/mm/numa.c                    |  197 +++++++++++++++++++++++++++++++++-
 arch/x86/mm/numa_32.c                 |    7 +
 arch/x86/mm/numa_64.c                 |  192 +++------------------------------
 arch/x86/mm/srat_32.c                 |    6 -
 arch/x86/mm/srat_64.c                 |   12 +-
 35 files changed, 484 insertions(+), 453 deletions(-)
--
tejun
[L] http://thread.gmane.org/gmane.linux.kernel/1081199
^ permalink raw reply	[flat|nested] 40+ messages in thread
* [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2011-01-11  3:48   ` David Rientjes
  2010-12-30 17:49 ` [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC Tejun Heo
                   ` (16 subsequent siblings)
  17 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/kernel/smpboot.c |    6 +-----
 1 files changed, 1 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d..4de6a00 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -165,8 +165,6 @@ static void unmap_cpu_to_node(int cpu)
 #endif
 
 #ifdef CONFIG_X86_32
-static int boot_cpu_logical_apicid;
-
 u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
 					{ [0 ... NR_CPUS-1] = BAD_APICID };
 
@@ -1101,9 +1099,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	 * Setup boot CPU information
 	 */
 	smp_store_cpu_info(0); /* Final full version of the data */
-#ifdef CONFIG_X86_32
-	boot_cpu_logical_apicid = logical_smp_processor_id();
-#endif
+
 	current_thread_info()->cpu = 0;  /* needed? */
 	for_each_possible_cpu(i) {
 		zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
  2010-12-30 17:49 ` [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2011-01-11  3:48   ` David Rientjes
  2010-12-30 17:49 ` [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only Tejun Heo
                   ` (15 subsequent siblings)
  17 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Replace x86_32 MAX_APICID in include/asm/mpspec.h with MAX_LOCAL_APIC
in include/asm/apicdef.h to make it consistent with x86_64.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/apicdef.h |    1 +
 arch/x86/include/asm/mpspec.h  |    2 --
 arch/x86/kernel/smpboot.c      |    2 +-
 arch/x86/mm/srat_32.c          |    4 ++--
 4 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index a859ca4..47a30ff 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -145,6 +145,7 @@
 
 #ifdef CONFIG_X86_32
 # define MAX_IO_APICS 64
+# define MAX_LOCAL_APIC 256
 #else
 # define MAX_IO_APICS 128
 # define MAX_LOCAL_APIC 32768
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index c82868e..018ffc1 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -32,8 +32,6 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES];
 extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
 #endif
 
-#define MAX_APICID		256
-
 #else /* CONFIG_X86_64: */
 
 #define MAX_MP_BUSSES		256
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4de6a00..0b32f17 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -72,7 +72,7 @@
 #include <asm/i8259.h>
 
 #ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_APICID];
+u8 apicid_2_node[MAX_LOCAL_APIC];
 #endif
 
 /* State of each CPU */
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index a17dffd..e55e748 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -57,7 +57,7 @@ struct node_memory_chunk_s {
 static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
 
 static int __initdata num_memory_chunks; /* total number of memory chunks */
-static u8 __initdata apicid_to_pxm[MAX_APICID];
+static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
 
 int numa_off __initdata;
 int acpi_numa __initdata;
@@ -254,7 +254,7 @@ int __init get_memcfg_from_srat(void)
 	printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
 			 num_memory_chunks);
 
-	for (i = 0; i < MAX_APICID; i++)
+	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
 
 	for (j = 0; j < num_memory_chunks; j++){
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
  2010-12-30 17:49 ` [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c Tejun Heo
  2010-12-30 17:49 ` [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2011-01-11  3:48   ` David Rientjes
  2010-12-30 17:49 ` [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable Tejun Heo
                   ` (14 subsequent siblings)
  17 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Both functions are used only in 32bit.  Put them inside CONFIG_X86_32.
This is to prepare for logical apicid handling update.
- Cyrill Gorcunov spotted that I forgot to move declarations in ipi.h
  under CONFIG_X86_32.  Fixed.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 arch/x86/include/asm/ipi.h |    8 ++++----
 arch/x86/kernel/apic/ipi.c |    4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 0b72282..615fa90 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -123,10 +123,6 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
 						 int vector);
 extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
 							 int vector);
-extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
-							 int vector);
-extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
-							 int vector);
 
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
@@ -150,6 +146,10 @@ static inline void __default_local_send_IPI_all(int vector)
 }
 
 #ifdef CONFIG_X86_32
+extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+							 int vector);
+extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+							 int vector);
 extern void default_send_IPI_mask_logical(const struct cpumask *mask,
 						 int vector);
 extern void default_send_IPI_allbutself(int vector);
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 08385e0..5037736 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -56,6 +56,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_X86_32
+
 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
 						 int vector)
 {
@@ -96,8 +98,6 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
 	local_irq_restore(flags);
 }
 
-#ifdef CONFIG_X86_32
-
 /*
  * This is only used on smaller machines.
  */
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (2 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2011-01-11  3:48   ` David Rientjes
  2010-12-30 17:49 ` [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id Tejun Heo
                   ` (13 subsequent siblings)
  17 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Unlike x86_64, on x86_32, the mapping from cpu to logical apicid may
vary depending on apic in use.  cpu_2_logical_apicid[] array is used
for this mapping.  Replace it with early percpu variable
x86_cpu_to_logical_apicid to make it better aligned with other
mappings.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/apic.h      |    4 ----
 arch/x86/include/asm/smp.h       |    3 +++
 arch/x86/kernel/apic/apic.c      |   11 +++++++++++
 arch/x86/kernel/apic/es7000_32.c |    2 +-
 arch/x86/kernel/apic/numaq_32.c  |    2 +-
 arch/x86/kernel/apic/summit_32.c |    4 ++--
 arch/x86/kernel/setup_percpu.c   |    7 +++++++
 arch/x86/kernel/smpboot.c        |    7 ++-----
 8 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 5e3969c..eb139ec 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -595,8 +595,4 @@ extern int default_check_phys_apicid_present(int phys_apicid);
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
-#ifdef CONFIG_X86_32
-extern u8 cpu_2_logical_apicid[NR_CPUS];
-#endif
-
 #endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4c2f63c..dc7c46a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -38,6 +38,9 @@ static inline struct cpumask *cpu_core_mask(int cpu)
 
 DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
+#endif
 
 /* Static state in head.S used to set up a CPU */
 extern struct {
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c0f6426..ba78b1e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -79,6 +79,17 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 
 #ifdef CONFIG_X86_32
+
+#ifdef CONFIG_SMP
+/*
+ * On x86_32, the mapping between cpu and logical apicid may vary
+ * depending on apic in use.  The following early percpu variable is
+ * used for the mapping.  This is where the behaviors of x86_64 and 32
+ * actually diverge.  Let's keep it ugly for now.
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
+#endif
+
 /*
  * Knob to control our willingness to enable the local APIC.
  *
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8593582..7cb73e1 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -534,7 +534,7 @@ static int es7000_cpu_to_logical_apicid(int cpu)
 #ifdef CONFIG_SMP
 	if (cpu >= nr_cpu_ids)
 		return BAD_APICID;
-	return cpu_2_logical_apicid[cpu];
+	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 #else
 	return logical_smp_processor_id();
 #endif
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 960f26a..4ed90c4 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -377,7 +377,7 @@ static inline int numaq_cpu_to_logical_apicid(int cpu)
 {
 	if (cpu >= nr_cpu_ids)
 		return BAD_APICID;
-	return cpu_2_logical_apicid[cpu];
+	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 }
 
 /*
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9b41926..82cfc3e 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -206,7 +206,7 @@ static void summit_init_apic_ldr(void)
 
 	/* Create logical APIC IDs by counting CPUs already in cluster. */
 	for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
-		lid = cpu_2_logical_apicid[i];
+		lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
 		if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
 			++count;
 	}
@@ -247,7 +247,7 @@ static inline int summit_cpu_to_logical_apicid(int cpu)
 #ifdef CONFIG_SMP
 	if (cpu >= nr_cpu_ids)
 		return BAD_APICID;
-	return cpu_2_logical_apicid[cpu];
+	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 #else
 	return logical_smp_processor_id();
 #endif
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 002b796..b5147f0 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -225,6 +225,10 @@ void __init setup_per_cpu_areas(void)
 		per_cpu(x86_bios_cpu_apicid, cpu) =
 			early_per_cpu_map(x86_bios_cpu_apicid, cpu);
 #endif
+#ifdef CONFIG_X86_32
+		per_cpu(x86_cpu_to_logical_apicid, cpu) =
+			early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
+#endif
 #ifdef CONFIG_X86_64
 		per_cpu(irq_stack_ptr, cpu) =
 			per_cpu(irq_stack_union.irq_stack, cpu) +
@@ -256,6 +260,9 @@ void __init setup_per_cpu_areas(void)
 	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
 	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
 #endif
+#ifdef CONFIG_X86_32
+	early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
+#endif
 #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0b32f17..eb04f30 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -165,9 +165,6 @@ static void unmap_cpu_to_node(int cpu)
 #endif
 
 #ifdef CONFIG_X86_32
-u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
-					{ [0 ... NR_CPUS-1] = BAD_APICID };
-
 static void map_cpu_to_logical_apicid(void)
 {
 	int cpu = smp_processor_id();
@@ -177,13 +174,13 @@ static void map_cpu_to_logical_apicid(void)
 	if (!node_online(node))
 		node = first_online_node;
 
-	cpu_2_logical_apicid[cpu] = apicid;
+	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = apicid;
 	map_cpu_to_node(cpu, node);
 }
 
 void numa_remove_cpu(int cpu)
 {
-	cpu_2_logical_apicid[cpu] = BAD_APICID;
+	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = BAD_APICID;
 	unmap_cpu_to_node(cpu);
 }
 #else
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (3 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2011-01-13  2:40   ` David Rientjes
  2010-12-30 17:49 ` [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid() Tejun Heo
                   ` (12 subsequent siblings)
  17 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Currently, cpu -> logical apic id translation is done by
apic->cpu_to_logical_apicid() callback which may or may not use
x86_cpu_to_logical_apicid.  This is unnecessary as it should always
equal logical_smp_processor_id() which is known early during CPU bring
up.
Initialize x86_cpu_to_logical_apicid after apic->init_apic_ldr() in
setup_local_APIC() and always use x86_cpu_to_logical_apicid for cpu ->
logical apic id mapping.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/apic/apic.c |    8 ++++++++
 arch/x86/kernel/apic/ipi.c  |    8 ++++----
 arch/x86/kernel/smpboot.c   |    7 +++----
 3 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ba78b1e..8ad231c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1252,6 +1252,14 @@ void __cpuinit setup_local_APIC(void)
 	 */
 	apic->init_apic_ldr();
 
+#ifdef CONFIG_X86_32
+	/*
+	 * APIC LDR is initialized.  Fetch and store logical_apic_id.
+	 */
+	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+		logical_smp_processor_id();
+#endif
+
 	/*
 	 * Set Task Priority to 'accept all'. We never change this
 	 * later on.
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 5037736..cce91bf 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -73,8 +73,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
 	local_irq_save(flags);
 	for_each_cpu(query_cpu, mask)
 		__default_send_IPI_dest_field(
-			apic->cpu_to_logical_apicid(query_cpu), vector,
-			apic->dest_logical);
+			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+			vector, apic->dest_logical);
 	local_irq_restore(flags);
 }
 
@@ -92,8 +92,8 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
 		if (query_cpu == this_cpu)
 			continue;
 		__default_send_IPI_dest_field(
-			apic->cpu_to_logical_apicid(query_cpu), vector,
-			apic->dest_logical);
+			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+			vector, apic->dest_logical);
 		}
 	local_irq_restore(flags);
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index eb04f30..0768761 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -168,19 +168,18 @@ static void unmap_cpu_to_node(int cpu)
 static void map_cpu_to_logical_apicid(void)
 {
 	int cpu = smp_processor_id();
-	int apicid = logical_smp_processor_id();
-	int node = apic->apicid_to_node(apicid);
+	int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+	int node;
 
+	node = apic->apicid_to_node(logical_apicid);
 	if (!node_online(node))
 		node = first_online_node;
 
-	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = apicid;
 	map_cpu_to_node(cpu, node);
 }
 
 void numa_remove_cpu(int cpu)
 {
-	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = BAD_APICID;
 	unmap_cpu_to_node(cpu);
 }
 #else
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid()
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (4 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2011-01-13  2:40   ` David Rientjes
  2010-12-30 17:49 ` [PATCH 07/16] x86: Add apic->x86_32_early_logical_apicid() Tejun Heo
                   ` (11 subsequent siblings)
  17 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
After the previous patch, apic->cpu_to_logical_apicid() is no longer
used.  Kill it.
For apic types with custom cpu_to_logical_apicid() which is also used
for other purposes, remove the function and modify its users to do the
mapping directly.
#ifdef's on CONFIG_SMP in es7000_32 and summit_32 are ignored during
conversion as they are not used for UP kernels.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/apic.h           |    7 -------
 arch/x86/kernel/apic/apic_flat_64.c   |    2 --
 arch/x86/kernel/apic/apic_noop.c      |    6 ------
 arch/x86/kernel/apic/bigsmp_32.c      |   19 +++++++------------
 arch/x86/kernel/apic/es7000_32.c      |   18 ++----------------
 arch/x86/kernel/apic/numaq_32.c       |    8 --------
 arch/x86/kernel/apic/probe_32.c       |    1 -
 arch/x86/kernel/apic/summit_32.c      |   17 ++---------------
 arch/x86/kernel/apic/x2apic_cluster.c |    1 -
 arch/x86/kernel/apic/x2apic_phys.c    |    1 -
 arch/x86/kernel/apic/x2apic_uv_x.c    |    1 -
 11 files changed, 11 insertions(+), 70 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index eb139ec..d1aa0c3 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -307,7 +307,6 @@ struct apic {
 	void (*setup_apic_routing)(void);
 	int (*multi_timer_check)(int apic, int irq);
 	int (*apicid_to_node)(int logical_apicid);
-	int (*cpu_to_logical_apicid)(int cpu);
 	int (*cpu_present_to_apicid)(int mps_cpu);
 	void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
 	void (*setup_portio_remap)(void);
@@ -557,12 +556,6 @@ static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_ma
 	*retmap = *phys_map;
 }
 
-/* Mapping from cpu number to logical apicid */
-static inline int default_cpu_to_logical_apicid(int cpu)
-{
-	return 1 << cpu;
-}
-
 static inline int __default_cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 09d3b17..5a9d11a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -186,7 +186,6 @@ struct apic apic_flat =  {
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= NULL,
-	.cpu_to_logical_apicid		= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
@@ -338,7 +337,6 @@ struct apic apic_physflat =  {
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= NULL,
-	.cpu_to_logical_apicid		= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e31b9ff..f3d19b2 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void)
 	return 0;
 }
 
-static int noop_cpu_to_logical_apicid(int cpu)
-{
-	return 0;
-}
-
 static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
 {
 	return 0;
@@ -155,7 +150,6 @@ struct apic apic_noop = {
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= noop_apicid_to_node,
 
-	.cpu_to_logical_apicid		= noop_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cb804c5..4c62592 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -93,14 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
 	return BAD_APICID;
 }
 
-/* Mapping from cpu number to logical apicid */
-static inline int bigsmp_cpu_to_logical_apicid(int cpu)
-{
-	if (cpu >= nr_cpu_ids)
-		return BAD_APICID;
-	return cpu_physical_id(cpu);
-}
-
 static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 {
 	/* For clustered we don't have a good way to do this yet - hack */
@@ -115,7 +107,11 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
 /* As we are using single CPU as destination, pick only one CPU here */
 static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
-	return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask));
+	int cpu = cpumask_first(cpumask);
+
+	if (cpu < nr_cpu_ids)
+		return cpu_physical_id(cpu);
+	return BAD_APICID;
 }
 
 static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -129,9 +125,9 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 */
 	for_each_cpu_and(cpu, cpumask, andmask) {
 		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
+			return cpu_physical_id(cpu);
 	}
-	return bigsmp_cpu_to_logical_apicid(cpu);
+	return BAD_APICID;
 }
 
 static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -220,7 +216,6 @@ struct apic apic_bigsmp = {
 	.setup_apic_routing		= bigsmp_setup_apic_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= bigsmp_apicid_to_node,
-	.cpu_to_logical_apicid		= bigsmp_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= bigsmp_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 7cb73e1..6840681 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -528,18 +528,6 @@ static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
 	++cpu_id;
 }
 
-/* Mapping from cpu number to logical apicid */
-static int es7000_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
-	if (cpu >= nr_cpu_ids)
-		return BAD_APICID;
-	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-#else
-	return logical_smp_processor_id();
-#endif
-}
-
 static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 {
 	/* For clustered we don't have a good way to do this yet - hack */
@@ -561,7 +549,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	 * The cpus in the mask must all be on the apic cluster.
 	 */
 	for_each_cpu(cpu, cpumask) {
-		int new_apicid = es7000_cpu_to_logical_apicid(cpu);
+		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
 			WARN(1, "Not a valid mask!");
@@ -578,7 +566,7 @@ static unsigned int
 es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
 			      const struct cpumask *andmask)
 {
-	int apicid = es7000_cpu_to_logical_apicid(0);
+	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 	cpumask_var_t cpumask;
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -656,7 +644,6 @@ struct apic __refdata apic_es7000_cluster = {
 	.setup_apic_routing		= es7000_setup_apic_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= es7000_apicid_to_node,
-	.cpu_to_logical_apicid		= es7000_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
@@ -721,7 +708,6 @@ struct apic __refdata apic_es7000 = {
 	.setup_apic_routing		= es7000_setup_apic_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= es7000_apicid_to_node,
-	.cpu_to_logical_apicid		= es7000_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 4ed90c4..2b434d5 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -373,13 +373,6 @@ static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask
 	return physids_promote(0xFUL, retmap);
 }
 
-static inline int numaq_cpu_to_logical_apicid(int cpu)
-{
-	if (cpu >= nr_cpu_ids)
-		return BAD_APICID;
-	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-}
-
 /*
  * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
  * cpu to APIC ID relation to properly interact with the intelligent
@@ -509,7 +502,6 @@ struct apic __refdata apic_numaq = {
 	.setup_apic_routing		= numaq_setup_apic_routing,
 	.multi_timer_check		= numaq_multi_timer_check,
 	.apicid_to_node			= numaq_apicid_to_node,
-	.cpu_to_logical_apicid		= numaq_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= numaq_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= numaq_apicid_to_cpu_present,
 	.setup_portio_remap		= numaq_setup_portio_remap,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 99d2fe0..24a6828 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -131,7 +131,6 @@ struct apic apic_default = {
 	.setup_apic_routing		= setup_apic_flat_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= default_apicid_to_node,
-	.cpu_to_logical_apicid		= default_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 82cfc3e..1ef4c14 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -241,18 +241,6 @@ static int summit_apicid_to_node(int logical_apicid)
 #endif
 }
 
-/* Mapping from cpu number to logical apicid */
-static inline int summit_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
-	if (cpu >= nr_cpu_ids)
-		return BAD_APICID;
-	return early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-#else
-	return logical_smp_processor_id();
-#endif
-}
-
 static int summit_cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < nr_cpu_ids)
@@ -286,7 +274,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	 * The cpus in the mask must all be on the apic cluster.
 	 */
 	for_each_cpu(cpu, cpumask) {
-		int new_apicid = summit_cpu_to_logical_apicid(cpu);
+		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 
 		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
 			printk("%s: Not a valid mask!\n", __func__);
@@ -301,7 +289,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
 static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
 			      const struct cpumask *andmask)
 {
-	int apicid = summit_cpu_to_logical_apicid(0);
+	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
 	cpumask_var_t cpumask;
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -529,7 +517,6 @@ struct apic apic_summit = {
 	.setup_apic_routing		= summit_setup_apic_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= summit_apicid_to_node,
-	.cpu_to_logical_apicid		= summit_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= summit_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= summit_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cf69c59..badc1fd 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -207,7 +207,6 @@ struct apic apic_x2apic_cluster = {
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= NULL,
-	.cpu_to_logical_apicid		= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8972f38..f28bf4c 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -196,7 +196,6 @@ struct apic apic_x2apic_phys = {
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= NULL,
-	.cpu_to_logical_apicid		= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 194539a..365f53d 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -316,7 +316,6 @@ struct apic __refdata apic_x2apic_uv_x = {
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= NULL,
-	.cpu_to_logical_apicid		= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 07/16] x86: Add apic->x86_32_early_logical_apicid()
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (5 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid() Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2010-12-30 17:49 ` [PATCH 08/16] x86: Implement the default x86_32_early_logical_apicid() Tejun Heo
                   ` (10 subsequent siblings)
  17 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
On x86_32, the mapping between cpu and logical apic ID differs
depending on the specific apic implementation in use.  The mapping is
initialized while bringing up CPUs; however, this makes early inits
ignore memory topology.
Add a x86_32 specific apic->x86_32_early_logical_apicid() which is
called early during boot to query the mapping.  The mapping is later
verified against the result of init_apic_ldr().  The method is allowed
to return BAD_APICID if it can't be determined early.
noop variant which always returns BAD_APICID is implemented and added
to all x86_32 apic implementations.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/apic.h      |   19 +++++++++++++++++++
 arch/x86/kernel/apic/apic.c      |   12 ++++++++++--
 arch/x86/kernel/apic/apic_noop.c |    4 ++++
 arch/x86/kernel/apic/bigsmp_32.c |    2 ++
 arch/x86/kernel/apic/es7000_32.c |    4 ++++
 arch/x86/kernel/apic/numaq_32.c  |    2 ++
 arch/x86/kernel/apic/probe_32.c  |    2 ++
 arch/x86/kernel/apic/summit_32.c |    2 ++
 8 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d1aa0c3..efb073b 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -354,6 +354,20 @@ struct apic {
 	void (*icr_write)(u32 low, u32 high);
 	void (*wait_icr_idle)(void);
 	u32 (*safe_wait_icr_idle)(void);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Called very early during boot from get_smp_config().  It should
+	 * return the logical apicid.  x86_[bios]_cpu_to_apicid is
+	 * initialized before this function is called.
+	 *
+	 * If logical apicid can't be determined that early, the function
+	 * may return BAD_APICID.  Logical apicid will be configured after
+	 * init_apic_ldr() while bringing up CPUs.  Note that NUMA affinity
+	 * won't be applied properly during early boot in this case.
+	 */
+	int (*x86_32_early_logical_apicid)(int cpu);
+#endif
 };
 
 /*
@@ -501,6 +515,11 @@ extern struct apic apic_noop;
 
 extern struct apic apic_default;
 
+static inline int noop_x86_32_early_logical_apicid(int cpu)
+{
+	return BAD_APICID;
+}
+
 /*
  * Set up the logical destination ID.
  *
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8ad231c..6a82dc6 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1254,8 +1254,13 @@ void __cpuinit setup_local_APIC(void)
 
 #ifdef CONFIG_X86_32
 	/*
-	 * APIC LDR is initialized.  Fetch and store logical_apic_id.
+	 * APIC LDR is initialized.  If logical_apicid mapping was
+	 * initialized during get_smp_config(), make sure it matches the
+	 * actual value.
 	 */
+	i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+	WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
+	/* always use the value from LDR */
 	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
 		logical_smp_processor_id();
 #endif
@@ -1994,7 +1999,10 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
 	early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
 #endif
-
+#ifdef CONFIG_X86_32
+	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+		apic->x86_32_early_logical_apicid(cpu);
+#endif
 	set_cpu_possible(cpu, true);
 	set_cpu_present(cpu, true);
 }
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index f3d19b2..0309c58 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -191,4 +191,8 @@ struct apic apic_noop = {
 	.icr_write			= noop_apic_icr_write,
 	.wait_icr_idle			= noop_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= noop_safe_apic_wait_icr_idle,
+
+#ifdef CONFIG_X86_32
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+#endif
 };
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 4c62592..dd32a9b 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -251,4 +251,6 @@ struct apic apic_bigsmp = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 6840681..0ffc1ec 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -682,6 +682,8 @@ struct apic __refdata apic_es7000_cluster = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
 
 struct apic __refdata apic_es7000 = {
@@ -744,4 +746,6 @@ struct apic __refdata apic_es7000 = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 2b434d5..f1a8b12 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -539,4 +539,6 @@ struct apic __refdata apic_numaq = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 24a6828..40be7c3 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -166,6 +166,8 @@ struct apic apic_default = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
 
 extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 1ef4c14..172c498 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -552,4 +552,6 @@ struct apic apic_summit = {
 	.icr_write			= native_apic_icr_write,
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
 };
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 08/16] x86: Implement the default x86_32_early_logical_apicid()
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (6 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 07/16] x86: Add apic->x86_32_early_logical_apicid() Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2010-12-30 17:49 ` [PATCH 09/16] x86: Implement x86_32_early_logical_apicid() for bigsmp_32 Tejun Heo
                   ` (9 subsequent siblings)
  17 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Implement x86_32_early_logical_apicid() for the default apic flat
routing.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/apic/probe_32.c |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 40be7c3..0f9a9ab 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -77,6 +77,11 @@ void __init default_setup_apic_routing(void)
 		apic->setup_apic_routing();
 }
 
+static int default_x86_32_early_logical_apicid(int cpu)
+{
+	return 1 << cpu;
+}
+
 static void setup_apic_flat_routing(void)
 {
 #ifdef CONFIG_X86_IO_APIC
@@ -167,7 +172,7 @@ struct apic apic_default = {
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
-	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_early_logical_apicid	= default_x86_32_early_logical_apicid,
 };
 
 extern struct apic apic_numaq;
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 09/16] x86: Implement x86_32_early_logical_apicid() for bigsmp_32
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (7 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 08/16] x86: Implement the default x86_32_early_logical_apicid() Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2010-12-30 17:49 ` [PATCH 10/16] x86: Implement x86_32_early_logical_apicid() for summit_32 Tejun Heo
                   ` (8 subsequent siblings)
  17 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/apic/bigsmp_32.c |    8 +++++++-
 1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index dd32a9b..bc7ed04 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -45,6 +45,12 @@ static unsigned long bigsmp_check_apicid_present(int bit)
 	return 1;
 }
 
+static int bigsmp_early_logical_apicid(int cpu)
+{
+	/* on bigsmp, logical apicid is the same as physical */
+	return early_per_cpu(x86_cpu_to_apicid, cpu);
+}
+
 static inline unsigned long calculate_ldr(int cpu)
 {
 	unsigned long val, id;
@@ -252,5 +258,5 @@ struct apic apic_bigsmp = {
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
-	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_early_logical_apicid	= bigsmp_early_logical_apicid,
 };
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 10/16] x86: Implement x86_32_early_logical_apicid() for summit_32
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (8 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 09/16] x86: Implement x86_32_early_logical_apicid() for bigsmp_32 Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2010-12-30 17:49 ` [PATCH 11/16] x86: Implement x86_32_early_logical_apicid() for numaq_32 Tejun Heo
                   ` (7 subsequent siblings)
  17 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Factor out logical apic id calculation from summit_init_apic_ldr() and
use it for the x86_32_early_logical_apicid() callback.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/apic/summit_32.c |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 172c498..8c91473 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -194,11 +194,10 @@ static unsigned long summit_check_apicid_present(int bit)
 	return 1;
 }
 
-static void summit_init_apic_ldr(void)
+static int summit_early_logical_apicid(int cpu)
 {
-	unsigned long val, id;
 	int count = 0;
-	u8 my_id = (u8)hard_smp_processor_id();
+	u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
 	u8 my_cluster = APIC_CLUSTER(my_id);
 #ifdef CONFIG_SMP
 	u8 lid;
@@ -214,7 +213,15 @@ static void summit_init_apic_ldr(void)
 	/* We only have a 4 wide bitmap in cluster mode.  If a deranged
 	 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
 	BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
-	id = my_cluster | (1UL << count);
+	return my_cluster | (1UL << count);
+}
+
+static void summit_init_apic_ldr(void)
+{
+	int cpu = smp_processor_id();
+	unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+	unsigned long val;
+
 	apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
 	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
 	val |= SET_APIC_LOGICAL_ID(id);
@@ -553,5 +560,5 @@ struct apic apic_summit = {
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
-	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_early_logical_apicid	= summit_early_logical_apicid,
 };
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 11/16] x86: Implement x86_32_early_logical_apicid() for numaq_32
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (9 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 10/16] x86: Implement x86_32_early_logical_apicid() for summit_32 Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2010-12-30 17:49 ` [PATCH 12/16] x86: Replace apic->apicid_to_node() with ->x86_32_numa_cpu_node() Tejun Heo
                   ` (6 subsequent siblings)
  17 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/apic/es7000_32.c |   10 ++++++++--
 1 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 0ffc1ec..5c53d05 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -460,6 +460,12 @@ static unsigned long es7000_check_apicid_present(int bit)
 	return physid_isset(bit, phys_cpu_present_map);
 }
 
+static int es7000_early_logical_apicid(int cpu)
+{
+	/* on es7000, logical apicid is the same as physical */
+	return early_per_cpu(x86_bios_cpu_apicid, cpu);
+}
+
 static unsigned long calculate_ldr(int cpu)
 {
 	unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
@@ -683,7 +689,7 @@ struct apic __refdata apic_es7000_cluster = {
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
-	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
 };
 
 struct apic __refdata apic_es7000 = {
@@ -747,5 +753,5 @@ struct apic __refdata apic_es7000 = {
 	.wait_icr_idle			= native_apic_wait_icr_idle,
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
-	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
 };
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 12/16] x86: Replace apic->apicid_to_node() with ->x86_32_numa_cpu_node()
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (10 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 11/16] x86: Implement x86_32_early_logical_apicid() for numaq_32 Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2010-12-30 17:49 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
                   ` (5 subsequent siblings)
  17 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
apic->apicid_to_node() is 32bit specific apic operation which
determines NUMA node for a CPU.  Depending on the APIC implementation,
it can be easier to determine NUMA node from either physical or
logical apicid.  Currently, ->apicid_to_node() takes @logical_apicid
and calls hard_smp_processor_id() if the physical apicid is needed.
This prevents NUMA mapping from being queried from a different CPU,
which in turn makes it impossible to initialize NUMA mapping before
SMP bringup.
This patch replaces apic->apicid_to_node() with
->x86_32_numa_cpu_node() which takes @cpu, from which both logical and
physical apicids can easily be determined.  While at it, drop
duplicate implementations from bigsmp_32 and summit_32, and use the
default one.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/apic.h           |    6 ++++--
 arch/x86/kernel/apic/apic.c           |   10 +++++++---
 arch/x86/kernel/apic/apic_flat_64.c   |    2 --
 arch/x86/kernel/apic/apic_noop.c      |   16 +++++++++-------
 arch/x86/kernel/apic/bigsmp_32.c      |    7 +------
 arch/x86/kernel/apic/es7000_32.c      |    7 +++----
 arch/x86/kernel/apic/numaq_32.c       |   11 ++++++++++-
 arch/x86/kernel/apic/probe_32.c       |    2 +-
 arch/x86/kernel/apic/summit_32.c      |   11 +----------
 arch/x86/kernel/apic/x2apic_cluster.c |    1 -
 arch/x86/kernel/apic/x2apic_phys.c    |    1 -
 arch/x86/kernel/apic/x2apic_uv_x.c    |    1 -
 arch/x86/kernel/smpboot.c             |    3 +--
 13 files changed, 37 insertions(+), 41 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index efb073b..ad30ca4 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -306,7 +306,6 @@ struct apic {
 
 	void (*setup_apic_routing)(void);
 	int (*multi_timer_check)(int apic, int irq);
-	int (*apicid_to_node)(int logical_apicid);
 	int (*cpu_present_to_apicid)(int mps_cpu);
 	void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
 	void (*setup_portio_remap)(void);
@@ -367,6 +366,9 @@ struct apic {
 	 * won't be applied properly during early boot in this case.
 	 */
 	int (*x86_32_early_logical_apicid)(int cpu);
+
+	/* determine CPU -> NUMA node mapping */
+	int (*x86_32_numa_cpu_node)(int cpu);
 #endif
 };
 
@@ -539,7 +541,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 	return cpuid_apic >> index_msb;
 }
 
-extern int default_apicid_to_node(int logical_apicid);
+extern int default_x86_32_numa_cpu_node(int cpu);
 
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6a82dc6..5e097e5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2023,10 +2023,14 @@ void default_init_apic_ldr(void)
 }
 
 #ifdef CONFIG_X86_32
-int default_apicid_to_node(int logical_apicid)
+int default_x86_32_numa_cpu_node(int cpu)
 {
-#ifdef CONFIG_SMP
-	return apicid_2_node[hard_smp_processor_id()];
+#ifdef CONFIG_NUMA
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return apicid_2_node[apicid];
+	return NUMA_NO_NODE;
 #else
 	return 0;
 #endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 5a9d11a..5652d31 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,7 +185,6 @@ struct apic apic_flat =  {
 	.ioapic_phys_id_map		= NULL,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
@@ -336,7 +335,6 @@ struct apic apic_physflat =  {
 	.ioapic_phys_id_map		= NULL,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 0309c58..f1baa2d 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -108,12 +108,6 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
 	cpumask_set_cpu(cpu, retmask);
 }
 
-int noop_apicid_to_node(int logical_apicid)
-{
-	/* we're always on node 0 */
-	return 0;
-}
-
 static u32 noop_apic_read(u32 reg)
 {
 	WARN_ON_ONCE((cpu_has_apic && !disable_apic));
@@ -125,6 +119,14 @@ static void noop_apic_write(u32 reg, u32 v)
 	WARN_ON_ONCE(cpu_has_apic && !disable_apic);
 }
 
+#ifdef CONFIG_X86_32
+static int noop_x86_32_numa_cpu_node(int cpu)
+{
+	/* we're always on node 0 */
+	return 0;
+}
+#endif
+
 struct apic apic_noop = {
 	.name				= "noop",
 	.probe				= noop_probe,
@@ -148,7 +150,6 @@ struct apic apic_noop = {
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= noop_apicid_to_node,
 
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
@@ -194,5 +195,6 @@ struct apic apic_noop = {
 
 #ifdef CONFIG_X86_32
 	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_numa_cpu_node		= noop_x86_32_numa_cpu_node,
 #endif
 };
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index bc7ed04..541a2e4 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -86,11 +86,6 @@ static void bigsmp_setup_apic_routing(void)
 		nr_ioapics);
 }
 
-static int bigsmp_apicid_to_node(int logical_apicid)
-{
-	return apicid_2_node[hard_smp_processor_id()];
-}
-
 static int bigsmp_cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < nr_cpu_ids)
@@ -221,7 +216,6 @@ struct apic apic_bigsmp = {
 	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map,
 	.setup_apic_routing		= bigsmp_setup_apic_routing,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= bigsmp_apicid_to_node,
 	.cpu_present_to_apicid		= bigsmp_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 	.setup_portio_remap		= NULL,
@@ -259,4 +253,5 @@ struct apic apic_bigsmp = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= bigsmp_early_logical_apicid,
+	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 5c53d05..3e9de48 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -510,12 +510,11 @@ static void es7000_setup_apic_routing(void)
 		nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
 }
 
-static int es7000_apicid_to_node(int logical_apicid)
+static int es7000_numa_cpu_node(int cpu)
 {
 	return 0;
 }
 
-
 static int es7000_cpu_present_to_apicid(int mps_cpu)
 {
 	if (!mps_cpu)
@@ -649,7 +648,6 @@ struct apic __refdata apic_es7000_cluster = {
 	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
 	.setup_apic_routing		= es7000_setup_apic_routing,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= es7000_apicid_to_node,
 	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
@@ -690,6 +688,7 @@ struct apic __refdata apic_es7000_cluster = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
+	.x86_32_numa_cpu_node		= es7000_numa_cpu_node,
 };
 
 struct apic __refdata apic_es7000 = {
@@ -715,7 +714,6 @@ struct apic __refdata apic_es7000 = {
 	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
 	.setup_apic_routing		= es7000_setup_apic_routing,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= es7000_apicid_to_node,
 	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
@@ -754,4 +752,5 @@ struct apic __refdata apic_es7000 = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
+	.x86_32_numa_cpu_node		= es7000_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index f1a8b12..6273eee 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -391,6 +391,15 @@ static inline int numaq_apicid_to_node(int logical_apicid)
 	return logical_apicid >> 4;
 }
 
+static int numaq_numa_cpu_node(int cpu)
+{
+	int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+	if (logical_apicid != BAD_APICID)
+		return numaq_apicid_to_node(logical_apicid);
+	return NUMA_NO_NODE;
+}
+
 static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
 {
 	int node = numaq_apicid_to_node(logical_apicid);
@@ -501,7 +510,6 @@ struct apic __refdata apic_numaq = {
 	.ioapic_phys_id_map		= numaq_ioapic_phys_id_map,
 	.setup_apic_routing		= numaq_setup_apic_routing,
 	.multi_timer_check		= numaq_multi_timer_check,
-	.apicid_to_node			= numaq_apicid_to_node,
 	.cpu_present_to_apicid		= numaq_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= numaq_apicid_to_cpu_present,
 	.setup_portio_remap		= numaq_setup_portio_remap,
@@ -541,4 +549,5 @@ struct apic __refdata apic_numaq = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+	.x86_32_numa_cpu_node		= numaq_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 0f9a9ab..fc84c7b 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -135,7 +135,6 @@ struct apic apic_default = {
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
 	.setup_apic_routing		= setup_apic_flat_routing,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= default_apicid_to_node,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 	.setup_portio_remap		= NULL,
@@ -173,6 +172,7 @@ struct apic apic_default = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= default_x86_32_early_logical_apicid,
+	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
 
 extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 8c91473..e4b8059 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -239,15 +239,6 @@ static void summit_setup_apic_routing(void)
 						nr_ioapics);
 }
 
-static int summit_apicid_to_node(int logical_apicid)
-{
-#ifdef CONFIG_SMP
-	return apicid_2_node[hard_smp_processor_id()];
-#else
-	return 0;
-#endif
-}
-
 static int summit_cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < nr_cpu_ids)
@@ -523,7 +514,6 @@ struct apic apic_summit = {
 	.ioapic_phys_id_map		= summit_ioapic_phys_id_map,
 	.setup_apic_routing		= summit_setup_apic_routing,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= summit_apicid_to_node,
 	.cpu_present_to_apicid		= summit_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= summit_apicid_to_cpu_present,
 	.setup_portio_remap		= NULL,
@@ -561,4 +551,5 @@ struct apic apic_summit = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= summit_early_logical_apicid,
+	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index badc1fd..90949bb 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -206,7 +206,6 @@ struct apic apic_x2apic_cluster = {
 	.ioapic_phys_id_map		= NULL,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f28bf4c..c7e6d66 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -195,7 +195,6 @@ struct apic apic_x2apic_phys = {
 	.ioapic_phys_id_map		= NULL,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 365f53d..e654ff9 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -315,7 +315,6 @@ struct apic __refdata apic_x2apic_uv_x = {
 	.ioapic_phys_id_map		= NULL,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
-	.apicid_to_node			= NULL,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= NULL,
 	.setup_portio_remap		= NULL,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0768761..031d2e1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -168,10 +168,9 @@ static void unmap_cpu_to_node(int cpu)
 static void map_cpu_to_logical_apicid(void)
 {
 	int cpu = smp_processor_id();
-	int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 	int node;
 
-	node = apic->apicid_to_node(logical_apicid);
+	node = apic->x86_32_numa_cpu_node(cpu);
 	if (!node_online(node))
 		node = first_online_node;
 
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (11 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 12/16] x86: Replace apic->apicid_to_node() with ->x86_32_numa_cpu_node() Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2010-12-30 17:49 ` [PATCH 14/16] x86: Unify CPU -> " Tejun Heo
                   ` (4 subsequent siblings)
  17 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
The mapping between cpu/apicid and node is done via apicid_to_node[]
on 64bit and apicid_2_node[] + apic->x86_32_numa_cpu_node() on 32bit.
This difference makes it difficult to further unify 32 and 64bit NUMA
handling.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed by
two accessors - set_apicid_to_node() and numa_cpu_node().  On 64bit,
numa_cpu_node() always consults __apicid_to_node[] directly while
32bit goes through apic->numa_cpu_node() method to allow apic
implementations to override it.
srat_detect_node() for amd cpus contains workaround for broken NUMA
configuration which assumes relationship between APIC ID, HT node ID
and NUMA topology.  Leave it to access __apicid_to_node[] directly as
mapping through CPU might result in undesirable behavior change.  The
comment is reformatted and updated to note the ugliness.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
---
 arch/x86/include/asm/mpspec.h  |    1 -
 arch/x86/include/asm/numa.h    |   28 +++++++++++++++++++++++
 arch/x86/include/asm/numa_32.h |    6 +++++
 arch/x86/include/asm/numa_64.h |    5 +--
 arch/x86/kernel/acpi/boot.c    |    3 +-
 arch/x86/kernel/apic/apic.c    |    2 +-
 arch/x86/kernel/cpu/amd.c      |   47 +++++++++++++++++++++++++--------------
 arch/x86/kernel/cpu/intel.c    |    3 +-
 arch/x86/kernel/smpboot.c      |    6 +----
 arch/x86/mm/amdtopology_64.c   |    4 +-
 arch/x86/mm/numa.c             |    6 ++++-
 arch/x86/mm/numa_32.c          |    6 +++++
 arch/x86/mm/numa_64.c          |   26 +++++++++------------
 arch/x86/mm/srat_32.c          |    2 +-
 arch/x86/mm/srat_64.c          |   12 +++++-----
 15 files changed, 101 insertions(+), 56 deletions(-)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 018ffc1..ae78732 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -24,7 +24,6 @@ extern int pic_mode;
 #define MAX_IRQ_SOURCES		256
 
 extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400..5e01c76 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,33 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid and
+ * node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
+ * should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+	__apicid_to_node[apicid] = node;
+}
+#else	/* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_X86_32
 # include "numa_32.h"
 #else
 # include "numa_64.h"
 #endif
+
+#endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index a372290..d30eb6c 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,6 +4,12 @@
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int apicid);
+#else	/* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 5ae8728..17abf80 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_NUMA_64_H
 
 #include <linux/nodemask.h>
-#include <asm/apicdef.h>
 
 struct bootnode {
 	u64 start;
@@ -17,8 +16,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
 extern unsigned long numa_free_all_bootmem(void);
 extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
@@ -32,6 +29,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 #define NODE_MIN_SIZE (4*1024*1024)
 
 extern void __init init_cpu_to_node(void);
+extern int __cpuinit numa_cpu_node(int cpu);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
 extern void __cpuinit numa_add_cpu(int cpu);
@@ -43,6 +41,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
 #endif /* CONFIG_NUMA_EMU */
 #else
 static inline void init_cpu_to_node(void)		{ }
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 1a5b9a8..de3308b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -583,11 +583,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	nid = acpi_get_node(handle);
 	if (nid == -1 || !node_online(nid))
 		return;
+	set_apicid_to_node(physid, nid);
 #ifdef CONFIG_X86_64
-	apicid_to_node[physid] = nid;
 	numa_set_node(cpu, nid);
 #else /* CONFIG_X86_32 */
-	apicid_2_node[physid] = nid;
 	cpu_to_node_map[cpu] = nid;
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 5e097e5..4f2f210 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2029,7 +2029,7 @@ int default_x86_32_numa_cpu_node(int cpu)
 	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
 
 	if (apicid != BAD_APICID)
-		return apicid_2_node[apicid];
+		return __apicid_to_node[apicid];
 	return NUMA_NO_NODE;
 #else
 	return 0;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8..d8e81e5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -234,17 +234,21 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 #endif
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+/*
+ * To workaround broken NUMA config.  Read the comment in
+ * srat_detect_node().
+ */
 static int __cpuinit nearby_node(int apicid)
 {
 	int i, node;
 
 	for (i = apicid - 1; i >= 0; i--) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
 	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -339,26 +343,35 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	int node;
 	unsigned apicid = c->apicid;
 
-	node = per_cpu(cpu_llc_id, cpu);
+	node = numa_cpu_node(cpu);
+	if (node == NUMA_NO_NODE)
+		node = per_cpu(cpu_llc_id, cpu);
 
-	if (apicid_to_node[apicid] != NUMA_NO_NODE)
-		node = apicid_to_node[apicid];
 	if (!node_online(node)) {
-		/* Two possibilities here:
-		   - The CPU is missing memory and no node was created.
-		   In that case try picking one from a nearby CPU
-		   - The APIC IDs differ from the HyperTransport node IDs
-		   which the K8 northbridge parsing fills in.
-		   Assume they are all increased by a constant offset,
-		   but in the same order as the HT nodeids.
-		   If that doesn't result in a usable node fall back to the
-		   path for the previous case.  */
-
+		/*
+		 * Two possibilities here:
+		 *
+		 * - The CPU is missing memory and no node was created.  In
+		 *   that case try picking one from a nearby CPU.
+		 *
+		 * - The APIC IDs differ from the HyperTransport node IDs
+		 *   which the K8 northbridge parsing fills in.  Assume
+		 *   they are all increased by a constant offset, but in
+		 *   the same order as the HT nodeids.  If that doesn't
+		 *   result in a usable node fall back to the path for the
+		 *   previous case.
+		 *
+		 * This workaround operates directly on the mapping between
+		 * APIC ID and NUMA node, assuming certain relationship
+		 * between APIC ID, HT node ID and NUMA topology.  As going
+		 * through CPU mapping may alter the outcome, directly
+		 * access __apicid_to_node[].
+		 */
 		int ht_nodeid = c->initial_apicid;
 
 		if (ht_nodeid >= 0 &&
-		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-			node = apicid_to_node[ht_nodeid];
+		    __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = __apicid_to_node[ht_nodeid];
 		/* Pick a nearby node */
 		if (!node_online(node))
 			node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c5..6052004 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -279,11 +279,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	unsigned node;
 	int cpu = smp_processor_id();
-	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
-	node = apicid_to_node[apicid];
+	node = numa_cpu_node(cpu);
 	if (node == NUMA_NO_NODE || !node_online(node)) {
 		/* reuse the value from init_cpu_to_node() */
 		node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 031d2e1..4b8b72d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -71,10 +71,6 @@
 #include <asm/smpboot_hooks.h>
 #include <asm/i8259.h>
 
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_LOCAL_APIC];
-#endif
-
 /* State of each CPU */
 DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
@@ -170,7 +166,7 @@ static void map_cpu_to_logical_apicid(void)
 	int cpu = smp_processor_id();
 	int node;
 
-	node = apic->x86_32_numa_cpu_node(cpu);
+	node = numa_cpu_node(cpu);
 	if (!node_online(node))
 		node = first_online_node;
 
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c..c7fae38 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -247,7 +247,7 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
 		__acpi_map_pxm_to_node(nid, i);
 #endif
 	}
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 }
 #endif /* CONFIG_NUMA_EMU */
 
@@ -285,7 +285,7 @@ int __init amd_scan_nodes(void)
 				nodes[i].start >> PAGE_SHIFT,
 				nodes[i].end >> PAGE_SHIFT);
 		for (j = apicid_base; j < cores + apicid_base; j++)
-			apicid_to_node[(i << bits) + j] = i;
+			set_apicid_to_node((i << bits) + j, i);
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 787c52c..63db99c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -4,8 +4,12 @@
 #include <linux/bootmem.h>
 
 /*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
  */
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c..8d91d22 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 static unsigned long kva_start_pfn;
 static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+	return apic->x86_32_numa_cpu_node(cpu);
+}
+
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 3d73201..e32b405 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,10 +26,6 @@ EXPORT_SYMBOL(node_data);
 
 struct memnode memnode;
 
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
-	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
 int numa_off __initdata;
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
@@ -730,12 +726,8 @@ void __init init_cpu_to_node(void)
 	BUG_ON(cpu_to_apicid == NULL);
 
 	for_each_possible_cpu(cpu) {
-		int node;
-		u16 apicid = cpu_to_apicid[cpu];
+		int node = numa_cpu_node(cpu);
 
-		if (apicid == BAD_APICID)
-			continue;
-		node = apicid_to_node[apicid];
 		if (node == NUMA_NO_NODE)
 			continue;
 		if (!node_online(node))
@@ -745,6 +737,14 @@ void __init init_cpu_to_node(void)
 }
 #endif
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
@@ -790,13 +790,9 @@ void __cpuinit numa_remove_cpu(int cpu)
 void __cpuinit numa_add_cpu(int cpu)
 {
 	unsigned long addr;
-	u16 apicid;
-	int physnid;
-	int nid = NUMA_NO_NODE;
+	int physnid, nid;
 
-	apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-	if (apicid != BAD_APICID)
-		nid = apicid_to_node[apicid];
+	nid = numa_cpu_node(cpu);
 	if (nid == NUMA_NO_NODE)
 		nid = early_cpu_to_node(cpu);
 	BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index e55e748..7fcae55 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -255,7 +255,7 @@ int __init get_memcfg_from_srat(void)
 			 num_memory_chunks);
 
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
 
 	for (j = 0; j < num_memory_chunks; j++){
 		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a756bcf..8b3a81e 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,7 +79,7 @@ static __init void bad_srat(void)
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_to_node[i] = NUMA_NO_NODE;
+		set_apicid_to_node(i, NUMA_NO_NODE);
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		nodes[i].start = nodes[i].end = 0;
 		nodes_add[i].start = nodes_add[i].end = 0;
@@ -134,7 +134,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	}
 
 	apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
@@ -168,7 +168,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
 	else
 		apic_id = pa->apic_id;
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
@@ -511,7 +511,7 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 		 * node, it must now point to the fake node ID.
 		 */
 		for (j = 0; j < MAX_LOCAL_APIC; j++)
-			if (apicid_to_node[j] == nid &&
+			if (__apicid_to_node[j] == nid &&
 			    fake_apicid_to_node[j] == NUMA_NO_NODE)
 				fake_apicid_to_node[j] = i;
 	}
@@ -522,13 +522,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 	 * value.
 	 */
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		if (apicid_to_node[i] != NUMA_NO_NODE &&
+		if (__apicid_to_node[i] != NUMA_NO_NODE &&
 		    fake_apicid_to_node[i] == NUMA_NO_NODE)
 			fake_apicid_to_node[i] = 0;
 
 	for (i = 0; i < num_nodes; i++)
 		__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 
 	nodes_clear(nodes_parsed);
 	for (i = 0; i < num_nodes; i++)
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 14/16] x86: Unify CPU -> NUMA node mapping between 32 and 64bit
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (12 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2010-12-30 17:49 ` [PATCH 15/16] x86: Unify node_to_cpumask_map handling " Tejun Heo
                   ` (3 subsequent siblings)
  17 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Unlike 64bit, 32bit has been using its own cpu_to_node_map[] for CPU
-> NUMA node mapping.  Replace it with early_percpu variable
x86_cpu_to_node_map and share the mapping code with 64bit.
* USE_PERCPU_NUMA_NODE_ID is now enabled for 32bit too.
* x86_cpu_to_node_map and numa_set/clear_node() are moved from numa_64
  to numa.  For now, on 32bit, x86_cpu_to_node_map is initialized with
  0 instead of NUMA_NO_NODE.  This is to avoid introducing unexpected
  behavior change and will be updated once init path is unified.
* srat_detect_node() is now enabled for x86_32 too.  It calls
  numa_set_node() and initializes the mapping making explicit
  cpu_to_node_map[] updates from map/unmap_cpu_to_node() unnecessary.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
---
 arch/x86/Kconfig                |    2 +-
 arch/x86/include/asm/numa.h     |    8 ++++
 arch/x86/include/asm/numa_64.h  |    4 --
 arch/x86/include/asm/topology.h |   17 ---------
 arch/x86/kernel/acpi/boot.c     |    5 ---
 arch/x86/kernel/cpu/amd.c       |    4 +-
 arch/x86/kernel/cpu/intel.c     |    2 +-
 arch/x86/kernel/setup_percpu.c  |    4 +-
 arch/x86/kernel/smpboot.c       |    6 ---
 arch/x86/mm/numa.c              |   72 ++++++++++++++++++++++++++++++++++++++-
 arch/x86/mm/numa_64.c           |   65 -----------------------------------
 11 files changed, 85 insertions(+), 104 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 58ecad5..8d9dd6c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1707,7 +1707,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
 	depends on NUMA
 
 config USE_PERCPU_NUMA_NODE_ID
-	def_bool X86_64
+	def_bool y
 	depends on NUMA
 
 menu "Power management and ACPI options"
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 5e01c76..2b21fff 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -30,4 +30,12 @@ static inline void set_apicid_to_node(int apicid, s16 node)
 # include "numa_64.h"
 #endif
 
+#ifdef CONFIG_NUMA
+extern void __cpuinit numa_set_node(int cpu, int node);
+extern void __cpuinit numa_clear_node(int cpu);
+#else	/* CONFIG_NUMA */
+static inline void numa_set_node(int cpu, int node)	{ }
+static inline void numa_clear_node(int cpu)		{ }
+#endif	/* CONFIG_NUMA */
+
 #endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 17abf80..f92b8e7 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -30,8 +30,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 
 extern void __init init_cpu_to_node(void);
 extern int __cpuinit numa_cpu_node(int cpu);
-extern void __cpuinit numa_set_node(int cpu, int node);
-extern void __cpuinit numa_clear_node(int cpu);
 extern void __cpuinit numa_add_cpu(int cpu);
 extern void __cpuinit numa_remove_cpu(int cpu);
 
@@ -42,8 +40,6 @@ extern void __cpuinit numa_remove_cpu(int cpu);
 #else
 static inline void init_cpu_to_node(void)		{ }
 static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
-static inline void numa_set_node(int cpu, int node)	{ }
-static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
 static inline void numa_remove_cpu(int cpu)		{ }
 #endif
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 21899cc..b101c17 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -47,21 +47,6 @@
 
 #include <asm/mpspec.h>
 
-#ifdef CONFIG_X86_32
-
-/* Mappings between logical cpu number and node number */
-extern int cpu_to_node_map[];
-
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int __cpu_to_node(int cpu)
-{
-	return cpu_to_node_map[cpu];
-}
-#define early_cpu_to_node __cpu_to_node
-#define cpu_to_node __cpu_to_node
-
-#else /* CONFIG_X86_64 */
-
 /* Mappings between logical cpu number and node number */
 DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
 
@@ -84,8 +69,6 @@ static inline int early_cpu_to_node(int cpu)
 
 #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
 
-#endif /* CONFIG_X86_64 */
-
 /* Mappings between node number and cpus on that node. */
 extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index de3308b..bf5d299 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -584,12 +584,7 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	if (nid == -1 || !node_online(nid))
 		return;
 	set_apicid_to_node(physid, nid);
-#ifdef CONFIG_X86_64
 	numa_set_node(cpu, nid);
-#else /* CONFIG_X86_32 */
-	cpu_to_node_map[cpu] = nid;
-#endif
-
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d8e81e5..9a1be2c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -233,7 +233,7 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 }
 #endif
 
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
 /*
  * To workaround broken NUMA config.  Read the comment in
  * srat_detect_node().
@@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id);
 
 static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 {
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
 	int cpu = smp_processor_id();
 	int node;
 	unsigned apicid = c->apicid;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 6052004..df86bc8 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -276,7 +276,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 
 static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 {
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
 	unsigned node;
 	int cpu = smp_processor_id();
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index b5147f0..71f4727 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -233,6 +233,7 @@ void __init setup_per_cpu_areas(void)
 		per_cpu(irq_stack_ptr, cpu) =
 			per_cpu(irq_stack_union.irq_stack, cpu) +
 			IRQ_STACK_SIZE - 64;
+#endif
 #ifdef CONFIG_NUMA
 		per_cpu(x86_cpu_to_node_map, cpu) =
 			early_per_cpu_map(x86_cpu_to_node_map, cpu);
@@ -246,7 +247,6 @@ void __init setup_per_cpu_areas(void)
 		 */
 		set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
 #endif
-#endif
 		/*
 		 * Up to this point, the boot CPU has been using .init.data
 		 * area.  Reload any changed state for the boot CPU.
@@ -263,7 +263,7 @@ void __init setup_per_cpu_areas(void)
 #ifdef CONFIG_X86_32
 	early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
 #endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+#ifdef CONFIG_NUMA
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4b8b72d..b78ce8c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -133,16 +133,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 atomic_t init_deasserted;
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
-/* which node each logical CPU is on */
-int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_to_node_map);
-
 /* set up a mapping between cpu and node. */
 static void map_cpu_to_node(int cpu, int node)
 {
 	printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
 	cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
-	cpu_to_node_map[cpu] = node;
 }
 
 /* undo a mapping between cpu and node. */
@@ -153,7 +148,6 @@ static void unmap_cpu_to_node(int cpu)
 	printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
 	for (node = 0; node < MAX_NUMNODES; node++)
 		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
-	cpu_to_node_map[cpu] = 0;
 }
 #else /* !(CONFIG_NUMA && CONFIG_X86_32) */
 #define map_cpu_to_node(cpu, node)	({})
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 63db99c..b23b5fe 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -14,6 +14,44 @@ cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
 /*
+ * Map cpu index to node index
+ */
+#ifdef CONFIG_X86_32
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0);
+#else
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+#endif
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+	/* early setting, no percpu area yet */
+	if (cpu_to_node_map) {
+		cpu_to_node_map[cpu] = node;
+		return;
+	}
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+	if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
+		printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
+		dump_stack();
+		return;
+	}
+#endif
+	per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+	if (node != NUMA_NO_NODE)
+		set_cpu_numa_node(cpu, node);
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+	numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+/*
  * Allocate node_to_cpumask_map based on number of available nodes
  * Requires node_possible_map to be valid.
  *
@@ -40,6 +78,37 @@ void __init setup_node_to_cpumask_map(void)
 }
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
+
+int __cpu_to_node(int cpu)
+{
+	if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+		printk(KERN_WARNING
+			"cpu_to_node(%d): usage too early!\n", cpu);
+		dump_stack();
+		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+	}
+	return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(__cpu_to_node);
+
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
+int early_cpu_to_node(int cpu)
+{
+	if (early_per_cpu_ptr(x86_cpu_to_node_map))
+		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+	if (!cpu_possible(cpu)) {
+		printk(KERN_WARNING
+			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+		dump_stack();
+		return NUMA_NO_NODE;
+	}
+	return per_cpu(x86_cpu_to_node_map, cpu);
+}
+
 /*
  * Returns a pointer to the bitmask of CPUs on Node 'node'.
  */
@@ -62,4 +131,5 @@ const struct cpumask *cpumask_of_node(int node)
 	return node_to_cpumask_map[node];
 }
 EXPORT_SYMBOL(cpumask_of_node);
-#endif
+
+#endif	/* CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e32b405..ede4608 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,12 +31,6 @@ static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
 
 /*
- * Map cpu index to node index
- */
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-
-/*
  * Given a shift value, try to populate memnodemap[]
  * Returns :
  * 1 if OK
@@ -746,34 +740,6 @@ int __cpuinit numa_cpu_node(int cpu)
 	return NUMA_NO_NODE;
 }
 
-void __cpuinit numa_set_node(int cpu, int node)
-{
-	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
-
-	/* early setting, no percpu area yet */
-	if (cpu_to_node_map) {
-		cpu_to_node_map[cpu] = node;
-		return;
-	}
-
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-	if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
-		printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
-		dump_stack();
-		return;
-	}
-#endif
-	per_cpu(x86_cpu_to_node_map, cpu) = node;
-
-	if (node != NUMA_NO_NODE)
-		set_cpu_numa_node(cpu, node);
-}
-
-void __cpuinit numa_clear_node(int cpu)
-{
-	numa_set_node(cpu, NUMA_NO_NODE);
-}
-
 #ifndef CONFIG_DEBUG_PER_CPU_MAPS
 
 #ifndef CONFIG_NUMA_EMU
@@ -875,37 +841,6 @@ void __cpuinit numa_remove_cpu(int cpu)
 {
 	numa_set_cpumask(cpu, 0);
 }
-
-int __cpu_to_node(int cpu)
-{
-	if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
-		printk(KERN_WARNING
-			"cpu_to_node(%d): usage too early!\n", cpu);
-		dump_stack();
-		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-	}
-	return per_cpu(x86_cpu_to_node_map, cpu);
-}
-EXPORT_SYMBOL(__cpu_to_node);
-
-/*
- * Same function as cpu_to_node() but used if called before the
- * per_cpu areas are setup.
- */
-int early_cpu_to_node(int cpu)
-{
-	if (early_per_cpu_ptr(x86_cpu_to_node_map))
-		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
-	if (!cpu_possible(cpu)) {
-		printk(KERN_WARNING
-			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
-		dump_stack();
-		return NUMA_NO_NODE;
-	}
-	return per_cpu(x86_cpu_to_node_map, cpu);
-}
-
 /*
  * --------- end of debug versions of the numa functions ---------
  */
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 15/16] x86: Unify node_to_cpumask_map handling between 32 and 64bit
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (13 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 14/16] x86: Unify CPU -> " Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2010-12-30 17:49 ` [PATCH 16/16] x86: Unify NUMA initialization " Tejun Heo
                   ` (2 subsequent siblings)
  17 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
x86_32 has been managing node_to_cpumask_map explicitly from
map_cpu_to_node() and friends in a rather ugly way.  With previous
changes, it's now possible to share the code with 64bit.
* When CONFIG_NUMA_EMU is disabled, numa_add/remove_cpu() are
  implemented in numa.c and shared by 32 and 64bit.  The !NUMA_EMU
  implementation is taken from numa_64.c right before c1c3443c ("x86,
  numa: Fake node-to-cpumask for NUMA emulation").  CONFIG_NUMA_EMU
  versions still live in numa_64.c.
  NUMA_EMU's dependency on 64bit is planned to be removed and the
  above should go away together.
* identify_cpu() now calls numa_add_cpu() for 32bit too.  This makes
  the explicit mask management from map_cpu_to_node() unnecessary.
* The whole x86_32 specific map_cpu_to_node() chunk is no longer
  necessary.  Dropped.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
---
 arch/x86/include/asm/numa.h    |    5 ++++
 arch/x86/include/asm/numa_32.h |    1 -
 arch/x86/include/asm/numa_64.h |    4 ---
 arch/x86/kernel/cpu/common.c   |    2 +-
 arch/x86/kernel/smpboot.c      |   47 -----------------------------------
 arch/x86/mm/numa.c             |   53 ++++++++++++++++++++++++++++++++++++++-
 arch/x86/mm/numa_64.c          |   40 +++++++++++-------------------
 7 files changed, 72 insertions(+), 80 deletions(-)
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 2b21fff..7012e64 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_X86_NUMA_H
 #define _ASM_X86_NUMA_H
 
+#include <asm/topology.h>
 #include <asm/apicdef.h>
 
 #ifdef CONFIG_NUMA
@@ -33,9 +34,13 @@ static inline void set_apicid_to_node(int apicid, s16 node)
 #ifdef CONFIG_NUMA
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
+extern void __cpuinit numa_add_cpu(int cpu);
+extern void __cpuinit numa_remove_cpu(int cpu);
 #else	/* CONFIG_NUMA */
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
+static inline void numa_add_cpu(int cpu)		{ }
+static inline void numa_remove_cpu(int cpu)		{ }
 #endif	/* CONFIG_NUMA */
 
 #endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index d30eb6c..ba0ea62 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_NUMA_32_H
 
 extern int pxm_to_nid(int pxm);
-extern void numa_remove_cpu(int cpu);
 
 #ifdef CONFIG_NUMA
 extern int __cpuinit numa_cpu_node(int apicid);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index f92b8e7..f624229 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -30,8 +30,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 
 extern void __init init_cpu_to_node(void);
 extern int __cpuinit numa_cpu_node(int cpu);
-extern void __cpuinit numa_add_cpu(int cpu);
-extern void __cpuinit numa_remove_cpu(int cpu);
 
 #ifdef CONFIG_NUMA_EMU
 #define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
@@ -40,8 +38,6 @@ extern void __cpuinit numa_remove_cpu(int cpu);
 #else
 static inline void init_cpu_to_node(void)		{ }
 static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
-static inline void numa_add_cpu(int cpu, int node)	{ }
-static inline void numa_remove_cpu(int cpu)		{ }
 #endif
 
 #endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda..0e9a1d7 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
 	select_idle_routine(c);
 
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
 #endif
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index b78ce8c..b152077 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -132,49 +132,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 atomic_t init_deasserted;
 
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
-/* set up a mapping between cpu and node. */
-static void map_cpu_to_node(int cpu, int node)
-{
-	printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
-	cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
-}
-
-/* undo a mapping between cpu and node. */
-static void unmap_cpu_to_node(int cpu)
-{
-	int node;
-
-	printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
-	for (node = 0; node < MAX_NUMNODES; node++)
-		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
-}
-#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
-#define map_cpu_to_node(cpu, node)	({})
-#define unmap_cpu_to_node(cpu)	({})
-#endif
-
-#ifdef CONFIG_X86_32
-static void map_cpu_to_logical_apicid(void)
-{
-	int cpu = smp_processor_id();
-	int node;
-
-	node = numa_cpu_node(cpu);
-	if (!node_online(node))
-		node = first_online_node;
-
-	map_cpu_to_node(cpu, node);
-}
-
-void numa_remove_cpu(int cpu)
-{
-	unmap_cpu_to_node(cpu);
-}
-#else
-#define map_cpu_to_logical_apicid()  do {} while (0)
-#endif
-
 /*
  * Report back to the Boot Processor.
  * Running on AP.
@@ -242,7 +199,6 @@ static void __cpuinit smp_callin(void)
 		apic->smp_callin_clear_local_apic();
 	setup_local_APIC();
 	end_local_APIC_setup();
-	map_cpu_to_logical_apicid();
 
 	/*
 	 * Need to setup vector mappings before we enable interrupts.
@@ -946,7 +902,6 @@ static __init void disable_smp(void)
 		physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 	else
 		physid_set_mask_of_physid(0, &phys_cpu_present_map);
-	map_cpu_to_logical_apicid();
 	cpumask_set_cpu(0, cpu_sibling_mask(0));
 	cpumask_set_cpu(0, cpu_core_mask(0));
 }
@@ -1125,8 +1080,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
 	end_local_APIC_setup();
 
-	map_cpu_to_logical_apicid();
-
 	if (apic->setup_portio_remap)
 		apic->setup_portio_remap();
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index b23b5fe..61dcdcd 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -77,7 +77,21 @@ void __init setup_node_to_cpumask_map(void)
 	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
 }
 
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+
+# ifndef CONFIG_NUMA_EMU
+void __cpuinit numa_add_cpu(int cpu)
+{
+	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+# endif	/* !CONFIG_NUMA_EMU */
+
+#else	/* !CONFIG_DEBUG_PER_CPU_MAPS */
 
 int __cpu_to_node(int cpu)
 {
@@ -109,6 +123,41 @@ int early_cpu_to_node(int cpu)
 	return per_cpu(x86_cpu_to_node_map, cpu);
 }
 
+# ifndef CONFIG_NUMA_EMU
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+	int node = early_cpu_to_node(cpu);
+	struct cpumask *mask;
+	char buf[64];
+
+	mask = node_to_cpumask_map[node];
+	if (mask == NULL) {
+		printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
+		dump_stack();
+		return;
+	}
+
+	if (enable)
+		cpumask_set_cpu(cpu, mask);
+	else
+		cpumask_clear_cpu(cpu, mask);
+
+	cpulist_scnprintf(buf, sizeof(buf), mask);
+	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+	numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+	numa_set_cpumask(cpu, 0);
+}
+# endif	/* !CONFIG_NUMA_EMU */
+
 /*
  * Returns a pointer to the bitmask of CPUs on Node 'node'.
  */
@@ -132,4 +181,4 @@ const struct cpumask *cpumask_of_node(int node)
 }
 EXPORT_SYMBOL(cpumask_of_node);
 
-#endif	/* CONFIG_DEBUG_PER_CPU_MAPS */
+#endif	/* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index ede4608..d58dc4d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -740,19 +740,18 @@ int __cpuinit numa_cpu_node(int cpu)
 	return NUMA_NO_NODE;
 }
 
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
-
-#ifndef CONFIG_NUMA_EMU
-void __cpuinit numa_add_cpu(int cpu)
-{
-	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-void __cpuinit numa_remove_cpu(int cpu)
-{
-	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-#else
+/*
+ * UGLINESS AHEAD: Currently, CONFIG_NUMA_EMU is 64bit only and makes use
+ * of 64bit specific data structures.  The distinction is artificial and
+ * should be removed.  numa_{add|remove}_cpu() are implemented in numa.c
+ * for both 32 and 64bit when CONFIG_NUMA_EMU is disabled but here when
+ * enabled.
+ *
+ * NUMA emulation is planned to be made generic and the following and other
+ * related code should be moved to numa.c.
+ */
+#ifdef CONFIG_NUMA_EMU
+# ifndef CONFIG_DEBUG_PER_CPU_MAPS
 void __cpuinit numa_add_cpu(int cpu)
 {
 	unsigned long addr;
@@ -792,13 +791,7 @@ void __cpuinit numa_remove_cpu(int cpu)
 	for_each_online_node(i)
 		cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
 }
-#endif /* !CONFIG_NUMA_EMU */
-
-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
-
-/*
- * --------- debug versions of the numa functions ---------
- */
+# else	/* !CONFIG_DEBUG_PER_CPU_MAPS */
 static void __cpuinit numa_set_cpumask(int cpu, int enable)
 {
 	int node = early_cpu_to_node(cpu);
@@ -841,8 +834,5 @@ void __cpuinit numa_remove_cpu(int cpu)
 {
 	numa_set_cpumask(cpu, 0);
 }
-/*
- * --------- end of debug versions of the numa functions ---------
- */
-
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+# endif	/* !CONFIG_DEBUG_PER_CPU_MAPS */
+#endif	/* CONFIG_NUMA_EMU */
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* [PATCH 16/16] x86: Unify NUMA initialization between 32 and 64bit
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (14 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 15/16] x86: Unify node_to_cpumask_map handling " Tejun Heo
@ 2010-12-30 17:49 ` Tejun Heo
  2010-12-30 20:14 ` [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 H. Peter Anvin
  2011-01-21 18:16 ` Tejun Heo
  17 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2010-12-30 17:49 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
  Cc: Tejun Heo
Now that everything else is unified, NUMA initialization can be
unified too.
* numa_init_array() and init_cpu_to_node() are moved from numa_64 to
  numa.
* numa_32::initmem_init() is updated to call numa_init_array() and
  setup_arch() to call init_cpu_to_node() on 32bit too.
* x86_cpu_to_node_map is now initialized to NUMA_NO_NODE on 32bit too.
  This is safe now as numa_init_array() will initialize it early
  during boot.
This makes NUMA mapping fully initialized before setup_per_cpu_areas()
on 32bit too and thus makes the first percpu chunk which contains all
the static variables and some of dynamic area allocated with NUMA
affinity correctly considered.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
---
 arch/x86/include/asm/numa.h    |    4 ++
 arch/x86/include/asm/numa_64.h |    3 --
 arch/x86/kernel/setup.c        |    2 -
 arch/x86/mm/numa.c             |   76 +++++++++++++++++++++++++++++++++++++--
 arch/x86/mm/numa_32.c          |    1 +
 arch/x86/mm/numa_64.c          |   75 ---------------------------------------
 6 files changed, 77 insertions(+), 84 deletions(-)
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 7012e64..ec0f378 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -34,11 +34,15 @@ static inline void set_apicid_to_node(int apicid, s16 node)
 #ifdef CONFIG_NUMA
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
+extern void __init numa_init_array(void);
+extern void __init init_cpu_to_node(void);
 extern void __cpuinit numa_add_cpu(int cpu);
 extern void __cpuinit numa_remove_cpu(int cpu);
 #else	/* CONFIG_NUMA */
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
+static inline void numa_init_array(void)		{ }
+static inline void init_cpu_to_node(void)		{ }
 static inline void numa_add_cpu(int cpu)		{ }
 static inline void numa_remove_cpu(int cpu)		{ }
 #endif	/* CONFIG_NUMA */
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index f624229..f6f38eb 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -13,7 +13,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
 
-extern void numa_init_array(void);
 extern int numa_off;
 
 extern unsigned long numa_free_all_bootmem(void);
@@ -28,7 +27,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
  */
 #define NODE_MIN_SIZE (4*1024*1024)
 
-extern void __init init_cpu_to_node(void);
 extern int __cpuinit numa_cpu_node(int cpu);
 
 #ifdef CONFIG_NUMA_EMU
@@ -36,7 +34,6 @@ extern int __cpuinit numa_cpu_node(int cpu);
 #define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
 #endif /* CONFIG_NUMA_EMU */
 #else
-static inline void init_cpu_to_node(void)		{ }
 static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 #endif
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0afb8c7..9d7cd90 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1030,9 +1030,7 @@ void __init setup_arch(char **cmdline_p)
 
 	prefill_possible_map();
 
-#ifdef CONFIG_X86_64
 	init_cpu_to_node();
-#endif
 
 	init_apic_mappings();
 	ioapic_and_gsi_init();
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 61dcdcd..2f07593 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -16,11 +16,7 @@ EXPORT_SYMBOL(node_to_cpumask_map);
 /*
  * Map cpu index to node index
  */
-#ifdef CONFIG_X86_32
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0);
-#else
 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-#endif
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
 
 void __cpuinit numa_set_node(int cpu, int node)
@@ -77,6 +73,78 @@ void __init setup_node_to_cpumask_map(void)
 	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
 }
 
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
+void __init numa_init_array(void)
+{
+	int rr, i;
+
+	rr = first_node(node_online_map);
+	for (i = 0; i < nr_cpu_ids; i++) {
+		if (early_cpu_to_node(i) != NUMA_NO_NODE)
+			continue;
+		numa_set_node(i, rr);
+		rr = next_node(rr, node_online_map);
+		if (rr == MAX_NUMNODES)
+			rr = first_node(node_online_map);
+	}
+}
+
+static __init int find_near_online_node(int node)
+{
+	int n, val;
+	int min_val = INT_MAX;
+	int best_node = -1;
+
+	for_each_online_node(n) {
+		val = node_distance(node, n);
+
+		if (val < min_val) {
+			min_val = val;
+			best_node = n;
+		}
+	}
+
+	return best_node;
+}
+
+/*
+ * Setup early cpu_to_node.
+ *
+ * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
+ * and apicid_to_node[] tables have valid entries for a CPU.
+ * This means we skip cpu_to_node[] initialisation for NUMA
+ * emulation and faking node case (when running a kernel compiled
+ * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
+ * is already initialized in a round robin manner at numa_init_array,
+ * prior to this call, and this initialization is good enough
+ * for the fake NUMA cases.
+ *
+ * Called before the per_cpu areas are setup.
+ */
+void __init init_cpu_to_node(void)
+{
+	int cpu;
+	u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+
+	BUG_ON(cpu_to_apicid == NULL);
+
+	for_each_possible_cpu(cpu) {
+		int node = numa_cpu_node(cpu);
+
+		if (node == NUMA_NO_NODE)
+			continue;
+		if (!node_online(node))
+			node = find_near_online_node(node);
+		numa_set_node(cpu, node);
+	}
+}
+
 #ifndef CONFIG_DEBUG_PER_CPU_MAPS
 
 # ifndef CONFIG_NUMA_EMU
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 8d91d22..505bb04 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -367,6 +367,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
 	 */
 
 	get_memcfg_numa();
+	numa_init_array();
 
 	kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index d58dc4d..c43aaf8 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -225,28 +225,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 	node_set_online(nodeid);
 }
 
-/*
- * There are unfortunately some poorly designed mainboards around that
- * only connect memory to a single CPU. This breaks the 1:1 cpu->node
- * mapping. To avoid this fill in the mapping for all possible CPUs,
- * as the number of CPUs is not known yet. We round robin the existing
- * nodes.
- */
-void __init numa_init_array(void)
-{
-	int rr, i;
-
-	rr = first_node(node_online_map);
-	for (i = 0; i < nr_cpu_ids; i++) {
-		if (early_cpu_to_node(i) != NUMA_NO_NODE)
-			continue;
-		numa_set_node(i, rr);
-		rr = next_node(rr, node_online_map);
-		if (rr == MAX_NUMNODES)
-			rr = first_node(node_online_map);
-	}
-}
-
 #ifdef CONFIG_NUMA_EMU
 /* Numa emulation */
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
@@ -678,59 +656,6 @@ static __init int numa_setup(char *opt)
 }
 early_param("numa", numa_setup);
 
-#ifdef CONFIG_NUMA
-
-static __init int find_near_online_node(int node)
-{
-	int n, val;
-	int min_val = INT_MAX;
-	int best_node = -1;
-
-	for_each_online_node(n) {
-		val = node_distance(node, n);
-
-		if (val < min_val) {
-			min_val = val;
-			best_node = n;
-		}
-	}
-
-	return best_node;
-}
-
-/*
- * Setup early cpu_to_node.
- *
- * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
- * and apicid_to_node[] tables have valid entries for a CPU.
- * This means we skip cpu_to_node[] initialisation for NUMA
- * emulation and faking node case (when running a kernel compiled
- * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
- * is already initialized in a round robin manner at numa_init_array,
- * prior to this call, and this initialization is good enough
- * for the fake NUMA cases.
- *
- * Called before the per_cpu areas are setup.
- */
-void __init init_cpu_to_node(void)
-{
-	int cpu;
-	u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-
-	BUG_ON(cpu_to_apicid == NULL);
-
-	for_each_possible_cpu(cpu) {
-		int node = numa_cpu_node(cpu);
-
-		if (node == NUMA_NO_NODE)
-			continue;
-		if (!node_online(node))
-			node = find_near_online_node(node);
-		numa_set_node(cpu, node);
-	}
-}
-#endif
-
 int __cpuinit numa_cpu_node(int cpu)
 {
 	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
* Re: [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (15 preceding siblings ...)
  2010-12-30 17:49 ` [PATCH 16/16] x86: Unify NUMA initialization " Tejun Heo
@ 2010-12-30 20:14 ` H. Peter Anvin
  2011-01-21 18:16 ` Tejun Heo
  17 siblings, 0 replies; 40+ messages in thread
From: H. Peter Anvin @ 2010-12-30 20:14 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, mingo, tglx, x86, eric.dumazet, yinghai, brgerst,
	gorcunov, penberg, shaohui.zheng, rientjes
On 12/30/2010 09:49 AM, Tejun Heo wrote:
> 
> The only change from the last take[L] is that it's now based on
> tip/x86/numa.  Unfortunately, some of the collisions weren't trivial
> and led to some ugliness.
> 
> Commit c1c3443c ("x86, numa: Fake node-to-cpumask for NUMA emulation")
> introduced hard dependency on x86_64 into numa_add/remove_cpu() when
> CONFIG_NUMA_EMU is enabled.  0015 has been updated so that the 32/64
> bit common versions used when !CONFIG_NUMA_EMU are in numa.c while
> CONFIG_NUMA_EMU variants are in numa_64.c.
> 
> This is ugly but still better than before.  IIUC, Shaohui's patchsets
> is going to unify NUMA emulation across 32 and 64bit, which should
> remove the above ugliness.  I haven't looked through the patchset yet
> but after skimming through the current NUMA_EMU code, here are some of
> my thoughts, FWIW.
> 
> * There's no reason for different NUMA config methods to construct
>   different data structures.  They all, including 32bit, can build a
>   single set of data structures.
> 
> * Then, unification of NUMA_EMU would naturally follow.  There's no
>   reason to think about whether the underlying NUMA and proximity
>   information is provided by ACPI, AMD or whatever.  It just needs to
>   manipulate the processed data.
> 
> Let's _please_ head that way instead of adding more gluing codes and
> hacks everywhere.  It would be a bit more churn but I don't think
> there's any other sustainable way.
> 
Agreed 100%.
	-hpa
-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c
  2010-12-30 17:49 ` [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c Tejun Heo
@ 2011-01-11  3:48   ` David Rientjes
  0 siblings, 0 replies; 40+ messages in thread
From: David Rientjes @ 2011-01-11  3:48 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
On Thu, 30 Dec 2010, Tejun Heo wrote:
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Reviewed-by: Pekka Enberg <penberg@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC
  2010-12-30 17:49 ` [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC Tejun Heo
@ 2011-01-11  3:48   ` David Rientjes
  0 siblings, 0 replies; 40+ messages in thread
From: David Rientjes @ 2011-01-11  3:48 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
On Thu, 30 Dec 2010, Tejun Heo wrote:
> diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
> index a859ca4..47a30ff 100644
> --- a/arch/x86/include/asm/apicdef.h
> +++ b/arch/x86/include/asm/apicdef.h
> @@ -145,6 +145,7 @@
>  
>  #ifdef CONFIG_X86_32
>  # define MAX_IO_APICS 64
> +# define MAX_LOCAL_APIC 256
>  #else
>  # define MAX_IO_APICS 128
>  # define MAX_LOCAL_APIC 32768
Looks like Yinghai's 56d91f13 already added this, but didn't convert the 
rest of the cases that you did.  With this hunk removed and an updated 
changelog:
Acked-by: David Rientjes <rientjes@google.com>
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only
  2010-12-30 17:49 ` [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only Tejun Heo
@ 2011-01-11  3:48   ` David Rientjes
  0 siblings, 0 replies; 40+ messages in thread
From: David Rientjes @ 2011-01-11  3:48 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
On Thu, 30 Dec 2010, Tejun Heo wrote:
> Both functions are used only in 32bit.  Put them inside CONFIG_X86_32.
> This is to prepare for logical apicid handling update.
> 
> - Cyrill Gorcunov spotted that I forgot to move declarations in ipi.h
>   under CONFIG_X86_32.  Fixed.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Reviewed-by: Pekka Enberg <penberg@kernel.org>
> Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable
  2010-12-30 17:49 ` [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable Tejun Heo
@ 2011-01-11  3:48   ` David Rientjes
  2011-01-11 14:19     ` Tejun Heo
  0 siblings, 1 reply; 40+ messages in thread
From: David Rientjes @ 2011-01-11  3:48 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
On Thu, 30 Dec 2010, Tejun Heo wrote:
> Unlike x86_64, on x86_32, the mapping from cpu to logical apicid may
> vary depending on apic in use.  cpu_2_logical_apicid[] array is used
> for this mapping.  Replace it with early percpu variable
> x86_cpu_to_logical_apicid to make it better aligned with other
> mappings.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
The apicid's are now stored as int's as opposed to u8, even though the max 
is 256.  Is that for extendability or for better alignment?  If not, I 
think it would be better to keep the old type.
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable
  2011-01-11  3:48   ` David Rientjes
@ 2011-01-11 14:19     ` Tejun Heo
  0 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2011-01-11 14:19 UTC (permalink / raw)
  To: David Rientjes
  Cc: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
On Mon, Jan 10, 2011 at 07:48:41PM -0800, David Rientjes wrote:
> On Thu, 30 Dec 2010, Tejun Heo wrote:
> 
> > Unlike x86_64, on x86_32, the mapping from cpu to logical apicid may
> > vary depending on apic in use.  cpu_2_logical_apicid[] array is used
> > for this mapping.  Replace it with early percpu variable
> > x86_cpu_to_logical_apicid to make it better aligned with other
> > mappings.
> > 
> > Signed-off-by: Tejun Heo <tj@kernel.org>
> 
> The apicid's are now stored as int's as opposed to u8, even though the max 
> is 256.  Is that for extendability or for better alignment?  If not, I 
> think it would be better to keep the old type.
I don't know.  Yeah, it's x86_32 only so it can be u8 but other IDs
are shared with x86_64 and should be int.  I think it's less confusing
to simply use int.  It's not like the extra overhead is gonna be
noticeable or anything.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id
  2010-12-30 17:49 ` [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id Tejun Heo
@ 2011-01-13  2:40   ` David Rientjes
  2011-01-13 10:57     ` Tejun Heo
  0 siblings, 1 reply; 40+ messages in thread
From: David Rientjes @ 2011-01-13  2:40 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Thu, 30 Dec 2010, Tejun Heo wrote:
> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> index ba78b1e..8ad231c 100644
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -1252,6 +1252,14 @@ void __cpuinit setup_local_APIC(void)
>  	 */
>  	apic->init_apic_ldr();
>  
> +#ifdef CONFIG_X86_32
> +	/*
> +	 * APIC LDR is initialized.  Fetch and store logical_apic_id.
> +	 */
> +	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
> +		logical_smp_processor_id();
> +#endif
> +
>  	/*
>  	 * Set Task Priority to 'accept all'. We never change this
>  	 * later on.
You can remove the initialization of x86_cpu_to_logical_apicid in the 
->init_apic_ldr() callback in x2apic_cluster.c now too?
[snip]
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index eb04f30..0768761 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -168,19 +168,18 @@ static void unmap_cpu_to_node(int cpu)
>  static void map_cpu_to_logical_apicid(void)
>  {
>  	int cpu = smp_processor_id();
> -	int apicid = logical_smp_processor_id();
> -	int node = apic->apicid_to_node(apicid);
> +	int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
> +	int node;
>  
> +	node = apic->apicid_to_node(logical_apicid);
>  	if (!node_online(node))
>  		node = first_online_node;
>  
> -	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = apicid;
>  	map_cpu_to_node(cpu, node);
>  }
>  
>  void numa_remove_cpu(int cpu)
>  {
> -	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = BAD_APICID;
>  	unmap_cpu_to_node(cpu);
>  }
>  #else
I don't see where this is reset to BAD_APICID when numa_remove_cpu() is 
called when disabling a cpu after the patch.
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid()
  2010-12-30 17:49 ` [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid() Tejun Heo
@ 2011-01-13  2:40   ` David Rientjes
  0 siblings, 0 replies; 40+ messages in thread
From: David Rientjes @ 2011-01-13  2:40 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng
On Thu, 30 Dec 2010, Tejun Heo wrote:
> After the previous patch, apic->cpu_to_logical_apicid() is no longer
> used.  Kill it.
> 
> For apic types with custom cpu_to_logical_apicid() which is also used
> for other purposes, remove the function and modify its users to do the
> mapping directly.
> 
> #ifdef's on CONFIG_SMP in es7000_32 and summit_32 are ignored during
> conversion as they are not used for UP kernels.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id
  2011-01-13  2:40   ` David Rientjes
@ 2011-01-13 10:57     ` Tejun Heo
  0 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2011-01-13 10:57 UTC (permalink / raw)
  To: David Rientjes
  Cc: linux-kernel, Ingo Molnar, tglx, H. Peter Anvin, x86,
	eric.dumazet, yinghai, brgerst, gorcunov, Pekka Enberg,
	shaohui.zheng
On Wed, Jan 12, 2011 at 06:40:09PM -0800, David Rientjes wrote:
> On Thu, 30 Dec 2010, Tejun Heo wrote:
> 
> > diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> > index ba78b1e..8ad231c 100644
> > --- a/arch/x86/kernel/apic/apic.c
> > +++ b/arch/x86/kernel/apic/apic.c
> > @@ -1252,6 +1252,14 @@ void __cpuinit setup_local_APIC(void)
> >  	 */
> >  	apic->init_apic_ldr();
> >  
> > +#ifdef CONFIG_X86_32
> > +	/*
> > +	 * APIC LDR is initialized.  Fetch and store logical_apic_id.
> > +	 */
> > +	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
> > +		logical_smp_processor_id();
> > +#endif
> > +
> >  	/*
> >  	 * Set Task Priority to 'accept all'. We never change this
> >  	 * later on.
> 
> You can remove the initialization of x86_cpu_to_logical_apicid in the 
> ->init_apic_ldr() callback in x2apic_cluster.c now too?
Believe it or not, that's a different variable.  And yeah we probably
should clean that up too.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4
  2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
                   ` (16 preceding siblings ...)
  2010-12-30 20:14 ` [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 H. Peter Anvin
@ 2011-01-21 18:16 ` Tejun Heo
  2011-01-21 18:49   ` H. Peter Anvin
  17 siblings, 1 reply; 40+ messages in thread
From: Tejun Heo @ 2011-01-21 18:16 UTC (permalink / raw)
  To: linux-kernel, mingo, tglx, hpa, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
Hello,
On Thu, Dec 30, 2010 at 06:49:13PM +0100, Tejun Heo wrote:
> This is the fourth take of unify-x86_32-and-64-NUMA-init-paths
> patchset.
hpa, shall I refresh and resend?
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 40+ messages in thread
* Re: [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4
  2011-01-21 18:16 ` Tejun Heo
@ 2011-01-21 18:49   ` H. Peter Anvin
  0 siblings, 0 replies; 40+ messages in thread
From: H. Peter Anvin @ 2011-01-21 18:49 UTC (permalink / raw)
  To: Tejun Heo, linux-kernel, mingo, tglx, x86, eric.dumazet, yinghai,
	brgerst, gorcunov, penberg, shaohui.zheng, rientjes
Yes, please.
"Tejun Heo" <tj@kernel.org> wrote:
>Hello,
>
>On Thu, Dec 30, 2010 at 06:49:13PM +0100, Tejun Heo wrote:
>> This is the fourth take of unify-x86_32-and-64-NUMA-init-paths
>> patchset.
>
>hpa, shall I refresh and resend?
>
>Thanks.
>
>-- 
>tejun
-- 
Sent from my mobile phone.  Please pardon any lack of formatting.
^ permalink raw reply	[flat|nested] 40+ messages in thread
* [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
  2011-01-23 13:37 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#5 Tejun Heo
@ 2011-01-23 13:37 ` Tejun Heo
  0 siblings, 0 replies; 40+ messages in thread
From: Tejun Heo @ 2011-01-23 13:37 UTC (permalink / raw)
  To: linux-kernel, hpa
  Cc: mingo, tglx, x86, eric.dumazet, yinghai, brgerst, gorcunov,
	penberg, shaohui.zheng, rientjes, Tejun Heo
The mapping between cpu/apicid and node is done via apicid_to_node[]
on 64bit and apicid_2_node[] + apic->x86_32_numa_cpu_node() on 32bit.
This difference makes it difficult to further unify 32 and 64bit NUMA
handling.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed by
two accessors - set_apicid_to_node() and numa_cpu_node().  On 64bit,
numa_cpu_node() always consults __apicid_to_node[] directly while
32bit goes through apic->numa_cpu_node() method to allow apic
implementations to override it.
srat_detect_node() for amd cpus contains workaround for broken NUMA
configuration which assumes relationship between APIC ID, HT node ID
and NUMA topology.  Leave it to access __apicid_to_node[] directly as
mapping through CPU might result in undesirable behavior change.  The
comment is reformatted and updated to note the ugliness.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
---
 arch/x86/include/asm/mpspec.h  |    1 -
 arch/x86/include/asm/numa.h    |   28 +++++++++++++++++++++++
 arch/x86/include/asm/numa_32.h |    6 +++++
 arch/x86/include/asm/numa_64.h |    5 +--
 arch/x86/kernel/acpi/boot.c    |    3 +-
 arch/x86/kernel/apic/apic.c    |    2 +-
 arch/x86/kernel/cpu/amd.c      |   47 +++++++++++++++++++++++++--------------
 arch/x86/kernel/cpu/intel.c    |    3 +-
 arch/x86/kernel/smpboot.c      |    6 +----
 arch/x86/mm/amdtopology_64.c   |    4 +-
 arch/x86/mm/numa.c             |    6 ++++-
 arch/x86/mm/numa_32.c          |    6 +++++
 arch/x86/mm/numa_64.c          |   26 +++++++++------------
 arch/x86/mm/srat_32.c          |    2 +-
 arch/x86/mm/srat_64.c          |   12 +++++-----
 15 files changed, 101 insertions(+), 56 deletions(-)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index edc2a45..9c7d95f 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -25,7 +25,6 @@ extern int pic_mode;
 #define MAX_IRQ_SOURCES		256
 
 extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400..5e01c76 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,33 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid and
+ * node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
+ * should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+	__apicid_to_node[apicid] = node;
+}
+#else	/* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_X86_32
 # include "numa_32.h"
 #else
 # include "numa_64.h"
 #endif
+
+#endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index b0ef2b4..cdf8043 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -6,6 +6,12 @@ extern int numa_off;
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int apicid);
+#else	/* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
+#endif	/* CONFIG_NUMA */
+
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 0493be3..4982a9c 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_NUMA_64_H
 
 #include <linux/nodemask.h>
-#include <asm/apicdef.h>
 
 struct bootnode {
 	u64 start;
@@ -17,8 +16,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
 extern unsigned long numa_free_all_bootmem(void);
 extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
@@ -32,6 +29,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 #define NODE_MIN_SIZE (4*1024*1024)
 
 extern void __init init_cpu_to_node(void);
+extern int __cpuinit numa_cpu_node(int cpu);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
 extern void __cpuinit numa_add_cpu(int cpu);
@@ -44,6 +42,7 @@ void numa_emu_cmdline(char *);
 #endif /* CONFIG_NUMA_EMU */
 #else
 static inline void init_cpu_to_node(void)		{ }
+static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void numa_add_cpu(int cpu, int node)	{ }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b3a7113..a7bca59 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -589,11 +589,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 	nid = acpi_get_node(handle);
 	if (nid == -1 || !node_online(nid))
 		return;
+	set_apicid_to_node(physid, nid);
 #ifdef CONFIG_X86_64
-	apicid_to_node[physid] = nid;
 	numa_set_node(cpu, nid);
 #else /* CONFIG_X86_32 */
-	apicid_2_node[physid] = nid;
 	cpu_to_node_map[cpu] = nid;
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0f4f3c1..4686ea5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2026,7 +2026,7 @@ int default_x86_32_numa_cpu_node(int cpu)
 	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
 
 	if (apicid != BAD_APICID)
-		return apicid_2_node[apicid];
+		return __apicid_to_node[apicid];
 	return NUMA_NO_NODE;
 #else
 	return 0;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c7bedb..3cce8f2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -234,17 +234,21 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 #endif
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+/*
+ * To workaround broken NUMA config.  Read the comment in
+ * srat_detect_node().
+ */
 static int __cpuinit nearby_node(int apicid)
 {
 	int i, node;
 
 	for (i = apicid - 1; i >= 0; i--) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
 	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		node = apicid_to_node[i];
+		node = __apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -339,26 +343,35 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	int node;
 	unsigned apicid = c->apicid;
 
-	node = per_cpu(cpu_llc_id, cpu);
+	node = numa_cpu_node(cpu);
+	if (node == NUMA_NO_NODE)
+		node = per_cpu(cpu_llc_id, cpu);
 
-	if (apicid_to_node[apicid] != NUMA_NO_NODE)
-		node = apicid_to_node[apicid];
 	if (!node_online(node)) {
-		/* Two possibilities here:
-		   - The CPU is missing memory and no node was created.
-		   In that case try picking one from a nearby CPU
-		   - The APIC IDs differ from the HyperTransport node IDs
-		   which the K8 northbridge parsing fills in.
-		   Assume they are all increased by a constant offset,
-		   but in the same order as the HT nodeids.
-		   If that doesn't result in a usable node fall back to the
-		   path for the previous case.  */
-
+		/*
+		 * Two possibilities here:
+		 *
+		 * - The CPU is missing memory and no node was created.  In
+		 *   that case try picking one from a nearby CPU.
+		 *
+		 * - The APIC IDs differ from the HyperTransport node IDs
+		 *   which the K8 northbridge parsing fills in.  Assume
+		 *   they are all increased by a constant offset, but in
+		 *   the same order as the HT nodeids.  If that doesn't
+		 *   result in a usable node fall back to the path for the
+		 *   previous case.
+		 *
+		 * This workaround operates directly on the mapping between
+		 * APIC ID and NUMA node, assuming certain relationship
+		 * between APIC ID, HT node ID and NUMA topology.  As going
+		 * through CPU mapping may alter the outcome, directly
+		 * access __apicid_to_node[].
+		 */
 		int ht_nodeid = c->initial_apicid;
 
 		if (ht_nodeid >= 0 &&
-		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-			node = apicid_to_node[ht_nodeid];
+		    __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = __apicid_to_node[ht_nodeid];
 		/* Pick a nearby node */
 		if (!node_online(node))
 			node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c5..6052004 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -279,11 +279,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	unsigned node;
 	int cpu = smp_processor_id();
-	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
-	node = apicid_to_node[apicid];
+	node = numa_cpu_node(cpu);
 	if (node == NUMA_NO_NODE || !node_online(node)) {
 		/* reuse the value from init_cpu_to_node() */
 		node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e7e570f..d6afd57 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -71,10 +71,6 @@
 #include <asm/smpboot_hooks.h>
 #include <asm/i8259.h>
 
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_LOCAL_APIC];
-#endif
-
 /* State of each CPU */
 DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
@@ -170,7 +166,7 @@ static void map_cpu_to_logical_apicid(void)
 	int cpu = smp_processor_id();
 	int node;
 
-	node = apic->x86_32_numa_cpu_node(cpu);
+	node = numa_cpu_node(cpu);
 	if (!node_online(node))
 		node = first_online_node;
 
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c..c7fae38 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -247,7 +247,7 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
 		__acpi_map_pxm_to_node(nid, i);
 #endif
 	}
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 }
 #endif /* CONFIG_NUMA_EMU */
 
@@ -285,7 +285,7 @@ int __init amd_scan_nodes(void)
 				nodes[i].start >> PAGE_SHIFT,
 				nodes[i].end >> PAGE_SHIFT);
 		for (j = apicid_base; j < cores + apicid_base; j++)
-			apicid_to_node[(i << bits) + j] = i;
+			set_apicid_to_node((i << bits) + j, i);
 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ebf6d78..480b357 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -26,8 +26,12 @@ static __init int numa_setup(char *opt)
 early_param("numa", numa_setup);
 
 /*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
  */
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c..8d91d22 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 static unsigned long kva_start_pfn;
 static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+	return apic->x86_32_numa_cpu_node(cpu);
+}
+
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea155..1e1026f 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,10 +26,6 @@ EXPORT_SYMBOL(node_data);
 
 struct memnode memnode;
 
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
-	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
 
@@ -716,12 +712,8 @@ void __init init_cpu_to_node(void)
 	BUG_ON(cpu_to_apicid == NULL);
 
 	for_each_possible_cpu(cpu) {
-		int node;
-		u16 apicid = cpu_to_apicid[cpu];
+		int node = numa_cpu_node(cpu);
 
-		if (apicid == BAD_APICID)
-			continue;
-		node = apicid_to_node[apicid];
 		if (node == NUMA_NO_NODE)
 			continue;
 		if (!node_online(node))
@@ -731,6 +723,14 @@ void __init init_cpu_to_node(void)
 }
 #endif
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
@@ -776,13 +776,9 @@ void __cpuinit numa_remove_cpu(int cpu)
 void __cpuinit numa_add_cpu(int cpu)
 {
 	unsigned long addr;
-	u16 apicid;
-	int physnid;
-	int nid = NUMA_NO_NODE;
+	int physnid, nid;
 
-	apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-	if (apicid != BAD_APICID)
-		nid = apicid_to_node[apicid];
+	nid = numa_cpu_node(cpu);
 	if (nid == NUMA_NO_NODE)
 		nid = early_cpu_to_node(cpu);
 	BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 6027a48..48651c6 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -255,7 +255,7 @@ int __init get_memcfg_from_srat(void)
 			 num_memory_chunks);
 
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
 
 	for (j = 0; j < num_memory_chunks; j++){
 		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 603d285..9a97261 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,7 +79,7 @@ static __init void bad_srat(void)
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		apicid_to_node[i] = NUMA_NO_NODE;
+		set_apicid_to_node(i, NUMA_NO_NODE);
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		nodes[i].start = nodes[i].end = 0;
 		nodes_add[i].start = nodes_add[i].end = 0;
@@ -138,7 +138,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
 		return;
 	}
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
@@ -178,7 +178,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		return;
 	}
 
-	apicid_to_node[apic_id] = node;
+	set_apicid_to_node(apic_id, node);
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
@@ -521,7 +521,7 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 		 * node, it must now point to the fake node ID.
 		 */
 		for (j = 0; j < MAX_LOCAL_APIC; j++)
-			if (apicid_to_node[j] == nid &&
+			if (__apicid_to_node[j] == nid &&
 			    fake_apicid_to_node[j] == NUMA_NO_NODE)
 				fake_apicid_to_node[j] = i;
 	}
@@ -532,13 +532,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 	 * value.
 	 */
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		if (apicid_to_node[i] != NUMA_NO_NODE &&
+		if (__apicid_to_node[i] != NUMA_NO_NODE &&
 		    fake_apicid_to_node[i] == NUMA_NO_NODE)
 			fake_apicid_to_node[i] = 0;
 
 	for (i = 0; i < num_nodes; i++)
 		__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
-	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+	memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
 
 	nodes_clear(nodes_parsed);
 	for (i = 0; i < num_nodes; i++)
-- 
1.7.1
^ permalink raw reply related	[flat|nested] 40+ messages in thread
end of thread, other threads:[~2011-01-23 13:38 UTC | newest]
Thread overview: 40+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-30 17:49 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 Tejun Heo
2010-12-30 17:49 ` [PATCH 01/16] x86: Kill unused static boot_cpu_logical_apicid in smpboot.c Tejun Heo
2011-01-11  3:48   ` David Rientjes
2010-12-30 17:49 ` [PATCH 02/16] x86: Rename x86_32 MAX_APICID to MAX_LOCAL_APIC Tejun Heo
2011-01-11  3:48   ` David Rientjes
2010-12-30 17:49 ` [PATCH 03/16] x86: Make default_send_IPI_mask_sequence/allbutself_logical() 32bit only Tejun Heo
2011-01-11  3:48   ` David Rientjes
2010-12-30 17:49 ` [PATCH 04/16] x86: Replace cpu_2_logical_apicid[] with early percpu variable Tejun Heo
2011-01-11  3:48   ` David Rientjes
2011-01-11 14:19     ` Tejun Heo
2010-12-30 17:49 ` [PATCH 05/16] x86: Always use x86_cpu_to_logical_apicid for cpu -> logical apic id Tejun Heo
2011-01-13  2:40   ` David Rientjes
2011-01-13 10:57     ` Tejun Heo
2010-12-30 17:49 ` [PATCH 06/16] x86: Kill apic->cpu_to_logical_apicid() Tejun Heo
2011-01-13  2:40   ` David Rientjes
2010-12-30 17:49 ` [PATCH 07/16] x86: Add apic->x86_32_early_logical_apicid() Tejun Heo
2010-12-30 17:49 ` [PATCH 08/16] x86: Implement the default x86_32_early_logical_apicid() Tejun Heo
2010-12-30 17:49 ` [PATCH 09/16] x86: Implement x86_32_early_logical_apicid() for bigsmp_32 Tejun Heo
2010-12-30 17:49 ` [PATCH 10/16] x86: Implement x86_32_early_logical_apicid() for summit_32 Tejun Heo
2010-12-30 17:49 ` [PATCH 11/16] x86: Implement x86_32_early_logical_apicid() for numaq_32 Tejun Heo
2010-12-30 17:49 ` [PATCH 12/16] x86: Replace apic->apicid_to_node() with ->x86_32_numa_cpu_node() Tejun Heo
2010-12-30 17:49 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
2010-12-30 17:49 ` [PATCH 14/16] x86: Unify CPU -> " Tejun Heo
2010-12-30 17:49 ` [PATCH 15/16] x86: Unify node_to_cpumask_map handling " Tejun Heo
2010-12-30 17:49 ` [PATCH 16/16] x86: Unify NUMA initialization " Tejun Heo
2010-12-30 20:14 ` [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#4 H. Peter Anvin
2011-01-21 18:16 ` Tejun Heo
2011-01-21 18:49   ` H. Peter Anvin
  -- strict thread matches above, loose matches on Subject: below --
2011-01-23 13:37 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#5 Tejun Heo
2011-01-23 13:37 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
2010-12-28 11:48 [PATCHSET REPOST] x86: unify x86_32 and 64 NUMA init paths, take#3 Tejun Heo
2010-12-28 11:48 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
2010-12-28 20:35   ` David Rientjes
2010-12-29 10:52     ` Tejun Heo
2010-12-29 19:36       ` H. Peter Anvin
2010-12-29 22:05         ` H. Peter Anvin
2010-12-30 11:33           ` Tejun Heo
2010-11-27 15:21 [PATCHSET] x86: unify x86_32 and 64 NUMA init paths, take#2 Tejun Heo
2010-11-27 15:22 ` [PATCH 13/16] x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit Tejun Heo
2010-12-09 21:43   ` Thomas Gleixner
2010-12-10 20:45     ` Tejun Heo
2010-12-10 20:54       ` Tejun Heo
2010-12-10 21:17         ` Yinghai Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).