All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel J Blueman <daniel@numascale.com>
To: Ingo Molnar <mingo@kernel.org>
Cc: Daniel J Blueman <daniel@numascale.com>,
	Denys Vlasenko <dvlasenk@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Jiang Liu <jiang.liu@linux.intel.com>,
	Len Brown <len.brown@intel.com>,
	Steffen Persvold <sp@numascale.com>,
	<linux-kernel@vger.kernel.org>, <x86@kernel.org>
Subject: [PATCH v2] x86/apic: Use smaller array for __apicid_to_node[] mapping
Date: Mon, 5 Oct 2015 12:32:30 +0800	[thread overview]
Message-ID: <1444019550-21045-1-git-send-email-daniel@numascale.com> (raw)
In-Reply-To: <20151003074428.GA25143@gmail.com>

The Intel x2APIC spec states the upper 16-bits of APIC ID is the
cluster ID [1, p2-12], intended for future distributed systems. Beyond
the legacy 8-bit APIC ID, Numascale NumaConnect uses 4-bits for the
position of a server on each axis of a multi-dimension torus; SGI
NUMAlink also structures the APIC ID space.

Instead, define an array based on NR_CPUs to achieve a 1:1 mapping and
perform linear search; we see "ACPI: NR_CPUS/possible_cpus limit of X
reached.  Processor 8/0x16 ignored." when config-limited. This addresses
the binary bloat and the present artificial APIC ID limits. With
CONFIG_NR_CPUS=256, we save ~64KB of vmlinux data:

$ size vmlinux vmlinux-patched
  text      data     bss      dec     hex filename
18232877 1849656 2281472 22364005 1553f65 vmlinux
18233034 1786168 2281472 22300674 1544802 vmlinux-patched

Tested on a 256-core system with a 20-bit APIC ID space, and on a
48-core legacy 8-bit APIC ID system with and without CONFIG_NUMA,
CONFIG_NUMA_EMU and CONFIG_AMD_NUMA.

v2: Improved readability by moving static variable out; integrated Denys's
numa emulation fix

Signed-off-by: Daniel J Blueman <daniel@numascale.com>
CC: Denys Vlasenko <dvlasenk@redhat.com>
CC: Ingo Molnar <mingo@kernel.org>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Jiang Liu <jiang.liu@linux.intel.com>
CC: Len Brown <len.brown@intel.com>
CC: Steffen Persvold <sp@numascale.com>
CC: linux-kernel@vger.kernel.org
CC: x86@kernel.org

[1] http://www.intel.com/content/dam/doc/specification-update/64-architecture-x2apic-specification.pdf
---
 arch/x86/include/asm/numa.h  | 13 +++++++------
 arch/x86/kernel/cpu/amd.c    | 11 ++++++-----
 arch/x86/mm/numa.c           | 29 +++++++++++++++++++++--------
 arch/x86/mm/numa_emulation.c |  6 +++---
 4 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 01b493e..33becb8 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -17,6 +17,11 @@
  */
 #define NODE_MIN_SIZE (4*1024*1024)
 
+struct apicid_to_node {
+	int apicid;
+	s16 node;
+};
+
 extern int numa_off;
 
 /*
@@ -27,17 +32,13 @@ extern int numa_off;
  * should be accessed by the accessors - set_apicid_to_node() and
  * numa_cpu_node().
  */
-extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+extern struct apicid_to_node __apicid_to_node[NR_CPUS];
 extern nodemask_t numa_nodes_parsed __initdata;
 
 extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
 extern void __init numa_set_distance(int from, int to, int distance);
 
-static inline void set_apicid_to_node(int apicid, s16 node)
-{
-	__apicid_to_node[apicid] = node;
-}
-
+extern void set_apicid_to_node(int apicid, s16 node);
 extern int numa_cpu_node(int cpu);
 
 #else	/* CONFIG_NUMA */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4a70fc6..9494f0e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -277,12 +277,13 @@ static int nearby_node(int apicid)
 	int i, node;
 
 	for (i = apicid - 1; i >= 0; i--) {
-		node = __apicid_to_node[i];
+		node = __apicid_to_node[i].node;
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
-	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		node = __apicid_to_node[i];
+	for (i = apicid + 1; i < NR_CPUS; i++) {
+		node = __apicid_to_node[i].node;
+
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -422,8 +423,8 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
 		int ht_nodeid = c->initial_apicid;
 
 		if (ht_nodeid >= 0 &&
-		    __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-			node = __apicid_to_node[ht_nodeid];
+		    __apicid_to_node[ht_nodeid].node != NUMA_NO_NODE)
+			node = __apicid_to_node[ht_nodeid].node;
 		/* Pick a nearby node */
 		if (!node_online(node))
 			node = nearby_node(apicid);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index c3b3f65..849a113 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -26,6 +26,7 @@ nodemask_t numa_nodes_parsed __initdata;
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 
+static unsigned apicids;
 static struct numa_meminfo numa_meminfo
 #ifndef CONFIG_MEMORY_HOTPLUG
 __initdata
@@ -56,16 +57,31 @@ early_param("numa", numa_setup);
 /*
  * apicid, cpu, node mappings
  */
-s16 __apicid_to_node[MAX_LOCAL_APIC] = {
-	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+struct apicid_to_node __apicid_to_node[NR_CPUS] = {
+	[0 ... NR_CPUS-1] = {-1, NUMA_NO_NODE}
 };
 
+void set_apicid_to_node(int apicid, s16 node)
+{
+	/* Protect against small kernel on large system */
+	if (apicids >= NR_CPUS)
+		return;
+
+	__apicid_to_node[apicids].apicid = apicid;
+	__apicid_to_node[apicids].node = node;
+	apicids++;
+}
+
 int numa_cpu_node(int cpu)
 {
-	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+	int ent, apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+	if (apicid == BAD_APICID)
+		return NUMA_NO_NODE;
+
+	for (ent = 0; ent < NR_CPUS; ent++)
+		if (__apicid_to_node[ent].apicid == apicid)
+			return __apicid_to_node[ent].node;
 
-	if (apicid != BAD_APICID)
-		return __apicid_to_node[apicid];
 	return NUMA_NO_NODE;
 }
 
@@ -607,9 +623,6 @@ static int __init numa_init(int (*init_func)(void))
 	int i;
 	int ret;
 
-	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		set_apicid_to_node(i, NUMA_NO_NODE);
-
 	nodes_clear(numa_nodes_parsed);
 	nodes_clear(node_possible_map);
 	nodes_clear(node_online_map);
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index a8f90ce..1a0e112 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -399,12 +399,12 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 	 * back to zero just in case.
 	 */
 	for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) {
-		if (__apicid_to_node[i] == NUMA_NO_NODE)
+		if (__apicid_to_node[i].node == NUMA_NO_NODE)
 			continue;
 		for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
-			if (__apicid_to_node[i] == emu_nid_to_phys[j])
+			if (__apicid_to_node[i].node == emu_nid_to_phys[j])
 				break;
-		__apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0;
+		__apicid_to_node[i].node = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0;
 	}
 
 	/* make sure all emulated nodes are mapped to a physical node */
-- 
2.5.0


  parent reply	other threads:[~2015-10-05  4:33 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-02 19:12 [PATCH 1/3] x86/apic: Rename MAX_LOCAL_APIC to MAX_LOCAL_APICID Denys Vlasenko
2015-10-02 19:12 ` [PATCH 2/3] x86/apic: Make apic_version[] smaller Denys Vlasenko
2015-10-02 19:12 ` [PATCH 3/3] x86/apic: Use smaller array for __apicid_to_node[] mapping Denys Vlasenko
2015-10-03  7:44   ` Ingo Molnar
2015-10-03 20:26     ` Denys Vlasenko
2015-10-05  4:32     ` Daniel J Blueman [this message]
2015-10-09 14:15       ` [PATCH v2] " Thomas Gleixner
2015-10-09 15:16         ` Jiang Liu
2015-10-09 20:40           ` Thomas Gleixner
2015-10-09 15:35   ` [PATCH 3/3] " Jiang Liu
2015-10-12 10:21     ` Daniel J Blueman
2015-10-12 10:25       ` Thomas Gleixner
2015-10-13  9:32         ` Jiang Liu
2015-10-13 12:55           ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1444019550-21045-1-git-send-email-daniel@numascale.com \
    --to=daniel@numascale.com \
    --cc=dvlasenk@redhat.com \
    --cc=jiang.liu@linux.intel.com \
    --cc=len.brown@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=sp@numascale.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.