[PATCH 23/33] x86-64, NUMA: Kill numa_nodes[]

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, x86@kernel.org, yinghai@kernel.org,
	brgerst@gmail.com, gorcunov@gmail.com, shaohui.zheng@intel.com,
	rientjes@google.com, mingo@elte.hu, hpa@linux.intel.com,
	ankita@in.ibm.com
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 23/33] x86-64, NUMA: Kill numa_nodes[]
Date: Wed, 16 Feb 2011 13:20:57 +0100	[thread overview]
Message-ID: <1297858867-25981-24-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1297858867-25981-1-git-send-email-tj@kernel.org>

numa_nodes[] doesn't carry any information which isn't present in
numa_meminfo.  Each entry is simply min/max range of all the memblks
for the node.  This is not only redundant but also inaccurate when
memblks for different nodes interleave - for example,
find_node_by_addr() can return the wrong nodeid.

Kill numa_nodes[] and always use numa_meminfo instead.

* nodes_cover_memory() is renamed to numa_meminfo_cover_memory() and
  now operations on numa_meminfo and returns bool.

* setup_node_bootmem() needs min/max range.  Compute the range on the
  fly.  setup_node_bootmem() invocation is restructured to use outer
  loop instead of hardcoding the double invocations.

* find_node_by_addr() now operates on numa_meminfo.

* setup_physnodes() builds physnodes[] from memblks.  This will go
  away when emulation code is updated to use struct numa_meminfo.

This patch also makes the following misc changes.

* Clearing of nodes_add[] clearing is converted to memset().

* numa_add_memblk() in amd_numa_init() is moved down a bit for
  consistency.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/numa_64.h |    1 -
 arch/x86/mm/amdtopology_64.c   |    6 +--
 arch/x86/mm/numa_64.c          |   82 +++++++++++++++++++++++----------------
 arch/x86/mm/srat_64.c          |   22 ++---------
 4 files changed, 53 insertions(+), 58 deletions(-)

diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 867d41b..da5c501 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -27,7 +27,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 
 extern nodemask_t cpu_nodes_parsed __initdata;
 extern nodemask_t mem_nodes_parsed __initdata;
-extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
 
 extern int __cpuinit numa_cpu_node(int cpu);
 extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index 8f7a5eb..0cb59e5 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -165,12 +165,8 @@ int __init amd_numa_init(void)
 		pr_info("Node %d MemBase %016lx Limit %016lx\n",
 			nodeid, base, limit);
 
-		numa_nodes[nodeid].start = base;
-		numa_nodes[nodeid].end = limit;
-		numa_add_memblk(nodeid, base, limit);
-
 		prevbase = base;
-
+		numa_add_memblk(nodeid, base, limit);
 		node_set(nodeid, mem_nodes_parsed);
 		node_set(nodeid, cpu_nodes_parsed);
 	}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 681bc0d..c490448 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -46,8 +46,6 @@ static unsigned long __initdata nodemap_size;
 
 static struct numa_meminfo numa_meminfo __initdata;
 
-struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
-
 /*
  * Given a shift value, try to populate memnodemap[]
  * Returns :
@@ -349,17 +347,17 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
  * Sanity check to catch more bad NUMA configurations (they are amazingly
  * common).  Make sure the nodes cover all memory.
  */
-static int __init nodes_cover_memory(const struct bootnode *nodes)
+static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
 {
 	unsigned long numaram, e820ram;
 	int i;
 
 	numaram = 0;
-	for_each_node_mask(i, mem_nodes_parsed) {
-		unsigned long s = nodes[i].start >> PAGE_SHIFT;
-		unsigned long e = nodes[i].end >> PAGE_SHIFT;
+	for (i = 0; i < mi->nr_blks; i++) {
+		unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
+		unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
 		numaram += e - s;
-		numaram -= __absent_pages_in_range(i, s, e);
+		numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
 		if ((long)numaram < 0)
 			numaram = 0;
 	}
@@ -371,14 +369,14 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 		printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
 		       (numaram << PAGE_SHIFT) >> 20,
 		       (e820ram << PAGE_SHIFT) >> 20);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static int __init numa_register_memblks(struct numa_meminfo *mi)
 {
-	int i;
+	int i, j, nid;
 
 	/* Account for nodes with cpus and no memory */
 	nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed);
@@ -398,23 +396,34 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 
 	/* for out of order entries */
 	sort_node_map();
-	if (!nodes_cover_memory(numa_nodes))
+	if (!numa_meminfo_cover_memory(mi))
 		return -EINVAL;
 
 	init_memory_mapping_high();
 
-	/* Finally register nodes. */
-	for_each_node_mask(i, node_possible_map)
-		setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
-
 	/*
-	 * Try again in case setup_node_bootmem missed one due to missing
-	 * bootmem.
+	 * Finally register nodes.  Do it twice in case setup_node_bootmem
+	 * missed one due to missing bootmem.
 	 */
-	for_each_node_mask(i, node_possible_map)
-		if (!node_online(i))
-			setup_node_bootmem(i, numa_nodes[i].start,
-					   numa_nodes[i].end);
+	for (i = 0; i < 2; i++) {
+		for_each_node_mask(nid, node_possible_map) {
+			u64 start = (u64)max_pfn << PAGE_SHIFT;
+			u64 end = 0;
+
+			if (node_online(nid))
+				continue;
+
+			for (j = 0; j < mi->nr_blks; j++) {
+				if (nid != mi->blk[j].nid)
+					continue;
+				start = min(mi->blk[j].start, start);
+				end = max(mi->blk[j].end, end);
+			}
+
+			if (start < end)
+				setup_node_bootmem(nid, start, end);
+		}
+	}
 
 	return 0;
 }
@@ -432,33 +441,41 @@ void __init numa_emu_cmdline(char *str)
 
 int __init find_node_by_addr(unsigned long addr)
 {
-	int ret = NUMA_NO_NODE;
+	const struct numa_meminfo *mi = &numa_meminfo;
 	int i;
 
-	for_each_node_mask(i, mem_nodes_parsed) {
+	for (i = 0; i < mi->nr_blks; i++) {
 		/*
 		 * Find the real node that this emulated node appears on.  For
 		 * the sake of simplicity, we only use a real node's starting
 		 * address to determine which emulated node it appears on.
 		 */
-		if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) {
-			ret = i;
-			break;
-		}
+		if (addr >= mi->blk[i].start && addr < mi->blk[i].end)
+			return mi->blk[i].nid;
 	}
-	return ret;
+	return NUMA_NO_NODE;
 }
 
 static int __init setup_physnodes(unsigned long start, unsigned long end)
 {
+	const struct numa_meminfo *mi = &numa_meminfo;
 	int ret = 0;
 	int i;
 
 	memset(physnodes, 0, sizeof(physnodes));
 
-	for_each_node_mask(i, mem_nodes_parsed) {
-		physnodes[i].start = numa_nodes[i].start;
-		physnodes[i].end = numa_nodes[i].end;
+	for (i = 0; i < mi->nr_blks; i++) {
+		int nid = mi->blk[i].nid;
+
+		if (physnodes[nid].start == physnodes[nid].end) {
+			physnodes[nid].start = mi->blk[i].start;
+			physnodes[nid].end = mi->blk[i].end;
+		} else {
+			physnodes[nid].start = min(physnodes[nid].start,
+						   mi->blk[i].start);
+			physnodes[nid].end = max(physnodes[nid].end,
+						 mi->blk[i].end);
+		}
 	}
 
 	/*
@@ -809,8 +826,6 @@ static int dummy_numa_init(void)
 	node_set(0, cpu_nodes_parsed);
 	node_set(0, mem_nodes_parsed);
 	numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
-	numa_nodes[0].start = 0;
-	numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT;
 
 	return 0;
 }
@@ -841,7 +856,6 @@ void __init initmem_init(void)
 		nodes_clear(node_possible_map);
 		nodes_clear(node_online_map);
 		memset(&numa_meminfo, 0, sizeof(numa_meminfo));
-		memset(numa_nodes, 0, sizeof(numa_nodes));
 		remove_all_active_ranges();
 
 		if (numa_init[i]() < 0)
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51d0733..e8b3b3c 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -37,13 +37,9 @@ static __init int setup_node(int pxm)
 
 static __init void bad_srat(void)
 {
-	int i;
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		numa_nodes[i].start = numa_nodes[i].end = 0;
-		nodes_add[i].start = nodes_add[i].end = 0;
-	}
+	memset(nodes_add, 0, sizeof(nodes_add));
 }
 
 static __init inline int srat_disabled(void)
@@ -210,7 +206,6 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
 void __init
 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 {
-	struct bootnode *nd;
 	unsigned long start, end;
 	int node, pxm;
 
@@ -243,18 +238,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 	printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
 	       start, end);
 
-	if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
-		nd = &numa_nodes[node];
-		if (!node_test_and_set(node, mem_nodes_parsed)) {
-			nd->start = start;
-			nd->end = end;
-		} else {
-			if (start < nd->start)
-				nd->start = start;
-			if (nd->end < end)
-				nd->end = end;
-		}
-	} else
+	if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE))
+		node_set(node, mem_nodes_parsed);
+	else
 		update_nodes_add(node, start, end);
 }
 
-- 
1.7.1

next prev parent reply	other threads:[~2011-02-16 12:24 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-16 12:20 [PATCHSET x86/numa] x86-64, NUMA: bring sanity to NUMA config/emulation Tejun Heo
2011-02-16 12:20 ` [PATCH 01/33] x86-64, NUMA: Make dummy node initialization path similar to non-dummy ones Tejun Heo
2011-02-16 12:20 ` [PATCH 02/33] x86-64, NUMA: Simplify hotplug node handling in acpi_numa_memory_affinity_init() Tejun Heo
2011-02-16 12:20 ` [PATCH 03/33] x86, NUMA: Drop @start/last_pfn from initmem_init() Tejun Heo
2011-02-16 12:20 ` [PATCH 04/33] x86-64, NUMA: Unify {acpi|amd}_{numa_init|scan_nodes}() arguments and return values Tejun Heo
2011-02-16 12:20 ` [PATCH 05/33] x86-64, NUMA: Wrap acpi_numa_init() so that failure can be indicated by return value Tejun Heo
2011-02-16 12:20 ` [PATCH 06/33] x86, NUMA: Move *_numa_init() invocations into initmem_init() Tejun Heo
2011-02-16 12:20 ` [PATCH 07/33] x86-64, NUMA: Restructure initmem_init() Tejun Heo
2011-02-16 12:20 ` [PATCH 08/33] x86-64, NUMA: Use common {cpu|mem}_nodes_parsed Tejun Heo
2011-02-16 12:20 ` [PATCH 09/33] x86-64, NUMA: Remove local variable found from amd_numa_init() Tejun Heo
2011-02-16 12:20 ` [PATCH 10/33] x86-64, NUMA: Move apicid to numa mapping initialization from amd_scan_nodes() to amd_numa_init() Tejun Heo
2011-02-16 12:20 ` [PATCH 11/33] x86-64, NUMA: Use common numa_nodes[] Tejun Heo
2011-02-16 12:20 ` [PATCH 12/33] x86-64, NUMA: Kill {acpi|amd}_get_nodes() Tejun Heo
2011-02-16 12:20 ` [PATCH 13/33] x86-64, NUMA: Factor out memblk handling into numa_{add|register}_memblk() Tejun Heo
2011-02-16 16:15   ` [PATCH UPDATED " Tejun Heo
2011-02-16 12:20 ` [PATCH 14/33] x86-64, NUMA: Unify use of memblk in all init methods Tejun Heo
2011-02-16 12:20 ` [PATCH 15/33] x86-64, NUMA: Unify the rest of memblk registration Tejun Heo
2011-02-16 12:20 ` [PATCH 16/33] x86-64, NUMA: Kill {acpi|amd|dummy}_scan_nodes() Tejun Heo
2011-02-16 12:20 ` [PATCH 17/33] x86-64, NUMA: Remove %NULL @nodeids handling from compute_hash_shift() Tejun Heo
2011-02-16 12:20 ` [PATCH 18/33] x86-64, NUMA: Introduce struct numa_meminfo Tejun Heo
2011-02-16 12:20 ` [PATCH 19/33] x86-64, NUMA: Separate out numa_cleanup_meminfo() Tejun Heo
2011-02-16 12:20 ` [PATCH 20/33] x86-64, NUMA: make numa_cleanup_meminfo() prettier Tejun Heo
2011-02-16 12:20 ` [PATCH 21/33] x86-64, NUMA: consolidate and improve memblk sanity checks Tejun Heo
2011-02-16 12:20 ` [PATCH 22/33] x86-64, NUMA: Add common find_node_by_addr() Tejun Heo
2011-02-16 12:20 ` Tejun Heo [this message]
2011-02-16 12:20 ` [PATCH 24/33] x86-64, NUMA: Rename cpu_nodes_parsed to numa_nodes_parsed Tejun Heo
2011-02-16 12:20 ` [PATCH 25/33] x86-64, NUMA: Kill mem_nodes_parsed Tejun Heo
2011-02-16 12:21 ` [PATCH 26/33] x86-64, NUMA: Implement generic node distance handling Tejun Heo
2011-02-16 12:21 ` [PATCH 27/33] x86-64, NUMA: Trivial changes to prepare for emulation updates Tejun Heo
2011-02-16 12:21 ` [PATCH 28/33] x86-64, NUMA: Build and use direct emulated nid -> phys nid mapping Tejun Heo
2011-02-16 14:14   ` [PATCH UPDATED " Tejun Heo
2011-02-16 12:21 ` [PATCH 29/33] x86-64, NUMA: Make emulation code build numa_meminfo and share the registration path Tejun Heo
2011-02-16 12:21 ` [PATCH 30/33] x86-64, NUMA: Wrap node ID during emulation Tejun Heo
2011-02-16 12:21 ` [PATCH 31/33] x86-64, NUMA: Emulate directly from numa_meminfo Tejun Heo
2011-02-16 12:21 ` [PATCH 32/33] x86-64, NUMA: Unify emulated apicid -> node mapping transformation Tejun Heo
2011-02-16 12:21 ` [PATCH 33/33] x86-64, NUMA: Unify emulated distance mapping Tejun Heo
2011-02-16 12:52 ` [PATCHSET x86/numa] x86-64, NUMA: bring sanity to NUMA config/emulation Ingo Molnar
2011-02-16 14:17   ` Tejun Heo
2011-02-16 15:53     ` Ingo Molnar
2011-02-16 16:23       ` Tejun Heo
2011-02-16 17:29         ` Ingo Molnar
2011-02-16 17:33           ` Tejun Heo
2011-02-17 12:35           ` [boot crash] " Ingo Molnar
2011-02-17 12:48             ` Tejun Heo
2011-02-17 16:10               ` Ingo Molnar

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:867d41b dfblob:da5c501 dfblob:8f7a5eb dfblob:0cb59e5
dfblob:681bc0d dfblob:c490448 dfblob:51d0733 dfblob:e8b3b3c )
 OR (
bs:"[PATCH 23/33] x86-64, NUMA: Kill numa_nodes[]" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1297858867-25981-24-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=ankita@in.ibm.com \
    --cc=brgerst@gmail.com \
    --cc=gorcunov@gmail.com \
    --cc=hpa@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rientjes@google.com \
    --cc=shaohui.zheng@intel.com \
    --cc=x86@kernel.org \
    --cc=yinghai@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox