All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, x86@kernel.org, yinghai@kernel.org,
	brgerst@gmail.com, gorcunov@gmail.com, shaohui.zheng@intel.com,
	rientjes@google.com, mingo@elte.hu, hpa@linux.intel.com
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 23/26] x86-64, NUMA: kill numa_nodes[]
Date: Sat, 12 Feb 2011 18:11:00 +0100	[thread overview]
Message-ID: <1297530663-26234-24-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1297530663-26234-1-git-send-email-tj@kernel.org>

numa_nodes[] doesn't carry any information which isn't present in
numa_meminfo.  Each entry is simply min/max range of all the memblks
for the node.  This is not only redundant but also inaccurate when
memblks for different nodes interleave - for example,
find_node_by_addr() can return the wrong nodeid.

Kill numa_nodes[] and always use numa_meminfo instead.

* nodes_cover_memory() is renamed to numa_meminfo_cover_memory() and
  now operations on numa_meminfo and returns bool.

* setup_node_bootmem() needs min/max range.  Compute the range on the
  fly.  setup_node_bootmem() invocation is restructured to use outer
  loop instead of hardcoding the double invocations.

* find_node_by_addr() now operates on numa_meminfo.

* setup_physnodes() builds physnodes[] from memblks.  This will go
  away when emulation code is updated to use struct numa_meminfo.

This patch also makes the following misc changes.

* Clearing of nodes_add[] clearing is converted to memset().

* numa_add_memblk() in amd_numa_init() is moved down a bit for
  consistency.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/numa_64.h |    1 -
 arch/x86/mm/amdtopology_64.c   |    6 +--
 arch/x86/mm/numa_64.c          |   82 +++++++++++++++++++++++----------------
 arch/x86/mm/srat_64.c          |   22 ++---------
 4 files changed, 53 insertions(+), 58 deletions(-)

diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 867d41b..da5c501 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -27,7 +27,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 
 extern nodemask_t cpu_nodes_parsed __initdata;
 extern nodemask_t mem_nodes_parsed __initdata;
-extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
 
 extern int __cpuinit numa_cpu_node(int cpu);
 extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index 8f7a5eb..0cb59e5 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -165,12 +165,8 @@ int __init amd_numa_init(void)
 		pr_info("Node %d MemBase %016lx Limit %016lx\n",
 			nodeid, base, limit);
 
-		numa_nodes[nodeid].start = base;
-		numa_nodes[nodeid].end = limit;
-		numa_add_memblk(nodeid, base, limit);
-
 		prevbase = base;
-
+		numa_add_memblk(nodeid, base, limit);
 		node_set(nodeid, mem_nodes_parsed);
 		node_set(nodeid, cpu_nodes_parsed);
 	}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index ea3fb52..c0e45c7 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -46,8 +46,6 @@ static unsigned long __initdata nodemap_size;
 
 static struct numa_meminfo numa_meminfo __initdata;
 
-struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
-
 /*
  * Given a shift value, try to populate memnodemap[]
  * Returns :
@@ -349,17 +347,17 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
  * Sanity check to catch more bad NUMA configurations (they are amazingly
  * common).  Make sure the nodes cover all memory.
  */
-static int __init nodes_cover_memory(const struct bootnode *nodes)
+static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
 {
 	unsigned long numaram, e820ram;
 	int i;
 
 	numaram = 0;
-	for_each_node_mask(i, mem_nodes_parsed) {
-		unsigned long s = nodes[i].start >> PAGE_SHIFT;
-		unsigned long e = nodes[i].end >> PAGE_SHIFT;
+	for (i = 0; i < mi->nr_blks; i++) {
+		unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
+		unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
 		numaram += e - s;
-		numaram -= __absent_pages_in_range(i, s, e);
+		numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
 		if ((long)numaram < 0)
 			numaram = 0;
 	}
@@ -371,14 +369,14 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 		printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
 		       (numaram << PAGE_SHIFT) >> 20,
 		       (e820ram << PAGE_SHIFT) >> 20);
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
 static int __init numa_register_memblks(struct numa_meminfo *mi)
 {
-	int i;
+	int i, j, nid;
 
 	/* Account for nodes with cpus and no memory */
 	nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed);
@@ -398,21 +396,32 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 
 	/* for out of order entries */
 	sort_node_map();
-	if (!nodes_cover_memory(numa_nodes))
+	if (!numa_meminfo_cover_memory(mi))
 		return -EINVAL;
 
-	/* Finally register nodes. */
-	for_each_node_mask(i, node_possible_map)
-		setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
-
 	/*
-	 * Try again in case setup_node_bootmem missed one due to missing
-	 * bootmem.
+	 * Finally register nodes.  Do it twice in case setup_node_bootmem
+	 * missed one due to missing bootmem.
 	 */
-	for_each_node_mask(i, node_possible_map)
-		if (!node_online(i))
-			setup_node_bootmem(i, numa_nodes[i].start,
-					   numa_nodes[i].end);
+	for (i = 0; i < 2; i++) {
+		for_each_node_mask(nid, node_possible_map) {
+			u64 start = (u64)max_pfn << PAGE_SHIFT;
+			u64 end = 0;
+
+			if (node_online(nid))
+				continue;
+
+			for (j = 0; j < mi->nr_blks; j++) {
+				if (nid != mi->blk[j].nid)
+					continue;
+				start = min(mi->blk[j].start, start);
+				end = max(mi->blk[j].end, end);
+			}
+
+			if (start < end)
+				setup_node_bootmem(nid, start, end);
+		}
+	}
 
 	return 0;
 }
@@ -430,33 +439,41 @@ void __init numa_emu_cmdline(char *str)
 
 int __init find_node_by_addr(unsigned long addr)
 {
-	int ret = NUMA_NO_NODE;
+	const struct numa_meminfo *mi = &numa_meminfo;
 	int i;
 
-	for_each_node_mask(i, mem_nodes_parsed) {
+	for (i = 0; i < mi->nr_blks; i++) {
 		/*
 		 * Find the real node that this emulated node appears on.  For
 		 * the sake of simplicity, we only use a real node's starting
 		 * address to determine which emulated node it appears on.
 		 */
-		if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) {
-			ret = i;
-			break;
-		}
+		if (addr >= mi->blk[i].start && addr < mi->blk[i].end)
+			return mi->blk[i].nid;
 	}
-	return ret;
+	return NUMA_NO_NODE;
 }
 
 static int __init setup_physnodes(unsigned long start, unsigned long end)
 {
+	const struct numa_meminfo *mi = &numa_meminfo;
 	int ret = 0;
 	int i;
 
 	memset(physnodes, 0, sizeof(physnodes));
 
-	for_each_node_mask(i, mem_nodes_parsed) {
-		physnodes[i].start = numa_nodes[i].start;
-		physnodes[i].end = numa_nodes[i].end;
+	for (i = 0; i < mi->nr_blks; i++) {
+		int nid = mi->blk[i].nid;
+
+		if (physnodes[nid].start == physnodes[nid].end) {
+			physnodes[nid].start = mi->blk[i].start;
+			physnodes[nid].end = mi->blk[i].end;
+		} else {
+			physnodes[nid].start = min(physnodes[nid].start,
+						   mi->blk[i].start);
+			physnodes[nid].end = max(physnodes[nid].end,
+						 mi->blk[i].end);
+		}
 	}
 
 	/*
@@ -806,8 +823,6 @@ static int dummy_numa_init(void)
 	node_set(0, cpu_nodes_parsed);
 	node_set(0, mem_nodes_parsed);
 	numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
-	numa_nodes[0].start = 0;
-	numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT;
 
 	return 0;
 }
@@ -838,7 +853,6 @@ void __init initmem_init(void)
 		nodes_clear(node_possible_map);
 		nodes_clear(node_online_map);
 		memset(&numa_meminfo, 0, sizeof(numa_meminfo));
-		memset(numa_nodes, 0, sizeof(numa_nodes));
 		remove_all_active_ranges();
 
 		if (numa_init[i]() < 0)
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51d0733..e8b3b3c 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -37,13 +37,9 @@ static __init int setup_node(int pxm)
 
 static __init void bad_srat(void)
 {
-	int i;
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	for (i = 0; i < MAX_NUMNODES; i++) {
-		numa_nodes[i].start = numa_nodes[i].end = 0;
-		nodes_add[i].start = nodes_add[i].end = 0;
-	}
+	memset(nodes_add, 0, sizeof(nodes_add));
 }
 
 static __init inline int srat_disabled(void)
@@ -210,7 +206,6 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
 void __init
 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 {
-	struct bootnode *nd;
 	unsigned long start, end;
 	int node, pxm;
 
@@ -243,18 +238,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 	printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
 	       start, end);
 
-	if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
-		nd = &numa_nodes[node];
-		if (!node_test_and_set(node, mem_nodes_parsed)) {
-			nd->start = start;
-			nd->end = end;
-		} else {
-			if (start < nd->start)
-				nd->start = start;
-			if (nd->end < end)
-				nd->end = end;
-		}
-	} else
+	if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE))
+		node_set(node, mem_nodes_parsed);
+	else
 		update_nodes_add(node, start, end);
 }
 
-- 
1.7.1


  parent reply	other threads:[~2011-02-12 17:15 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-12 17:10 [PATCHSET x86/numa] x86-64, NUMA: bring sanity to NUMA configuration Tejun Heo
2011-02-12 17:10 ` [PATCH 01/26] x86-64, NUMA: Make dummy node initialization path similar to non-dummy ones Tejun Heo
2011-02-12 17:52   ` Yinghai Lu
2011-02-12 17:10 ` [PATCH 02/26] x86-64, NUMA: Simplify hotplug node handling in acpi_numa_memory_affinity_init() Tejun Heo
2011-02-12 17:47   ` Yinghai Lu
2011-02-12 17:56     ` Tejun Heo
2011-02-12 18:04       ` Yinghai Lu
2011-02-12 18:06         ` Tejun Heo
2011-02-12 18:13           ` Yinghai Lu
2011-02-14 11:25             ` Tejun Heo
2011-02-14 16:12               ` Yinghai Lu
2011-02-12 17:10 ` [PATCH 03/26] x86-64, NUMA: Drop @start/last_pfn from initmem_init() Tejun Heo
2011-02-12 17:58   ` Yinghai Lu
2011-02-12 18:03     ` Tejun Heo
2011-02-14 13:50   ` [PATCH UPDATED 03/26] x86, NUMA: Drop @start/last_pfn from initmem_init() initmem_init() Tejun Heo
2011-02-14 14:20     ` Ingo Molnar
2011-02-14 14:58       ` Tejun Heo
2011-02-14 19:03         ` Yinghai Lu
2011-02-14 19:31           ` Tejun Heo
2011-02-15  2:29             ` Ingo Molnar
2011-02-12 17:10 ` [PATCH 04/26] x86-64, NUMA: Unify {acpi|amd}_{numa_init|scan_nodes}() arguments and return values Tejun Heo
2011-02-12 18:39   ` Yinghai Lu
2011-02-14 11:29     ` Tejun Heo
2011-02-14 16:14       ` Yinghai Lu
2011-02-14 16:18         ` Tejun Heo
2011-02-14 18:00           ` Yinghai Lu
2011-02-12 17:10 ` [PATCH 05/26] x86-64, NUMA: Wrap acpi_numa_init() so that failure can be indicated by return value Tejun Heo
2011-02-12 17:10 ` [PATCH 06/26] x86-64, NUMA: Move *_numa_init() invocations into initmem_init() Tejun Heo
2011-02-14  6:10   ` Ankita Garg
2011-02-14 11:09     ` Tejun Heo
2011-02-14 13:51   ` [PATCH UPDATED 06/26] x86, " Tejun Heo
2011-02-14 14:21     ` Ingo Molnar
2011-02-12 17:10 ` [PATCH 07/26] x86-64, NUMA: Restructure initmem_init() Tejun Heo
2011-02-12 17:10 ` [PATCH 08/26] x86-64, NUMA: Use common {cpu|mem}_nodes_parsed Tejun Heo
2011-02-12 17:10 ` [PATCH 09/26] x86-64, NUMA: Remove local variable found from amd_numa_init() Tejun Heo
2011-02-12 17:10 ` [PATCH 10/26] x86-64, NUMA: Move apicid to numa mapping initialization from amd_scan_nodes() to amd_numa_init() Tejun Heo
2011-02-14 22:59   ` Cyrill Gorcunov
2011-02-15  9:36     ` Tejun Heo
2011-02-15 17:31       ` Cyrill Gorcunov
2011-02-15 17:54         ` Yinghai Lu
2011-02-15 18:01           ` Cyrill Gorcunov
2011-02-15 18:27             ` Cyrill Gorcunov
2011-02-15 19:41             ` Yinghai Lu
2011-02-12 17:10 ` [PATCH 11/26] x86-64, NUMA: Use common numa_nodes[] Tejun Heo
2011-02-12 17:10 ` [PATCH 12/26] x86-64, NUMA: Kill {acpi|amd}_get_nodes() Tejun Heo
2011-02-12 17:10 ` [PATCH 13/26] x86-64, NUMA: Factor out memblk handling into numa_{add|register}_memblk() Tejun Heo
2011-02-12 17:10 ` [PATCH 14/26] x86-64, NUMA: Unify use of memblk in all init methods Tejun Heo
2011-02-12 17:10 ` [PATCH 15/26] x86-64, NUMA: Unify the rest of memblk registration Tejun Heo
2011-02-13  0:45   ` Yinghai Lu
2011-02-14 11:32     ` Tejun Heo
2011-02-14 16:08       ` Yinghai Lu
2011-02-14 16:12         ` Tejun Heo
2011-02-14 16:17           ` Yinghai Lu
2011-02-14 16:22             ` Tejun Heo
2011-02-14 18:14               ` Yinghai Lu
2011-02-14 18:27                 ` Tejun Heo
2011-02-14 19:07                   ` Yinghai Lu
2011-02-14 19:30                     ` Tejun Heo
2011-02-14 19:35                       ` Yinghai Lu
2011-02-15  9:11                         ` Tejun Heo
2011-02-15  9:43                           ` Ingo Molnar
2011-02-15 16:49                             ` Tejun Heo
2011-02-16  8:41                               ` Ingo Molnar
2011-02-16  8:48                                 ` Ingo Molnar
2011-02-16  9:01                                   ` Tejun Heo
2011-02-16  9:31                                     ` Ingo Molnar
2011-02-12 17:10 ` [PATCH 16/26] x86-64, NUMA: Kill {acpi|amd|dummy}_scan_nodes() Tejun Heo
2011-02-12 17:10 ` [PATCH 17/26] x86-64, NUMA: Remove %NULL @nodeids handling from compute_hash_shift() Tejun Heo
2011-02-12 17:10 ` [PATCH 18/26] x86-64, NUMA: Introduce struct numa_meminfo Tejun Heo
2011-02-12 17:10 ` [PATCH 19/26] x86-64, NUMA: Separate out numa_cleanup_meminfo() Tejun Heo
2011-02-12 17:10 ` [PATCH 20/26] x86-64, NUMA: make numa_cleanup_meminfo() prettier Tejun Heo
2011-02-12 17:10 ` [PATCH 21/26] x86-64, NUMA: consolidate and improve memblk sanity checks Tejun Heo
2011-02-12 17:10 ` [PATCH 22/26] x86-64, NUMA: Add common find_node_by_addr() Tejun Heo
2011-02-12 17:11 ` Tejun Heo [this message]
2011-02-12 17:11 ` [PATCH 24/26] x86-64, NUMA: Rename cpu_nodes_parsed to numa_nodes_parsed Tejun Heo
2011-02-12 17:11 ` [PATCH 25/26] x86-64, NUMA: Kill mem_nodes_parsed Tejun Heo
2011-02-12 17:11 ` [PATCH 26/26] x86-64, NUMA: Implement generic node distance handling Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1297530663-26234-24-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=brgerst@gmail.com \
    --cc=gorcunov@gmail.com \
    --cc=hpa@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rientjes@google.com \
    --cc=shaohui.zheng@intel.com \
    --cc=x86@kernel.org \
    --cc=yinghai@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.