From: Yinghai Lu <yinghai@kernel.org>
To: Tejun Heo <tj@kernel.org>
Cc: linux-kernel@vger.kernel.org, x86@kernel.org, brgerst@gmail.com,
gorcunov@gmail.com, shaohui.zheng@intel.com, rientjes@google.com,
mingo@elte.hu, hpa@linux.intel.com
Subject: Re: [PATCH 15/26] x86-64, NUMA: Unify the rest of memblk registration
Date: Sat, 12 Feb 2011 16:45:27 -0800 [thread overview]
Message-ID: <4D5729A7.7070706@kernel.org> (raw)
In-Reply-To: <1297530663-26234-16-git-send-email-tj@kernel.org>
On 02/12/2011 09:10 AM, Tejun Heo wrote:
> Move the remaining memblk registration logic from acpi_scan_nodes() to
> numa_register_memblks() and initmem_init().
>
> This applies nodes_cover_memory() sanity check, memory node sorting
> and node_online() checking, which were only applied to acpi, to all
> init methods.
>
> As all memblk registration is moved to common code, active range
> clearing is moved to initmem_init() too and removed from bad_srat().
>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Cc: Yinghai Lu <yinghai@kernel.org>
> Cc: Brian Gerst <brgerst@gmail.com>
> Cc: Cyrill Gorcunov <gorcunov@gmail.com>
> Cc: Shaohui Zheng <shaohui.zheng@intel.com>
> Cc: David Rientjes <rientjes@google.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> Cc: H. Peter Anvin <hpa@linux.intel.com>
> ---
> arch/x86/mm/amdtopology_64.c | 6 ---
> arch/x86/mm/numa_64.c | 71 +++++++++++++++++++++++++++++++++++++++---
> arch/x86/mm/srat_64.c | 59 ----------------------------------
> 3 files changed, 66 insertions(+), 70 deletions(-)
>
> diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
> index 48ec374..9c9f46a 100644
> --- a/arch/x86/mm/amdtopology_64.c
> +++ b/arch/x86/mm/amdtopology_64.c
> @@ -262,11 +262,5 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
>
> int __init amd_scan_nodes(void)
> {
> - int i;
> -
> - for_each_node_mask(i, node_possible_map)
> - setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
> -
> - numa_init_array();
> return 0;
> }
> diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
> index 2e2ca94..062649d 100644
> --- a/arch/x86/mm/numa_64.c
> +++ b/arch/x86/mm/numa_64.c
> @@ -287,6 +287,37 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
> node_set_online(nodeid);
> }
>
> +/*
> + * Sanity check to catch more bad NUMA configurations (they are amazingly
> + * common). Make sure the nodes cover all memory.
> + */
> +static int __init nodes_cover_memory(const struct bootnode *nodes)
> +{
> + unsigned long numaram, e820ram;
> + int i;
> +
> + numaram = 0;
> + for_each_node_mask(i, mem_nodes_parsed) {
> + unsigned long s = nodes[i].start >> PAGE_SHIFT;
> + unsigned long e = nodes[i].end >> PAGE_SHIFT;
> + numaram += e - s;
> + numaram -= __absent_pages_in_range(i, s, e);
> + if ((long)numaram < 0)
> + numaram = 0;
> + }
> +
> + e820ram = max_pfn -
> + (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT) >> PAGE_SHIFT);
> + /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> + if ((long)(e820ram - numaram) >= (1<<(20 - PAGE_SHIFT))) {
> + printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
> + (numaram << PAGE_SHIFT) >> 20,
> + (e820ram << PAGE_SHIFT) >> 20);
> + return 0;
> + }
> + return 1;
> +}
> +
> static int __init numa_register_memblks(void)
> {
> int i;
> @@ -349,6 +380,25 @@ static int __init numa_register_memblks(void)
> memblock_x86_register_active_regions(memblk_nodeid[i],
> node_memblk_range[i].start >> PAGE_SHIFT,
> node_memblk_range[i].end >> PAGE_SHIFT);
> +
> + /* for out of order entries */
> + sort_node_map();
> + if (!nodes_cover_memory(numa_nodes))
> + return -EINVAL;
> +
> + /* Finally register nodes. */
> + for_each_node_mask(i, node_possible_map)
> + setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
> +
> + /*
> + * Try again in case setup_node_bootmem missed one due to missing
> + * bootmem.
> + */
> + for_each_node_mask(i, node_possible_map)
> + if (!node_online(i))
> + setup_node_bootmem(i, numa_nodes[i].start,
> + numa_nodes[i].end);
> +
> return 0;
> }
please don't put setup_node_bootmem calling into numa_register_memblks()
that is not related.
put the calling in initmem_init() directly is more reasonable.
>
> @@ -713,15 +763,14 @@ static int dummy_numa_init(void)
> node_set(0, cpu_nodes_parsed);
> node_set(0, mem_nodes_parsed);
> numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
> + numa_nodes[0].start = 0;
> + numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT;
>
> return 0;
> }
>
> static int dummy_scan_nodes(void)
> {
> - setup_node_bootmem(0, 0, max_pfn << PAGE_SHIFT);
> - numa_init_array();
> -
> return 0;
> }
>
> @@ -757,6 +806,7 @@ void __init initmem_init(void)
> memset(node_memblk_range, 0, sizeof(node_memblk_range));
> memset(memblk_nodeid, 0, sizeof(memblk_nodeid));
> memset(numa_nodes, 0, sizeof(numa_nodes));
> + remove_all_active_ranges();
>
> if (numa_init[i]() < 0)
> continue;
> @@ -781,8 +831,19 @@ void __init initmem_init(void)
> if (numa_register_memblks() < 0)
> continue;
>
> - if (!scan_nodes[i]())
> - return;
> + if (scan_nodes[i]() < 0)
> + continue;
> +
> + for (j = 0; j < nr_cpu_ids; j++) {
> + int nid = early_cpu_to_node(j);
> +
> + if (nid == NUMA_NO_NODE)
> + continue;
> + if (!node_online(nid))
> + numa_clear_node(j);
> + }
> + numa_init_array();
> + return;
> }
> BUG();
> }
> diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
> index 755d157..4a2c33b 100644
> --- a/arch/x86/mm/srat_64.c
> +++ b/arch/x86/mm/srat_64.c
> @@ -44,7 +44,6 @@ static __init void bad_srat(void)
> numa_nodes[i].start = numa_nodes[i].end = 0;
> nodes_add[i].start = nodes_add[i].end = 0;
> }
> - remove_all_active_ranges();
> }
>
> static __init inline int srat_disabled(void)
> @@ -259,35 +258,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
> update_nodes_add(node, start, end);
> }
>
> -/* Sanity check to catch more bad SRATs (they are amazingly common).
> - Make sure the PXMs cover all memory. */
> -static int __init nodes_cover_memory(const struct bootnode *nodes)
> -{
> - int i;
> - unsigned long pxmram, e820ram;
> -
> - pxmram = 0;
> - for_each_node_mask(i, mem_nodes_parsed) {
> - unsigned long s = nodes[i].start >> PAGE_SHIFT;
> - unsigned long e = nodes[i].end >> PAGE_SHIFT;
> - pxmram += e - s;
> - pxmram -= __absent_pages_in_range(i, s, e);
> - if ((long)pxmram < 0)
> - pxmram = 0;
> - }
> -
> - e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
> - /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> - if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
> - printk(KERN_ERR
> - "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
> - (pxmram << PAGE_SHIFT) >> 20,
> - (e820ram << PAGE_SHIFT) >> 20);
> - return 0;
> - }
> - return 1;
> -}
> -
> void __init acpi_numa_arch_fixup(void) {}
>
> int __init x86_acpi_numa_init(void)
> @@ -303,37 +273,8 @@ int __init x86_acpi_numa_init(void)
> /* Use the information discovered above to actually set up the nodes. */
> int __init acpi_scan_nodes(void)
> {
> - int i;
> -
> if (acpi_numa <= 0)
> return -1;
> -
> - /* for out of order entries in SRAT */
> - sort_node_map();
> - if (!nodes_cover_memory(numa_nodes)) {
> - bad_srat();
> - return -1;
> - }
> -
> - /* Finally register nodes */
> - for_each_node_mask(i, node_possible_map)
> - setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
> - /* Try again in case setup_node_bootmem missed one due
> - to missing bootmem */
> - for_each_node_mask(i, node_possible_map)
> - if (!node_online(i))
> - setup_node_bootmem(i, numa_nodes[i].start,
> - numa_nodes[i].end);
> -
> - for (i = 0; i < nr_cpu_ids; i++) {
> - int node = early_cpu_to_node(i);
> -
> - if (node == NUMA_NO_NODE)
> - continue;
> - if (!node_online(node))
> - numa_clear_node(i);
> - }
> - numa_init_array();
> return 0;
> }
>
next prev parent reply other threads:[~2011-02-13 0:46 UTC|newest]
Thread overview: 77+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-02-12 17:10 [PATCHSET x86/numa] x86-64, NUMA: bring sanity to NUMA configuration Tejun Heo
2011-02-12 17:10 ` [PATCH 01/26] x86-64, NUMA: Make dummy node initialization path similar to non-dummy ones Tejun Heo
2011-02-12 17:52 ` Yinghai Lu
2011-02-12 17:10 ` [PATCH 02/26] x86-64, NUMA: Simplify hotplug node handling in acpi_numa_memory_affinity_init() Tejun Heo
2011-02-12 17:47 ` Yinghai Lu
2011-02-12 17:56 ` Tejun Heo
2011-02-12 18:04 ` Yinghai Lu
2011-02-12 18:06 ` Tejun Heo
2011-02-12 18:13 ` Yinghai Lu
2011-02-14 11:25 ` Tejun Heo
2011-02-14 16:12 ` Yinghai Lu
2011-02-12 17:10 ` [PATCH 03/26] x86-64, NUMA: Drop @start/last_pfn from initmem_init() Tejun Heo
2011-02-12 17:58 ` Yinghai Lu
2011-02-12 18:03 ` Tejun Heo
2011-02-14 13:50 ` [PATCH UPDATED 03/26] x86, NUMA: Drop @start/last_pfn from initmem_init() initmem_init() Tejun Heo
2011-02-14 14:20 ` Ingo Molnar
2011-02-14 14:58 ` Tejun Heo
2011-02-14 19:03 ` Yinghai Lu
2011-02-14 19:31 ` Tejun Heo
2011-02-15 2:29 ` Ingo Molnar
2011-02-12 17:10 ` [PATCH 04/26] x86-64, NUMA: Unify {acpi|amd}_{numa_init|scan_nodes}() arguments and return values Tejun Heo
2011-02-12 18:39 ` Yinghai Lu
2011-02-14 11:29 ` Tejun Heo
2011-02-14 16:14 ` Yinghai Lu
2011-02-14 16:18 ` Tejun Heo
2011-02-14 18:00 ` Yinghai Lu
2011-02-12 17:10 ` [PATCH 05/26] x86-64, NUMA: Wrap acpi_numa_init() so that failure can be indicated by return value Tejun Heo
2011-02-12 17:10 ` [PATCH 06/26] x86-64, NUMA: Move *_numa_init() invocations into initmem_init() Tejun Heo
2011-02-14 6:10 ` Ankita Garg
2011-02-14 11:09 ` Tejun Heo
2011-02-14 13:51 ` [PATCH UPDATED 06/26] x86, " Tejun Heo
2011-02-14 14:21 ` Ingo Molnar
2011-02-12 17:10 ` [PATCH 07/26] x86-64, NUMA: Restructure initmem_init() Tejun Heo
2011-02-12 17:10 ` [PATCH 08/26] x86-64, NUMA: Use common {cpu|mem}_nodes_parsed Tejun Heo
2011-02-12 17:10 ` [PATCH 09/26] x86-64, NUMA: Remove local variable found from amd_numa_init() Tejun Heo
2011-02-12 17:10 ` [PATCH 10/26] x86-64, NUMA: Move apicid to numa mapping initialization from amd_scan_nodes() to amd_numa_init() Tejun Heo
2011-02-14 22:59 ` Cyrill Gorcunov
2011-02-15 9:36 ` Tejun Heo
2011-02-15 17:31 ` Cyrill Gorcunov
2011-02-15 17:54 ` Yinghai Lu
2011-02-15 18:01 ` Cyrill Gorcunov
2011-02-15 18:27 ` Cyrill Gorcunov
2011-02-15 19:41 ` Yinghai Lu
2011-02-12 17:10 ` [PATCH 11/26] x86-64, NUMA: Use common numa_nodes[] Tejun Heo
2011-02-12 17:10 ` [PATCH 12/26] x86-64, NUMA: Kill {acpi|amd}_get_nodes() Tejun Heo
2011-02-12 17:10 ` [PATCH 13/26] x86-64, NUMA: Factor out memblk handling into numa_{add|register}_memblk() Tejun Heo
2011-02-12 17:10 ` [PATCH 14/26] x86-64, NUMA: Unify use of memblk in all init methods Tejun Heo
2011-02-12 17:10 ` [PATCH 15/26] x86-64, NUMA: Unify the rest of memblk registration Tejun Heo
2011-02-13 0:45 ` Yinghai Lu [this message]
2011-02-14 11:32 ` Tejun Heo
2011-02-14 16:08 ` Yinghai Lu
2011-02-14 16:12 ` Tejun Heo
2011-02-14 16:17 ` Yinghai Lu
2011-02-14 16:22 ` Tejun Heo
2011-02-14 18:14 ` Yinghai Lu
2011-02-14 18:27 ` Tejun Heo
2011-02-14 19:07 ` Yinghai Lu
2011-02-14 19:30 ` Tejun Heo
2011-02-14 19:35 ` Yinghai Lu
2011-02-15 9:11 ` Tejun Heo
2011-02-15 9:43 ` Ingo Molnar
2011-02-15 16:49 ` Tejun Heo
2011-02-16 8:41 ` Ingo Molnar
2011-02-16 8:48 ` Ingo Molnar
2011-02-16 9:01 ` Tejun Heo
2011-02-16 9:31 ` Ingo Molnar
2011-02-12 17:10 ` [PATCH 16/26] x86-64, NUMA: Kill {acpi|amd|dummy}_scan_nodes() Tejun Heo
2011-02-12 17:10 ` [PATCH 17/26] x86-64, NUMA: Remove %NULL @nodeids handling from compute_hash_shift() Tejun Heo
2011-02-12 17:10 ` [PATCH 18/26] x86-64, NUMA: Introduce struct numa_meminfo Tejun Heo
2011-02-12 17:10 ` [PATCH 19/26] x86-64, NUMA: Separate out numa_cleanup_meminfo() Tejun Heo
2011-02-12 17:10 ` [PATCH 20/26] x86-64, NUMA: make numa_cleanup_meminfo() prettier Tejun Heo
2011-02-12 17:10 ` [PATCH 21/26] x86-64, NUMA: consolidate and improve memblk sanity checks Tejun Heo
2011-02-12 17:10 ` [PATCH 22/26] x86-64, NUMA: Add common find_node_by_addr() Tejun Heo
2011-02-12 17:11 ` [PATCH 23/26] x86-64, NUMA: kill numa_nodes[] Tejun Heo
2011-02-12 17:11 ` [PATCH 24/26] x86-64, NUMA: Rename cpu_nodes_parsed to numa_nodes_parsed Tejun Heo
2011-02-12 17:11 ` [PATCH 25/26] x86-64, NUMA: Kill mem_nodes_parsed Tejun Heo
2011-02-12 17:11 ` [PATCH 26/26] x86-64, NUMA: Implement generic node distance handling Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4D5729A7.7070706@kernel.org \
--to=yinghai@kernel.org \
--cc=brgerst@gmail.com \
--cc=gorcunov@gmail.com \
--cc=hpa@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=rientjes@google.com \
--cc=shaohui.zheng@intel.com \
--cc=tj@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.