linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yinghai Lu <yinghai@kernel.org>
To: Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Miller <davem@davemloft.net>,
	Be
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Yinghai Lu <yinghai@kernel.org>,
	stable@kernel.org
Subject: [PATCH 02/50] x86,mm: fix 32bit numa sparsemem
Date: Tue, 13 Jul 2010 00:09:56 -0700	[thread overview]
Message-ID: <1279005044-24777-3-git-send-email-yinghai@kernel.org> (raw)
In-Reply-To: <1279005044-24777-1-git-send-email-yinghai@kernel.org>

Borislav Petkov <borislav.petkov@amd.com> reported his 32bit numa has problem:

[    0.000000] Reserving total of 4c00 pages for numa KVA remap
[    0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[    0.000000] max_pfn = 238000
[    0.000000] 8202MB HIGHMEM available.
[    0.000000] 885MB LOWMEM available.
[    0.000000]   mapped low ram: 0 - 375fe000
[    0.000000]   low ram: 0 - 375fe000
[    0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[    0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[    0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[    0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[    0.000000] BUG: unable to handle kernel paging request at 40000000
[    0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[    0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
...
[    0.000000] Call Trace:
[    0.000000]  [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[    0.000000]  [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[    0.000000]  [<c2c9149e>] ? sparse_init+0x1dc/0x499
[    0.000000]  [<c2c79118>] ? paging_init+0x168/0x1df
[    0.000000]  [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb

looks like it allocate much high address for bootmem.

try to cut limit with get_max_mapped()

-v3: make alloc_bootmem_node could fallback to other node.
     just like old alloc_bootmem_node did

need this patch for 2.6.34 and 2.6.35

Reported-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: stable@kernel.org
---
 mm/bootmem.c    |   24 ++++++++++++++++++++----
 mm/page_alloc.c |    3 +++
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 58c66cc..142c84a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 				   unsigned long align, unsigned long goal)
 {
+	void *ptr;
+
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
 #ifdef CONFIG_NO_BOOTMEM
-	return __alloc_memory_core_early(pgdat->node_id, size, align,
+	ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+					 goal, -1ULL);
+	if (ptr)
+		return ptr;
+
+	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
 					 goal, -1ULL);
 #else
-	return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+	ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
 #endif
+
+	return ptr;
 }
 
 void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 				       unsigned long align, unsigned long goal)
 {
+	void *ptr;
+
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
 #ifdef CONFIG_NO_BOOTMEM
-	return __alloc_memory_core_early(pgdat->node_id, size, align,
+	ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+				goal, ARCH_LOW_ADDRESS_LIMIT);
+	if (ptr)
+		return ptr;
+	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
 				goal, ARCH_LOW_ADDRESS_LIMIT);
 #else
-	return ___alloc_bootmem_node(pgdat->bdata, size, align,
+	ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
 				goal, ARCH_LOW_ADDRESS_LIMIT);
 #endif
+	return ptr;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 431214b..cee2d79 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
 	int i;
 	void *ptr;
 
+	if (limit > get_max_mapped())
+		limit = get_max_mapped();
+
 	/* need to go over early_node_map to find out good range for node */
 	for_each_active_range_index_in_nid(i, nid) {
 		u64 addr;
-- 
1.6.4.2

WARNING: multiple messages have this Message-ID (diff)
From: Yinghai Lu <yinghai@kernel.org>
To: Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Miller <davem@davemloft.net>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Yinghai Lu <yinghai@kernel.org>,
	stable@kernel.org
Subject: [PATCH 02/50] x86,mm: fix 32bit numa sparsemem
Date: Tue, 13 Jul 2010 00:09:56 -0700	[thread overview]
Message-ID: <1279005044-24777-3-git-send-email-yinghai@kernel.org> (raw)
Message-ID: <20100713070956.aDXk-cIWKueZA7bRUBpC8FmhS4ZzrxFZ_7OpRrwjTiE@z> (raw)
In-Reply-To: <1279005044-24777-1-git-send-email-yinghai@kernel.org>

Borislav Petkov <borislav.petkov@amd.com> reported his 32bit numa has problem:

[    0.000000] Reserving total of 4c00 pages for numa KVA remap
[    0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[    0.000000] max_pfn = 238000
[    0.000000] 8202MB HIGHMEM available.
[    0.000000] 885MB LOWMEM available.
[    0.000000]   mapped low ram: 0 - 375fe000
[    0.000000]   low ram: 0 - 375fe000
[    0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[    0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[    0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[    0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[    0.000000] BUG: unable to handle kernel paging request at 40000000
[    0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[    0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
...
[    0.000000] Call Trace:
[    0.000000]  [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[    0.000000]  [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[    0.000000]  [<c2c9149e>] ? sparse_init+0x1dc/0x499
[    0.000000]  [<c2c79118>] ? paging_init+0x168/0x1df
[    0.000000]  [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb

looks like it allocate much high address for bootmem.

try to cut limit with get_max_mapped()

-v3: make alloc_bootmem_node could fallback to other node.
     just like old alloc_bootmem_node did

need this patch for 2.6.34 and 2.6.35

Reported-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: stable@kernel.org
---
 mm/bootmem.c    |   24 ++++++++++++++++++++----
 mm/page_alloc.c |    3 +++
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 58c66cc..142c84a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 				   unsigned long align, unsigned long goal)
 {
+	void *ptr;
+
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
 #ifdef CONFIG_NO_BOOTMEM
-	return __alloc_memory_core_early(pgdat->node_id, size, align,
+	ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+					 goal, -1ULL);
+	if (ptr)
+		return ptr;
+
+	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
 					 goal, -1ULL);
 #else
-	return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+	ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
 #endif
+
+	return ptr;
 }
 
 void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 				       unsigned long align, unsigned long goal)
 {
+	void *ptr;
+
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
 #ifdef CONFIG_NO_BOOTMEM
-	return __alloc_memory_core_early(pgdat->node_id, size, align,
+	ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+				goal, ARCH_LOW_ADDRESS_LIMIT);
+	if (ptr)
+		return ptr;
+	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
 				goal, ARCH_LOW_ADDRESS_LIMIT);
 #else
-	return ___alloc_bootmem_node(pgdat->bdata, size, align,
+	ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
 				goal, ARCH_LOW_ADDRESS_LIMIT);
 #endif
+	return ptr;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 431214b..cee2d79 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
 	int i;
 	void *ptr;
 
+	if (limit > get_max_mapped())
+		limit = get_max_mapped();
+
 	/* need to go over early_node_map to find out good range for node */
 	for_each_active_range_index_in_nid(i, nid) {
 		u64 addr;
-- 
1.6.4.2


  parent reply	other threads:[~2010-07-13  7:14 UTC|newest]

Thread overview: 103+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-13  7:09 [PATCH -v24 00/50] Use memblock with x86 Yinghai Lu
2010-07-13  7:09 ` Yinghai Lu
2010-07-13  7:09 ` [PATCH 01/50] x86, numa: fix boot without RAM on node0 again Yinghai Lu
2010-07-13  7:09   ` Yinghai Lu
2010-07-13  7:09 ` Yinghai Lu [this message]
2010-07-13  7:09   ` [PATCH 02/50] x86,mm: fix 32bit numa sparsemem Yinghai Lu
2010-07-13  7:09 ` [PATCH 03/50] lmb: rename to memblock Yinghai Lu
2010-07-13  7:09 ` [PATCH 04/50] memblock: Rename memblock_region to memblock_type and memblock_property to memblock_region Yinghai Lu
2010-07-13  7:09   ` Yinghai Lu
2010-07-13  7:09 ` [PATCH 05/50] memblock: No reason to include asm/memblock.h late Yinghai Lu
2010-07-13  7:09   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 06/50] memblock: Introduce for_each_memblock() and new accessors, and use it Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 07/50] memblock: Remove nid_range argument, arch provides memblock_nid_range() instead Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 08/50] memblock: Factor the lowest level alloc function Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 09/50] memblock: Expose MEMBLOCK_ALLOC_ANYWHERE Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 10/50] memblock: Introduce default allocation limit and use it to replace explicit ones Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 11/50] memblock: Remove rmo_size, burry it in arch/powerpc where it belongs Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 12/50] memblock: Change u64 to phys_addr_t Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 13/50] memblock: Remove unused memblock.debug struct member Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 14/50] memblock: Remove memblock_type.size and add memblock.memory_size instead Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 15/50] memblock: Move memblock arrays to static storage in memblock.c and make their size a variable Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 16/50] memblock: Add debug markers at the end of the array Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 17/50] memblock: Make memblock_find_region() out of memblock_alloc_region() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 18/50] memblock: Define MEMBLOCK_ERROR internally instead of using ~(phys_addr_t)0 Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 19/50] memblock: Move memblock_init() to the bottom of the file Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 20/50] memblock: split memblock_find_base() out of __memblock_alloc_base() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 21/50] memblock: Move functions around into a more sensible order Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 22/50] memblock: Add array resizing support Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 23/50] memblock: Add arch function to control coalescing of memblock memory regions Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 24/50] memblock: Add "start" argument to memblock_find_base() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 25/50] memblock: NUMA allocate can now use early_pfn_map Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 26/50] memblock: Separate memblock_alloc_nid() and memblock_alloc_try_nid() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 27/50] memblock: Make memblock_alloc_try_nid() fallback to MEMBLOCK_ALLOC_ANYWHERE Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 28/50] memblock: Add debugfs files to dump the arrays content Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 29/50] memblock: Prepare x86 to use memblock to replace early_res Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 30/50] memblock: Print new doubled array location info Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 31/50] memblock: Export MEMBLOCK_ERROR again Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 32/50] memblock: Prepare to include linux/memblock.h in core file Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 33/50] memblock: Add ARCH_DISCARD_MEMBLOCK to put memblock code to .init Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 34/50] memblock: Add memblock_find_in_range() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 35/50] x86, memblock: Add memblock_x86_find_in_range_size() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 36/50] bootmem, x86: Add weak version of reserve_bootmem_generic Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 37/50] x86, memblock: Add memblock_x86_to_bootmem() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 38/50] x86,memblock: Add memblock_x86_reserve_range/memblock_x86_free_range Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 39/50] x86, memblock: Add get_free_all_memory_range() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 40/50] x86, memblock: Add memblock_x86_register_active_regions() and memblock_x86_hole_size() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 41/50] memblock: Add find_memory_core_early() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 42/50] x86, memblock: Add memblock_x86_find_in_range_node() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 43/50] x86, memblock: Add memblock_x86_free_memory_in_range() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 44/50] x86, memblock: Add memblock_x86_memory_in_range() Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 45/50] x86, memblock: Use memblock_debug to control debug message print out Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13 20:37   ` Bjorn Helgaas
2010-07-13 20:40     ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 46/50] x86: Use memblock to replace early_res Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 47/50] x86: Replace e820_/_early string with memblock_ Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 48/50] x86: Remove not used early_res code Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 49/50] x86, memblock: Use memblock_memory_size()/memblock_free_memory_size() to get correct dma_reserve Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu
2010-07-13  7:10 ` [PATCH 50/50] x86: remove old bootmem code Yinghai Lu
2010-07-13  7:10   ` Yinghai Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1279005044-24777-3-git-send-email-yinghai@kernel.org \
    --to=yinghai@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=davem@davemloft.net \
    --cc=hannes@cmpxchg.org \
    --cc=hpa@zytor.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=stable@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).