From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yinghai Lu Subject: [PATCH 02/50] x86,mm: fix 32bit numa sparsemem Date: Tue, 13 Jul 2010 00:09:56 -0700 Message-ID: <1279005044-24777-3-git-send-email-yinghai@kernel.org> References: <1279005044-24777-1-git-send-email-yinghai@kernel.org> Return-path: Received: from rcsinet10.oracle.com ([148.87.113.121]:24501 "EHLO rcsinet10.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752898Ab0GMHOd (ORCPT ); Tue, 13 Jul 2010 03:14:33 -0400 In-Reply-To: <1279005044-24777-1-git-send-email-yinghai@kernel.org> Sender: linux-arch-owner@vger.kernel.org List-ID: To: Ingo Molnar , Thomas Gleixner , "H. Peter Anvin" , Andrew Morton , David Miller , Be Cc: Linus Torvalds , Johannes Weiner , linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, Yinghai Lu , stable@kernel.org Borislav Petkov reported his 32bit numa has problem: [ 0.000000] Reserving total of 4c00 pages for numa KVA remap [ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe [ 0.000000] max_pfn = 238000 [ 0.000000] 8202MB HIGHMEM available. [ 0.000000] 885MB LOWMEM available. [ 0.000000] mapped low ram: 0 - 375fe000 [ 0.000000] low ram: 0 - 375fe000 [ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000 [ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80 [ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140 [ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000 [ 0.000000] BUG: unable to handle kernel paging request at 40000000 [ 0.000000] IP: [] __alloc_memory_core_early+0x147/0x1d6 [ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00 ... [ 0.000000] Call Trace: [ 0.000000] [] ? __alloc_bootmem_node+0x216/0x22f [ 0.000000] [] ? sparse_early_usemaps_alloc_node+0x5a/0x10b [ 0.000000] [] ? sparse_init+0x1dc/0x499 [ 0.000000] [] ? paging_init+0x168/0x1df [ 0.000000] [] ? native_pagetable_setup_start+0xef/0x1bb looks like it allocate much high address for bootmem. try to cut limit with get_max_mapped() -v3: make alloc_bootmem_node could fallback to other node. just like old alloc_bootmem_node did need this patch for 2.6.34 and 2.6.35 Reported-by: Borislav Petkov Signed-off-by: Yinghai Lu Cc: stable@kernel.org --- mm/bootmem.c | 24 ++++++++++++++++++++---- mm/page_alloc.c | 3 +++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/mm/bootmem.c b/mm/bootmem.c index 58c66cc..142c84a 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { + void *ptr; + if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); #ifdef CONFIG_NO_BOOTMEM - return __alloc_memory_core_early(pgdat->node_id, size, align, + ptr = __alloc_memory_core_early(pgdat->node_id, size, align, + goal, -1ULL); + if (ptr) + return ptr; + + ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, -1ULL); #else - return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); + ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); #endif + + return ptr; } void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, @@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { + void *ptr; + if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); #ifdef CONFIG_NO_BOOTMEM - return __alloc_memory_core_early(pgdat->node_id, size, align, + ptr = __alloc_memory_core_early(pgdat->node_id, size, align, + goal, ARCH_LOW_ADDRESS_LIMIT); + if (ptr) + return ptr; + ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, ARCH_LOW_ADDRESS_LIMIT); #else - return ___alloc_bootmem_node(pgdat->bdata, size, align, + ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, ARCH_LOW_ADDRESS_LIMIT); #endif + return ptr; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 431214b..cee2d79 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, int i; void *ptr; + if (limit > get_max_mapped()) + limit = get_max_mapped(); + /* need to go over early_node_map to find out good range for node */ for_each_active_range_index_in_nid(i, nid) { u64 addr; -- 1.6.4.2 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from rcsinet10.oracle.com ([148.87.113.121]:24501 "EHLO rcsinet10.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752898Ab0GMHOd (ORCPT ); Tue, 13 Jul 2010 03:14:33 -0400 From: Yinghai Lu Subject: [PATCH 02/50] x86,mm: fix 32bit numa sparsemem Date: Tue, 13 Jul 2010 00:09:56 -0700 Message-ID: <1279005044-24777-3-git-send-email-yinghai@kernel.org> In-Reply-To: <1279005044-24777-1-git-send-email-yinghai@kernel.org> References: <1279005044-24777-1-git-send-email-yinghai@kernel.org> Sender: linux-arch-owner@vger.kernel.org List-ID: To: Ingo Molnar , Thomas Gleixner , "H. Peter Anvin" , Andrew Morton , David Miller , Benjamin Herrenschmidt Cc: Linus Torvalds , Johannes Weiner , linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, Yinghai Lu , stable@kernel.org Message-ID: <20100713070956.aDXk-cIWKueZA7bRUBpC8FmhS4ZzrxFZ_7OpRrwjTiE@z> Borislav Petkov reported his 32bit numa has problem: [ 0.000000] Reserving total of 4c00 pages for numa KVA remap [ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe [ 0.000000] max_pfn = 238000 [ 0.000000] 8202MB HIGHMEM available. [ 0.000000] 885MB LOWMEM available. [ 0.000000] mapped low ram: 0 - 375fe000 [ 0.000000] low ram: 0 - 375fe000 [ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000 [ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80 [ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140 [ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000 [ 0.000000] BUG: unable to handle kernel paging request at 40000000 [ 0.000000] IP: [] __alloc_memory_core_early+0x147/0x1d6 [ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00 ... [ 0.000000] Call Trace: [ 0.000000] [] ? __alloc_bootmem_node+0x216/0x22f [ 0.000000] [] ? sparse_early_usemaps_alloc_node+0x5a/0x10b [ 0.000000] [] ? sparse_init+0x1dc/0x499 [ 0.000000] [] ? paging_init+0x168/0x1df [ 0.000000] [] ? native_pagetable_setup_start+0xef/0x1bb looks like it allocate much high address for bootmem. try to cut limit with get_max_mapped() -v3: make alloc_bootmem_node could fallback to other node. just like old alloc_bootmem_node did need this patch for 2.6.34 and 2.6.35 Reported-by: Borislav Petkov Signed-off-by: Yinghai Lu Cc: stable@kernel.org --- mm/bootmem.c | 24 ++++++++++++++++++++---- mm/page_alloc.c | 3 +++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/mm/bootmem.c b/mm/bootmem.c index 58c66cc..142c84a 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { + void *ptr; + if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); #ifdef CONFIG_NO_BOOTMEM - return __alloc_memory_core_early(pgdat->node_id, size, align, + ptr = __alloc_memory_core_early(pgdat->node_id, size, align, + goal, -1ULL); + if (ptr) + return ptr; + + ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, -1ULL); #else - return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); + ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); #endif + + return ptr; } void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, @@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { + void *ptr; + if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); #ifdef CONFIG_NO_BOOTMEM - return __alloc_memory_core_early(pgdat->node_id, size, align, + ptr = __alloc_memory_core_early(pgdat->node_id, size, align, + goal, ARCH_LOW_ADDRESS_LIMIT); + if (ptr) + return ptr; + ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, ARCH_LOW_ADDRESS_LIMIT); #else - return ___alloc_bootmem_node(pgdat->bdata, size, align, + ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, ARCH_LOW_ADDRESS_LIMIT); #endif + return ptr; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 431214b..cee2d79 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, int i; void *ptr; + if (limit > get_max_mapped()) + limit = get_max_mapped(); + /* need to go over early_node_map to find out good range for node */ for_each_active_range_index_in_nid(i, nid) { u64 addr; -- 1.6.4.2