linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yinghai Lu <yinghai@kernel.org>
To: Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Miller <davem@davemloft.net>,
	Je
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Yinghai Lu <yinghai@kernel.org>
Subject: [PATCH 01/20] x86: add find_e820_area_node
Date: Sun, 21 Mar 2010 00:13:02 -0700	[thread overview]
Message-ID: <1269155601-18247-2-git-send-email-yinghai@kernel.org> (raw)
In-Reply-To: <1269155601-18247-1-git-send-email-yinghai@kernel.org>

David Miller pointed out that early_res have problem to find node data on correct node
when we have
node0: [0, 2g), [4g, 6g), [10g, 14g)
node1: [6g, 10g), [14g, 18g)
the cross node case

the problem is there for x86 bits even before we are using early_res for bootmem replacement.
after early_res for bootmem replacement, alloc_bootmem_node still can get range on correct node

this patch is fixing problem before bootmem or early_res replacement for bootmem.

now only user is for x86 64bit numa to find node data.

the point is use early_node_map with find_e820_area_node()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 arch/x86/include/asm/e820.h |    1 +
 arch/x86/kernel/e820.c      |   15 +++++++++++++++
 arch/x86/mm/numa_64.c       |    4 ++--
 include/linux/mm.h          |    2 ++
 mm/page_alloc.c             |   37 +++++++++++++++++++++++--------------
 5 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 0e22296..b48f371 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -111,6 +111,7 @@ extern unsigned long end_user_pfn;
 
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
 extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
+u64 find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 #include <linux/early_res.h>
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 740b440..05ee724 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -787,6 +787,21 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
 	return -1ULL;
 }
 
+u64 __init find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align)
+{
+	u64 addr;
+	/*
+	 * need to call this function after e820_register_active_regions
+	 * so early_node_map[] is set
+	 */
+	addr = find_memory_core_early(nid, size, align, start, end);
+	if (addr != -1ULL)
+		return addr;
+
+	/* fallback, should already have start end in the node range */
+	return find_e820_area(start, end, size, align);
+}
+
 /*
  * pre allocated 4k and reserved it in e820
  */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 8948f47..ffc5ad5 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -174,7 +174,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 	if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
 	    end > (MAX_DMA32_PFN<<PAGE_SHIFT))
 		start = MAX_DMA32_PFN<<PAGE_SHIFT;
-	mem = find_e820_area(start, end, size, align);
+	mem = find_e820_area_node(nodeid, start, end, size, align);
 	if (mem != -1L)
 		return __va(mem);
 
@@ -184,7 +184,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 		start = MAX_DMA32_PFN<<PAGE_SHIFT;
 	else
 		start = MAX_DMA_PFN<<PAGE_SHIFT;
-	mem = find_e820_area(start, end, size, align);
+	mem = find_e820_area_node(nodeid, start, end, size, align);
 	if (mem != -1L)
 		return __va(mem);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e70f21b..5c2d17e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1160,6 +1160,8 @@ extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 int add_from_early_node_map(struct range *range, int az,
 				   int nr_range, int nid);
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit);
 void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
 				 u64 goal, u64 limit);
 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d03c946..eef3757 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3408,12 +3408,11 @@ int __init add_from_early_node_map(struct range *range, int az,
 	return nr_range;
 }
 
-#ifdef CONFIG_NO_BOOTMEM
-void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+#ifdef CONFIG_HAVE_EARLY_RES
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
 					u64 goal, u64 limit)
 {
 	int i;
-	void *ptr;
 
 	/* need to go over early_node_map to find out good range for node */
 	for_each_active_range_index_in_nid(i, nid) {
@@ -3430,20 +3429,30 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
 		if (addr == -1ULL)
 			continue;
 
-#if 0
-		printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
-				nid,
-				ei_start, ei_last, goal, limit, size,
-				align, addr);
+		return addr;
+	}
+
+	return -1ULL;
+}
 #endif
 
-		ptr = phys_to_virt(addr);
-		memset(ptr, 0, size);
-		reserve_early_without_check(addr, addr + size, "BOOTMEM");
-		return ptr;
-	}
+#ifdef CONFIG_NO_BOOTMEM
+void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit)
+{
+	void *ptr;
 
-	return NULL;
+	u64 addr;
+
+	addr = find_memory_core_early(nid, size, align, goal, limit);
+
+	if (addr == -1ULL)
+		return NULL;
+
+	ptr = phys_to_virt(addr);
+	memset(ptr, 0, size);
+	reserve_early_without_check(addr, addr + size, "BOOTMEM");
+	return ptr;
 }
 #endif
 
-- 
1.6.4.2

WARNING: multiple messages have this Message-ID (diff)
From: Yinghai Lu <yinghai@kernel.org>
To: Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Miller <davem@davemloft.net>,
	Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Yinghai Lu <yinghai@kernel.org>
Subject: [PATCH 01/20] x86: add find_e820_area_node
Date: Sun, 21 Mar 2010 00:13:02 -0700	[thread overview]
Message-ID: <1269155601-18247-2-git-send-email-yinghai@kernel.org> (raw)
Message-ID: <20100321071302.gvBT52OLYYzGZWTMc4qSGP9QFpQfI2ONQDUX1M9fo8c@z> (raw)
In-Reply-To: <1269155601-18247-1-git-send-email-yinghai@kernel.org>

David Miller pointed out that early_res have problem to find node data on correct node
when we have
node0: [0, 2g), [4g, 6g), [10g, 14g)
node1: [6g, 10g), [14g, 18g)
the cross node case

the problem is there for x86 bits even before we are using early_res for bootmem replacement.
after early_res for bootmem replacement, alloc_bootmem_node still can get range on correct node

this patch is fixing problem before bootmem or early_res replacement for bootmem.

now only user is for x86 64bit numa to find node data.

the point is use early_node_map with find_e820_area_node()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
 arch/x86/include/asm/e820.h |    1 +
 arch/x86/kernel/e820.c      |   15 +++++++++++++++
 arch/x86/mm/numa_64.c       |    4 ++--
 include/linux/mm.h          |    2 ++
 mm/page_alloc.c             |   37 +++++++++++++++++++++++--------------
 5 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 0e22296..b48f371 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -111,6 +111,7 @@ extern unsigned long end_user_pfn;
 
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
 extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
+u64 find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 #include <linux/early_res.h>
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 740b440..05ee724 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -787,6 +787,21 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
 	return -1ULL;
 }
 
+u64 __init find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align)
+{
+	u64 addr;
+	/*
+	 * need to call this function after e820_register_active_regions
+	 * so early_node_map[] is set
+	 */
+	addr = find_memory_core_early(nid, size, align, start, end);
+	if (addr != -1ULL)
+		return addr;
+
+	/* fallback, should already have start end in the node range */
+	return find_e820_area(start, end, size, align);
+}
+
 /*
  * pre allocated 4k and reserved it in e820
  */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 8948f47..ffc5ad5 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -174,7 +174,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 	if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
 	    end > (MAX_DMA32_PFN<<PAGE_SHIFT))
 		start = MAX_DMA32_PFN<<PAGE_SHIFT;
-	mem = find_e820_area(start, end, size, align);
+	mem = find_e820_area_node(nodeid, start, end, size, align);
 	if (mem != -1L)
 		return __va(mem);
 
@@ -184,7 +184,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 		start = MAX_DMA32_PFN<<PAGE_SHIFT;
 	else
 		start = MAX_DMA_PFN<<PAGE_SHIFT;
-	mem = find_e820_area(start, end, size, align);
+	mem = find_e820_area_node(nodeid, start, end, size, align);
 	if (mem != -1L)
 		return __va(mem);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e70f21b..5c2d17e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1160,6 +1160,8 @@ extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 int add_from_early_node_map(struct range *range, int az,
 				   int nr_range, int nid);
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit);
 void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
 				 u64 goal, u64 limit);
 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d03c946..eef3757 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3408,12 +3408,11 @@ int __init add_from_early_node_map(struct range *range, int az,
 	return nr_range;
 }
 
-#ifdef CONFIG_NO_BOOTMEM
-void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+#ifdef CONFIG_HAVE_EARLY_RES
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
 					u64 goal, u64 limit)
 {
 	int i;
-	void *ptr;
 
 	/* need to go over early_node_map to find out good range for node */
 	for_each_active_range_index_in_nid(i, nid) {
@@ -3430,20 +3429,30 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
 		if (addr == -1ULL)
 			continue;
 
-#if 0
-		printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
-				nid,
-				ei_start, ei_last, goal, limit, size,
-				align, addr);
+		return addr;
+	}
+
+	return -1ULL;
+}
 #endif
 
-		ptr = phys_to_virt(addr);
-		memset(ptr, 0, size);
-		reserve_early_without_check(addr, addr + size, "BOOTMEM");
-		return ptr;
-	}
+#ifdef CONFIG_NO_BOOTMEM
+void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit)
+{
+	void *ptr;
 
-	return NULL;
+	u64 addr;
+
+	addr = find_memory_core_early(nid, size, align, goal, limit);
+
+	if (addr == -1ULL)
+		return NULL;
+
+	ptr = phys_to_virt(addr);
+	memset(ptr, 0, size);
+	reserve_early_without_check(addr, addr + size, "BOOTMEM");
+	return ptr;
 }
 #endif
 
-- 
1.6.4.2


  parent reply	other threads:[~2010-03-21  7:13 UTC|newest]

Thread overview: 102+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-21  7:13 [PATCH 00/20] x86: early_res and irq_desc Yinghai Lu
2010-03-21  7:13 ` Yinghai Lu
2010-03-21  7:13 ` Yinghai Lu [this message]
2010-03-21  7:13   ` [PATCH 01/20] x86: add find_e820_area_node Yinghai Lu
2010-03-21  7:13 ` [PATCH 02/20] x86: add get_centaur_ram_top Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 03/20] x86: make e820 to be static Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 04/20] x86: use wake_system_ram_range instead of e820_any_mapped in agp path Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 05/20] x86: make e820 to be initdata Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 06/20] early_res: seperate common memmap func from e820.c to fw_memmap.c Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-22  2:37   ` Benjamin Herrenschmidt
2010-03-22  2:46     ` Questions about SMP bootup control Zhu, Yijun (NSN - CN/Beijing)
2010-03-22  2:46       ` Zhu, Yijun (NSN - CN/Beijing)
2010-03-22  3:29       ` Andi Kleen
2010-03-22  7:45         ` Zhu, Yijun (NSN - CN/Beijing)
2010-03-22  3:56     ` [PATCH 06/20] early_res: seperate common memmap func from e820.c to fw_memmap.c Yinghai Lu
2010-03-22  4:00       ` David Miller
2010-03-22  4:28         ` Yinghai Lu
2010-03-22  4:33           ` David Miller
2010-03-22  9:28             ` Ingo Molnar
2010-03-22  9:28               ` Ingo Molnar
2010-03-22 11:30               ` Paul Mackerras
2010-03-22 13:05                 ` Ingo Molnar
2010-03-22 13:05                   ` Ingo Molnar
2010-03-22 21:04                   ` Benjamin Herrenschmidt
2010-03-22 21:20                     ` Ingo Molnar
2010-03-22 21:52                       ` Benjamin Herrenschmidt
2010-03-22 22:14                         ` Yinghai Lu
2010-03-22 18:18               ` [PATCH 06/20] early_res: seperate common memmap func from e820.c to fw_memmap.cy Thomas Gleixner
2010-03-22 19:37                 ` Ingo Molnar
2010-03-22 20:07                   ` Yinghai Lu
2010-03-22 21:08                     ` Benjamin Herrenschmidt
2010-03-22 22:09                     ` Thomas Gleixner
2010-03-22 22:25                       ` Yinghai Lu
2010-03-22 22:53                         ` Thomas Gleixner
2010-03-22 23:41                           ` Yinghai Lu
2010-03-23  0:45                             ` Thomas Gleixner
2010-03-23  1:04                               ` Yinghai Lu
2010-03-23  1:36                                 ` Thomas Gleixner
2010-03-23  6:01                                   ` Yinghai Lu
2010-03-23  8:02                                     ` Ingo Molnar
2010-03-23  9:02                                       ` Yinghai Lu
2010-03-23  9:48                                         ` Ingo Molnar
2010-03-24  4:29                                           ` Benjamin Herrenschmidt
2010-03-24  4:44                                             ` Benjamin Herrenschmidt
2010-03-24  5:54                                               ` Yinghai Lu
2010-03-24  7:43                                                 ` Benjamin Herrenschmidt
2010-03-24 18:37                                                   ` Yinghai Lu
2010-03-24  9:00                                               ` Ingo Molnar
2010-03-24  9:32                                                 ` Benjamin Herrenschmidt
2010-03-24  4:24                                       ` Benjamin Herrenschmidt
2010-03-24  6:05                                         ` Yinghai Lu
2010-03-22 20:47               ` [PATCH 06/20] early_res: seperate common memmap func from e820.c to fw_memmap.c Benjamin Herrenschmidt
2010-03-22 20:57                 ` Ingo Molnar
2010-03-22 21:54                   ` Benjamin Herrenschmidt
2010-03-23  8:53                     ` Geert Uytterhoeven
2010-03-23 11:16                     ` Ingo Molnar
2010-03-24  4:50                       ` Benjamin Herrenschmidt
2010-03-24  5:47                       ` Kyle Moffett
2010-03-22 21:57                   ` Paul Mackerras
2010-03-22 21:07                 ` Benjamin Herrenschmidt
2010-03-22 21:07                   ` Benjamin Herrenschmidt
2010-03-22 21:01               ` Benjamin Herrenschmidt
2010-03-22  5:12       ` Benjamin Herrenschmidt
2010-03-22  6:09         ` Yinghai Lu
2010-03-22  7:05           ` Eric W. Biederman
2010-03-21  7:13 ` [PATCH 07/20] irq: move some interrupt arch_* functions into struct irq_chip Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 08/20] x86: fix out of order of gsi - full Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 09/20] x86: set nr_irqs_gsi only in probe_nr_irqs_gsi Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 10/20] x86: kill smpboot_hooks.h Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 11/20] x86: use vector_desc instead of vector_irq Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 12/20] genericirq: change ack/mask in irq_chip to take irq_desc instead of irq -- x86 and core Yinghai Lu
2010-03-21  7:13 ` [PATCH 13/20] genericirq: change ack/mask in irq_chip to take irq_desc instead of irq -- other arch Yinghai Lu
2010-03-21  7:13 ` [PATCH 14/20] genericirq: add set_irq_desc_chip/data Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 15/20] x86/iommu/dmar: update iommu/inter_remapping to use desc Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 16/20] x86: use num_processors for possible cpus Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 17/20] x86: make 32bit apic flat to physflat switch like 64bit Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 18/20] x86: remove arch_probe_nr_irqs Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 19/20] x86/pci: ioh new version read all at same time Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-22 16:16   ` Jesse Barnes
2010-03-22 16:16     ` Jesse Barnes
2010-03-22 19:32     ` Yinghai Lu
2010-03-22 19:32       ` Yinghai Lu
2010-03-21  7:13 ` [PATCH 20/20] x86/pci: add mmconf range into e820 for when it is from MSR with amd faml0h Yinghai Lu
2010-03-21  7:13   ` Yinghai Lu
2010-03-22  2:35 ` [PATCH 00/20] x86: early_res and irq_desc Benjamin Herrenschmidt
2010-03-22  3:26   ` Yinghai Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1269155601-18247-2-git-send-email-yinghai@kernel.org \
    --to=yinghai@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=davem@davemloft.net \
    --cc=ebiederm@xmission.com \
    --cc=hpa@zytor.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).