linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86: early check if one system is numaq v2
@ 2008-06-03 17:25 Yinghai Lu
  2008-06-04  2:32 ` [PATCH] x86: numa32 make sure get kva space Yinghai Lu
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Yinghai Lu @ 2008-06-03 17:25 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org


so could fallback one node numa
v2: using found_numaq with MP_processor_info and MP_bus_info

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/mpparse.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/mpparse.c
+++ linux-2.6/arch/x86/kernel/mpparse.c
@@ -70,7 +70,10 @@ static void __cpuinit MP_processor_info(
 		return;
 	}
 #ifdef CONFIG_X86_NUMAQ
-	apicid = mpc_apic_id(m, translation_table[mpc_record]);
+	if (found_numaq)
+		apicid = mpc_apic_id(m, translation_table[mpc_record]);
+	else
+		apicid = m->mpc_apicid;
 #else
 	apicid = m->mpc_apicid;
 #endif
@@ -91,7 +94,8 @@ static void __init MP_bus_info(struct mp
 	str[6] = 0;
 
 #ifdef CONFIG_X86_NUMAQ
-	mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+	if (found_numaq)
+		mpc_oem_bus_info(m, str, translation_table[mpc_record]);
 #else
 	printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
 #endif
@@ -112,7 +116,8 @@ static void __init MP_bus_info(struct mp
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
 #ifdef CONFIG_X86_NUMAQ
-		mpc_oem_pci_bus(m, translation_table[mpc_record]);
+		if (found_numaq)
+			mpc_oem_pci_bus(m, translation_table[mpc_record]);
 #endif
 		clear_bit(m->mpc_busid, mp_bus_not_pci);
 #if defined(CONFIG_EISA) || defined (CONFIG_MCA)
@@ -321,6 +326,9 @@ static inline void mps_oem_check(struct 
 {
 	if (strncmp(oem, "IBM NUMA", 8))
 		printk("Warning!  May not be a NUMA-Q system!\n");
+	else
+		found_numaq = 1;
+
 	if (mpc->mpc_oemptr)
 		smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
 				 mpc->mpc_oemsize);
Index: linux-2.6/arch/x86/kernel/numaq_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/numaq_32.c
+++ linux-2.6/arch/x86/kernel/numaq_32.c
@@ -31,9 +31,12 @@
 #include <asm/numaq.h>
 #include <asm/topology.h>
 #include <asm/processor.h>
+#include <asm/mpspec.h>
 
 #define	MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
 
+int found_numaq;
+
 /*
  * Function: smp_dump_qct()
  *
@@ -67,13 +70,24 @@ static void __init smp_dump_qct(void)
 	}
 }
 
-/*
- * Unlike Summit, we don't really care to let the NUMA-Q
- * fall back to flat mode.  Don't compile for NUMA-Q
- * unless you really need it!
- */
+static __init void early_check_numaq(void)
+{
+	/*
+	 * Find possible boot-time SMP configuration:
+	 */
+	early_find_smp_config();
+	/*
+	 * get boot-time SMP configuration:
+	 */
+	if (smp_found_config)
+		early_get_smp_config();
+}
+
 int __init get_memcfg_numaq(void)
 {
+	early_check_numaq();
+	if (!found_numaq)
+		return 0;
 	smp_dump_qct();
 	return 1;
 }
Index: linux-2.6/include/asm-x86/numaq.h
===================================================================
--- linux-2.6.orig/include/asm-x86/numaq.h
+++ linux-2.6/include/asm-x86/numaq.h
@@ -28,6 +28,7 @@
 
 #ifdef CONFIG_X86_NUMAQ
 
+extern int found_numaq;
 extern int get_memcfg_numaq(void);
 
 /*
Index: linux-2.6/include/asm-x86/mpspec.h
===================================================================
--- linux-2.6.orig/include/asm-x86/mpspec.h
+++ linux-2.6/include/asm-x86/mpspec.h
@@ -21,11 +21,11 @@ extern int pic_mode;
 /* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
 #define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
 
+#endif
+
 extern void early_find_smp_config(void);
 extern void early_get_smp_config(void);
 
-#endif
-
 #if defined(CONFIG_MCA) || defined(CONFIG_EISA)
 extern int mp_bus_id_to_type[MAX_MP_BUSSES];
 #endif

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] x86: numa32 make sure get kva space
  2008-06-03 17:25 [PATCH] x86: early check if one system is numaq v2 Yinghai Lu
@ 2008-06-04  2:32 ` Yinghai Lu
  2008-06-04 10:26   ` Ingo Molnar
  2008-06-04  2:34 ` [PATCH] x86: move e820_register_active to e820.c Yinghai Lu
  2008-06-04 10:25 ` [PATCH] x86: early check if one system is numaq v2 Ingo Molnar
  2 siblings, 1 reply; 19+ messages in thread
From: Yinghai Lu @ 2008-06-04  2:32 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org


when 1/3 user/kernel is used, and less memory is installed, or have big hole
below 4g, max_low_pfn is still using 3g-128m

try to go down from max_low_pfn until get it. otherwise will panic.

need to make 32bit code to use register_e820_active_regions...later

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index e119c53..e8d9a0a 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -306,6 +306,7 @@ unsigned long __init setup_memory(void)
 {
 	int nid;
 	unsigned long system_start_pfn, system_max_low_pfn;
+	long kva_target_pfn;
 
 	/*
 	 * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -322,11 +323,17 @@ unsigned long __init setup_memory(void)
 	system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
 
 	system_max_low_pfn = max_low_pfn = find_max_low_pfn();
-	kva_start_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
-	kva_start_pfn = find_e820_area(kva_start_pfn<<PAGE_SHIFT,
-				max_low_pfn<<PAGE_SHIFT,
-				kva_pages<<PAGE_SHIFT,
-				PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
+	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
+	do {
+		kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT,
+					max_low_pfn<<PAGE_SHIFT,
+					kva_pages<<PAGE_SHIFT,
+					PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
+		kva_target_pfn -= PTRS_PER_PTE;
+	} while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn);
+
+	if (kva_start_pfn == -1UL)
+		panic("Can not get kva space\n");
 
 	printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
 		kva_start_pfn, max_low_pfn);

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH] x86: move e820_register_active to e820.c
  2008-06-03 17:25 [PATCH] x86: early check if one system is numaq v2 Yinghai Lu
  2008-06-04  2:32 ` [PATCH] x86: numa32 make sure get kva space Yinghai Lu
@ 2008-06-04  2:34 ` Yinghai Lu
  2008-06-04  2:35   ` [PATCH] x86: 32 bit use e820_register_active_regions Yinghai Lu
  2008-06-04 10:26   ` [PATCH] x86: move e820_register_active to e820.c Ingo Molnar
  2008-06-04 10:25 ` [PATCH] x86: early check if one system is numaq v2 Ingo Molnar
  2 siblings, 2 replies; 19+ messages in thread
From: Yinghai Lu @ 2008-06-04  2:34 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org


to prepare 32 bit to use it

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -764,3 +764,112 @@ u64 __init early_reserve_e820(u64 startt
 	return addr;
 }
 
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_X86_PAE
+#  define MAX_ARCH_PFN		(1ULL<<(36-PAGE_SHIFT))
+# else
+#  define MAX_ARCH_PFN		(1ULL<<(32-PAGE_SHIFT))
+# endif
+#else /* CONFIG_X86_32 */
+# define MAX_ARCH_PFN MAXMEM<<PAGE_SHIFT
+#endif
+
+/*
+ * Last pfn which the user wants to use.
+ */
+unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
+
+/*
+ * Find the highest page frame number we have available
+ */
+unsigned long __init e820_end_of_ram(void)
+{
+	unsigned long last_pfn;
+	unsigned long max_arch_pfn = MAX_ARCH_PFN;
+
+	last_pfn = find_max_pfn_with_active_regions();
+
+	if (last_pfn > max_arch_pfn)
+		last_pfn = max_arch_pfn;
+	if (last_pfn > end_user_pfn)
+		last_pfn = end_user_pfn;
+
+	printk(KERN_INFO "last_pfn = %lu max_arch_pfn = %lu\n",
+			 last_pfn, max_arch_pfn);
+	return last_pfn;
+}
+
+/*
+ * Finds an active region in the address range from start_pfn to last_pfn and
+ * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
+ */
+int __init e820_find_active_region(const struct e820entry *ei,
+				  unsigned long start_pfn,
+				  unsigned long last_pfn,
+				  unsigned long *ei_startpfn,
+				  unsigned long *ei_endpfn)
+{
+	u64 align = PAGE_SIZE;
+
+	*ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
+	*ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
+
+	/* Skip map entries smaller than a page */
+	if (*ei_startpfn >= *ei_endpfn)
+		return 0;
+
+	/* Skip if map is outside the node */
+	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
+				    *ei_startpfn >= last_pfn)
+		return 0;
+
+	/* Check for overlaps */
+	if (*ei_startpfn < start_pfn)
+		*ei_startpfn = start_pfn;
+	if (*ei_endpfn > last_pfn)
+		*ei_endpfn = last_pfn;
+
+	/* Obey end_user_pfn to save on memmap */
+	if (*ei_startpfn >= end_user_pfn)
+		return 0;
+	if (*ei_endpfn > end_user_pfn)
+		*ei_endpfn = end_user_pfn;
+
+	return 1;
+}
+
+/* Walk the e820 map and register active regions within a node */
+void __init e820_register_active_regions(int nid, unsigned long start_pfn,
+					 unsigned long last_pfn)
+{
+	unsigned long ei_startpfn;
+	unsigned long ei_endpfn;
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++)
+		if (e820_find_active_region(&e820.map[i],
+					    start_pfn, last_pfn,
+					    &ei_startpfn, &ei_endpfn))
+			add_active_range(nid, ei_startpfn, ei_endpfn);
+}
+
+/*
+ * Find the hole size (in bytes) in the memory range.
+ * @start: starting address of the memory range to scan
+ * @end: ending address of the memory range to scan
+ */
+u64 __init e820_hole_size(u64 start, u64 end)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long last_pfn = end >> PAGE_SHIFT;
+	unsigned long ei_startpfn, ei_endpfn, ram = 0;
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		if (e820_find_active_region(&e820.map[i],
+					    start_pfn, last_pfn,
+					    &ei_startpfn, &ei_endpfn))
+			ram += ei_endpfn - ei_startpfn;
+	}
+	return end - start - ((u64)ram << PAGE_SHIFT);
+}
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -42,29 +42,6 @@ unsigned long end_pfn;
 unsigned long max_pfn_mapped;
 
 /*
- * Last pfn which the user wants to use.
- */
-static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
-
-/*
- * Find the highest page frame number we have available
- */
-unsigned long __init e820_end_of_ram(void)
-{
-	unsigned long last_pfn;
-
-	last_pfn = find_max_pfn_with_active_regions();
-
-	if (last_pfn > MAXMEM>>PAGE_SHIFT)
-		last_pfn = MAXMEM>>PAGE_SHIFT;
-	if (last_pfn > end_user_pfn)
-		last_pfn = end_user_pfn;
-
-	printk(KERN_INFO "last_pfn = %lu\n", last_pfn);
-	return last_pfn;
-}
-
-/*
  * Mark e820 reserved areas as busy for the resource manager.
  */
 void __init e820_reserve_resources(void)
@@ -88,80 +65,6 @@ void __init e820_reserve_resources(void)
 	}
 }
 
-/*
- * Finds an active region in the address range from start_pfn to last_pfn and
- * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
- */
-static int __init e820_find_active_region(const struct e820entry *ei,
-					  unsigned long start_pfn,
-					  unsigned long last_pfn,
-					  unsigned long *ei_startpfn,
-					  unsigned long *ei_endpfn)
-{
-	*ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
-	*ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
-
-	/* Skip map entries smaller than a page */
-	if (*ei_startpfn >= *ei_endpfn)
-		return 0;
-
-	/* Skip if map is outside the node */
-	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
-				    *ei_startpfn >= last_pfn)
-		return 0;
-
-	/* Check for overlaps */
-	if (*ei_startpfn < start_pfn)
-		*ei_startpfn = start_pfn;
-	if (*ei_endpfn > last_pfn)
-		*ei_endpfn = last_pfn;
-
-	/* Obey end_user_pfn to save on memmap */
-	if (*ei_startpfn >= end_user_pfn)
-		return 0;
-	if (*ei_endpfn > end_user_pfn)
-		*ei_endpfn = end_user_pfn;
-
-	return 1;
-}
-
-/* Walk the e820 map and register active regions within a node */
-void __init
-e820_register_active_regions(int nid, unsigned long start_pfn,
-							unsigned long last_pfn)
-{
-	unsigned long ei_startpfn;
-	unsigned long ei_endpfn;
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++)
-		if (e820_find_active_region(&e820.map[i],
-					    start_pfn, last_pfn,
-					    &ei_startpfn, &ei_endpfn))
-			add_active_range(nid, ei_startpfn, ei_endpfn);
-}
-
-/*
- * Find the hole size (in bytes) in the memory range.
- * @start: starting address of the memory range to scan
- * @end: ending address of the memory range to scan
- */
-unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
-{
-	unsigned long start_pfn = start >> PAGE_SHIFT;
-	unsigned long last_pfn = end >> PAGE_SHIFT;
-	unsigned long ei_startpfn, ei_endpfn, ram = 0;
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		if (e820_find_active_region(&e820.map[i],
-					    start_pfn, last_pfn,
-					    &ei_startpfn, &ei_endpfn))
-			ram += ei_endpfn - ei_startpfn;
-	}
-	return end - start - (ram << PAGE_SHIFT);
-}
-
 static void early_panic(char *msg)
 {
 	early_printk(msg);
Index: linux-2.6/include/asm-x86/e820.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820.h
+++ linux-2.6/include/asm-x86/e820.h
@@ -79,6 +79,8 @@ static inline void e820_mark_nosave_regi
 }
 #endif
 
+extern unsigned long end_user_pfn;
+
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
 extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
 extern void reserve_early(u64 start, u64 end, char *name);
@@ -86,6 +88,15 @@ extern void free_early(u64 start, u64 en
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
+extern unsigned long e820_end_of_ram(void);
+extern int e820_find_active_region(const struct e820entry *ei,
+				  unsigned long start_pfn,
+				  unsigned long last_pfn,
+				  unsigned long *ei_startpfn,
+				  unsigned long *ei_endpfn);
+extern void e820_register_active_regions(int nid, unsigned long start_pfn,
+					 unsigned long end_pfn);
+extern u64 e820_hole_size(u64 start, u64 end);
 #endif /* __ASSEMBLY__ */
 
 #define ISA_START_ADDRESS	0xa0000
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -16,16 +16,11 @@
 #ifndef __ASSEMBLY__
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void);
-extern unsigned long e820_end_of_ram(void);
 extern void e820_reserve_resources(void);
 extern int e820_any_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_any_valid(unsigned long start, unsigned long end);
 extern int e820_all_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_all_valid(unsigned long start, unsigned long end);
-extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
-
-extern void e820_register_active_regions(int nid, unsigned long start_pfn,
-					 unsigned long end_pfn);
 
 extern void finish_e820_parsing(void);
 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] x86: 32 bit use e820_register_active_regions
  2008-06-04  2:34 ` [PATCH] x86: move e820_register_active to e820.c Yinghai Lu
@ 2008-06-04  2:35   ` Yinghai Lu
  2008-06-04  7:39     ` [PATCH] x86: e820 merge parse mem/memmap Yinghai Lu
                       ` (4 more replies)
  2008-06-04 10:26   ` [PATCH] x86: move e820_register_active to e820.c Ingo Molnar
  1 sibling, 5 replies; 19+ messages in thread
From: Yinghai Lu @ 2008-06-04  2:35 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org


so 32bit is more like 64bit, and more smart with combining e820 and numa.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -406,11 +406,12 @@ static void __init zone_sizes_init(void)
 	max_zone_pfns[ZONE_DMA] =
 		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+	remove_all_active_ranges();
 #ifdef CONFIG_HIGHMEM
 	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
-	add_active_range(0, 0, highend_pfn);
+	e820_register_active_regions(0, 0, highend_pfn);
 #else
-	add_active_range(0, 0, max_low_pfn);
+	e820_register_active_regions(0, 0, max_low_pfn);
 #endif
 
 	free_area_init_nodes(max_zone_pfns);
@@ -583,6 +584,7 @@ static void __init relocate_initrd(void)
 
 void __init setup_bootmem_allocator(void)
 {
+	int i;
 	unsigned long bootmap_size, bootmap;
 	/*
 	 * Initialize the boot-time allocator (with low memory only):
@@ -604,7 +606,8 @@ void __init setup_bootmem_allocator(void
 		 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
 	printk(KERN_INFO "  bootmap %08lx - %08lx\n",
 		 bootmap, bootmap + bootmap_size);
-	register_bootmem_low_pages(max_low_pfn);
+	for_each_online_node(i)
+		free_bootmem_with_active_regions(i, max_low_pfn);
 	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -745,11 +748,20 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled)
 		efi_init();
 
+	e820_register_active_regions(0, 0, -1UL);
+	/*
+	 * partially used pages are not usable - thus
+	 * we are rounding upwards:
+	 */
+	max_pfn = e820_end_of_ram();
+
 	/* update e820 for memory not covered by WB MTRRs */
-	find_max_pfn();
 	mtrr_bp_init();
-	if (mtrr_trim_uncached_memory(max_pfn))
-		find_max_pfn();
+	if (mtrr_trim_uncached_memory(max_pfn)) {
+		remove_all_active_ranges();
+		e820_register_active_regions(0, 0, -1UL);
+		max_pfn = e820_end_of_ram();
+	}
 
 	max_low_pfn = setup_memory();
 
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -120,10 +120,9 @@ int __init get_memcfg_numa_flat(void)
 {
 	printk("NUMA - single node, flat memory mode\n");
 
-	/* Run the memory configuration and find the top of memory. */
-	find_max_pfn();
 	node_start_pfn[0] = 0;
 	node_end_pfn[0] = max_pfn;
+	e820_register_active_regions(0, 0, max_pfn);
 	memory_present(0, 0, max_pfn);
 	node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
 
@@ -315,6 +314,11 @@ unsigned long __init setup_memory(void)
 	 * this space and use it to adjust the boundary between ZONE_NORMAL
 	 * and ZONE_HIGHMEM.
 	 */
+
+	/* call find_max_low_pfn at first, it could update max_pfn */
+	system_max_low_pfn = max_low_pfn = find_max_low_pfn();
+
+	remove_all_active_ranges();
 	get_memcfg_numa();
 
 	kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
@@ -322,7 +326,6 @@ unsigned long __init setup_memory(void)
 	/* partially used pages are not usable - thus round upwards */
 	system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
 
-	system_max_low_pfn = max_low_pfn = find_max_low_pfn();
 	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
 	do {
 		kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT,
@@ -380,7 +383,6 @@ unsigned long __init setup_memory(void)
 
 void __init zone_sizes_init(void)
 {
-	int nid;
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] =
@@ -390,15 +392,6 @@ void __init zone_sizes_init(void)
 	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
 #endif
 
-	/* If SRAT has not registered memory, register it now */
-	if (find_max_pfn_with_active_regions() == 0) {
-		for_each_online_node(nid) {
-			if (node_has_online_mem(nid))
-				add_active_range(nid, node_start_pfn[nid],
-							node_end_pfn[nid]);
-		}
-	}
-
 	free_area_init_nodes(max_zone_pfns);
 	return;
 }
Index: linux-2.6/arch/x86/kernel/numaq_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/numaq_32.c
+++ linux-2.6/arch/x86/kernel/numaq_32.c
@@ -32,6 +32,7 @@
 #include <asm/topology.h>
 #include <asm/processor.h>
 #include <asm/mpspec.h>
+#include <asm/e820.h>
 
 #define	MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
 
@@ -61,6 +62,8 @@ static void __init smp_dump_qct(void)
 			node_end_pfn[node] = MB_TO_PAGES(
 				eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
 
+			e820_register_active_regions(node, node_start_pfn[node],
+							node_end_pfn[node]);
 			memory_present(node,
 				node_start_pfn[node], node_end_pfn[node]);
 			node_remap_size[node] = node_memmap_size_bytes(node,
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -207,69 +207,6 @@ void __init init_iomem_resources(struct 
 	}
 }
 
-/*
- * Find the highest page frame number we have available
- */
-void __init find_max_pfn(void)
-{
-	int i;
-
-	max_pfn = 0;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		unsigned long start, end;
-		/* RAM? */
-		if (e820.map[i].type != E820_RAM)
-			continue;
-		start = PFN_UP(e820.map[i].addr);
-		end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-		if (start >= end)
-			continue;
-		if (end > max_pfn)
-			max_pfn = end;
-	}
-}
-
-/*
- * Register fully available low RAM pages with the bootmem allocator.
- */
-void __init register_bootmem_low_pages(unsigned long max_low_pfn)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		unsigned long curr_pfn, last_pfn, size;
-		/*
-		 * Reserve usable low memory
-		 */
-		if (e820.map[i].type != E820_RAM)
-			continue;
-		/*
-		 * We are rounding up the start address of usable memory:
-		 */
-		curr_pfn = PFN_UP(e820.map[i].addr);
-		if (curr_pfn >= max_low_pfn)
-			continue;
-		/*
-		 * ... and at the end of the usable range downwards:
-		 */
-		last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-
-		if (last_pfn > max_low_pfn)
-			last_pfn = max_low_pfn;
-
-		/*
-		 * .. finally, did all the rounding and playing
-		 * around just make the area go away?
-		 */
-		if (last_pfn <= curr_pfn)
-			continue;
-
-		size = last_pfn - curr_pfn;
-		free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
-	}
-}
-
 void __init limit_regions(unsigned long long size)
 {
 	unsigned long long current_addr;
@@ -360,8 +297,9 @@ static int __init parse_memmap(char *arg
 		 * size before original memory map is
 		 * reset.
 		 */
-		find_max_pfn();
-		saved_max_pfn = max_pfn;
+		e820_register_active_regions(0, 0, -1UL);
+		saved_max_pfn = e820_end_of_ram();
+		remove_all_active_ranges();
 #endif
 		e820.nr_map = 0;
 		user_defined_memmap = 1;
Index: linux-2.6/arch/x86/kernel/srat_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/srat_32.c
+++ linux-2.6/arch/x86/kernel/srat_32.c
@@ -31,6 +31,7 @@
 #include <asm/srat.h>
 #include <asm/topology.h>
 #include <asm/smp.h>
+#include <asm/e820.h>
 
 /*
  * proximity macros and definitions
@@ -244,7 +245,8 @@ static int __init acpi20_parse_srat(stru
 		printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
 		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
 		node_read_chunk(chunk->nid, chunk);
-		add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
+		e820_register_active_regions(chunk->nid, chunk->start_pfn,
+					     min(chunk->end_pfn, max_pfn));
 	}
  
 	for_each_online_node(nid) {
Index: linux-2.6/include/asm-x86/e820_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_32.h
+++ linux-2.6/include/asm-x86/e820_32.h
@@ -21,8 +21,6 @@
 extern void setup_memory_map(void);
 extern void finish_e820_parsing(void);
 
-extern void find_max_pfn(void);
-extern void register_bootmem_low_pages(unsigned long max_low_pfn);
 extern void limit_regions(unsigned long long size);
 extern void init_iomem_resources(struct resource *code_resource,
 				 struct resource *data_resource,

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] x86: e820 merge parse mem/memmap
  2008-06-04  2:35   ` [PATCH] x86: 32 bit use e820_register_active_regions Yinghai Lu
@ 2008-06-04  7:39     ` Yinghai Lu
  2008-06-04 10:27     ` [PATCH] x86: 32 bit use e820_register_active_regions Ingo Molnar
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 19+ messages in thread
From: Yinghai Lu @ 2008-06-04  7:39 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org


after we get 32 bit support e820_register_active_regions

we could merge parse mem/memmap

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -873,3 +873,89 @@ u64 __init e820_hole_size(u64 start, u64
 	}
 	return end - start - ((u64)ram << PAGE_SHIFT);
 }
+
+static void early_panic(char *msg)
+{
+	early_printk(msg);
+	panic(msg);
+}
+
+/* "mem=nopentium" disables the 4MB page tables. */
+static int __init parse_memopt(char *p)
+{
+	u64 mem_size;
+
+	if (!p)
+		return -EINVAL;
+
+#ifdef CONFIG_X86_32
+	if (!strcmp(p, "nopentium")) {
+		setup_clear_cpu_cap(X86_FEATURE_PSE);
+		return 0;
+	}
+#endif
+
+	mem_size = memparse(p, &p);
+	end_user_pfn = mem_size>>PAGE_SHIFT;
+	return 0;
+}
+early_param("mem", parse_memopt);
+
+static int userdef __initdata;
+
+static int __init parse_memmap_opt(char *p)
+{
+	char *oldp;
+	u64 start_at, mem_size;
+
+	if (!strcmp(p, "exactmap")) {
+#ifdef CONFIG_CRASH_DUMP
+		/*
+		 * If we are doing a crash dump, we still need to know
+		 * the real mem size before original memory map is
+		 * reset.
+		 */
+		e820_register_active_regions(0, 0, -1UL);
+		saved_max_pfn = e820_end_of_ram();
+		remove_all_active_ranges();
+#endif
+		e820.nr_map = 0;
+		userdef = 1;
+		return 0;
+	}
+
+	oldp = p;
+	mem_size = memparse(p, &p);
+	if (p == oldp)
+		return -EINVAL;
+
+	userdef = 1;
+	if (*p == '@') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, E820_RAM);
+	} else if (*p == '#') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, E820_ACPI);
+	} else if (*p == '$') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, E820_RESERVED);
+	} else {
+		end_user_pfn = (mem_size >> PAGE_SHIFT);
+	}
+	return *p == '\0' ? 0 : -EINVAL;
+}
+early_param("memmap", parse_memmap_opt);
+
+void __init finish_e820_parsing(void)
+{
+	if (userdef) {
+		int nr = e820.nr_map;
+
+		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
+			early_panic("Invalid user supplied memory map");
+		e820.nr_map = nr;
+
+		printk(KERN_INFO "user-defined physical RAM map:\n");
+		e820_print_map("user");
+	}
+}
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -207,36 +207,6 @@ void __init init_iomem_resources(struct 
 	}
 }
 
-void __init limit_regions(unsigned long long size)
-{
-	unsigned long long current_addr;
-	int i;
-
-	e820_print_map("limit_regions start");
-	for (i = 0; i < e820.nr_map; i++) {
-		current_addr = e820.map[i].addr + e820.map[i].size;
-		if (current_addr < size)
-			continue;
-
-		if (e820.map[i].type != E820_RAM)
-			continue;
-
-		if (e820.map[i].addr >= size) {
-			/*
-			 * This region starts past the end of the
-			 * requested size, skip it completely.
-			 */
-			e820.nr_map = i;
-		} else {
-			e820.nr_map = i + 1;
-			e820.map[i].size -= current_addr - size;
-		}
-		e820_print_map("limit_regions endfor");
-		return;
-	}
-	e820_print_map("limit_regions endfunc");
-}
-
 /* Overridden in paravirt.c if CONFIG_PARAVIRT */
 char * __init __attribute__((weak)) memory_setup(void)
 {
@@ -249,93 +219,3 @@ void __init setup_memory_map(void)
 	e820_print_map(memory_setup());
 }
 
-static int __initdata user_defined_memmap;
-
-/*
- * "mem=nopentium" disables the 4MB page tables.
- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
- * to <mem>, overriding the bios size.
- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
- * <start> to <start>+<mem>, overriding the bios size.
- *
- * HPA tells me bootloaders need to parse mem=, so no new
- * option should be mem=  [also see Documentation/i386/boot.txt]
- */
-static int __init parse_mem(char *arg)
-{
-	if (!arg)
-		return -EINVAL;
-
-	if (strcmp(arg, "nopentium") == 0) {
-		setup_clear_cpu_cap(X86_FEATURE_PSE);
-	} else {
-		/* If the user specifies memory size, we
-		 * limit the BIOS-provided memory map to
-		 * that size. exactmap can be used to specify
-		 * the exact map. mem=number can be used to
-		 * trim the existing memory map.
-		 */
-		unsigned long long mem_size;
-
-		mem_size = memparse(arg, &arg);
-		limit_regions(mem_size);
-		user_defined_memmap = 1;
-	}
-	return 0;
-}
-early_param("mem", parse_mem);
-
-static int __init parse_memmap(char *arg)
-{
-	if (!arg)
-		return -EINVAL;
-
-	if (strcmp(arg, "exactmap") == 0) {
-#ifdef CONFIG_CRASH_DUMP
-		/* If we are doing a crash dump, we
-		 * still need to know the real mem
-		 * size before original memory map is
-		 * reset.
-		 */
-		e820_register_active_regions(0, 0, -1UL);
-		saved_max_pfn = e820_end_of_ram();
-		remove_all_active_ranges();
-#endif
-		e820.nr_map = 0;
-		user_defined_memmap = 1;
-	} else {
-		/* If the user specifies memory size, we
-		 * limit the BIOS-provided memory map to
-		 * that size. exactmap can be used to specify
-		 * the exact map. mem=number can be used to
-		 * trim the existing memory map.
-		 */
-		unsigned long long start_at, mem_size;
-
-		mem_size = memparse(arg, &arg);
-		if (*arg == '@') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_RAM);
-		} else if (*arg == '#') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_ACPI);
-		} else if (*arg == '$') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_RESERVED);
-		} else {
-			limit_regions(mem_size);
-			user_defined_memmap = 1;
-		}
-	}
-	return 0;
-}
-early_param("memmap", parse_memmap);
-
-void __init finish_e820_parsing(void)
-{
-	if (user_defined_memmap) {
-		printk(KERN_INFO "user-defined physical RAM map:\n");
-		e820_print_map("user");
-	}
-}
-
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -96,75 +96,6 @@ char *__init machine_specific_memory_set
 	return who;
 }
 
-static int __init parse_memopt(char *p)
-{
-	if (!p)
-		return -EINVAL;
-	end_user_pfn = memparse(p, &p);
-	end_user_pfn >>= PAGE_SHIFT;
-	return 0;
-}
-early_param("mem", parse_memopt);
-
-static int userdef __initdata;
-
-static int __init parse_memmap_opt(char *p)
-{
-	char *oldp;
-	unsigned long long start_at, mem_size;
-
-	if (!strcmp(p, "exactmap")) {
-#ifdef CONFIG_CRASH_DUMP
-		/*
-		 * If we are doing a crash dump, we still need to know
-		 * the real mem size before original memory map is
-		 * reset.
-		 */
-		e820_register_active_regions(0, 0, -1UL);
-		saved_max_pfn = e820_end_of_ram();
-		remove_all_active_ranges();
-#endif
-		e820.nr_map = 0;
-		userdef = 1;
-		return 0;
-	}
-
-	oldp = p;
-	mem_size = memparse(p, &p);
-	if (p == oldp)
-		return -EINVAL;
-
-	userdef = 1;
-	if (*p == '@') {
-		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_RAM);
-	} else if (*p == '#') {
-		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_ACPI);
-	} else if (*p == '$') {
-		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_RESERVED);
-	} else {
-		end_user_pfn = (mem_size >> PAGE_SHIFT);
-	}
-	return *p == '\0' ? 0 : -EINVAL;
-}
-early_param("memmap", parse_memmap_opt);
-
-void __init finish_e820_parsing(void)
-{
-	if (userdef) {
-		int nr = e820.nr_map;
-
-		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
-			early_panic("Invalid user supplied memory map");
-		e820.nr_map = nr;
-
-		printk(KERN_INFO "user-defined physical RAM map:\n");
-		e820_print_map("user");
-	}
-}
-
 int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
 {
 	int i;
Index: linux-2.6/include/asm-x86/e820.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820.h
+++ linux-2.6/include/asm-x86/e820.h
@@ -97,6 +97,8 @@ extern int e820_find_active_region(const
 extern void e820_register_active_regions(int nid, unsigned long start_pfn,
 					 unsigned long end_pfn);
 extern u64 e820_hole_size(u64 start, u64 end);
+extern void finish_e820_parsing(void);
+
 #endif /* __ASSEMBLY__ */
 
 #define ISA_START_ADDRESS	0xa0000
Index: linux-2.6/include/asm-x86/e820_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_32.h
+++ linux-2.6/include/asm-x86/e820_32.h
@@ -19,9 +19,7 @@
 #ifndef __ASSEMBLY__
 
 extern void setup_memory_map(void);
-extern void finish_e820_parsing(void);
 
-extern void limit_regions(unsigned long long size);
 extern void init_iomem_resources(struct resource *code_resource,
 				 struct resource *data_resource,
 				 struct resource *bss_resource);
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -22,8 +22,6 @@ extern int is_memory_any_valid(unsigned 
 extern int e820_all_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_all_valid(unsigned long start, unsigned long end);
 
-extern void finish_e820_parsing(void);
-
 #endif/*!__ASSEMBLY__*/
 
 #endif/*__E820_HEADER*/


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] x86: early check if one system is numaq v2
  2008-06-03 17:25 [PATCH] x86: early check if one system is numaq v2 Yinghai Lu
  2008-06-04  2:32 ` [PATCH] x86: numa32 make sure get kva space Yinghai Lu
  2008-06-04  2:34 ` [PATCH] x86: move e820_register_active to e820.c Yinghai Lu
@ 2008-06-04 10:25 ` Ingo Molnar
  2 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2008-06-04 10:25 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> so could fallback one node numa
> v2: using found_numaq with MP_processor_info and MP_bus_info
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

applied, thanks.

	Ingo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] x86: numa32 make sure get kva space
  2008-06-04  2:32 ` [PATCH] x86: numa32 make sure get kva space Yinghai Lu
@ 2008-06-04 10:26   ` Ingo Molnar
  0 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2008-06-04 10:26 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> when 1/3 user/kernel is used, and less memory is installed, or have big hole
> below 4g, max_low_pfn is still using 3g-128m
> 
> try to go down from max_low_pfn until get it. otherwise will panic.
> 
> need to make 32bit code to use register_e820_active_regions...later
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

applied, thanks.

	Ingo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] x86: move e820_register_active to e820.c
  2008-06-04  2:34 ` [PATCH] x86: move e820_register_active to e820.c Yinghai Lu
  2008-06-04  2:35   ` [PATCH] x86: 32 bit use e820_register_active_regions Yinghai Lu
@ 2008-06-04 10:26   ` Ingo Molnar
  1 sibling, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2008-06-04 10:26 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> to prepare 32 bit to use it
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

applied, thanks.

	Ingo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] x86: 32 bit use e820_register_active_regions
  2008-06-04  2:35   ` [PATCH] x86: 32 bit use e820_register_active_regions Yinghai Lu
  2008-06-04  7:39     ` [PATCH] x86: e820 merge parse mem/memmap Yinghai Lu
@ 2008-06-04 10:27     ` Ingo Molnar
  2008-06-04 20:21     ` [PATCH] x86: e820 max_arch_pfn typo fix for 64 bit Yinghai Lu
                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2008-06-04 10:27 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> so 32bit is more like 64bit, and more smart with combining e820 and numa.
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

applied, thanks.

	Ingo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] x86: e820 max_arch_pfn typo fix for 64 bit
  2008-06-04  2:35   ` [PATCH] x86: 32 bit use e820_register_active_regions Yinghai Lu
  2008-06-04  7:39     ` [PATCH] x86: e820 merge parse mem/memmap Yinghai Lu
  2008-06-04 10:27     ` [PATCH] x86: 32 bit use e820_register_active_regions Ingo Molnar
@ 2008-06-04 20:21     ` Yinghai Lu
  2008-06-04 22:47       ` H. Peter Anvin
  2008-06-06 21:43     ` [PATCH] x86: shrink pages should check all Yinghai Lu
  2008-06-10 19:55     ` [PATCH] x86: e820 merge parse mem/memmap Yinghai Lu
  4 siblings, 1 reply; 19+ messages in thread
From: Yinghai Lu @ 2008-06-04 20:21 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org


should use right shift

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -771,7 +771,7 @@ u64 __init early_reserve_e820(u64 startt
 #  define MAX_ARCH_PFN		(1ULL<<(32-PAGE_SHIFT))
 # endif
 #else /* CONFIG_X86_32 */
-# define MAX_ARCH_PFN MAXMEM<<PAGE_SHIFT
+# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
 #endif
 
 /*

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] x86: e820 max_arch_pfn typo fix for 64 bit
  2008-06-04 20:21     ` [PATCH] x86: e820 max_arch_pfn typo fix for 64 bit Yinghai Lu
@ 2008-06-04 22:47       ` H. Peter Anvin
  0 siblings, 0 replies; 19+ messages in thread
From: H. Peter Anvin @ 2008-06-04 22:47 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, Andrew Morton,
	linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> should use right shift
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
> 
> Index: linux-2.6/arch/x86/kernel/e820.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/e820.c
> +++ linux-2.6/arch/x86/kernel/e820.c
> @@ -771,7 +771,7 @@ u64 __init early_reserve_e820(u64 startt
>  #  define MAX_ARCH_PFN		(1ULL<<(32-PAGE_SHIFT))
>  # endif
>  #else /* CONFIG_X86_32 */
> -# define MAX_ARCH_PFN MAXMEM<<PAGE_SHIFT
> +# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
>  #endif
>  

Should also use parens (operator inside a macro).

	-hpa

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] x86: shrink pages should check all
  2008-06-04  2:35   ` [PATCH] x86: 32 bit use e820_register_active_regions Yinghai Lu
                       ` (2 preceding siblings ...)
  2008-06-04 20:21     ` [PATCH] x86: e820 max_arch_pfn typo fix for 64 bit Yinghai Lu
@ 2008-06-06 21:43     ` Yinghai Lu
  2008-06-07  1:53       ` [PATCH] x86: numa32 use find_e820_area to find KVA ram on node Yinghai Lu
                         ` (2 more replies)
  2008-06-10 19:55     ` [PATCH] x86: e820 merge parse mem/memmap Yinghai Lu
  4 siblings, 3 replies; 19+ messages in thread
From: Yinghai Lu @ 2008-06-06 21:43 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org


we are uing register_e820_active_regions instead of add_active_range directly.
so end_pfn could be different between the value in early_node_map to node_end_pfn.

need to make shrink_active_range more smart.

shrink_active_range is only used x86 32 bit, or need to move back in some file
in arch/x86?

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -279,7 +279,7 @@ static unsigned long calculate_numa_rema
 
 		node_end_pfn[nid] -= size;
 		node_remap_start_pfn[nid] = node_end_pfn[nid];
-		shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
+		shrink_active_range(nid, node_end_pfn[nid]);
 	}
 	printk("Reserving total of %ld pages for numa KVA remap\n",
 			reserve_pages);
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -997,8 +997,7 @@ extern void free_area_init_node(int nid,
 extern void free_area_init_nodes(unsigned long *max_zone_pfn);
 extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
-extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
-						unsigned long new_end_pfn);
+extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn);
 extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3592,25 +3592,51 @@ void __init add_active_range(unsigned in
 /**
  * shrink_active_range - Shrink an existing registered range of PFNs
  * @nid: The node id the range is on that should be shrunk
- * @old_end_pfn: The old end PFN of the range
  * @new_end_pfn: The new PFN of the range
  *
  * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
- * The map is kept at the end physical page range that has already been
- * registered with add_active_range(). This function allows an arch to shrink
- * an existing registered range.
+ * The map is kept near the end physical page range that has already been
+ * registered. This function allows an arch to shrink an existing registered
+ * range.
  */
-void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
-						unsigned long new_end_pfn)
+void __init shrink_active_range(unsigned int nid, unsigned long new_end_pfn)
 {
-	int i;
+	int i,j;
+	int removed = 0;
 
 	/* Find the old active region end and shrink */
-	for_each_active_range_index_in_nid(i, nid)
-		if (early_node_map[i].end_pfn == old_end_pfn) {
+	for_each_active_range_index_in_nid(i, nid) {
+		if (early_node_map[i].start_pfn >= new_end_pfn) {
+			/* clear it */
+			early_node_map[i].end_pfn = 0;
+			removed = 1;
+			continue;
+		}
+		if (early_node_map[i].end_pfn > new_end_pfn) {
 			early_node_map[i].end_pfn = new_end_pfn;
-			break;
+			continue;
+		}
+	}
+
+	if (!removed)
+		return;
+
+	/* remove the blank ones */
+	for (i = nr_nodemap_entries - 1; i > 0; i--) {
+		if (early_node_map[i].nid != nid)
+			continue;
+		if (early_node_map[i].end_pfn)
+			continue;
+		/* we found it, get rid of it */
+		for (j = i; j < nr_nodemap_entries - 1; j++) {
+			early_node_map[j].nid = early_node_map[j+1].nid;
+			early_node_map[j].start_pfn = early_node_map[j+1].start_pfn;
+			early_node_map[j].end_pfn = early_node_map[j+1].end_pfn;
 		}
+		j = nr_nodemap_entries - 1;
+		memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
+		nr_nodemap_entries--;
+	}
 }
 
 /**

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] x86: numa32 use find_e820_area to find KVA ram on node
  2008-06-06 21:43     ` [PATCH] x86: shrink pages should check all Yinghai Lu
@ 2008-06-07  1:53       ` Yinghai Lu
  2008-06-10  9:53         ` Ingo Molnar
  2008-06-07  1:54       ` [PATCH] x86: fix fail with 64g above system with numa32 Yinghai Lu
  2008-06-09  2:39       ` [PATCH] x86: shrink pages should check all v2 Yinghai Lu
  2 siblings, 1 reply; 19+ messages in thread
From: Yinghai Lu @ 2008-06-07  1:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org


don't assume we can use ram near end of every node.
esp some system has less memory. and they could have
kva address and kva ram all below max_low_pfn

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -225,17 +225,21 @@ static unsigned long calculate_numa_rema
 {
 	int nid;
 	unsigned long size, reserve_pages = 0;
-	unsigned long pfn;
 
 	for_each_online_node(nid) {
-		unsigned old_end_pfn = node_end_pfn[nid];
+		u64 node_end_target;
+		u64 node_end_final;
 
 		/*
 		 * The acpi/srat node info can show hot-add memroy zones
 		 * where memory could be added but not currently present.
 		 */
+		printk("node %d pfn: [%lx - %lx]\n",
+			nid, node_start_pfn[nid], node_end_pfn[nid]);
 		if (node_start_pfn[nid] > max_pfn)
 			continue;
+		if (!node_end_pfn[nid])
+			continue;
 		if (node_end_pfn[nid] > max_pfn)
 			node_end_pfn[nid] = max_pfn;
 
@@ -247,37 +251,40 @@ static unsigned long calculate_numa_rema
 		/* now the roundup is correct, convert to PAGE_SIZE pages */
 		size = size * PTRS_PER_PTE;
 
-		/*
-		 * Validate the region we are allocating only contains valid
-		 * pages.
-		 */
-		for (pfn = node_end_pfn[nid] - size;
-		     pfn < node_end_pfn[nid]; pfn++)
-			if (!page_is_ram(pfn))
-				break;
+		node_end_target = round_down(node_end_pfn[nid] - size,
+						 PTRS_PER_PTE);
+		node_end_target <<= PAGE_SHIFT;
+		do {
+			node_end_final = find_e820_area(node_end_target,
+					((u64)node_end_pfn[nid])<<PAGE_SHIFT,
+						((u64)size)<<PAGE_SHIFT,
+						LARGE_PAGE_BYTES);
+			node_end_target -= LARGE_PAGE_BYTES;
+		} while (node_end_final == -1ULL &&
+			 (node_end_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
 
-		if (pfn != node_end_pfn[nid])
-			size = 0;
+		if (node_end_final == -1ULL)
+			panic("Can not get kva ram\n");
 
 		printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
 				size, nid);
 		node_remap_size[nid] = size;
 		node_remap_offset[nid] = reserve_pages;
 		reserve_pages += size;
-		printk("Shrinking node %d from %ld pages to %ld pages\n",
-			nid, node_end_pfn[nid], node_end_pfn[nid] - size);
+		printk("Shrinking node %d from %ld pages to %lld pages\n",
+			nid, node_end_pfn[nid], node_end_final>>PAGE_SHIFT);
 
-		if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) {
-			/*
-			 * Align node_end_pfn[] and node_remap_start_pfn[] to
-			 * pmd boundary. remap_numa_kva will barf otherwise.
-			 */
-			printk("Shrinking node %d further by %ld pages for proper alignment\n",
-				nid, node_end_pfn[nid] & (PTRS_PER_PTE-1));
-			size +=  node_end_pfn[nid] & (PTRS_PER_PTE-1);
-		}
+		/*
+		 *  prevent kva address below max_low_pfn want it on system
+		 *  with less memory later.
+		 *  layout will be: KVA address , KVA RAM
+		 */
+		if ((node_end_final>>PAGE_SHIFT) < max_low_pfn)
+			reserve_early(node_end_final,
+				      node_end_final+(((u64)size)<<PAGE_SHIFT),
+				      "KVA RAM");
 
-		node_end_pfn[nid] -= size;
+		node_end_pfn[nid] = node_end_final>>PAGE_SHIFT;
 		node_remap_start_pfn[nid] = node_end_pfn[nid];
 		shrink_active_range(nid, node_end_pfn[nid]);
 	}

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] x86: fix fail with 64g above system with numa32
  2008-06-06 21:43     ` [PATCH] x86: shrink pages should check all Yinghai Lu
  2008-06-07  1:53       ` [PATCH] x86: numa32 use find_e820_area to find KVA ram on node Yinghai Lu
@ 2008-06-07  1:54       ` Yinghai Lu
  2008-06-10  9:53         ` Ingo Molnar
  2008-06-09  2:39       ` [PATCH] x86: shrink pages should check all v2 Yinghai Lu
  2 siblings, 1 reply; 19+ messages in thread
From: Yinghai Lu @ 2008-06-07  1:54 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org


Signed-off-by : Yinghai Lu <yhlu.kernel@gmail.com>

--- a/arch/x86/kernel/srat_32.c	2008-06-06 17:37:51.000000000 -0700
+++ b/arch/x86/kernel/srat_32.c	2008-06-06 17:38:38.000000000 -0700
@@ -251,7 +251,7 @@ static int __init acpi20_parse_srat(stru
  
 	for_each_online_node(nid) {
 		unsigned long start = node_start_pfn[nid];
-		unsigned long end = node_end_pfn[nid];
+		unsigned long end = min(node_end_pfn[nid],max_pfn);
 
 		memory_present(nid, start, end);
 		node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] x86: shrink pages should check all v2
  2008-06-06 21:43     ` [PATCH] x86: shrink pages should check all Yinghai Lu
  2008-06-07  1:53       ` [PATCH] x86: numa32 use find_e820_area to find KVA ram on node Yinghai Lu
  2008-06-07  1:54       ` [PATCH] x86: fix fail with 64g above system with numa32 Yinghai Lu
@ 2008-06-09  2:39       ` Yinghai Lu
  2008-06-09 10:15         ` Ingo Molnar
  2 siblings, 1 reply; 19+ messages in thread
From: Yinghai Lu @ 2008-06-09  2:39 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org


we are uing register_e820_active_regions instead of add_active_range directly.
so end_pfn could be different between the value in early_node_map to node_end_pfn.

need to make shrink_active_range more smart.

shrink_active_range is only used x86 32 bit, or need to move back in some file
in arch/x86?

v2: use memcpy instead

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -997,8 +997,7 @@ extern void free_area_init_node(int nid,
 extern void free_area_init_nodes(unsigned long *max_zone_pfn);
 extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
-extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
-						unsigned long new_end_pfn);
+extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn);
 extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3592,25 +3592,49 @@ void __init add_active_range(unsigned in
 /**
  * shrink_active_range - Shrink an existing registered range of PFNs
  * @nid: The node id the range is on that should be shrunk
- * @old_end_pfn: The old end PFN of the range
  * @new_end_pfn: The new PFN of the range
  *
  * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
- * The map is kept at the end physical page range that has already been
- * registered with add_active_range(). This function allows an arch to shrink
- * an existing registered range.
+ * The map is kept near the end physical page range that has already been
+ * registered. This function allows an arch to shrink an existing registered
+ * range.
  */
-void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
-						unsigned long new_end_pfn)
+void __init shrink_active_range(unsigned int nid, unsigned long new_end_pfn)
 {
-	int i;
+	int i, j;
+	int removed = 0;
 
 	/* Find the old active region end and shrink */
-	for_each_active_range_index_in_nid(i, nid)
-		if (early_node_map[i].end_pfn == old_end_pfn) {
+	for_each_active_range_index_in_nid(i, nid) {
+		if (early_node_map[i].start_pfn >= new_end_pfn) {
+			/* clear it */
+			early_node_map[i].end_pfn = 0;
+			removed = 1;
+			continue;
+		}
+		if (early_node_map[i].end_pfn > new_end_pfn) {
 			early_node_map[i].end_pfn = new_end_pfn;
-			break;
+			continue;
 		}
+	}
+
+	if (!removed)
+		return;
+
+	/* remove the blank ones */
+	for (i = nr_nodemap_entries - 1; i > 0; i--) {
+		if (early_node_map[i].nid != nid)
+			continue;
+		if (early_node_map[i].end_pfn)
+			continue;
+		/* we found it, get rid of it */
+		for (j = i; j < nr_nodemap_entries - 1; j++)
+			memcpy(&early_node_map[j], &early_node_map[j+1],
+				sizeof(early_node_map[j]));
+		j = nr_nodemap_entries - 1;
+		memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
+		nr_nodemap_entries--;
+	}
 }
 
 /**
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -279,7 +279,7 @@ static unsigned long calculate_numa_rema
 
 		node_end_pfn[nid] -= size;
 		node_remap_start_pfn[nid] = node_end_pfn[nid];
-		shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
+		shrink_active_range(nid, node_end_pfn[nid]);
 	}
 	printk("Reserving total of %ld pages for numa KVA remap\n",
 			reserve_pages);

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] x86: shrink pages should check all v2
  2008-06-09  2:39       ` [PATCH] x86: shrink pages should check all v2 Yinghai Lu
@ 2008-06-09 10:15         ` Ingo Molnar
  0 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2008-06-09 10:15 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> we are uing register_e820_active_regions instead of add_active_range 
> directly. so end_pfn could be different between the value in 
> early_node_map to node_end_pfn.
> 
> need to make shrink_active_range more smart.
> 
> shrink_active_range is only used x86 32 bit, or need to move back in 
> some file in arch/x86?
> 
> v2: use memcpy instead

applied, thanks.

	Ingo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] x86: numa32 use find_e820_area to find KVA ram on node
  2008-06-07  1:53       ` [PATCH] x86: numa32 use find_e820_area to find KVA ram on node Yinghai Lu
@ 2008-06-10  9:53         ` Ingo Molnar
  0 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2008-06-10  9:53 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> don't assume we can use ram near end of every node.
> esp some system has less memory. and they could have
> kva address and kva ram all below max_low_pfn

applied to tip/x86/mpparse, thanks Yinghai.

	Ingo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] x86: fix fail with 64g above system with numa32
  2008-06-07  1:54       ` [PATCH] x86: fix fail with 64g above system with numa32 Yinghai Lu
@ 2008-06-10  9:53         ` Ingo Molnar
  0 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2008-06-10  9:53 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
	linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> +++ b/arch/x86/kernel/srat_32.c	2008-06-06 17:38:38.000000000 -0700
> @@ -251,7 +251,7 @@ static int __init acpi20_parse_srat(stru
>   
>  	for_each_online_node(nid) {
>  		unsigned long start = node_start_pfn[nid];
> -		unsigned long end = node_end_pfn[nid];
> +		unsigned long end = min(node_end_pfn[nid],max_pfn);
>  
>  		memory_present(nid, start, end);

applied to tip/x86/mpparse, thanks.

	Ingo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH] x86: e820 merge parse mem/memmap
  2008-06-04  2:35   ` [PATCH] x86: 32 bit use e820_register_active_regions Yinghai Lu
                       ` (3 preceding siblings ...)
  2008-06-06 21:43     ` [PATCH] x86: shrink pages should check all Yinghai Lu
@ 2008-06-10 19:55     ` Yinghai Lu
  4 siblings, 0 replies; 19+ messages in thread
From: Yinghai Lu @ 2008-06-10 19:55 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
  Cc: linux-kernel@vger.kernel.org

resend. it seems Ingo missed it.

after we get 32 bit support e820_register_active_regions

we could merge parse mem/memmap

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -873,3 +873,89 @@ u64 __init e820_hole_size(u64 start, u64
 	}
 	return end - start - ((u64)ram << PAGE_SHIFT);
 }
+
+static void early_panic(char *msg)
+{
+	early_printk(msg);
+	panic(msg);
+}
+
+/* "mem=nopentium" disables the 4MB page tables. */
+static int __init parse_memopt(char *p)
+{
+	u64 mem_size;
+
+	if (!p)
+		return -EINVAL;
+
+#ifdef CONFIG_X86_32
+	if (!strcmp(p, "nopentium")) {
+		setup_clear_cpu_cap(X86_FEATURE_PSE);
+		return 0;
+	}
+#endif
+
+	mem_size = memparse(p, &p);
+	end_user_pfn = mem_size>>PAGE_SHIFT;
+	return 0;
+}
+early_param("mem", parse_memopt);
+
+static int userdef __initdata;
+
+static int __init parse_memmap_opt(char *p)
+{
+	char *oldp;
+	u64 start_at, mem_size;
+
+	if (!strcmp(p, "exactmap")) {
+#ifdef CONFIG_CRASH_DUMP
+		/*
+		 * If we are doing a crash dump, we still need to know
+		 * the real mem size before original memory map is
+		 * reset.
+		 */
+		e820_register_active_regions(0, 0, -1UL);
+		saved_max_pfn = e820_end_of_ram();
+		remove_all_active_ranges();
+#endif
+		e820.nr_map = 0;
+		userdef = 1;
+		return 0;
+	}
+
+	oldp = p;
+	mem_size = memparse(p, &p);
+	if (p == oldp)
+		return -EINVAL;
+
+	userdef = 1;
+	if (*p == '@') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, E820_RAM);
+	} else if (*p == '#') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, E820_ACPI);
+	} else if (*p == '$') {
+		start_at = memparse(p+1, &p);
+		add_memory_region(start_at, mem_size, E820_RESERVED);
+	} else {
+		end_user_pfn = (mem_size >> PAGE_SHIFT);
+	}
+	return *p == '\0' ? 0 : -EINVAL;
+}
+early_param("memmap", parse_memmap_opt);
+
+void __init finish_e820_parsing(void)
+{
+	if (userdef) {
+		int nr = e820.nr_map;
+
+		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
+			early_panic("Invalid user supplied memory map");
+		e820.nr_map = nr;
+
+		printk(KERN_INFO "user-defined physical RAM map:\n");
+		e820_print_map("user");
+	}
+}
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -207,36 +207,6 @@ void __init init_iomem_resources(struct 
 	}
 }
 
-void __init limit_regions(unsigned long long size)
-{
-	unsigned long long current_addr;
-	int i;
-
-	e820_print_map("limit_regions start");
-	for (i = 0; i < e820.nr_map; i++) {
-		current_addr = e820.map[i].addr + e820.map[i].size;
-		if (current_addr < size)
-			continue;
-
-		if (e820.map[i].type != E820_RAM)
-			continue;
-
-		if (e820.map[i].addr >= size) {
-			/*
-			 * This region starts past the end of the
-			 * requested size, skip it completely.
-			 */
-			e820.nr_map = i;
-		} else {
-			e820.nr_map = i + 1;
-			e820.map[i].size -= current_addr - size;
-		}
-		e820_print_map("limit_regions endfor");
-		return;
-	}
-	e820_print_map("limit_regions endfunc");
-}
-
 /* Overridden in paravirt.c if CONFIG_PARAVIRT */
 char * __init __attribute__((weak)) memory_setup(void)
 {
@@ -249,93 +219,3 @@ void __init setup_memory_map(void)
 	e820_print_map(memory_setup());
 }
 
-static int __initdata user_defined_memmap;
-
-/*
- * "mem=nopentium" disables the 4MB page tables.
- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
- * to <mem>, overriding the bios size.
- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
- * <start> to <start>+<mem>, overriding the bios size.
- *
- * HPA tells me bootloaders need to parse mem=, so no new
- * option should be mem=  [also see Documentation/i386/boot.txt]
- */
-static int __init parse_mem(char *arg)
-{
-	if (!arg)
-		return -EINVAL;
-
-	if (strcmp(arg, "nopentium") == 0) {
-		setup_clear_cpu_cap(X86_FEATURE_PSE);
-	} else {
-		/* If the user specifies memory size, we
-		 * limit the BIOS-provided memory map to
-		 * that size. exactmap can be used to specify
-		 * the exact map. mem=number can be used to
-		 * trim the existing memory map.
-		 */
-		unsigned long long mem_size;
-
-		mem_size = memparse(arg, &arg);
-		limit_regions(mem_size);
-		user_defined_memmap = 1;
-	}
-	return 0;
-}
-early_param("mem", parse_mem);
-
-static int __init parse_memmap(char *arg)
-{
-	if (!arg)
-		return -EINVAL;
-
-	if (strcmp(arg, "exactmap") == 0) {
-#ifdef CONFIG_CRASH_DUMP
-		/* If we are doing a crash dump, we
-		 * still need to know the real mem
-		 * size before original memory map is
-		 * reset.
-		 */
-		e820_register_active_regions(0, 0, -1UL);
-		saved_max_pfn = e820_end_of_ram();
-		remove_all_active_ranges();
-#endif
-		e820.nr_map = 0;
-		user_defined_memmap = 1;
-	} else {
-		/* If the user specifies memory size, we
-		 * limit the BIOS-provided memory map to
-		 * that size. exactmap can be used to specify
-		 * the exact map. mem=number can be used to
-		 * trim the existing memory map.
-		 */
-		unsigned long long start_at, mem_size;
-
-		mem_size = memparse(arg, &arg);
-		if (*arg == '@') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_RAM);
-		} else if (*arg == '#') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_ACPI);
-		} else if (*arg == '$') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_RESERVED);
-		} else {
-			limit_regions(mem_size);
-			user_defined_memmap = 1;
-		}
-	}
-	return 0;
-}
-early_param("memmap", parse_memmap);
-
-void __init finish_e820_parsing(void)
-{
-	if (user_defined_memmap) {
-		printk(KERN_INFO "user-defined physical RAM map:\n");
-		e820_print_map("user");
-	}
-}
-
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -96,75 +96,6 @@ char *__init machine_specific_memory_set
 	return who;
 }
 
-static int __init parse_memopt(char *p)
-{
-	if (!p)
-		return -EINVAL;
-	end_user_pfn = memparse(p, &p);
-	end_user_pfn >>= PAGE_SHIFT;
-	return 0;
-}
-early_param("mem", parse_memopt);
-
-static int userdef __initdata;
-
-static int __init parse_memmap_opt(char *p)
-{
-	char *oldp;
-	unsigned long long start_at, mem_size;
-
-	if (!strcmp(p, "exactmap")) {
-#ifdef CONFIG_CRASH_DUMP
-		/*
-		 * If we are doing a crash dump, we still need to know
-		 * the real mem size before original memory map is
-		 * reset.
-		 */
-		e820_register_active_regions(0, 0, -1UL);
-		saved_max_pfn = e820_end_of_ram();
-		remove_all_active_ranges();
-#endif
-		e820.nr_map = 0;
-		userdef = 1;
-		return 0;
-	}
-
-	oldp = p;
-	mem_size = memparse(p, &p);
-	if (p == oldp)
-		return -EINVAL;
-
-	userdef = 1;
-	if (*p == '@') {
-		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_RAM);
-	} else if (*p == '#') {
-		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_ACPI);
-	} else if (*p == '$') {
-		start_at = memparse(p+1, &p);
-		add_memory_region(start_at, mem_size, E820_RESERVED);
-	} else {
-		end_user_pfn = (mem_size >> PAGE_SHIFT);
-	}
-	return *p == '\0' ? 0 : -EINVAL;
-}
-early_param("memmap", parse_memmap_opt);
-
-void __init finish_e820_parsing(void)
-{
-	if (userdef) {
-		int nr = e820.nr_map;
-
-		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
-			early_panic("Invalid user supplied memory map");
-		e820.nr_map = nr;
-
-		printk(KERN_INFO "user-defined physical RAM map:\n");
-		e820_print_map("user");
-	}
-}
-
 int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
 {
 	int i;
Index: linux-2.6/include/asm-x86/e820.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820.h
+++ linux-2.6/include/asm-x86/e820.h
@@ -97,6 +97,8 @@ extern int e820_find_active_region(const
 extern void e820_register_active_regions(int nid, unsigned long start_pfn,
 					 unsigned long end_pfn);
 extern u64 e820_hole_size(u64 start, u64 end);
+extern void finish_e820_parsing(void);
+
 #endif /* __ASSEMBLY__ */
 
 #define ISA_START_ADDRESS	0xa0000
Index: linux-2.6/include/asm-x86/e820_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_32.h
+++ linux-2.6/include/asm-x86/e820_32.h
@@ -19,9 +19,7 @@
 #ifndef __ASSEMBLY__
 
 extern void setup_memory_map(void);
-extern void finish_e820_parsing(void);
 
-extern void limit_regions(unsigned long long size);
 extern void init_iomem_resources(struct resource *code_resource,
 				 struct resource *data_resource,
 				 struct resource *bss_resource);
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -22,8 +22,6 @@ extern int is_memory_any_valid(unsigned 
 extern int e820_all_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_all_valid(unsigned long start, unsigned long end);
 
-extern void finish_e820_parsing(void);
-
 #endif/*!__ASSEMBLY__*/
 
 #endif/*__E820_HEADER*/

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2008-06-10 19:55 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-03 17:25 [PATCH] x86: early check if one system is numaq v2 Yinghai Lu
2008-06-04  2:32 ` [PATCH] x86: numa32 make sure get kva space Yinghai Lu
2008-06-04 10:26   ` Ingo Molnar
2008-06-04  2:34 ` [PATCH] x86: move e820_register_active to e820.c Yinghai Lu
2008-06-04  2:35   ` [PATCH] x86: 32 bit use e820_register_active_regions Yinghai Lu
2008-06-04  7:39     ` [PATCH] x86: e820 merge parse mem/memmap Yinghai Lu
2008-06-04 10:27     ` [PATCH] x86: 32 bit use e820_register_active_regions Ingo Molnar
2008-06-04 20:21     ` [PATCH] x86: e820 max_arch_pfn typo fix for 64 bit Yinghai Lu
2008-06-04 22:47       ` H. Peter Anvin
2008-06-06 21:43     ` [PATCH] x86: shrink pages should check all Yinghai Lu
2008-06-07  1:53       ` [PATCH] x86: numa32 use find_e820_area to find KVA ram on node Yinghai Lu
2008-06-10  9:53         ` Ingo Molnar
2008-06-07  1:54       ` [PATCH] x86: fix fail with 64g above system with numa32 Yinghai Lu
2008-06-10  9:53         ` Ingo Molnar
2008-06-09  2:39       ` [PATCH] x86: shrink pages should check all v2 Yinghai Lu
2008-06-09 10:15         ` Ingo Molnar
2008-06-10 19:55     ` [PATCH] x86: e820 merge parse mem/memmap Yinghai Lu
2008-06-04 10:26   ` [PATCH] x86: move e820_register_active to e820.c Ingo Molnar
2008-06-04 10:25 ` [PATCH] x86: early check if one system is numaq v2 Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).