public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: move some func calling from setup_arch to paging_init
@ 2008-06-24  2:51 Yinghai Lu
  2008-06-24  2:52 ` [PATCH] x86: setup_arch 64bit move efi_init calling early Yinghai Lu
  2008-06-24 11:10 ` [PATCH] x86: introduce init_memory_mapping for 32bit Yinghai Lu
  0 siblings, 2 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24  2:51 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel@vger.kernel.org


those function depends on paging setup pgtable, so they could access
the ram in bootmem region but just get mapped.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/setup_32.c |   34 ++--------------------------------
 arch/x86/mm/init_32.c      |   29 +++++++++++++++++++++++++++++
 include/asm-x86/setup.h    |    1 +
 3 files changed, 32 insertions(+), 32 deletions(-)

Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -250,7 +250,7 @@ static void __init reserve_initrd(void)
 
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 
-static void __init post_reserve_initrd(void)
+void __init post_reserve_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
@@ -308,29 +308,11 @@ static void __init post_reserve_initrd(v
 void __init reserve_initrd(void)
 {
 }
-static void __init post_reserve_initrd(void)
+void __init post_reserve_initrd(void)
 {
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-/*
- * The node 0 pgdat is initialized before all of these because
- * it's needed for bootmem.  node>0 pgdats have their virtual
- * space allocated before the pagetables are in place to access
- * them, so they can't be cleared then.
- *
- * This should all compile down to nothing when NUMA is off.
- */
-static void __init remapped_pgdat_init(void)
-{
-	int nid;
-
-	for_each_online_node(nid) {
-		if (nid != 0)
-			memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
-	}
-}
-
 #ifdef CONFIG_MCA
 static void set_mca_bus(int x)
 {
@@ -530,18 +512,6 @@ void __init setup_arch(char **cmdline_p)
 		init_ohci1394_dma_on_all_controllers();
 #endif
 
-	/*
-	 * NOTE: at this point the bootmem allocator is fully available.
-	 */
-
-	post_reserve_initrd();
-
-	remapped_pgdat_init();
-	sparse_init();
-	zone_sizes_init();
-
-	paravirt_post_allocator_init();
-
 #ifdef CONFIG_X86_GENERICARCH
 	generic_apic_probe();
 #endif
Index: linux-2.6/arch/x86/mm/init_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_32.c
+++ linux-2.6/arch/x86/mm/init_32.c
@@ -705,6 +705,23 @@ void __init setup_bootmem_allocator(void
 
 }
 
+/*
+ * The node 0 pgdat is initialized before all of these because
+ * it's needed for bootmem.  node>0 pgdats have their virtual
+ * space allocated before the pagetables are in place to access
+ * them, so they can't be cleared then.
+ *
+ * This should all compile down to nothing when NUMA is off.
+ */
+static void __init remapped_pgdat_init(void)
+{
+	int nid;
+
+	for_each_online_node(nid) {
+		if (nid != 0)
+			memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+	}
+}
 
 /*
  * paging_init() sets up the page tables - note that the first 8MB are
@@ -727,6 +744,18 @@ void __init paging_init(void)
 	__flush_tlb_all();
 
 	kmap_init();
+
+	/*
+	 * NOTE: at this point the bootmem allocator is fully available.
+	 */
+
+	post_reserve_initrd();
+
+	remapped_pgdat_init();
+	sparse_init();
+	zone_sizes_init();
+
+	paravirt_post_allocator_init();
 }
 
 /*
Index: linux-2.6/include/asm-x86/setup.h
===================================================================
--- linux-2.6.orig/include/asm-x86/setup.h
+++ linux-2.6/include/asm-x86/setup.h
@@ -39,6 +39,7 @@ void reserve_crashkernel(void);
 #include <asm/bootparam.h>
 
 void reserve_standard_io_resources(void);
+extern void post_reserve_initrd(void);
 
 #ifndef _SETUP
 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] x86: setup_arch 64bit move efi_init calling early
  2008-06-24  2:51 [PATCH] x86: move some func calling from setup_arch to paging_init Yinghai Lu
@ 2008-06-24  2:52 ` Yinghai Lu
  2008-06-24  2:52   ` [PATCH] x86: setup_arch 64bit move kvmclock_init later Yinghai Lu
  2008-06-24 11:10 ` [PATCH] x86: introduce init_memory_mapping for 32bit Yinghai Lu
  1 sibling, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24  2:52 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel@vger.kernel.org



Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/setup_32.c |   32 ++++++++++++++++++--------------
 arch/x86/kernel/setup_64.c |   14 +++++++-------
 2 files changed, 25 insertions(+), 21 deletions(-)

Index: linux-2.6/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_64.c
+++ linux-2.6/arch/x86/kernel/setup_64.c
@@ -260,6 +260,9 @@ void __init setup_arch(char **cmdline_p)
 	insert_resource(&iomem_resource, &data_resource);
 	insert_resource(&iomem_resource, &bss_resource);
 
+	if (efi_enabled)
+		efi_init();
+
 	early_gart_iommu_check();
 
 	e820_register_active_regions(0, 0, -1UL);
@@ -286,8 +289,6 @@ void __init setup_arch(char **cmdline_p)
 	check_efer();
 
 	max_pfn_mapped = init_memory_mapping(0, (end_pfn << PAGE_SHIFT));
-	if (efi_enabled)
-		efi_init();
 
 	vsmp_init();
 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] x86: setup_arch 64bit move kvmclock_init later
  2008-06-24  2:52 ` [PATCH] x86: setup_arch 64bit move efi_init calling early Yinghai Lu
@ 2008-06-24  2:52   ` Yinghai Lu
  2008-06-24  2:53     ` [PATCH] x86: setup_arch 32bit move efi check later Yinghai Lu
  0 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24  2:52 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel@vger.kernel.org



Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/setup_32.c |   32 ++++++++++++++++++--------------
 arch/x86/kernel/setup_64.c |   14 +++++++-------
 2 files changed, 25 insertions(+), 21 deletions(-)

Index: linux-2.6/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_64.c
+++ linux-2.6/arch/x86/kernel/setup_64.c
@@ -296,10 +296,6 @@ void __init setup_arch(char **cmdline_p)
 
 	io_delay_init();
 
-#ifdef CONFIG_KVM_CLOCK
-	kvmclock_init();
-#endif
-
 	/*
 	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
 	 * Call this early for SRAT node setup.
@@ -342,6 +338,9 @@ void __init setup_arch(char **cmdline_p)
 
 	reserve_ibft_region();
 
+#ifdef CONFIG_KVM_CLOCK
+	kvmclock_init();
+#endif
 	paging_init();
 	map_vsyscall();
 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] x86: setup_arch 32bit move efi check later
  2008-06-24  2:52   ` [PATCH] x86: setup_arch 64bit move kvmclock_init later Yinghai Lu
@ 2008-06-24  2:53     ` Yinghai Lu
  2008-06-24  2:54       ` [PATCH] x86: setup_arch 32bit move command line copying early Yinghai Lu
  0 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24  2:53 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel@vger.kernel.org


Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/setup_32.c |   17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -337,14 +337,6 @@ void __init setup_arch(char **cmdline_p)
 	early_ioremap_init();
 	reserve_setup_data();
 
-#ifdef CONFIG_EFI
-	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
-		     "EL32", 4)) {
-		efi_enabled = 1;
-		efi_reserve_early();
-	}
-#endif
-
 	ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
 	screen_info = boot_params.screen_info;
 	edid_info = boot_params.edid_info;
@@ -364,10 +356,17 @@ void __init setup_arch(char **cmdline_p)
 	rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
 	rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
 #endif
+#ifdef CONFIG_EFI
+	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
+		     "EL32", 4)) {
+		efi_enabled = 1;
+		efi_reserve_early();
+	}
+#endif
+
 	ARCH_SETUP
 
 	setup_memory_map();
-
 	copy_edd();
 
 	if (!boot_params.hdr.root_flags)

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] x86: setup_arch 32bit move command line copying early
  2008-06-24  2:53     ` [PATCH] x86: setup_arch 32bit move efi check later Yinghai Lu
@ 2008-06-24  2:54       ` Yinghai Lu
  2008-06-24  2:55         ` [PATCH] x86: setup_arch 32bit move kvm_guest_init later Yinghai Lu
  0 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24  2:54 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel@vger.kernel.org


Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/setup_32.c |   32 ++++++++++++++++++--------------
 arch/x86/kernel/setup_64.c |   14 +++++++-------
 2 files changed, 25 insertions(+), 21 deletions(-)

Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -383,6 +383,9 @@ void __init setup_arch(char **cmdline_p)
 	bss_resource.start = virt_to_phys(&__bss_start);
 	bss_resource.end = virt_to_phys(&__bss_stop)-1;
 
+	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
+	*cmdline_p = command_line;
+
 	parse_setup_data();
 
 	parse_early_param();
@@ -408,9 +411,6 @@ void __init setup_arch(char **cmdline_p)
 	insert_resource(&iomem_resource, &data_resource);
 	insert_resource(&iomem_resource, &bss_resource);
 
-	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
-	*cmdline_p = command_line;
-
 	if (efi_enabled)
 		efi_init();
 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] x86: setup_arch 32bit move kvm_guest_init later
  2008-06-24  2:54       ` [PATCH] x86: setup_arch 32bit move command line copying early Yinghai Lu
@ 2008-06-24  2:55         ` Yinghai Lu
  2008-06-24 10:53           ` Ingo Molnar
  0 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24  2:55 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel@vger.kernel.org


Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/setup_32.c |   32 ++++++++++++++++++--------------
 arch/x86/kernel/setup_64.c |   14 +++++++-------
 2 files changed, 25 insertions(+), 21 deletions(-)

Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -488,8 +488,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	vmi_init();
 #endif
-	kvm_guest_init();
-
 	/*
 	 * NOTE: before this point _nobody_ is allowed to allocate
 	 * any memory using the bootmem allocator.  Although the
@@ -517,9 +515,15 @@ void __init setup_arch(char **cmdline_p)
 
 	early_quirks();
 
+	/*
+	 * Read APIC and some other early information from ACPI tables.
+	 */
 	acpi_boot_init();
 
 #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
+	/*
+	 * get boot-time SMP configuration:
+	 */
 	if (smp_found_config)
 		get_smp_config();
 #endif
@@ -529,6 +533,7 @@ void __init setup_arch(char **cmdline_p)
 			"CONFIG_X86_PC cannot handle it.\nUse "
 			"CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
 #endif
+	kvm_guest_init();
 
 	e820_reserve_resources();
 	e820_mark_nosave_regions(max_low_pfn);

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] x86: setup_arch 32bit move kvm_guest_init later
  2008-06-24  2:55         ` [PATCH] x86: setup_arch 32bit move kvm_guest_init later Yinghai Lu
@ 2008-06-24 10:53           ` Ingo Molnar
  2008-06-24 11:10             ` Yinghai Lu
  0 siblings, 1 reply; 17+ messages in thread
From: Ingo Molnar @ 2008-06-24 10:53 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

applied all 9 patches to tip/x86/setup-memory. Thanks Yinghai,

	Ingo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] x86: setup_arch 32bit move kvm_guest_init later
  2008-06-24 10:53           ` Ingo Molnar
@ 2008-06-24 11:10             ` Yinghai Lu
  0 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24 11:10 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, linux-kernel@vger.kernel.org

On Tue, Jun 24, 2008 at 3:53 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
>
> applied all 9 patches to tip/x86/setup-memory. Thanks Yinghai,

please check

[PATCH] x86: introduce init_memory_mapping for 32bit

that will make 32bit flow in setup_arch more clear.

YH

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] x86: introduce init_memory_mapping for 32bit
  2008-06-24  2:51 [PATCH] x86: move some func calling from setup_arch to paging_init Yinghai Lu
  2008-06-24  2:52 ` [PATCH] x86: setup_arch 64bit move efi_init calling early Yinghai Lu
@ 2008-06-24 11:10 ` Yinghai Lu
  2008-06-24 11:30   ` Ingo Molnar
  2008-06-24 19:18   ` [PATCH] x86: introduce init_memory_mapping for 32bit #1 Yinghai Lu
  1 sibling, 2 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24 11:10 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel@vger.kernel.org



so could use mem below max_low_pfn as early.
could move several function more early instead of waiting after
paging_init
including moving relocate_initrd early, and kva related early done
in initmem_init

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>


---
 arch/x86/kernel/setup_32.c |   32 +++------
 arch/x86/mm/discontig_32.c |    8 +-
 arch/x86/mm/init_32.c      |  154 +++++++++++++++++++++++++++++++--------------
 include/asm-x86/numa_32.h  |    5 -
 include/asm-x86/page_32.h  |    2 
 include/asm-x86/setup.h    |    1 
 6 files changed, 128 insertions(+), 74 deletions(-)

Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -187,7 +187,7 @@ static inline void copy_edd(void)
 
 #ifdef CONFIG_BLK_DEV_INITRD
 
-static bool do_relocate_initrd = false;
+static void __init relocate_initrd(void);
 
 static void __init reserve_initrd(void)
 {
@@ -196,7 +196,6 @@ static void __init reserve_initrd(void)
 	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
 	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
 	u64 ramdisk_here;
-	u64 ramdisk_target;
 
 	if (!boot_params.hdr.type_of_loader ||
 	    !ramdisk_image || !ramdisk_size)
@@ -227,10 +226,8 @@ static void __init reserve_initrd(void)
 	}
 
 	/* We need to move the initrd down into lowmem */
-	ramdisk_target = max_pfn_mapped<<PAGE_SHIFT;
-	ramdisk_here = find_e820_area(min(ramdisk_target, end_of_lowmem>>1),
-				 end_of_lowmem, ramdisk_size,
-				 PAGE_SIZE);
+	ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
+					 PAGE_SIZE);
 
 	if (ramdisk_here == -1ULL)
 		panic("Cannot find place for new RAMDISK of size %lld\n",
@@ -245,12 +242,12 @@ static void __init reserve_initrd(void)
 	printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
 			 ramdisk_here, ramdisk_here + ramdisk_size);
 
-	do_relocate_initrd = true;
+	relocate_initrd();
 }
 
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 
-void __init post_reserve_initrd(void)
+static void __init relocate_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
@@ -259,9 +256,6 @@ void __init post_reserve_initrd(void)
 	unsigned long slop, clen, mapaddr;
 	char *p, *q;
 
-	if (!do_relocate_initrd)
-		return;
-
 	ramdisk_here = initrd_start - PAGE_OFFSET;
 
 	q = (char *)initrd_start;
@@ -272,10 +266,6 @@ void __init post_reserve_initrd(void)
 		p = (char *)__va(ramdisk_image);
 		memcpy(q, p, clen);
 		q += clen;
-		/* need to free these low pages...*/
-		printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n",
-			 ramdisk_image, ramdisk_image + clen - 1);
-		free_bootmem(ramdisk_image, clen);
 		ramdisk_image += clen;
 		ramdisk_size  -= clen;
 	}
@@ -301,16 +291,16 @@ void __init post_reserve_initrd(void)
 		ramdisk_image, ramdisk_image + ramdisk_size - 1,
 		ramdisk_here, ramdisk_here + ramdisk_size - 1);
 
-	/* need to free that, otherwise init highmem will reserve it again */
+	/*
+	 * need to free old one, otherwise init cross max_low_pfn could be
+	 * converted to bootmem
+	 */
 	free_early(ramdisk_image, ramdisk_image+ramdisk_size);
 }
 #else
 void __init reserve_initrd(void)
 {
 }
-void __init post_reserve_initrd(void)
-{
-}
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 #ifdef CONFIG_MCA
@@ -439,8 +429,12 @@ void __init setup_arch(char **cmdline_p)
 		max_pfn = e820_end_of_ram();
 	}
 
+	/* max_low_pfn get updated here */
 	find_low_pfn_range();
 
+	/* max_pfn_mapped is updated here*/
+	init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+
 	reserve_initrd();
 
 	dmi_scan_machine();
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -200,7 +200,7 @@ void *alloc_remap(int nid, unsigned long
 	return allocation;
 }
 
-void __init remap_numa_kva(void)
+static void __init remap_numa_kva(void)
 {
 	void *vaddr;
 	unsigned long pfn;
@@ -373,12 +373,16 @@ void __init initmem_init(unsigned long s
 
 		allocate_pgdat(nid);
 	}
+	remap_numa_kva();
+
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
 	for_each_online_node(nid)
 		propagate_e820_map_node(nid);
 
-	memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
+	for_each_online_node(nid)
+		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
 	NODE_DATA(0)->bdata = &node0_bdata;
 	setup_bootmem_allocator();
 }
Index: linux-2.6/arch/x86/mm/init_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_32.c
+++ linux-2.6/arch/x86/mm/init_32.c
@@ -57,6 +57,27 @@ unsigned long highstart_pfn, highend_pfn
 
 static noinline int do_test_wp_bit(void);
 
+
+static unsigned long __initdata table_start;
+static unsigned long __meminitdata table_end;
+static unsigned long __meminitdata table_top;
+
+static int __initdata after_init_bootmem;
+
+static __init void *alloc_low_page(unsigned long *phys)
+{
+	unsigned long pfn = table_end++;
+	void *adr;
+
+	if (pfn >= table_top)
+		panic("alloc_low_page: ran out of memory");
+
+	adr = __va(pfn * PAGE_SIZE);
+	memset(adr, 0, PAGE_SIZE);
+	*phys  = pfn * PAGE_SIZE;
+	return adr;
+}
+
 /*
  * Creates a middle page table and puts a pointer to it in the
  * given global directory entry. This only returns the gd entry
@@ -68,9 +89,12 @@ static pmd_t * __init one_md_table_init(
 	pmd_t *pmd_table;
 
 #ifdef CONFIG_X86_PAE
+	unsigned long phys;
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
-		pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-
+		if (after_init_bootmem)
+			pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+		else
+			pmd_table = (pmd_t *)alloc_low_page(&phys);
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
 		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
 		pud = pud_offset(pgd, 0);
@@ -92,12 +116,16 @@ static pte_t * __init one_page_table_ini
 	if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
 		pte_t *page_table = NULL;
 
+		if (after_init_bootmem) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
-		page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
+			page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
 #endif
-		if (!page_table) {
-			page_table =
+			if (!page_table)
+				page_table =
 				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+		} else {
+			unsigned long phys;
+			page_table = (pte_t *)alloc_low_page(&phys);
 		}
 
 		paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
@@ -155,7 +183,9 @@ static inline int is_kernel_text(unsigne
  * of max_low_pfn pages, by creating page tables starting from address
  * PAGE_OFFSET:
  */
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
+static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
+						unsigned long start,
+						unsigned long end)
 {
 	int pgd_idx, pmd_idx, pte_ofs;
 	unsigned long pfn;
@@ -163,18 +193,19 @@ static void __init kernel_physical_mappi
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned pages_2m = 0, pages_4k = 0;
+	unsigned limit_pfn = end >> PAGE_SHIFT;
 
 	pgd_idx = pgd_index(PAGE_OFFSET);
 	pgd = pgd_base + pgd_idx;
-	pfn = 0;
+	pfn = start >> PAGE_SHIFT;
 
 	for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
 		pmd = one_md_table_init(pgd);
-		if (pfn >= max_low_pfn)
+		if (pfn >= limit_pfn)
 			continue;
 
 		for (pmd_idx = 0;
-		     pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
+		     pmd_idx < PTRS_PER_PMD && pfn < limit_pfn;
 		     pmd++, pmd_idx++) {
 			unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
 
@@ -418,20 +449,6 @@ static void __init pagetable_init(void)
 
 	paravirt_pagetable_setup_start(pgd_base);
 
-	/* Enable PSE if available */
-	if (cpu_has_pse)
-		set_in_cr4(X86_CR4_PSE);
-
-	/* Enable PGE if available */
-	if (cpu_has_pge) {
-		set_in_cr4(X86_CR4_PGE);
-		__PAGE_KERNEL |= _PAGE_GLOBAL;
-		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
-	}
-
-	kernel_physical_mapping_init(pgd_base);
-	remap_numa_kva();
-
 	/*
 	 * Fixed mappings, only the page table structure has to be
 	 * created - mappings will be set by set_fixmap():
@@ -703,24 +720,78 @@ void __init setup_bootmem_allocator(void
 		free_bootmem_with_active_regions(i, max_low_pfn);
 	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
 
+	after_init_bootmem = 1;
 }
 
-/*
- * The node 0 pgdat is initialized before all of these because
- * it's needed for bootmem.  node>0 pgdats have their virtual
- * space allocated before the pagetables are in place to access
- * them, so they can't be cleared then.
- *
- * This should all compile down to nothing when NUMA is off.
- */
-static void __init remapped_pgdat_init(void)
+static void __init find_early_table_space(unsigned long end)
 {
-	int nid;
+	unsigned long puds, pmds, tables, start;
+
+	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
+	tables = PAGE_ALIGN(puds * sizeof(pud_t));
+
+	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+	tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
+
+	/*
+	 * RED-PEN putting page tables only on node 0 could
+	 * cause a hotspot and fill up ZONE_DMA. The page tables
+	 * need roughly 0.5KB per GB.
+	 */
+	start = 0x7000;
+	table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
+					tables, PAGE_SIZE);
+	if (table_start == -1UL)
+		panic("Cannot find space for the kernel page tables");
+
+	table_start >>= PAGE_SHIFT;
+	table_end = table_start;
+	table_top = table_start + (tables>>PAGE_SHIFT);
+
+	printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
+		end, table_start << PAGE_SHIFT,
+		(table_start << PAGE_SHIFT) + tables);
+}
+
+unsigned long __init_refok init_memory_mapping(unsigned long start,
+						unsigned long end)
+{
+	pgd_t *pgd_base = swapper_pg_dir;
 
-	for_each_online_node(nid) {
-		if (nid != 0)
-			memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+	/*
+	 * Find space for the kernel direct mapping tables.
+	 */
+	if (!after_init_bootmem)
+		find_early_table_space(end);
+
+#ifdef CONFIG_X86_PAE
+	set_nx();
+	if (nx_enabled)
+		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+#endif
+
+	/* Enable PSE if available */
+	if (cpu_has_pse)
+		set_in_cr4(X86_CR4_PSE);
+
+	/* Enable PGE if available */
+	if (cpu_has_pge) {
+		set_in_cr4(X86_CR4_PGE);
+		__PAGE_KERNEL |= _PAGE_GLOBAL;
+		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
 	}
+
+	kernel_physical_mapping_init(pgd_base, start, end);
+
+	load_cr3(swapper_pg_dir);
+
+	__flush_tlb_all();
+
+	if (!after_init_bootmem)
+		reserve_early(table_start << PAGE_SHIFT,
+				 table_end << PAGE_SHIFT, "PGTABLE");
+
+	return end >> PAGE_SHIFT;
 }
 
 /*
@@ -732,15 +803,8 @@ static void __init remapped_pgdat_init(v
  */
 void __init paging_init(void)
 {
-#ifdef CONFIG_X86_PAE
-	set_nx();
-	if (nx_enabled)
-		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-#endif
 	pagetable_init();
 
-	load_cr3(swapper_pg_dir);
-
 	__flush_tlb_all();
 
 	kmap_init();
@@ -748,10 +812,6 @@ void __init paging_init(void)
 	/*
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
-
-	post_reserve_initrd();
-
-	remapped_pgdat_init();
 	sparse_init();
 	zone_sizes_init();
 
Index: linux-2.6/include/asm-x86/numa_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/numa_32.h
+++ linux-2.6/include/asm-x86/numa_32.h
@@ -5,12 +5,7 @@ extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
 #ifdef CONFIG_NUMA
-extern void __init remap_numa_kva(void);
 extern void set_highmem_pages_init(void);
-#else
-static inline void remap_numa_kva(void)
-{
-}
 #endif
 
 #endif /* _ASM_X86_32_NUMA_H */
Index: linux-2.6/include/asm-x86/page_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/page_32.h
+++ linux-2.6/include/asm-x86/page_32.h
@@ -93,6 +93,8 @@ extern int sysctl_legacy_va_layout;
 #define MAXMEM			(-__PAGE_OFFSET - __VMALLOC_RESERVE)
 
 extern void find_low_pfn_range(void);
+extern unsigned long init_memory_mapping(unsigned long start,
+					 unsigned long end);
 extern void initmem_init(unsigned long, unsigned long);
 extern void zone_sizes_init(void);
 extern void setup_bootmem_allocator(void);
Index: linux-2.6/include/asm-x86/setup.h
===================================================================
--- linux-2.6.orig/include/asm-x86/setup.h
+++ linux-2.6/include/asm-x86/setup.h
@@ -39,7 +39,6 @@ void reserve_crashkernel(void);
 #include <asm/bootparam.h>
 
 void reserve_standard_io_resources(void);
-extern void post_reserve_initrd(void);
 
 #ifndef _SETUP
 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] x86: introduce init_memory_mapping for 32bit
  2008-06-24 11:10 ` [PATCH] x86: introduce init_memory_mapping for 32bit Yinghai Lu
@ 2008-06-24 11:30   ` Ingo Molnar
  2008-06-24 17:16     ` Yinghai Lu
  2008-06-24 19:22     ` Yinghai Lu
  2008-06-24 19:18   ` [PATCH] x86: introduce init_memory_mapping for 32bit #1 Yinghai Lu
  1 sibling, 2 replies; 17+ messages in thread
From: Ingo Molnar @ 2008-06-24 11:30 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> so could use mem below max_low_pfn as early. could move several 
> function more early instead of waiting after paging_init including 
> moving relocate_initrd early, and kva related early done in 
> initmem_init

applied to tip/x86/setup-memory - thanks Yinghai.

a sidenote:

>  6 files changed, 128 insertions(+), 74 deletions(-)

this patch is too large - if it causes any problems it will not be very 
easy to figure out which exact change caused the problems.

Lets hope it goes all fine - but in the future lets try doing 
more+smaller patches, especially if they change some known-dangerous 
area of the kernel.

For example here a better splitup would have been to do 5 or more 
patches:

 1) first introduce init_memory_mapping() [but dont use it anywhere]
 2) add the init_memory_mapping() call to setup_arch()
 3) move remap_numa_kva()
 4) move relocate_initrd()
 5) remove the now unnecessary setup from paging_init()

... or something like that. The point is to manage risk: if there's 
multiple problem areas that a change is touching, try to isolate them 
from each other and introduce the change gradually.

The end result is still exactly the same, but much more 
reviewable/debuggable/bisectable.

	Ingo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] x86: introduce init_memory_mapping for 32bit
  2008-06-24 11:30   ` Ingo Molnar
@ 2008-06-24 17:16     ` Yinghai Lu
  2008-06-24 19:22     ` Yinghai Lu
  1 sibling, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24 17:16 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, linux-kernel@vger.kernel.org

On Tue, Jun 24, 2008 at 4:30 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> so could use mem below max_low_pfn as early. could move several
>> function more early instead of waiting after paging_init including
>> moving relocate_initrd early, and kva related early done in
>> initmem_init
>
> applied to tip/x86/setup-memory - thanks Yinghai.
>
> a sidenote:
>
>>  6 files changed, 128 insertions(+), 74 deletions(-)
>
> this patch is too large - if it causes any problems it will not be very
> easy to figure out which exact change caused the problems.
>
> Lets hope it goes all fine - but in the future lets try doing
> more+smaller patches, especially if they change some known-dangerous
> area of the kernel.
>
> For example here a better splitup would have been to do 5 or more
> patches:
>
>  1) first introduce init_memory_mapping() [but dont use it anywhere]
>  2) add the init_memory_mapping() call to setup_arch()
>  3) move remap_numa_kva()
>  4) move relocate_initrd()
>  5) remove the now unnecessary setup from paging_init()
>
> ... or something like that. The point is to manage risk: if there's
> multiple problem areas that a change is touching, try to isolate them
> from each other and introduce the change gradually.
>
> The end result is still exactly the same, but much more
> reviewable/debuggable/bisectable.

actually i added and tested that one by one. let me check if i split
that into 5.

YH

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] x86: introduce init_memory_mapping for 32bit #1
  2008-06-24 11:10 ` [PATCH] x86: introduce init_memory_mapping for 32bit Yinghai Lu
  2008-06-24 11:30   ` Ingo Molnar
@ 2008-06-24 19:18   ` Yinghai Lu
  2008-06-24 19:18     ` [PATCH] x86: introduce init_memory_mapping for 32bit #2 Yinghai Lu
  1 sibling, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24 19:18 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel@vger.kernel.org


so could use mem below max_low_pfn as early.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/setup_32.c |   10 +--
 arch/x86/mm/init_32.c      |  141 +++++++++++++++++++++++++++++++++++----------
 include/asm-x86/page_32.h  |    2 
 3 files changed, 120 insertions(+), 33 deletions(-)

Index: xy/arch/x86/kernel/setup_32.c
===================================================================
--- xy.orig/arch/x86/kernel/setup_32.c
+++ xy/arch/x86/kernel/setup_32.c
@@ -227,10 +227,8 @@ static void __init reserve_initrd(void)
 	}
 
 	/* We need to move the initrd down into lowmem */
-	ramdisk_target = max_pfn_mapped<<PAGE_SHIFT;
-	ramdisk_here = find_e820_area(min(ramdisk_target, end_of_lowmem>>1),
-				 end_of_lowmem, ramdisk_size,
-				 PAGE_SIZE);
+	ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
+					 PAGE_SIZE);
 
 	if (ramdisk_here == -1ULL)
 		panic("Cannot find place for new RAMDISK of size %lld\n",
@@ -440,8 +438,12 @@ void __init setup_arch(char **cmdline_p)
 		max_pfn = e820_end_of_ram();
 	}
 
+	/* max_low_pfn get updated here */
 	find_low_pfn_range();
 
+	/* max_pfn_mapped is updated here*/
+	init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+
 	reserve_initrd();
 
 	dmi_scan_machine();
Index: xy/arch/x86/mm/init_32.c
===================================================================
--- xy.orig/arch/x86/mm/init_32.c
+++ xy/arch/x86/mm/init_32.c
@@ -57,6 +57,27 @@ unsigned long highstart_pfn, highend_pfn
 
 static noinline int do_test_wp_bit(void);
 
+
+static unsigned long __initdata table_start;
+static unsigned long __meminitdata table_end;
+static unsigned long __meminitdata table_top;
+
+static int __initdata after_init_bootmem;
+
+static __init void *alloc_low_page(unsigned long *phys)
+{
+	unsigned long pfn = table_end++;
+	void *adr;
+
+	if (pfn >= table_top)
+		panic("alloc_low_page: ran out of memory");
+
+	adr = __va(pfn * PAGE_SIZE);
+	memset(adr, 0, PAGE_SIZE);
+	*phys  = pfn * PAGE_SIZE;
+	return adr;
+}
+
 /*
  * Creates a middle page table and puts a pointer to it in the
  * given global directory entry. This only returns the gd entry
@@ -68,9 +89,12 @@ static pmd_t * __init one_md_table_init(
 	pmd_t *pmd_table;
 
 #ifdef CONFIG_X86_PAE
+	unsigned long phys;
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
-		pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-
+		if (after_init_bootmem)
+			pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+		else
+			pmd_table = (pmd_t *)alloc_low_page(&phys);
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
 		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
 		pud = pud_offset(pgd, 0);
@@ -92,12 +116,16 @@ static pte_t * __init one_page_table_ini
 	if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
 		pte_t *page_table = NULL;
 
+		if (after_init_bootmem) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
-		page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
+			page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
 #endif
-		if (!page_table) {
-			page_table =
+			if (!page_table)
+				page_table =
 				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+		} else {
+			unsigned long phys;
+			page_table = (pte_t *)alloc_low_page(&phys);
 		}
 
 		paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
@@ -155,7 +183,9 @@ static inline int is_kernel_text(unsigne
  * of max_low_pfn pages, by creating page tables starting from address
  * PAGE_OFFSET:
  */
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
+static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
+						unsigned long start,
+						unsigned long end)
 {
 	int pgd_idx, pmd_idx, pte_ofs;
 	unsigned long pfn;
@@ -163,18 +193,19 @@ static void __init kernel_physical_mappi
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned pages_2m = 0, pages_4k = 0;
+	unsigned limit_pfn = end >> PAGE_SHIFT;
 
 	pgd_idx = pgd_index(PAGE_OFFSET);
 	pgd = pgd_base + pgd_idx;
-	pfn = 0;
+	pfn = start >> PAGE_SHIFT;
 
 	for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
 		pmd = one_md_table_init(pgd);
-		if (pfn >= max_low_pfn)
+		if (pfn >= limit_pfn)
 			continue;
 
 		for (pmd_idx = 0;
-		     pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
+		     pmd_idx < PTRS_PER_PMD && pfn < limit_pfn;
 		     pmd++, pmd_idx++) {
 			unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
 
@@ -418,20 +449,7 @@ static void __init pagetable_init(void)
 
 	paravirt_pagetable_setup_start(pgd_base);
 
-	/* Enable PSE if available */
-	if (cpu_has_pse)
-		set_in_cr4(X86_CR4_PSE);
-
-	/* Enable PGE if available */
-	if (cpu_has_pge) {
-		set_in_cr4(X86_CR4_PGE);
-		__PAGE_KERNEL |= _PAGE_GLOBAL;
-		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
-	}
-
-	kernel_physical_mapping_init(pgd_base);
 	remap_numa_kva();
-
 	/*
 	 * Fixed mappings, only the page table structure has to be
 	 * created - mappings will be set by set_fixmap():
@@ -703,6 +721,7 @@ void __init setup_bootmem_allocator(void
 		free_bootmem_with_active_regions(i, max_low_pfn);
 	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
 
+	after_init_bootmem = 1;
 }
 
 /*
@@ -723,6 +742,77 @@ static void __init remapped_pgdat_init(v
 	}
 }
 
+static void __init find_early_table_space(unsigned long end)
+{
+	unsigned long puds, pmds, tables, start;
+
+	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
+	tables = PAGE_ALIGN(puds * sizeof(pud_t));
+
+	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+	tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
+
+	/*
+	 * RED-PEN putting page tables only on node 0 could
+	 * cause a hotspot and fill up ZONE_DMA. The page tables
+	 * need roughly 0.5KB per GB.
+	 */
+	start = 0x7000;
+	table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
+					tables, PAGE_SIZE);
+	if (table_start == -1UL)
+		panic("Cannot find space for the kernel page tables");
+
+	table_start >>= PAGE_SHIFT;
+	table_end = table_start;
+	table_top = table_start + (tables>>PAGE_SHIFT);
+
+	printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
+		end, table_start << PAGE_SHIFT,
+		(table_start << PAGE_SHIFT) + tables);
+}
+
+unsigned long __init_refok init_memory_mapping(unsigned long start,
+						unsigned long end)
+{
+	pgd_t *pgd_base = swapper_pg_dir;
+
+	/*
+	 * Find space for the kernel direct mapping tables.
+	 */
+	if (!after_init_bootmem)
+		find_early_table_space(end);
+
+#ifdef CONFIG_X86_PAE
+	set_nx();
+	if (nx_enabled)
+		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+#endif
+
+	/* Enable PSE if available */
+	if (cpu_has_pse)
+		set_in_cr4(X86_CR4_PSE);
+
+	/* Enable PGE if available */
+	if (cpu_has_pge) {
+		set_in_cr4(X86_CR4_PGE);
+		__PAGE_KERNEL |= _PAGE_GLOBAL;
+		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
+	}
+
+	kernel_physical_mapping_init(pgd_base, start, end);
+
+	load_cr3(swapper_pg_dir);
+
+	__flush_tlb_all();
+
+	if (!after_init_bootmem)
+		reserve_early(table_start << PAGE_SHIFT,
+				 table_end << PAGE_SHIFT, "PGTABLE");
+
+	return end >> PAGE_SHIFT;
+}
+
 /*
  * paging_init() sets up the page tables - note that the first 8MB are
  * already mapped by head.S.
@@ -732,15 +822,8 @@ static void __init remapped_pgdat_init(v
  */
 void __init paging_init(void)
 {
-#ifdef CONFIG_X86_PAE
-	set_nx();
-	if (nx_enabled)
-		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-#endif
 	pagetable_init();
 
-	load_cr3(swapper_pg_dir);
-
 	__flush_tlb_all();
 
 	kmap_init();
Index: xy/include/asm-x86/page_32.h
===================================================================
--- xy.orig/include/asm-x86/page_32.h
+++ xy/include/asm-x86/page_32.h
@@ -93,6 +93,8 @@ extern int sysctl_legacy_va_layout;
 #define MAXMEM			(-__PAGE_OFFSET - __VMALLOC_RESERVE)
 
 extern void find_low_pfn_range(void);
+extern unsigned long init_memory_mapping(unsigned long start,
+					 unsigned long end);
 extern void initmem_init(unsigned long, unsigned long);
 extern void zone_sizes_init(void);
 extern void setup_bootmem_allocator(void);

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] x86: introduce init_memory_mapping for 32bit #2
  2008-06-24 19:18   ` [PATCH] x86: introduce init_memory_mapping for 32bit #1 Yinghai Lu
@ 2008-06-24 19:18     ` Yinghai Lu
  2008-06-24 19:19       ` [PATCH] x86: introduce init_memory_mapping for 32bit #3 Yinghai Lu
  0 siblings, 1 reply; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24 19:18 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel@vger.kernel.org


moving relocate_initrd early

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>


---
 arch/x86/kernel/setup_32.c |   22 +++++++---------------
 arch/x86/mm/init_32.c      |    3 ---
 include/asm-x86/setup.h    |    1 -
 3 files changed, 7 insertions(+), 19 deletions(-)

Index: xy/arch/x86/kernel/setup_32.c
===================================================================
--- xy.orig/arch/x86/kernel/setup_32.c
+++ xy/arch/x86/kernel/setup_32.c
@@ -187,7 +187,7 @@ static inline void copy_edd(void)
 
 #ifdef CONFIG_BLK_DEV_INITRD
 
-static bool do_relocate_initrd = false;
+static void __init relocate_initrd(void);
 
 static void __init reserve_initrd(void)
 {
@@ -196,7 +196,6 @@ static void __init reserve_initrd(void)
 	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
 	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
 	u64 ramdisk_here;
-	u64 ramdisk_target;
 
 	if (!boot_params.hdr.type_of_loader ||
 	    !ramdisk_image || !ramdisk_size)
@@ -243,12 +242,12 @@ static void __init reserve_initrd(void)
 	printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
 			 ramdisk_here, ramdisk_here + ramdisk_size);
 
-	do_relocate_initrd = true;
+	relocate_initrd();
 }
 
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 
-void __init post_reserve_initrd(void)
+static void __init relocate_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
@@ -257,9 +256,6 @@ void __init post_reserve_initrd(void)
 	unsigned long slop, clen, mapaddr;
 	char *p, *q;
 
-	if (!do_relocate_initrd)
-		return;
-
 	ramdisk_here = initrd_start - PAGE_OFFSET;
 
 	q = (char *)initrd_start;
@@ -270,10 +266,6 @@ void __init post_reserve_initrd(void)
 		p = (char *)__va(ramdisk_image);
 		memcpy(q, p, clen);
 		q += clen;
-		/* need to free these low pages...*/
-		printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n",
-			 ramdisk_image, ramdisk_image + clen - 1);
-		free_bootmem(ramdisk_image, clen);
 		ramdisk_image += clen;
 		ramdisk_size  -= clen;
 	}
@@ -299,16 +291,16 @@ void __init post_reserve_initrd(void)
 		ramdisk_image, ramdisk_image + ramdisk_size - 1,
 		ramdisk_here, ramdisk_here + ramdisk_size - 1);
 
-	/* need to free that, otherwise init highmem will reserve it again */
+	/*
+	 * need to free old one, otherwise init cross max_low_pfn could be
+	 * converted to bootmem
+	 */
 	free_early(ramdisk_image, ramdisk_image+ramdisk_size);
 }
 #else
 void __init reserve_initrd(void)
 {
 }
-void __init post_reserve_initrd(void)
-{
-}
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 #ifdef CONFIG_MCA
Index: xy/arch/x86/mm/init_32.c
===================================================================
--- xy.orig/arch/x86/mm/init_32.c
+++ xy/arch/x86/mm/init_32.c
@@ -831,9 +831,6 @@ void __init paging_init(void)
 	/*
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
-
-	post_reserve_initrd();
-
 	remapped_pgdat_init();
 	sparse_init();
 	zone_sizes_init();
Index: xy/include/asm-x86/setup.h
===================================================================
--- xy.orig/include/asm-x86/setup.h
+++ xy/include/asm-x86/setup.h
@@ -39,7 +39,6 @@ void reserve_crashkernel(void);
 #include <asm/bootparam.h>
 
 void reserve_standard_io_resources(void);
-extern void post_reserve_initrd(void);
 
 #ifndef _SETUP
 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] x86: introduce init_memory_mapping for 32bit #3
  2008-06-24 19:18     ` [PATCH] x86: introduce init_memory_mapping for 32bit #2 Yinghai Lu
@ 2008-06-24 19:19       ` Yinghai Lu
  0 siblings, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24 19:19 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel@vger.kernel.org


move kva related early backto initmem_init for numa32

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>


---
 arch/x86/mm/discontig_32.c |    8 ++++++--
 arch/x86/mm/init_32.c      |   20 --------------------
 include/asm-x86/numa_32.h  |    5 -----
 3 files changed, 6 insertions(+), 27 deletions(-)

Index: xy/arch/x86/mm/discontig_32.c
===================================================================
--- xy.orig/arch/x86/mm/discontig_32.c
+++ xy/arch/x86/mm/discontig_32.c
@@ -200,7 +200,7 @@ void *alloc_remap(int nid, unsigned long
 	return allocation;
 }
 
-void __init remap_numa_kva(void)
+static void __init remap_numa_kva(void)
 {
 	void *vaddr;
 	unsigned long pfn;
@@ -373,12 +373,16 @@ void __init initmem_init(unsigned long s
 
 		allocate_pgdat(nid);
 	}
+	remap_numa_kva();
+
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
 	for_each_online_node(nid)
 		propagate_e820_map_node(nid);
 
-	memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
+	for_each_online_node(nid)
+		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
 	NODE_DATA(0)->bdata = &node0_bdata;
 	setup_bootmem_allocator();
 }
Index: xy/arch/x86/mm/init_32.c
===================================================================
--- xy.orig/arch/x86/mm/init_32.c
+++ xy/arch/x86/mm/init_32.c
@@ -449,7 +449,6 @@ static void __init pagetable_init(void)
 
 	paravirt_pagetable_setup_start(pgd_base);
 
-	remap_numa_kva();
 	/*
 	 * Fixed mappings, only the page table structure has to be
 	 * created - mappings will be set by set_fixmap():
@@ -724,24 +723,6 @@ void __init setup_bootmem_allocator(void
 	after_init_bootmem = 1;
 }
 
-/*
- * The node 0 pgdat is initialized before all of these because
- * it's needed for bootmem.  node>0 pgdats have their virtual
- * space allocated before the pagetables are in place to access
- * them, so they can't be cleared then.
- *
- * This should all compile down to nothing when NUMA is off.
- */
-static void __init remapped_pgdat_init(void)
-{
-	int nid;
-
-	for_each_online_node(nid) {
-		if (nid != 0)
-			memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
-	}
-}
-
 static void __init find_early_table_space(unsigned long end)
 {
 	unsigned long puds, pmds, tables, start;
@@ -831,7 +812,6 @@ void __init paging_init(void)
 	/*
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
-	remapped_pgdat_init();
 	sparse_init();
 	zone_sizes_init();
 
Index: xy/include/asm-x86/numa_32.h
===================================================================
--- xy.orig/include/asm-x86/numa_32.h
+++ xy/include/asm-x86/numa_32.h
@@ -5,12 +5,7 @@ extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
 
 #ifdef CONFIG_NUMA
-extern void __init remap_numa_kva(void);
 extern void set_highmem_pages_init(void);
-#else
-static inline void remap_numa_kva(void)
-{
-}
 #endif
 
 #endif /* _ASM_X86_32_NUMA_H */

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] x86: introduce init_memory_mapping for 32bit
  2008-06-24 11:30   ` Ingo Molnar
  2008-06-24 17:16     ` Yinghai Lu
@ 2008-06-24 19:22     ` Yinghai Lu
  2008-06-24 22:02       ` Yinghai Lu
  2008-06-25 14:45       ` Ingo Molnar
  1 sibling, 2 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24 19:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, linux-kernel@vger.kernel.org

On Tue, Jun 24, 2008 at 4:30 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> so could use mem below max_low_pfn as early. could move several
>> function more early instead of waiting after paging_init including
>> moving relocate_initrd early, and kva related early done in
>> initmem_init
>
> applied to tip/x86/setup-memory - thanks Yinghai.
>
> a sidenote:
>
>>  6 files changed, 128 insertions(+), 74 deletions(-)
>
> this patch is too large - if it causes any problems it will not be very
> easy to figure out which exact change caused the problems.
>
> Lets hope it goes all fine - but in the future lets try doing
> more+smaller patches, especially if they change some known-dangerous
> area of the kernel.
>
> For example here a better splitup would have been to do 5 or more
> patches:
>
>  1) first introduce init_memory_mapping() [but dont use it anywhere]
>  2) add the init_memory_mapping() call to setup_arch()
>  3) move remap_numa_kva()
>  4) move relocate_initrd()
>  5) remove the now unnecessary setup from paging_init()
>
> ... or something like that. The point is to manage risk: if there's
> multiple problem areas that a change is touching, try to isolate them
> from each other and introduce the change gradually.
>
> The end result is still exactly the same, but much more
> reviewable/debuggable/bisectable.

split into 3 patches, please use them to replace the old one if you like.

YH

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] x86: introduce init_memory_mapping for 32bit
  2008-06-24 19:22     ` Yinghai Lu
@ 2008-06-24 22:02       ` Yinghai Lu
  2008-06-25 14:45       ` Ingo Molnar
  1 sibling, 0 replies; 17+ messages in thread
From: Yinghai Lu @ 2008-06-24 22:02 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, linux-kernel@vger.kernel.org

On Tue, Jun 24, 2008 at 12:22 PM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> On Tue, Jun 24, 2008 at 4:30 AM, Ingo Molnar <mingo@elte.hu> wrote:
>>
>> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>>
>>> so could use mem below max_low_pfn as early. could move several
>>> function more early instead of waiting after paging_init including
>>> moving relocate_initrd early, and kva related early done in
>>> initmem_init
>>
>> applied to tip/x86/setup-memory - thanks Yinghai.
>>
>> a sidenote:
>>
>>>  6 files changed, 128 insertions(+), 74 deletions(-)
>>
>> this patch is too large - if it causes any problems it will not be very
>> easy to figure out which exact change caused the problems.
>>
>> Lets hope it goes all fine - but in the future lets try doing
>> more+smaller patches, especially if they change some known-dangerous
>> area of the kernel.
>>
>> For example here a better splitup would have been to do 5 or more
>> patches:
>>
>>  1) first introduce init_memory_mapping() [but dont use it anywhere]
>>  2) add the init_memory_mapping() call to setup_arch()
>>  3) move remap_numa_kva()
>>  4) move relocate_initrd()
>>  5) remove the now unnecessary setup from paging_init()
>>
>> ... or something like that. The point is to manage risk: if there's
>> multiple problem areas that a change is touching, try to isolate them
>> from each other and introduce the change gradually.
>>
>> The end result is still exactly the same, but much more
>> reviewable/debuggable/bisectable.
>
> split into 3 patches, please use them to replace the old one if you like.
>
and
[PATCH] x86: fix e820_update_range size when overlapping
[PATCH] x86: get max_pfn_mapped in init_memory_mapping
[PATCH] x86: add table_top check for alloc_low_page in 64 bit
[PATCH] x86: change size if e820_update/remove_range

YH

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] x86: introduce init_memory_mapping for 32bit
  2008-06-24 19:22     ` Yinghai Lu
  2008-06-24 22:02       ` Yinghai Lu
@ 2008-06-25 14:45       ` Ingo Molnar
  1 sibling, 0 replies; 17+ messages in thread
From: Ingo Molnar @ 2008-06-25 14:45 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> On Tue, Jun 24, 2008 at 4:30 AM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> > * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> >
> >> so could use mem below max_low_pfn as early. could move several
> >> function more early instead of waiting after paging_init including
> >> moving relocate_initrd early, and kva related early done in
> >> initmem_init
> >
> > applied to tip/x86/setup-memory - thanks Yinghai.
> >
> > a sidenote:
> >
> >>  6 files changed, 128 insertions(+), 74 deletions(-)
> >
> > this patch is too large - if it causes any problems it will not be very
> > easy to figure out which exact change caused the problems.
> >
> > Lets hope it goes all fine - but in the future lets try doing
> > more+smaller patches, especially if they change some known-dangerous
> > area of the kernel.
> >
> > For example here a better splitup would have been to do 5 or more
> > patches:
> >
> >  1) first introduce init_memory_mapping() [but dont use it anywhere]
> >  2) add the init_memory_mapping() call to setup_arch()
> >  3) move remap_numa_kva()
> >  4) move relocate_initrd()
> >  5) remove the now unnecessary setup from paging_init()
> >
> > ... or something like that. The point is to manage risk: if there's
> > multiple problem areas that a change is touching, try to isolate them
> > from each other and introduce the change gradually.
> >
> > The end result is still exactly the same, but much more
> > reviewable/debuggable/bisectable.
> 
> split into 3 patches, please use them to replace the old one if you 
> like.

i've applied them to tip/x86/setup-memory, replacing the larger patch - 
thanks Yinghai.

	Ingo

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2008-06-25 14:46 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-24  2:51 [PATCH] x86: move some func calling from setup_arch to paging_init Yinghai Lu
2008-06-24  2:52 ` [PATCH] x86: setup_arch 64bit move efi_init calling early Yinghai Lu
2008-06-24  2:52   ` [PATCH] x86: setup_arch 64bit move kvmclock_init later Yinghai Lu
2008-06-24  2:53     ` [PATCH] x86: setup_arch 32bit move efi check later Yinghai Lu
2008-06-24  2:54       ` [PATCH] x86: setup_arch 32bit move command line copying early Yinghai Lu
2008-06-24  2:55         ` [PATCH] x86: setup_arch 32bit move kvm_guest_init later Yinghai Lu
2008-06-24 10:53           ` Ingo Molnar
2008-06-24 11:10             ` Yinghai Lu
2008-06-24 11:10 ` [PATCH] x86: introduce init_memory_mapping for 32bit Yinghai Lu
2008-06-24 11:30   ` Ingo Molnar
2008-06-24 17:16     ` Yinghai Lu
2008-06-24 19:22     ` Yinghai Lu
2008-06-24 22:02       ` Yinghai Lu
2008-06-25 14:45       ` Ingo Molnar
2008-06-24 19:18   ` [PATCH] x86: introduce init_memory_mapping for 32bit #1 Yinghai Lu
2008-06-24 19:18     ` [PATCH] x86: introduce init_memory_mapping for 32bit #2 Yinghai Lu
2008-06-24 19:19       ` [PATCH] x86: introduce init_memory_mapping for 32bit #3 Yinghai Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox