public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* Early memory patch, revised
@ 2004-02-24  5:10 H. Peter Anvin
  2004-02-25 18:26 ` Eric W. Biederman
  2004-02-26  6:16 ` Early memory patch, revised Eric W. Biederman
  0 siblings, 2 replies; 8+ messages in thread
From: H. Peter Anvin @ 2004-02-24  5:10 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 378 bytes --]

Hi all,

This is the latest version of the i386 early memory cleanup patch.  It 
has the additional advantage that it removes some of the special casing 
for VISWS -- this is still untested; if you have access to a VISWS 
*please* test this out.

The main difference other than the VISWS code is that it always sets up 
the GDT.  I agree with Eric this is a lot cleaner.

	-hpa

[-- Attachment #2: earlymem-4.diff --]
[-- Type: text/plain, Size: 10553 bytes --]

===================================================================
RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/boot/tools/build.c,v
retrieving revision 1.4
diff -u -r1.4 build.c
--- linux-2.5/arch/i386/boot/tools/build.c	7 Mar 2003 15:39:16 -0000	1.4
+++ linux-2.5/arch/i386/boot/tools/build.c	21 Feb 2004 01:44:11 -0000
@@ -150,10 +150,8 @@
 	sz = sb.st_size;
 	fprintf (stderr, "System is %d kB\n", sz/1024);
 	sys_size = (sz + 15) / 16;
-	/* 0x40000*16 = 4.0 MB, reasonable estimate for the current maximum */
-	if (sys_size > (is_big_kernel ? 0x40000 : DEF_SYSSIZE))
-		die("System is too big. Try using %smodules.",
-			is_big_kernel ? "" : "bzImage or ");
+	if (!is_big_kernel && sys_size > DEF_SYSSIZE)
+		die("System is too big. Try using bzImage or modules.");
 	while (sz > 0) {
 		int l, n;
 
===================================================================
RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/kernel/head.S,v
retrieving revision 1.30
diff -u -r1.30 head.S
--- linux-2.5/arch/i386/kernel/head.S	19 Feb 2004 04:55:53 -0000	1.30
+++ linux-2.5/arch/i386/kernel/head.S	23 Feb 2004 01:07:18 -0000
@@ -39,42 +39,38 @@
 #define X86_CAPABILITY	CPU_PARAMS+12
 #define X86_VENDOR_ID	CPU_PARAMS+36	/* offset dependent on NCAPINTS */
 
-/*
- * Initialize page tables
+/* 
+ * This is how much memory *in addition to the memory covered up to
+ * and including _end* we need mapped initially.  We need one bit for
+ * each possible page, which currently means 2^36/4096/8 = 2 MB
+ * (64-bit-capable chips can do more, but if you have more than 64 GB
+ * of memory you *really* should be running a 64-bit kernel.  However,
+ * if this really bothers someone we could query this dynamically.)
+ *
+ * The other thing we may want to do dynamically in the future is to
+ * detect PSE and skip generating the PTEs.
+ *
+ * Modulo rounding, each megabyte assigned here requires a kilobyte of
+ * memory, which is currently unreclaimed.
+ *
+ * This should be a multiple of a page.
  */
-#define INIT_PAGE_TABLES \
-	movl $pg0 - __PAGE_OFFSET, %edi; \
-	/* "007" doesn't mean with license to kill, but	PRESENT+RW+USER */ \
-	movl $007, %eax; \
-2:	stosl; \
-	add $0x1000, %eax; \
-	cmp $empty_zero_page - __PAGE_OFFSET, %edi; \
-	jne 2b;
-
+#define INIT_MAP_BEYOND_END	(2*1024*1024)
+	
 /*
  * swapper_pg_dir is the main page directory, address 0x00101000
  *
- * On entry, %esi points to the real-mode code as a 32-bit pointer.
+ * On entry, %esi points to the real-mode code as a 32-bit pointer,
+ * and %bx is zero iff this is the boot CPU.
  */
 ENTRY(startup_32)
-
-#ifdef CONFIG_X86_VISWS
-/*
- * On SGI Visual Workstations boot CPU starts in protected mode.
- */
-	orw %bx, %bx
-	jnz 1f
-	INIT_PAGE_TABLES
-	movl $swapper_pg_dir - __PAGE_OFFSET, %eax
-	movl %eax, %cr3
-	lgdt boot_gdt
-1:
-#endif
-
 /*
- * Set segments to known values
+ * Set segments to known values.  Note that __BOOT_CS and __BOOT_DS
+ * must be the appropriate selectors; this is an entry condition to
+ * this function.
  */
 	cld
+	lgdt boot_gdt - __PAGE_OFFSET
 	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
 	movl %eax,%es
@@ -107,7 +103,37 @@
 	jmp 3f
 1:
 #endif
-	INIT_PAGE_TABLES
+
+/*
+ * Initialize page tables.  This creates a PDE and a set of page
+ * tables, which are located immediately beyond _end.  The variable
+ * init_pg_tables_end is set up to point to the first "safe" location.
+ *
+ * Warning: don't use %ebx, %esi or the stack in this code!
+ */
+page_pde_offset = (__PAGE_OFFSET >> 20);
+	
+	movl $((_end-__PAGE_OFFSET) + 0xfff), %edi
+	andl $(~0xfff), %edi
+	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
+	movl $0x007, %eax	/* 0x007 = PRESENT+RW+USER */
+10:
+	leal 0x007(%edi),%ecx	/* Create PDE entry */
+	movl %ecx,(%edx)	/* Store identity PDE entry */
+	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
+	addl $4,%edx
+	movl $1024, %ecx
+11:
+	stosl
+	addl $0x1000,%eax
+	loop 11b
+	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
+	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
+	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
+	cmpl %ebp,%eax
+	jb 10b
+	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
+
 /*
  * Enable paging
  */
@@ -117,10 +143,7 @@
 	movl %cr0,%eax
 	orl $0x80000000,%eax
 	movl %eax,%cr0		/* ..and set paging (PG) bit */
-	jmp 1f			/* flush the prefetch-queue */
-1:
-	movl $1f,%eax
-	jmp *%eax		/* make sure eip is relocated */
+	ljmp $__BOOT_CS,$1f	/* Clear prefetch and normalize %eip */
 1:
 	/* Set up the stack pointer */
 	lss stack_start,%esp
@@ -142,8 +165,8 @@
 	movl $__bss_start,%edi
 	movl $__bss_stop,%ecx
 	subl %edi,%ecx
-	rep
-	stosb
+	shrl $2,%ecx
+	rep ; stosl
 
 /*
  * start system 32-bit setup. We need to re-do some of the things done
@@ -379,41 +402,20 @@
 	.fill NR_CPUS-1,8,0		# space for the other GDT descriptors
 
 /*
- * This is initialized to create an identity-mapping at 0-8M (for bootup
- * purposes) and another mapping of the 0-8M area at virtual address
- * PAGE_OFFSET.
+ * This is initialized to create an identity-mapping at 0-_end (for bootup
+ * purposes) and another mapping of the 0-_end area at virtual address
+ * PAGE_OFFSET.  The values put here should be all invalid (zero); the valid
+ * entries are created at INIT_PAGE_TABLES.
  */
 .org 0x1000
 ENTRY(swapper_pg_dir)
-	.long 0x00102007
-	.long 0x00103007
-	.fill BOOT_USER_PGD_PTRS-2,4,0
-	/* default: 766 entries */
-	.long 0x00102007
-	.long 0x00103007
-	/* default: 254 entries */
-	.fill BOOT_KERNEL_PGD_PTRS-2,4,0
+	.fill 1024,4,0
 
-/*
- * The page tables are initialized to only 8MB here - the final page
- * tables are set up later depending on memory size.
- */
 .org 0x2000
-ENTRY(pg0)
-
-.org 0x3000
-ENTRY(pg1)
-
-/*
- * empty_zero_page must immediately follow the page tables ! (The
- * initialization loop counts until empty_zero_page)
- */
-
-.org 0x4000
 ENTRY(empty_zero_page)
+	.fill 4096,1,0
 
-.org 0x5000
-
+.org 0x3000
 /*
  * Real beginning of normal "text" segment
  */
@@ -428,19 +430,22 @@
 .data
 
 /*
- * The Global Descriptor Table contains 28 quadwords, per-CPU.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_X86_VISWS)
-/*
  * The boot_gdt_table must mirror the equivalent in setup.S and is
- * used only by the trampoline for booting other CPUs
+ * used only for booting.
  */
+ENTRY(boot_gdt)
+	.word	__BOOT_DS + 7			# gdt limit
+	.long	boot_gdt_table-__PAGE_OFFSET	# gdt base
+
 	.align L1_CACHE_BYTES
 ENTRY(boot_gdt_table)
 	.fill GDT_ENTRY_BOOT_CS,8,0
 	.quad 0x00cf9a000000ffff	/* kernel 4GB code at 0x00000000 */
 	.quad 0x00cf92000000ffff	/* kernel 4GB data at 0x00000000 */
-#endif
+
+/*
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
+ */
 	.align L1_CACHE_BYTES
 ENTRY(cpu_gdt_table)
 	.quad 0x0000000000000000	/* NULL descriptor */
@@ -488,4 +493,3 @@
 #ifdef CONFIG_SMP
 	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
 #endif
-
===================================================================
RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/kernel/setup.c,v
retrieving revision 1.107
diff -u -r1.107 setup.c
--- linux-2.5/arch/i386/kernel/setup.c	19 Feb 2004 04:45:13 -0000	1.107
+++ linux-2.5/arch/i386/kernel/setup.c	21 Feb 2004 04:44:02 -0000
@@ -50,6 +50,11 @@
 #include "setup_arch_pre.h"
 #include "mach_resources.h"
 
+/* This value is set up by the early boot code to point to the value
+   immediately after the boot time page tables.  It contains a *physical*
+   address, and must not be in the .bss segment! */
+unsigned long init_pg_tables_end __initdata = ~0UL;
+
 int disable_pse __initdata = 0;
 
 static inline char * __init machine_specific_memory_setup(void);
@@ -115,7 +120,6 @@
 extern void dmi_scan_machine(void);
 extern void generic_apic_probe(char *);
 extern int root_mountflags;
-extern char _end[];
 
 unsigned long saved_videomode;
 
@@ -785,7 +789,7 @@
 	 * partially used pages are not usable - thus
 	 * we are rounding upwards:
 	 */
-	start_pfn = PFN_UP(__pa(_end));
+	start_pfn = PFN_UP(init_pg_tables_end);
 
 	find_max_pfn();
 
@@ -1097,7 +1101,7 @@
 	init_mm.start_code = (unsigned long) _text;
 	init_mm.end_code = (unsigned long) _etext;
 	init_mm.end_data = (unsigned long) _edata;
-	init_mm.brk = (unsigned long) _end;
+	init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
 
 	code_resource.start = virt_to_phys(_text);
 	code_resource.end = virt_to_phys(_etext)-1;
===================================================================
RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/kernel/trampoline.S,v
retrieving revision 1.9
diff -u -r1.9 trampoline.S
--- linux-2.5/arch/i386/kernel/trampoline.S	26 May 2003 23:59:47 -0000	1.9
+++ linux-2.5/arch/i386/kernel/trampoline.S	23 Feb 2004 01:00:23 -0000
@@ -63,13 +63,5 @@
 	.word	0			# idt limit = 0
 	.word	0, 0			# idt base = 0L
 
-#
-# NOTE: here we actually use CPU#0's GDT - but that is OK, we reload
-# the proper GDT shortly after booting up the secondary CPUs.
-#
-ENTRY(boot_gdt)
-	.word	__BOOT_DS + 7			# gdt limit
-	.long	boot_gdt_table-__PAGE_OFFSET	# gdt base = gdt (first SMP CPU)
-
 .globl trampoline_end
 trampoline_end:
===================================================================
RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/kernel/vmlinux.lds.S,v
retrieving revision 1.2
diff -u -r1.2 vmlinux.lds.S
--- linux-2.5/arch/i386/kernel/vmlinux.lds.S	18 Aug 2003 18:17:01 -0000	1.2
+++ linux-2.5/arch/i386/kernel/vmlinux.lds.S	21 Feb 2004 05:03:08 -0000
@@ -105,6 +105,7 @@
 	
   __bss_start = .;		/* BSS */
   .bss : { *(.bss) }
+  . = ALIGN(4);
   __bss_stop = .; 
 
   _end = . ;
===================================================================
RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/mm/discontig.c,v
retrieving revision 1.16
diff -u -r1.16 discontig.c
--- linux-2.5/arch/i386/mm/discontig.c	21 Sep 2003 22:39:20 -0000	1.16
+++ linux-2.5/arch/i386/mm/discontig.c	21 Feb 2004 01:48:30 -0000
@@ -66,7 +66,7 @@
 extern void one_highpage_init(struct page *, int, int);
 
 extern struct e820map e820;
-extern char _end;
+extern unsigned long init_pg_tables_end;
 extern unsigned long highend_pfn, highstart_pfn;
 extern unsigned long max_low_pfn;
 extern unsigned long totalram_pages;
@@ -237,7 +237,7 @@
 	reserve_pages = calculate_numa_remap_pages();
 
 	/* partially used pages are not usable - thus round upwards */
-	system_start_pfn = min_low_pfn = PFN_UP(__pa(&_end));
+	system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
 
 	find_max_pfn();
 	system_max_low_pfn = max_low_pfn = find_max_low_pfn();

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2004-03-01  7:15 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-02-24  5:10 Early memory patch, revised H. Peter Anvin
2004-02-25 18:26 ` Eric W. Biederman
2004-02-25 18:38   ` H. Peter Anvin
2004-02-25 18:49   ` H. Peter Anvin
2004-02-26 14:56     ` [PATCH] Early memory patch, +accounting Eric W. Biederman
2004-02-26 18:02       ` H. Peter Anvin
2004-03-01  7:06         ` Eric W. Biederman
2004-02-26  6:16 ` Early memory patch, revised Eric W. Biederman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox