* [PATCH] x86: make e820.c to have common functions
@ 2008-05-11 7:30 Yinghai Lu
2008-05-13 13:05 ` Ingo Molnar
2008-05-18 8:18 ` [PATCH] x86: extend e820 ealy_res support 32bit Yinghai Lu
0 siblings, 2 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-11 7:30 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
remove the duplicated copy of these functions.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
arch/x86/kernel/Makefile | 2
arch/x86/kernel/e820.c | 475 +++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kernel/e820_32.c | 411 --------------------------------------
arch/x86/kernel/e820_64.c | 444 ------------------------------------------
arch/x86/kernel/setup_32.c | 2
include/asm-x86/e820.h | 14 +
include/asm-x86/e820_32.h | 12 -
include/asm-x86/e820_64.h | 12 -
8 files changed, 496 insertions(+), 876 deletions(-)
Index: linux-2.6/arch/x86/kernel/Makefile
===================================================================
--- linux-2.6.orig/arch/x86/kernel/Makefile
+++ linux-2.6/arch/x86/kernel/Makefile
@@ -31,7 +31,7 @@ obj-y += setup_$(BITS).o i8259_$(BITS)
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o
-obj-y += bootflag.o e820_$(BITS).o
+obj-y += bootflag.o e820_$(BITS).o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
obj-y += alternative.o i8253.o pci-nommu.o
obj-$(CONFIG_X86_64) += bugs_64.o
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -16,21 +16,6 @@
#include <asm/e820.h>
#include <asm/setup.h>
-struct e820map e820;
-struct change_member {
- struct e820entry *pbios; /* pointer to original bios entry */
- unsigned long long addr; /* address for this change point */
-};
-static struct change_member change_point_list[2*E820MAX] __initdata;
-static struct change_member *change_point[2*E820MAX] __initdata;
-static struct e820entry *overlap_list[E820MAX] __initdata;
-static struct e820entry new_bios[E820MAX] __initdata;
-/* For PCI or other memory-mapped resources */
-unsigned long pci_mem_start = 0x10000000;
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pci_mem_start);
-#endif
-
static struct resource system_rom_resource = {
.name = "System ROM",
.start = 0xf0000,
@@ -254,223 +239,6 @@ void __init e820_mark_nosave_regions(voi
}
#endif
-void __init add_memory_region(unsigned long long start,
- unsigned long long size, int type)
-{
- int x;
-
- x = e820.nr_map;
-
- if (x == E820MAX) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
-
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
-} /* add_memory_region */
-
-/*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries. The following
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
-int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
-{
- struct change_member *change_tmp;
- unsigned long current_type, last_type;
- unsigned long long last_addr;
- int chgidx, still_changing;
- int overlap_entries;
- int new_bios_entry;
- int old_nr, new_nr, chg_nr;
- int i;
-
- /*
- Visually we're performing the following (1,2,3,4 = memory types)...
-
- Sample memory map (w/overlaps):
- ____22__________________
- ______________________4_
- ____1111________________
- _44_____________________
- 11111111________________
- ____________________33__
- ___________44___________
- __________33333_________
- ______________22________
- ___________________2222_
- _________111111111______
- _____________________11_
- _________________4______
-
- Sanitized equivalent (no overlap):
- 1_______________________
- _44_____________________
- ___1____________________
- ____22__________________
- ______11________________
- _________1______________
- __________3_____________
- ___________44___________
- _____________33_________
- _______________2________
- ________________1_______
- _________________4______
- ___________________2____
- ____________________33__
- ______________________4_
- */
- /* if there's only one memory region, don't bother */
- if (*pnr_map < 2) {
- return -1;
- }
-
- old_nr = *pnr_map;
-
- /* bail out if we find any unreasonable addresses in bios map */
- for (i=0; i<old_nr; i++)
- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
- return -1;
- }
-
- /* create pointers for initial change-point information (for sorting) */
- for (i=0; i < 2*old_nr; i++)
- change_point[i] = &change_point_list[i];
-
- /* record all known change-points (starting and ending addresses),
- omitting those that are for empty memory regions */
- chgidx = 0;
- for (i=0; i < old_nr; i++) {
- if (biosmap[i].size != 0) {
- change_point[chgidx]->addr = biosmap[i].addr;
- change_point[chgidx++]->pbios = &biosmap[i];
- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
- change_point[chgidx++]->pbios = &biosmap[i];
- }
- }
- chg_nr = chgidx; /* true number of change-points */
-
- /* sort change-point list by memory addresses (low -> high) */
- still_changing = 1;
- while (still_changing) {
- still_changing = 0;
- for (i=1; i < chg_nr; i++) {
- /* if <current_addr> > <last_addr>, swap */
- /* or, if current=<start_addr> & last=<end_addr>, swap */
- if ((change_point[i]->addr < change_point[i-1]->addr) ||
- ((change_point[i]->addr == change_point[i-1]->addr) &&
- (change_point[i]->addr == change_point[i]->pbios->addr) &&
- (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
- )
- {
- change_tmp = change_point[i];
- change_point[i] = change_point[i-1];
- change_point[i-1] = change_tmp;
- still_changing=1;
- }
- }
- }
-
- /* create a new bios memory map, removing overlaps */
- overlap_entries=0; /* number of entries in the overlap table */
- new_bios_entry=0; /* index for creating new bios map entries */
- last_type = 0; /* start with undefined memory type */
- last_addr = 0; /* start with 0 as last starting address */
- /* loop through change-points, determining affect on the new bios map */
- for (chgidx=0; chgidx < chg_nr; chgidx++)
- {
- /* keep track of all overlapping bios entries */
- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
- {
- /* add map entry to overlap list (> 1 entry implies an overlap) */
- overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
- }
- else
- {
- /* remove entry from list (order independent, so swap with last) */
- for (i=0; i<overlap_entries; i++)
- {
- if (overlap_list[i] == change_point[chgidx]->pbios)
- overlap_list[i] = overlap_list[overlap_entries-1];
- }
- overlap_entries--;
- }
- /* if there are overlapping entries, decide which "type" to use */
- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
- current_type = 0;
- for (i=0; i<overlap_entries; i++)
- if (overlap_list[i]->type > current_type)
- current_type = overlap_list[i]->type;
- /* continue building up new bios map based on this information */
- if (current_type != last_type) {
- if (last_type != 0) {
- new_bios[new_bios_entry].size =
- change_point[chgidx]->addr - last_addr;
- /* move forward only if the new size was non-zero */
- if (new_bios[new_bios_entry].size != 0)
- if (++new_bios_entry >= E820MAX)
- break; /* no more space left for new bios entries */
- }
- if (current_type != 0) {
- new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
- new_bios[new_bios_entry].type = current_type;
- last_addr=change_point[chgidx]->addr;
- }
- last_type = current_type;
- }
- }
- new_nr = new_bios_entry; /* retain count for new bios entries */
-
- /* copy new bios mapping into original location */
- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
- *pnr_map = new_nr;
-
- return 0;
-}
-
-/*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory. If we aren't, we'll fake a memory map.
- *
- * We check to see that the memory map contains at least 2 elements
- * before we'll use it, because the detection code in setup.S may
- * not be perfect and most every PC known to man has two memory
- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
- * thinkpad 560x, for example, does not cooperate with the memory
- * detection code.)
- */
-int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
-{
- /* Only one memory region (or negative)? Ignore it */
- if (nr_map < 2)
- return -1;
-
- do {
- u64 start = biosmap->addr;
- u64 size = biosmap->size;
- u64 end = start + size;
- u32 type = biosmap->type;
-
- /* Overflow in 64 bits? Ignore the memory map. */
- if (start > end)
- return -1;
-
- add_memory_region(start, size, type);
- } while (biosmap++, --nr_map);
-
- return 0;
-}
-
/*
* Find the highest page frame number we have available
*/
@@ -535,86 +303,12 @@ void __init register_bootmem_low_pages(u
}
}
-void __init e820_register_memory(void)
-{
- unsigned long gapstart, gapsize, round;
- unsigned long long last;
- int i;
-
- /*
- * Search for the biggest gap in the low 32 bits of the e820
- * memory space.
- */
- last = 0x100000000ull;
- gapstart = 0x10000000;
- gapsize = 0x400000;
- i = e820.nr_map;
- while (--i >= 0) {
- unsigned long long start = e820.map[i].addr;
- unsigned long long end = start + e820.map[i].size;
-
- /*
- * Since "last" is at most 4GB, we know we'll
- * fit in 32 bits if this condition is true
- */
- if (last > end) {
- unsigned long gap = last - end;
-
- if (gap > gapsize) {
- gapsize = gap;
- gapstart = end;
- }
- }
- if (start < last)
- last = start;
- }
-
- /*
- * See how much we want to round up: start off with
- * rounding to the next 1MB area.
- */
- round = 0x100000;
- while ((gapsize >> 4) > round)
- round += round;
- /* Fun with two's complement */
- pci_mem_start = (gapstart + round) & -round;
-
- printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
- pci_mem_start, gapstart, gapsize);
-}
-
-static void __init print_memory_map(char *who)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- printk(" %s: %016Lx - %016Lx ", who,
- e820.map[i].addr,
- e820.map[i].addr + e820.map[i].size);
- switch (e820.map[i].type) {
- case E820_RAM: printk("(usable)\n");
- break;
- case E820_RESERVED:
- printk("(reserved)\n");
- break;
- case E820_ACPI:
- printk("(ACPI data)\n");
- break;
- case E820_NVS:
- printk("(ACPI NVS)\n");
- break;
- default: printk("type %u\n", e820.map[i].type);
- break;
- }
- }
-}
-
void __init limit_regions(unsigned long long size)
{
unsigned long long current_addr;
int i;
- print_memory_map("limit_regions start");
+ e820_print_map("limit_regions start");
for (i = 0; i < e820.nr_map; i++) {
current_addr = e820.map[i].addr + e820.map[i].size;
if (current_addr < size)
@@ -633,62 +327,10 @@ void __init limit_regions(unsigned long
e820.nr_map = i + 1;
e820.map[i].size -= current_addr - size;
}
- print_memory_map("limit_regions endfor");
+ e820_print_map("limit_regions endfor");
return;
}
- print_memory_map("limit_regions endfunc");
-}
-
-/*
- * This function checks if any part of the range <start,end> is mapped
- * with type.
- */
-int
-e820_any_mapped(u64 start, u64 end, unsigned type)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- const struct e820entry *ei = &e820.map[i];
- if (type && ei->type != type)
- continue;
- if (ei->addr >= end || ei->addr + ei->size <= start)
- continue;
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(e820_any_mapped);
-
- /*
- * This function checks if the entire range <start,end> is mapped with type.
- *
- * Note: this function only works correct if the e820 table is sorted and
- * not-overlapping, which is the case
- */
-int __init
-e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
-{
- u64 start = s;
- u64 end = e;
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- if (type && ei->type != type)
- continue;
- /* is the region (part) in overlap with the current region ?*/
- if (ei->addr >= end || ei->addr + ei->size <= start)
- continue;
- /* if the region is at the beginning of <start,end> we move
- * start to the end of the region since it's ok until there
- */
- if (ei->addr <= start)
- start = ei->addr + ei->size;
- /* if start is now at or beyond end, we're done, full
- * coverage */
- if (start >= end)
- return 1; /* we're done */
- }
- return 0;
+ e820_print_map("limit_regions endfunc");
}
/* Overridden in paravirt.c if CONFIG_PARAVIRT */
@@ -700,7 +342,7 @@ char * __init __attribute__((weak)) memo
void __init setup_memory_map(void)
{
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- print_memory_map(memory_setup());
+ e820_print_map(memory_setup());
}
static int __initdata user_defined_memmap;
@@ -783,55 +425,12 @@ static int __init parse_memmap(char *arg
return 0;
}
early_param("memmap", parse_memmap);
-u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
- unsigned new_type)
-{
- int i;
- u64 real_updated_size = 0;
-
- BUG_ON(old_type == new_type);
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- u64 final_start, final_end;
- if (ei->type != old_type)
- continue;
- /* totally covered? */
- if (ei->addr >= start &&
- (ei->addr + ei->size) <= (start + size)) {
- ei->type = new_type;
- real_updated_size += ei->size;
- continue;
- }
- /* partially covered */
- final_start = max(start, ei->addr);
- final_end = min(start + size, ei->addr + ei->size);
- if (final_start >= final_end)
- continue;
- add_memory_region(final_start, final_end - final_start,
- new_type);
- real_updated_size += final_end - final_start;
- }
-
- return real_updated_size;
-}
void __init finish_e820_parsing(void)
{
if (user_defined_memmap) {
printk(KERN_INFO "user-defined physical RAM map:\n");
- print_memory_map("user");
+ e820_print_map("user");
}
}
-void __init update_e820(void)
-{
- u8 nr_map;
-
- nr_map = e820.nr_map;
- if (sanitize_e820_map(e820.map, &nr_map))
- return;
- e820.nr_map = nr_map;
- printk(KERN_INFO "modified physical RAM map:\n");
- print_memory_map("modified");
-}
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -30,8 +30,6 @@
#include <asm/kdebug.h>
#include <asm/trampoline.h>
-struct e820map e820;
-
/*
* PFN of last memory page.
*/
@@ -177,62 +175,6 @@ again:
}
return changed;
}
-/*
- * This function checks if any part of the range <start,end> is mapped
- * with type.
- */
-int
-e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
-
- if (type && ei->type != type)
- continue;
- if (ei->addr >= end || ei->addr + ei->size <= start)
- continue;
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(e820_any_mapped);
-
-/*
- * This function checks if the entire range <start,end> is mapped with type.
- *
- * Note: this function only works correct if the e820 table is sorted and
- * not-overlapping, which is the case
- */
-int __init e820_all_mapped(unsigned long start, unsigned long end,
- unsigned type)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
-
- if (type && ei->type != type)
- continue;
- /* is the region (part) in overlap with the current region ?*/
- if (ei->addr >= end || ei->addr + ei->size <= start)
- continue;
-
- /* if the region is at the beginning of <start,end> we move
- * start to the end of the region since it's ok until there
- */
- if (ei->addr <= start)
- start = ei->addr + ei->size;
- /*
- * if start is now at or beyond end, we're done, full
- * coverage
- */
- if (start >= end)
- return 1;
- }
- return 0;
-}
/*
* Find a free area with specified alignment in a specific range.
@@ -436,24 +378,6 @@ e820_register_active_regions(int nid, un
}
/*
- * Add a memory region to the kernel e820 map.
- */
-void __init add_memory_region(unsigned long start, unsigned long size, int type)
-{
- int x = e820.nr_map;
-
- if (x == E820MAX) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
-
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
-}
-
-/*
* Find the hole size (in bytes) in the memory range.
* @start: starting address of the memory range to scan
* @end: ending address of the memory range to scan
@@ -474,266 +398,6 @@ unsigned long __init e820_hole_size(unsi
return end - start - (ram << PAGE_SHIFT);
}
-static void __init e820_print_map(char *who)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
- (unsigned long long) e820.map[i].addr,
- (unsigned long long)
- (e820.map[i].addr + e820.map[i].size));
- switch (e820.map[i].type) {
- case E820_RAM:
- printk(KERN_CONT "(usable)\n");
- break;
- case E820_RESERVED:
- printk(KERN_CONT "(reserved)\n");
- break;
- case E820_ACPI:
- printk(KERN_CONT "(ACPI data)\n");
- break;
- case E820_NVS:
- printk(KERN_CONT "(ACPI NVS)\n");
- break;
- default:
- printk(KERN_CONT "type %u\n", e820.map[i].type);
- break;
- }
- }
-}
-
-/*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries. The following
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
-static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
-{
- struct change_member {
- struct e820entry *pbios; /* pointer to original bios entry */
- unsigned long long addr; /* address for this change point */
- };
- static struct change_member change_point_list[2*E820MAX] __initdata;
- static struct change_member *change_point[2*E820MAX] __initdata;
- static struct e820entry *overlap_list[E820MAX] __initdata;
- static struct e820entry new_bios[E820MAX] __initdata;
- struct change_member *change_tmp;
- unsigned long current_type, last_type;
- unsigned long long last_addr;
- int chgidx, still_changing;
- int overlap_entries;
- int new_bios_entry;
- int old_nr, new_nr, chg_nr;
- int i;
-
- /*
- Visually we're performing the following
- (1,2,3,4 = memory types)...
-
- Sample memory map (w/overlaps):
- ____22__________________
- ______________________4_
- ____1111________________
- _44_____________________
- 11111111________________
- ____________________33__
- ___________44___________
- __________33333_________
- ______________22________
- ___________________2222_
- _________111111111______
- _____________________11_
- _________________4______
-
- Sanitized equivalent (no overlap):
- 1_______________________
- _44_____________________
- ___1____________________
- ____22__________________
- ______11________________
- _________1______________
- __________3_____________
- ___________44___________
- _____________33_________
- _______________2________
- ________________1_______
- _________________4______
- ___________________2____
- ____________________33__
- ______________________4_
- */
-
- /* if there's only one memory region, don't bother */
- if (*pnr_map < 2)
- return -1;
-
- old_nr = *pnr_map;
-
- /* bail out if we find any unreasonable addresses in bios map */
- for (i = 0; i < old_nr; i++)
- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
- return -1;
-
- /* create pointers for initial change-point information (for sorting) */
- for (i = 0; i < 2 * old_nr; i++)
- change_point[i] = &change_point_list[i];
-
- /* record all known change-points (starting and ending addresses),
- omitting those that are for empty memory regions */
- chgidx = 0;
- for (i = 0; i < old_nr; i++) {
- if (biosmap[i].size != 0) {
- change_point[chgidx]->addr = biosmap[i].addr;
- change_point[chgidx++]->pbios = &biosmap[i];
- change_point[chgidx]->addr = biosmap[i].addr +
- biosmap[i].size;
- change_point[chgidx++]->pbios = &biosmap[i];
- }
- }
- chg_nr = chgidx;
-
- /* sort change-point list by memory addresses (low -> high) */
- still_changing = 1;
- while (still_changing) {
- still_changing = 0;
- for (i = 1; i < chg_nr; i++) {
- unsigned long long curaddr, lastaddr;
- unsigned long long curpbaddr, lastpbaddr;
-
- curaddr = change_point[i]->addr;
- lastaddr = change_point[i - 1]->addr;
- curpbaddr = change_point[i]->pbios->addr;
- lastpbaddr = change_point[i - 1]->pbios->addr;
-
- /*
- * swap entries, when:
- *
- * curaddr > lastaddr or
- * curaddr == lastaddr and curaddr == curpbaddr and
- * lastaddr != lastpbaddr
- */
- if (curaddr < lastaddr ||
- (curaddr == lastaddr && curaddr == curpbaddr &&
- lastaddr != lastpbaddr)) {
- change_tmp = change_point[i];
- change_point[i] = change_point[i-1];
- change_point[i-1] = change_tmp;
- still_changing = 1;
- }
- }
- }
-
- /* create a new bios memory map, removing overlaps */
- overlap_entries = 0; /* number of entries in the overlap table */
- new_bios_entry = 0; /* index for creating new bios map entries */
- last_type = 0; /* start with undefined memory type */
- last_addr = 0; /* start with 0 as last starting address */
-
- /* loop through change-points, determining affect on the new bios map */
- for (chgidx = 0; chgidx < chg_nr; chgidx++) {
- /* keep track of all overlapping bios entries */
- if (change_point[chgidx]->addr ==
- change_point[chgidx]->pbios->addr) {
- /*
- * add map entry to overlap list (> 1 entry
- * implies an overlap)
- */
- overlap_list[overlap_entries++] =
- change_point[chgidx]->pbios;
- } else {
- /*
- * remove entry from list (order independent,
- * so swap with last)
- */
- for (i = 0; i < overlap_entries; i++) {
- if (overlap_list[i] ==
- change_point[chgidx]->pbios)
- overlap_list[i] =
- overlap_list[overlap_entries-1];
- }
- overlap_entries--;
- }
- /*
- * if there are overlapping entries, decide which
- * "type" to use (larger value takes precedence --
- * 1=usable, 2,3,4,4+=unusable)
- */
- current_type = 0;
- for (i = 0; i < overlap_entries; i++)
- if (overlap_list[i]->type > current_type)
- current_type = overlap_list[i]->type;
- /*
- * continue building up new bios map based on this
- * information
- */
- if (current_type != last_type) {
- if (last_type != 0) {
- new_bios[new_bios_entry].size =
- change_point[chgidx]->addr - last_addr;
- /*
- * move forward only if the new size
- * was non-zero
- */
- if (new_bios[new_bios_entry].size != 0)
- /*
- * no more space left for new
- * bios entries ?
- */
- if (++new_bios_entry >= E820MAX)
- break;
- }
- if (current_type != 0) {
- new_bios[new_bios_entry].addr =
- change_point[chgidx]->addr;
- new_bios[new_bios_entry].type = current_type;
- last_addr = change_point[chgidx]->addr;
- }
- last_type = current_type;
- }
- }
- /* retain count for new bios entries */
- new_nr = new_bios_entry;
-
- /* copy new bios mapping into original location */
- memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
- *pnr_map = new_nr;
-
- return 0;
-}
-
-/*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory. If we aren't, we'll fake a memory map.
- */
-static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
-{
- /* Only one memory region (or negative)? Ignore it */
- if (nr_map < 2)
- return -1;
-
- do {
- u64 start = biosmap->addr;
- u64 size = biosmap->size;
- u64 end = start + size;
- u32 type = biosmap->type;
-
- /* Overflow in 64 bits? Ignore the memory map. */
- if (start > end)
- return -1;
-
- add_memory_region(start, size, type);
- } while (biosmap++, --nr_map);
- return 0;
-}
-
static void early_panic(char *msg)
{
early_printk(msg);
@@ -830,114 +494,6 @@ void __init finish_e820_parsing(void)
}
}
-u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
- unsigned new_type)
-{
- int i;
- u64 real_updated_size = 0;
-
- BUG_ON(old_type == new_type);
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- u64 final_start, final_end;
- if (ei->type != old_type)
- continue;
- /* totally covered? */
- if (ei->addr >= start &&
- (ei->addr + ei->size) <= (start + size)) {
- ei->type = new_type;
- real_updated_size += ei->size;
- continue;
- }
- /* partially covered */
- final_start = max(start, ei->addr);
- final_end = min(start + size, ei->addr + ei->size);
- if (final_start >= final_end)
- continue;
- add_memory_region(final_start, final_end - final_start,
- new_type);
- real_updated_size += final_end - final_start;
- }
- return real_updated_size;
-}
-
-void __init update_e820(void)
-{
- u8 nr_map;
-
- nr_map = e820.nr_map;
- if (sanitize_e820_map(e820.map, &nr_map))
- return;
- e820.nr_map = nr_map;
- printk(KERN_INFO "modified physical RAM map:\n");
- e820_print_map("modified");
-}
-
-unsigned long pci_mem_start = 0xaeedbabe;
-EXPORT_SYMBOL(pci_mem_start);
-
-/*
- * Search for the biggest gap in the low 32 bits of the e820
- * memory space. We pass this space to PCI to assign MMIO resources
- * for hotplug or unconfigured devices in.
- * Hopefully the BIOS let enough space left.
- */
-__init void e820_setup_gap(void)
-{
- unsigned long gapstart, gapsize, round;
- unsigned long last;
- int i;
- int found = 0;
-
- last = 0x100000000ull;
- gapstart = 0x10000000;
- gapsize = 0x400000;
- i = e820.nr_map;
- while (--i >= 0) {
- unsigned long long start = e820.map[i].addr;
- unsigned long long end = start + e820.map[i].size;
-
- /*
- * Since "last" is at most 4GB, we know we'll
- * fit in 32 bits if this condition is true
- */
- if (last > end) {
- unsigned long gap = last - end;
-
- if (gap > gapsize) {
- gapsize = gap;
- gapstart = end;
- found = 1;
- }
- }
- if (start < last)
- last = start;
- }
-
- if (!found) {
- gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
- printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
- "address range\n"
- KERN_ERR "PCI: Unassigned devices with 32bit resource "
- "registers may break!\n");
- }
-
- /*
- * See how much we want to round up: start off with
- * rounding to the next 1MB area.
- */
- round = 0x100000;
- while ((gapsize >> 4) > round)
- round += round;
- /* Fun with two's complement */
- pci_mem_start = (gapstart + round) & -round;
-
- printk(KERN_INFO
- "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
- pci_mem_start, gapstart, gapsize);
-}
-
int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
{
int i;
Index: linux-2.6/include/asm-x86/e820.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820.h
+++ linux-2.6/include/asm-x86/e820.h
@@ -20,6 +20,20 @@ struct e820map {
__u32 nr_map;
struct e820entry map[E820MAX];
};
+
+extern struct e820map e820;
+
+extern int e820_any_mapped(u64 start, u64 end, unsigned type);
+extern int e820_all_mapped(u64 start, u64 end, unsigned type);
+extern void add_memory_region(u64 start, u64 size, int type);
+extern void e820_print_map(char *who);
+extern int sanitize_e820_map(struct e820entry *biosmap, char *pnr_map);
+extern int copy_e820_map(struct e820entry *biosmap, int nr_map);
+extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type);
+extern void update_e820(void);
+extern void e820_setup_gap(void);
+
#endif /* __ASSEMBLY__ */
#define ISA_START_ADDRESS 0xa0000
Index: linux-2.6/include/asm-x86/e820_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_32.h
+++ linux-2.6/include/asm-x86/e820_32.h
@@ -21,19 +21,8 @@
extern void setup_memory_map(void);
extern void finish_e820_parsing(void);
-extern struct e820map e820;
-extern void update_e820(void);
-
-extern int e820_all_mapped(unsigned long start, unsigned long end,
- unsigned type);
-extern int e820_any_mapped(u64 start, u64 end, unsigned type);
extern void propagate_e820_map(void);
extern void register_bootmem_low_pages(unsigned long max_low_pfn);
-extern void add_memory_region(unsigned long long start,
- unsigned long long size, int type);
-extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
- unsigned new_type);
-extern void e820_register_memory(void);
extern void limit_regions(unsigned long long size);
extern void init_iomem_resources(struct resource *code_resource,
struct resource *data_resource,
@@ -47,6 +36,5 @@ static inline void e820_mark_nosave_regi
}
#endif
-
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -19,34 +19,22 @@ extern unsigned long find_e820_area(unsi
extern unsigned long find_e820_area_size(unsigned long start,
unsigned long *sizep,
unsigned long align);
-extern void add_memory_region(unsigned long start, unsigned long size,
- int type);
-extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
- unsigned new_type);
extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern unsigned long e820_end_of_ram(void);
extern void e820_reserve_resources(void);
extern void e820_mark_nosave_regions(void);
-extern int e820_any_mapped(unsigned long start, unsigned long end,
- unsigned type);
-extern int e820_all_mapped(unsigned long start, unsigned long end,
- unsigned type);
extern int e820_any_non_reserved(unsigned long start, unsigned long end);
extern int is_memory_any_valid(unsigned long start, unsigned long end);
extern int e820_all_non_reserved(unsigned long start, unsigned long end);
extern int is_memory_all_valid(unsigned long start, unsigned long end);
extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
-extern void e820_setup_gap(void);
extern void e820_register_active_regions(int nid, unsigned long start_pfn,
unsigned long end_pfn);
extern void finish_e820_parsing(void);
-extern struct e820map e820;
-extern void update_e820(void);
-
extern void reserve_early(unsigned long start, unsigned long end, char *name);
extern void free_early(unsigned long start, unsigned long end);
extern void early_res_to_bootmem(unsigned long start, unsigned long end);
Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -0,0 +1,475 @@
+/*
+ * Handle the memory map.
+ * The functions here do the job until bootmem takes over.
+ *
+ * Getting sanitize_e820_map() in sync with i386 version by applying change:
+ * - Provisions for empty E820 memory regions (reported by certain BIOSes).
+ * Alex Achenbach <xela@slit.de>, December 2002.
+ * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/string.h>
+#include <linux/kexec.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/pfn.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/e820.h>
+#include <asm/setup.h>
+
+struct e820map e820;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0xaeedbabe;
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pci_mem_start);
+#endif
+
+/*
+ * This function checks if any part of the range <start,end> is mapped
+ * with type.
+ */
+int
+e820_any_mapped(u64 start, u64 end, unsigned type)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (type && ei->type != type)
+ continue;
+ if (ei->addr >= end || ei->addr + ei->size <= start)
+ continue;
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(e820_any_mapped);
+
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+ * Note: this function only works correct if the e820 table is sorted and
+ * not-overlapping, which is the case
+ */
+int __init e820_all_mapped(u64 start, u64 end, unsigned type)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (type && ei->type != type)
+ continue;
+ /* is the region (part) in overlap with the current region ?*/
+ if (ei->addr >= end || ei->addr + ei->size <= start)
+ continue;
+
+ /* if the region is at the beginning of <start,end> we move
+ * start to the end of the region since it's ok until there
+ */
+ if (ei->addr <= start)
+ start = ei->addr + ei->size;
+ /*
+ * if start is now at or beyond end, we're done, full
+ * coverage
+ */
+ if (start >= end)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Add a memory region to the kernel e820 map.
+ */
+void __init add_memory_region(u64 start, u64 size, int type)
+{
+ int x = e820.nr_map;
+
+ if (x == E820MAX) {
+ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+ return;
+ }
+
+ e820.map[x].addr = start;
+ e820.map[x].size = size;
+ e820.map[x].type = type;
+ e820.nr_map++;
+}
+
+void __init e820_print_map(char *who)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
+ (unsigned long long) e820.map[i].addr,
+ (unsigned long long)
+ (e820.map[i].addr + e820.map[i].size));
+ switch (e820.map[i].type) {
+ case E820_RAM:
+ printk(KERN_CONT "(usable)\n");
+ break;
+ case E820_RESERVED:
+ printk(KERN_CONT "(reserved)\n");
+ break;
+ case E820_ACPI:
+ printk(KERN_CONT "(ACPI data)\n");
+ break;
+ case E820_NVS:
+ printk(KERN_CONT "(ACPI NVS)\n");
+ break;
+ default:
+ printk(KERN_CONT "type %u\n", e820.map[i].type);
+ break;
+ }
+ }
+}
+
+/*
+ * Sanitize the BIOS e820 map.
+ *
+ * Some e820 responses include overlapping entries. The following
+ * replaces the original e820 map with a new one, removing overlaps.
+ *
+ */
+int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
+{
+ struct change_member {
+ struct e820entry *pbios; /* pointer to original bios entry */
+ unsigned long long addr; /* address for this change point */
+ };
+ static struct change_member change_point_list[2*E820MAX] __initdata;
+ static struct change_member *change_point[2*E820MAX] __initdata;
+ static struct e820entry *overlap_list[E820MAX] __initdata;
+ static struct e820entry new_bios[E820MAX] __initdata;
+ struct change_member *change_tmp;
+ unsigned long current_type, last_type;
+ unsigned long long last_addr;
+ int chgidx, still_changing;
+ int overlap_entries;
+ int new_bios_entry;
+ int old_nr, new_nr, chg_nr;
+ int i;
+
+ /*
+ Visually we're performing the following
+ (1,2,3,4 = memory types)...
+
+ Sample memory map (w/overlaps):
+ ____22__________________
+ ______________________4_
+ ____1111________________
+ _44_____________________
+ 11111111________________
+ ____________________33__
+ ___________44___________
+ __________33333_________
+ ______________22________
+ ___________________2222_
+ _________111111111______
+ _____________________11_
+ _________________4______
+
+ Sanitized equivalent (no overlap):
+ 1_______________________
+ _44_____________________
+ ___1____________________
+ ____22__________________
+ ______11________________
+ _________1______________
+ __________3_____________
+ ___________44___________
+ _____________33_________
+ _______________2________
+ ________________1_______
+ _________________4______
+ ___________________2____
+ ____________________33__
+ ______________________4_
+ */
+
+ /* if there's only one memory region, don't bother */
+ if (*pnr_map < 2)
+ return -1;
+
+ old_nr = *pnr_map;
+
+ /* bail out if we find any unreasonable addresses in bios map */
+ for (i = 0; i < old_nr; i++)
+ if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
+ return -1;
+
+ /* create pointers for initial change-point information (for sorting) */
+ for (i = 0; i < 2 * old_nr; i++)
+ change_point[i] = &change_point_list[i];
+
+ /* record all known change-points (starting and ending addresses),
+ omitting those that are for empty memory regions */
+ chgidx = 0;
+ for (i = 0; i < old_nr; i++) {
+ if (biosmap[i].size != 0) {
+ change_point[chgidx]->addr = biosmap[i].addr;
+ change_point[chgidx++]->pbios = &biosmap[i];
+ change_point[chgidx]->addr = biosmap[i].addr +
+ biosmap[i].size;
+ change_point[chgidx++]->pbios = &biosmap[i];
+ }
+ }
+ chg_nr = chgidx;
+
+ /* sort change-point list by memory addresses (low -> high) */
+ still_changing = 1;
+ while (still_changing) {
+ still_changing = 0;
+ for (i = 1; i < chg_nr; i++) {
+ unsigned long long curaddr, lastaddr;
+ unsigned long long curpbaddr, lastpbaddr;
+
+ curaddr = change_point[i]->addr;
+ lastaddr = change_point[i - 1]->addr;
+ curpbaddr = change_point[i]->pbios->addr;
+ lastpbaddr = change_point[i - 1]->pbios->addr;
+
+ /*
+ * swap entries, when:
+ *
+ * curaddr > lastaddr or
+ * curaddr == lastaddr and curaddr == curpbaddr and
+ * lastaddr != lastpbaddr
+ */
+ if (curaddr < lastaddr ||
+ (curaddr == lastaddr && curaddr == curpbaddr &&
+ lastaddr != lastpbaddr)) {
+ change_tmp = change_point[i];
+ change_point[i] = change_point[i-1];
+ change_point[i-1] = change_tmp;
+ still_changing = 1;
+ }
+ }
+ }
+
+ /* create a new bios memory map, removing overlaps */
+ overlap_entries = 0; /* number of entries in the overlap table */
+ new_bios_entry = 0; /* index for creating new bios map entries */
+ last_type = 0; /* start with undefined memory type */
+ last_addr = 0; /* start with 0 as last starting address */
+
+ /* loop through change-points, determining affect on the new bios map */
+ for (chgidx = 0; chgidx < chg_nr; chgidx++) {
+ /* keep track of all overlapping bios entries */
+ if (change_point[chgidx]->addr ==
+ change_point[chgidx]->pbios->addr) {
+ /*
+ * add map entry to overlap list (> 1 entry
+ * implies an overlap)
+ */
+ overlap_list[overlap_entries++] =
+ change_point[chgidx]->pbios;
+ } else {
+ /*
+ * remove entry from list (order independent,
+ * so swap with last)
+ */
+ for (i = 0; i < overlap_entries; i++) {
+ if (overlap_list[i] ==
+ change_point[chgidx]->pbios)
+ overlap_list[i] =
+ overlap_list[overlap_entries-1];
+ }
+ overlap_entries--;
+ }
+ /*
+ * if there are overlapping entries, decide which
+ * "type" to use (larger value takes precedence --
+ * 1=usable, 2,3,4,4+=unusable)
+ */
+ current_type = 0;
+ for (i = 0; i < overlap_entries; i++)
+ if (overlap_list[i]->type > current_type)
+ current_type = overlap_list[i]->type;
+ /*
+ * continue building up new bios map based on this
+ * information
+ */
+ if (current_type != last_type) {
+ if (last_type != 0) {
+ new_bios[new_bios_entry].size =
+ change_point[chgidx]->addr - last_addr;
+ /*
+ * move forward only if the new size
+ * was non-zero
+ */
+ if (new_bios[new_bios_entry].size != 0)
+ /*
+ * no more space left for new
+ * bios entries ?
+ */
+ if (++new_bios_entry >= E820MAX)
+ break;
+ }
+ if (current_type != 0) {
+ new_bios[new_bios_entry].addr =
+ change_point[chgidx]->addr;
+ new_bios[new_bios_entry].type = current_type;
+ last_addr = change_point[chgidx]->addr;
+ }
+ last_type = current_type;
+ }
+ }
+ /* retain count for new bios entries */
+ new_nr = new_bios_entry;
+
+ /* copy new bios mapping into original location */
+ memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
+ *pnr_map = new_nr;
+
+ return 0;
+}
+
+/*
+ * Copy the BIOS e820 map into a safe place.
+ *
+ * Sanity-check it while we're at it..
+ *
+ * If we're lucky and live on a modern system, the setup code
+ * will have given us a memory map that we can use to properly
+ * set up memory. If we aren't, we'll fake a memory map.
+ */
+int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
+{
+ /* Only one memory region (or negative)? Ignore it */
+ if (nr_map < 2)
+ return -1;
+
+ do {
+ u64 start = biosmap->addr;
+ u64 size = biosmap->size;
+ u64 end = start + size;
+ u32 type = biosmap->type;
+
+ /* Overflow in 64 bits? Ignore the memory map. */
+ if (start > end)
+ return -1;
+
+ add_memory_region(start, size, type);
+ } while (biosmap++, --nr_map);
+ return 0;
+}
+
+u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type)
+{
+ int i;
+ u64 real_updated_size = 0;
+
+ BUG_ON(old_type == new_type);
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 final_start, final_end;
+ if (ei->type != old_type)
+ continue;
+ /* totally covered? */
+ if (ei->addr >= start &&
+ (ei->addr + ei->size) <= (start + size)) {
+ ei->type = new_type;
+ real_updated_size += ei->size;
+ continue;
+ }
+ /* partially covered */
+ final_start = max(start, ei->addr);
+ final_end = min(start + size, ei->addr + ei->size);
+ if (final_start >= final_end)
+ continue;
+ add_memory_region(final_start, final_end - final_start,
+ new_type);
+ real_updated_size += final_end - final_start;
+ }
+ return real_updated_size;
+}
+
+void __init update_e820(void)
+{
+ u8 nr_map;
+
+ nr_map = e820.nr_map;
+ if (sanitize_e820_map(e820.map, &nr_map))
+ return;
+ e820.nr_map = nr_map;
+ printk(KERN_INFO "modified physical RAM map:\n");
+ e820_print_map("modified");
+}
+
+/*
+ * Search for the biggest gap in the low 32 bits of the e820
+ * memory space. We pass this space to PCI to assign MMIO resources
+ * for hotplug or unconfigured devices in.
+ * Hopefully the BIOS let enough space left.
+ */
+__init void e820_setup_gap(void)
+{
+ unsigned long gapstart, gapsize, round;
+ unsigned long long last;
+ int i;
+ int found = 0;
+
+ last = 0x100000000ull;
+ gapstart = 0x10000000;
+ gapsize = 0x400000;
+ i = e820.nr_map;
+ while (--i >= 0) {
+ unsigned long long start = e820.map[i].addr;
+ unsigned long long end = start + e820.map[i].size;
+
+ /*
+ * Since "last" is at most 4GB, we know we'll
+ * fit in 32 bits if this condition is true
+ */
+ if (last > end) {
+ unsigned long gap = last - end;
+
+ if (gap > gapsize) {
+ gapsize = gap;
+ gapstart = end;
+ found = 1;
+ }
+ }
+ if (start < last)
+ last = start;
+ }
+
+#ifdef CONFIG_X86_64
+ if (!found) {
+ gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
+ printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
+ "address range\n"
+ KERN_ERR "PCI: Unassigned devices with 32bit resource "
+ "registers may break!\n");
+ }
+#endif
+
+ /*
+ * See how much we want to round up: start off with
+ * rounding to the next 1MB area.
+ */
+ round = 0x100000;
+ while ((gapsize >> 4) > round)
+ round += round;
+ /* Fun with two's complement */
+ pci_mem_start = (gapstart + round) & -round;
+
+ printk(KERN_INFO
+ "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
+ pci_mem_start, gapstart, gapsize);
+}
+
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -851,7 +851,7 @@ void __init setup_arch(char **cmdline_p)
get_smp_config();
#endif
- e820_register_memory();
+ e820_setup_gap();
e820_mark_nosave_regions();
#ifdef CONFIG_VT
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: make e820.c to have common functions
2008-05-11 7:30 [PATCH] x86: make e820.c to have common functions Yinghai Lu
@ 2008-05-13 13:05 ` Ingo Molnar
2008-05-13 17:35 ` Yinghai Lu
2008-05-18 8:18 ` [PATCH] x86: extend e820 ealy_res support 32bit Yinghai Lu
1 sibling, 1 reply; 51+ messages in thread
From: Ingo Molnar @ 2008-05-13 13:05 UTC (permalink / raw)
To: Yinghai Lu
Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
* Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> remove the duplicated copy of these functions.
applied, thanks. What a low-key commit message for the first step such a
much-needed unification! :-)
any thoughts (or patches) about how to attack the rest of e820_32/64.c?
Ingo
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: make e820.c to have common functions
2008-05-13 13:05 ` Ingo Molnar
@ 2008-05-13 17:35 ` Yinghai Lu
0 siblings, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-13 17:35 UTC (permalink / raw)
To: Ingo Molnar
Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
On Tue, May 13, 2008 at 6:05 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
> > remove the duplicated copy of these functions.
>
> applied, thanks. What a low-key commit message for the first step such a
> much-needed unification! :-)
>
> any thoughts (or patches) about how to attack the rest of e820_32/64.c?
reserve_early related to 32 bit. but could cause some regression to subarch
but it seems hpa is doing sth about e820 related too?
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: extend e820 ealy_res support 32bit
2008-05-11 7:30 [PATCH] x86: make e820.c to have common functions Yinghai Lu
2008-05-13 13:05 ` Ingo Molnar
@ 2008-05-18 8:18 ` Yinghai Lu
2008-05-21 3:10 ` [PATCH] x86: move e820_mark_nosave_regions to e820.c Yinghai Lu
2008-05-22 1:40 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix Yinghai Lu
1 sibling, 2 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-18 8:18 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
move early_res related from e820_64.c to e820.c
make edba detection to be done in head32.c
remove smp_alloc_memory, because we have fixed trampoline address now.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++
arch/x86/kernel/e820_64.c | 196 --------------------------------
arch/x86/kernel/head32.c | 76 ++++++++++++
arch/x86/kernel/setup_32.c | 109 +++---------------
arch/x86/kernel/smpboot.c | 17 --
arch/x86/kernel/trampoline.c | 2
arch/x86/mach-voyager/voyager_smp.c | 9 -
include/asm-x86/e820.h | 6 +
include/asm-x86/e820_64.h | 9 -
include/asm-x86/smp.h | 1
10 files changed, 320 insertions(+), 319 deletions(-)
Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -22,7 +22,9 @@
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/e820.h>
+#include <asm/proto.h>
#include <asm/setup.h>
+#include <asm/trampoline.h>
struct e820map e820;
@@ -493,3 +495,215 @@ __init void e820_setup_gap(void)
pci_mem_start, gapstart, gapsize);
}
+
+/*
+ * Early reserved memory areas.
+ */
+#define MAX_EARLY_RES 20
+
+struct early_res {
+ u64 start, end;
+ char name[16];
+};
+static struct early_res early_res[MAX_EARLY_RES] __initdata = {
+ { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
+ { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
+#endif
+#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
+ /*
+ * Has to be in very low memory so we can execute
+ * real-mode AP code.
+ */
+ { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
+#endif
+ {}
+};
+
+void __init reserve_early(u64 start, u64 end, char *name)
+{
+ int i;
+ struct early_res *r;
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ r = &early_res[i];
+ if (end > r->start && start < r->end)
+ panic("Overlapping early reservations %llx-%llx %s to %llx-%llx %s\n",
+ start, end - 1, name?name:"", r->start,
+ r->end - 1, r->name);
+ }
+ if (i >= MAX_EARLY_RES)
+ panic("Too many early reservations");
+ r = &early_res[i];
+ r->start = start;
+ r->end = end;
+ if (name)
+ strncpy(r->name, name, sizeof(r->name) - 1);
+}
+
+void __init free_early(u64 start, u64 end)
+{
+ struct early_res *r;
+ int i, j;
+
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ r = &early_res[i];
+ if (start == r->start && end == r->end)
+ break;
+ }
+ if (i >= MAX_EARLY_RES || !early_res[i].end)
+ panic("free_early on not reserved area: %llx-%llx!",
+ start, end);
+
+ for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
+ ;
+
+ memmove(&early_res[i], &early_res[i + 1],
+ (j - 1 - i) * sizeof(struct early_res));
+
+ early_res[j - 1].end = 0;
+}
+
+void __init early_res_to_bootmem(u64 start, u64 end)
+{
+ int i;
+ u64 final_start, final_end;
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ struct early_res *r = &early_res[i];
+ final_start = max(start, r->start);
+ final_end = min(end, r->end);
+ if (final_start >= final_end)
+ continue;
+ printk(KERN_INFO " early res: %d [%llx-%llx] %s\n", i,
+ final_start, final_end - 1, r->name);
+#ifdef CONFIG_X86_64
+ reserve_bootmem_generic(final_start, final_end - final_start);
+#else
+ reserve_bootmem(final_start, final_end - final_start,
+ BOOTMEM_DEFAULT);
+#endif
+ }
+}
+
+/* Check for already reserved areas */
+static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
+{
+ int i;
+ u64 addr = *addrp, last;
+ int changed = 0;
+again:
+ last = addr + size;
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ struct early_res *r = &early_res[i];
+ if (last >= r->start && addr < r->end) {
+ *addrp = addr = round_up(r->end, align);
+ changed = 1;
+ goto again;
+ }
+ }
+ return changed;
+}
+
+/* Check for already reserved areas */
+static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
+{
+ int i;
+ u64 addr = *addrp, last;
+ u64 size = *sizep;
+ int changed = 0;
+again:
+ last = addr + size;
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ struct early_res *r = &early_res[i];
+ if (last > r->start && addr < r->start) {
+ size = r->start - addr;
+ changed = 1;
+ goto again;
+ }
+ if (last > r->end && addr < r->end) {
+ addr = round_up(r->end, align);
+ size = last - addr;
+ changed = 1;
+ goto again;
+ }
+ if (last <= r->end && addr >= r->start) {
+ (*sizep)++;
+ return 0;
+ }
+ }
+ if (changed) {
+ *addrp = addr;
+ *sizep = size;
+ }
+ return changed;
+}
+
+/*
+ * Find a free area with specified alignment in a specific range.
+ */
+u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 addr, last;
+ u64 ei_last;
+
+ if (ei->type != E820_RAM)
+ continue;
+ addr = round_up(ei->addr, align);
+ ei_last = ei->addr + ei->size;
+ if (addr < start)
+ addr = round_up(start, align);
+ if (addr >= ei_last)
+ continue;
+ while (bad_addr(&addr, size, align) && addr+size <= ei_last)
+ ;
+ last = addr + size;
+ if (last > ei_last)
+ continue;
+ if (last > end)
+ continue;
+ return addr;
+ }
+ return -1ULL;
+}
+
+/*
+ * Find next free range after *start
+ */
+u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 addr, last;
+ u64 ei_last;
+
+ if (ei->type != E820_RAM)
+ continue;
+ addr = round_up(ei->addr, align);
+ ei_last = ei->addr + ei->size;
+ if (addr < start)
+ addr = round_up(start, align);
+ if (addr >= ei_last)
+ continue;
+ *sizep = ei_last - addr;
+ while (bad_addr_size(&addr, sizep, align) &&
+ addr + *sizep <= ei_last)
+ ;
+ last = addr + *sizep;
+ if (last > ei_last)
+ continue;
+ return addr;
+ }
+ return -1UL;
+
+}
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -48,202 +48,6 @@ unsigned long max_pfn_mapped;
static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
/*
- * Early reserved memory areas.
- */
-#define MAX_EARLY_RES 20
-
-struct early_res {
- unsigned long start, end;
- char name[16];
-};
-static struct early_res early_res[MAX_EARLY_RES] __initdata = {
- { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
-#ifdef CONFIG_X86_TRAMPOLINE
- { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
-#endif
- {}
-};
-
-void __init reserve_early(unsigned long start, unsigned long end, char *name)
-{
- int i;
- struct early_res *r;
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- r = &early_res[i];
- if (end > r->start && start < r->end)
- panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
- start, end - 1, name?name:"", r->start, r->end - 1, r->name);
- }
- if (i >= MAX_EARLY_RES)
- panic("Too many early reservations");
- r = &early_res[i];
- r->start = start;
- r->end = end;
- if (name)
- strncpy(r->name, name, sizeof(r->name) - 1);
-}
-
-void __init free_early(unsigned long start, unsigned long end)
-{
- struct early_res *r;
- int i, j;
-
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- r = &early_res[i];
- if (start == r->start && end == r->end)
- break;
- }
- if (i >= MAX_EARLY_RES || !early_res[i].end)
- panic("free_early on not reserved area: %lx-%lx!", start, end);
-
- for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
- ;
-
- memmove(&early_res[i], &early_res[i + 1],
- (j - 1 - i) * sizeof(struct early_res));
-
- early_res[j - 1].end = 0;
-}
-
-void __init early_res_to_bootmem(unsigned long start, unsigned long end)
-{
- int i;
- unsigned long final_start, final_end;
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- struct early_res *r = &early_res[i];
- final_start = max(start, r->start);
- final_end = min(end, r->end);
- if (final_start >= final_end)
- continue;
- printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
- final_start, final_end - 1, r->name);
- reserve_bootmem_generic(final_start, final_end - final_start);
- }
-}
-
-/* Check for already reserved areas */
-static inline int __init
-bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
-{
- int i;
- unsigned long addr = *addrp, last;
- int changed = 0;
-again:
- last = addr + size;
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- struct early_res *r = &early_res[i];
- if (last >= r->start && addr < r->end) {
- *addrp = addr = round_up(r->end, align);
- changed = 1;
- goto again;
- }
- }
- return changed;
-}
-
-/* Check for already reserved areas */
-static inline int __init
-bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
-{
- int i;
- unsigned long addr = *addrp, last;
- unsigned long size = *sizep;
- int changed = 0;
-again:
- last = addr + size;
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- struct early_res *r = &early_res[i];
- if (last > r->start && addr < r->start) {
- size = r->start - addr;
- changed = 1;
- goto again;
- }
- if (last > r->end && addr < r->end) {
- addr = round_up(r->end, align);
- size = last - addr;
- changed = 1;
- goto again;
- }
- if (last <= r->end && addr >= r->start) {
- (*sizep)++;
- return 0;
- }
- }
- if (changed) {
- *addrp = addr;
- *sizep = size;
- }
- return changed;
-}
-
-/*
- * Find a free area with specified alignment in a specific range.
- */
-unsigned long __init find_e820_area(unsigned long start, unsigned long end,
- unsigned long size, unsigned long align)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long addr, last;
- unsigned long ei_last;
-
- if (ei->type != E820_RAM)
- continue;
- addr = round_up(ei->addr, align);
- ei_last = ei->addr + ei->size;
- if (addr < start)
- addr = round_up(start, align);
- if (addr >= ei_last)
- continue;
- while (bad_addr(&addr, size, align) && addr+size <= ei_last)
- ;
- last = addr + size;
- if (last > ei_last)
- continue;
- if (last > end)
- continue;
- return addr;
- }
- return -1UL;
-}
-
-/*
- * Find next free range after *start
- */
-unsigned long __init find_e820_area_size(unsigned long start,
- unsigned long *sizep,
- unsigned long align)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long addr, last;
- unsigned long ei_last;
-
- if (ei->type != E820_RAM)
- continue;
- addr = round_up(ei->addr, align);
- ei_last = ei->addr + ei->size;
- if (addr < start)
- addr = round_up(start, align);
- if (addr >= ei_last)
- continue;
- *sizep = ei_last - addr;
- while (bad_addr_size(&addr, sizep, align) &&
- addr + *sizep <= ei_last)
- ;
- last = addr + *sizep;
- if (last > ei_last)
- continue;
- return addr;
- }
- return -1UL;
-
-}
-/*
* Find the highest page frame number we have available
*/
unsigned long __init e820_end_of_ram(void)
Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -8,7 +8,83 @@
#include <linux/init.h>
#include <linux/start_kernel.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/e820.h>
+#include <asm/bios_ebda.h>
+
+#define BIOS_LOWMEM_KILOBYTES 0x413
+
+/*
+ * The BIOS places the EBDA/XBDA at the top of conventional
+ * memory, and usually decreases the reported amount of
+ * conventional memory (int 0x12) too. This also contains a
+ * workaround for Dell systems that neglect to reserve EBDA.
+ * The same workaround also avoids a problem with the AMD768MPX
+ * chipset: reserve a page before VGA to prevent PCI prefetch
+ * into it (errata #56). Usually the page is reserved anyways,
+ * unless you have no PS/2 mouse plugged in.
+ */
+static void __init reserve_ebda_region(void)
+{
+ unsigned int lowmem, ebda_addr;
+
+ /* To determine the position of the EBDA and the */
+ /* end of conventional memory, we need to look at */
+ /* the BIOS data area. In a paravirtual environment */
+ /* that area is absent. We'll just have to assume */
+ /* that the paravirt case can handle memory setup */
+ /* correctly, without our help. */
+ if (paravirt_enabled())
+ return;
+
+ /* end of low (conventional) memory */
+ lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
+ lowmem <<= 10;
+
+ /* start of EBDA area */
+ ebda_addr = get_bios_ebda();
+
+ /* Fixup: bios puts an EBDA in the top 64K segment */
+ /* of conventional memory, but does not adjust lowmem. */
+ if ((lowmem - ebda_addr) <= 0x10000)
+ lowmem = ebda_addr;
+
+ /* Fixup: bios does not report an EBDA at all. */
+ /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
+ if ((ebda_addr == 0) && (lowmem >= 0x9f000))
+ lowmem = 0x9f000;
+
+ /* Paranoia: should never happen, but... */
+ if ((lowmem == 0) || (lowmem >= 0x100000))
+ lowmem = 0x9f000;
+
+ /* reserve all memory between lowmem and the 1MB mark */
+ reserve_early(lowmem, 0x100000, "BIOS reserved");
+}
+
void __init i386_start_kernel(void)
{
+ reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ /* Reserve INITRD */
+ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 ramdisk_end = ramdisk_image + ramdisk_size;
+ reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
+ }
+#endif
+ reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE");
+
+ reserve_ebda_region();
+
+ /*
+ * At this point everything still needed from the boot loader
+ * or BIOS or kernel text should be early reserved or marked not
+ * RAM in e820. All other memory is free game.
+ */
+
start_kernel();
}
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -359,56 +359,6 @@ unsigned long __init find_max_low_pfn(vo
return max_low_pfn;
}
-#define BIOS_LOWMEM_KILOBYTES 0x413
-
-/*
- * The BIOS places the EBDA/XBDA at the top of conventional
- * memory, and usually decreases the reported amount of
- * conventional memory (int 0x12) too. This also contains a
- * workaround for Dell systems that neglect to reserve EBDA.
- * The same workaround also avoids a problem with the AMD768MPX
- * chipset: reserve a page before VGA to prevent PCI prefetch
- * into it (errata #56). Usually the page is reserved anyways,
- * unless you have no PS/2 mouse plugged in.
- */
-static void __init reserve_ebda_region(void)
-{
- unsigned int lowmem, ebda_addr;
-
- /* To determine the position of the EBDA and the */
- /* end of conventional memory, we need to look at */
- /* the BIOS data area. In a paravirtual environment */
- /* that area is absent. We'll just have to assume */
- /* that the paravirt case can handle memory setup */
- /* correctly, without our help. */
- if (paravirt_enabled())
- return;
-
- /* end of low (conventional) memory */
- lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
- lowmem <<= 10;
-
- /* start of EBDA area */
- ebda_addr = get_bios_ebda();
-
- /* Fixup: bios puts an EBDA in the top 64K segment */
- /* of conventional memory, but does not adjust lowmem. */
- if ((lowmem - ebda_addr) <= 0x10000)
- lowmem = ebda_addr;
-
- /* Fixup: bios does not report an EBDA at all. */
- /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
- if ((ebda_addr == 0) && (lowmem >= 0x9f000))
- lowmem = 0x9f000;
-
- /* Paranoia: should never happen, but... */
- if ((lowmem == 0) || (lowmem >= 0x100000))
- lowmem = 0x9f000;
-
- /* reserve all memory between lowmem and the 1MB mark */
- reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
-}
-
#ifndef CONFIG_NEED_MULTIPLE_NODES
static void __init setup_bootmem_allocator(void);
static unsigned long __init setup_memory(void)
@@ -522,25 +472,32 @@ static void __init reserve_initrd(void)
unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
unsigned long ramdisk_here;
- initrd_start = 0;
-
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
return; /* No initrd provided by bootloader */
+ initrd_start = 0;
+
if (ramdisk_end < ramdisk_image) {
+ free_bootmem(ramdisk_image, ramdisk_size);
printk(KERN_ERR "initrd wraps around end of memory, "
"disabling initrd\n");
return;
}
if (ramdisk_size >= end_of_lowmem/2) {
+ free_bootmem(ramdisk_image, ramdisk_size);
printk(KERN_ERR "initrd too large to handle, "
"disabling initrd\n");
return;
}
+
if (ramdisk_end <= end_of_lowmem) {
/* All in lowmem, easy case */
- reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT);
+ /*
+ * don't need to reserve again, already reserved early
+ * in i386_start_kernel, and early_res_to_bootmem
+ * convert that to reserved in bootmem
+ */
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start+ramdisk_size;
return;
@@ -582,6 +539,8 @@ static void __init relocate_initrd(void)
p = (char *)__va(ramdisk_image);
memcpy(q, p, clen);
q += clen;
+ /* need to free these low pages...*/
+ free_bootmem(ramdisk_image, clen);
ramdisk_image += clen;
ramdisk_size -= clen;
}
@@ -600,47 +559,28 @@ static void __init relocate_initrd(void)
ramdisk_image += clen;
ramdisk_size -= clen;
}
+ /* high pages is not converted by early_res_to_bootmem */
}
#endif /* CONFIG_BLK_DEV_INITRD */
void __init setup_bootmem_allocator(void)
{
- unsigned long bootmap_size;
+ unsigned long bootmap_size, bootmap;
/*
* Initialize the boot-time allocator (with low memory only):
*/
- bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
-
+ bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
+ bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+ max_low_pfn<<PAGE_SHIFT, bootmap_size,
+ PAGE_SIZE);
+ if (bootmap == -1L)
+ panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+ bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
register_bootmem_low_pages(max_low_pfn);
+ early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
+ reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
- /*
- * Reserve the bootmem bitmap itself as well. We do this in two
- * steps (first step was init_bootmem()) because this catches
- * the (very unlikely) case of us accidentally initializing the
- * bootmem allocator with an invalid RAM area.
- */
- reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
- bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
- BOOTMEM_DEFAULT);
-
- /*
- * reserve physical page 0 - it's a special BIOS page on many boxes,
- * enabling clean reboots, SMP operation, laptop functions.
- */
- reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
-
- /* reserve EBDA region */
- reserve_ebda_region();
-
-#ifdef CONFIG_SMP
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
-#endif
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
@@ -791,9 +731,6 @@ void __init setup_arch(char **cmdline_p)
* not to exceed the 8Mb limit.
*/
-#ifdef CONFIG_SMP
- smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
-#endif
paging_init();
/*
Index: linux-2.6/arch/x86/kernel/smpboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/smpboot.c
+++ linux-2.6/arch/x86/kernel/smpboot.c
@@ -538,23 +538,6 @@ cpumask_t cpu_coregroup_map(int cpu)
return c->llc_shared_map;
}
-#ifdef CONFIG_X86_32
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
- trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
- /*
- * Has to be in very low memory so we can execute
- * real-mode AP code.
- */
- if (__pa(trampoline_base) >= 0x9F000)
- BUG();
-}
-#endif
-
static void impress_friends(void)
{
int cpu;
Index: linux-2.6/arch/x86/kernel/trampoline.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/trampoline.c
+++ linux-2.6/arch/x86/kernel/trampoline.c
@@ -2,7 +2,7 @@
#include <asm/trampoline.h>
-/* ready for x86_64, no harm for x86, since it will overwrite after alloc */
+/* ready for x86_64 and x86 */
unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
/*
Index: linux-2.6/arch/x86/mach-voyager/voyager_smp.c
===================================================================
--- linux-2.6.orig/arch/x86/mach-voyager/voyager_smp.c
+++ linux-2.6/arch/x86/mach-voyager/voyager_smp.c
@@ -1137,15 +1137,6 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, 0, 1, 1);
}
-/* used to set up the trampoline for other CPUs when the memory manager
- * is sorted out */
-void __init smp_alloc_memory(void)
-{
- trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
- if (__pa(trampoline_base) >= 0x93000)
- BUG();
-}
-
/* send a reschedule CPI to one CPU by physical CPU number*/
static void voyager_smp_send_reschedule(int cpu)
{
Index: linux-2.6/include/asm-x86/e820.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820.h
+++ linux-2.6/include/asm-x86/e820.h
@@ -70,6 +70,12 @@ extern u64 update_memory_range(u64 start
extern void update_e820(void);
extern void e820_setup_gap(void);
+extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
+extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
+extern void reserve_early(u64 start, u64 end, char *name);
+extern void free_early(u64 start, u64 end);
+extern void early_res_to_bootmem(u64 start, u64 end);
+
#endif /* __ASSEMBLY__ */
#define ISA_START_ADDRESS 0xa0000
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -14,11 +14,6 @@
#include <linux/ioport.h>
#ifndef __ASSEMBLY__
-extern unsigned long find_e820_area(unsigned long start, unsigned long end,
- unsigned long size, unsigned long align);
-extern unsigned long find_e820_area_size(unsigned long start,
- unsigned long *sizep,
- unsigned long align);
extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern unsigned long e820_end_of_ram(void);
@@ -35,10 +30,6 @@ extern void e820_register_active_regions
extern void finish_e820_parsing(void);
-extern void reserve_early(unsigned long start, unsigned long end, char *name);
-extern void free_early(unsigned long start, unsigned long end);
-extern void early_res_to_bootmem(unsigned long start, unsigned long end);
-
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/
Index: linux-2.6/include/asm-x86/smp.h
===================================================================
--- linux-2.6.orig/include/asm-x86/smp.h
+++ linux-2.6/include/asm-x86/smp.h
@@ -192,7 +192,6 @@ extern void cpu_exit_clear(void);
extern void cpu_uninit(void);
#endif
-extern void smp_alloc_memory(void);
extern void lock_ipi_call_lock(void);
extern void unlock_ipi_call_lock(void);
#endif /* __ASSEMBLY__ */
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: move e820_mark_nosave_regions to e820.c
2008-05-18 8:18 ` [PATCH] x86: extend e820 ealy_res support 32bit Yinghai Lu
@ 2008-05-21 3:10 ` Yinghai Lu
2008-05-22 1:40 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix Yinghai Lu
1 sibling, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-21 3:10 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
and make e820_mark_nosave_regions to take limit_pfn to use max_low_pfn
for 32bit and end_pfn for 64bit
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/pfn.h>
+#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/page.h>
@@ -495,6 +496,36 @@ __init void e820_setup_gap(void)
pci_mem_start, gapstart, gapsize);
}
+#if defined(CONFIG_X86_64) || (defined(CONFIG_X86_32) && defined(CONFIG_PM) && defined(CONFIG_HIBERNATION))
+/**
+ * Find the ranges of physical addresses that do not correspond to
+ * e820 RAM areas and mark the corresponding pages as nosave for
+ * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
+ *
+ * This function requires the e820 map to be sorted and without any
+ * overlapping entries and assumes the first e820 area to be RAM.
+ */
+void __init e820_mark_nosave_regions(unsigned long limit_pfn)
+{
+ int i;
+ unsigned long pfn;
+
+ pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
+ for (i = 1; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (pfn < PFN_UP(ei->addr))
+ register_nosave_region(pfn, PFN_UP(ei->addr));
+
+ pfn = PFN_DOWN(ei->addr + ei->size);
+ if (ei->type != E820_RAM)
+ register_nosave_region(PFN_UP(ei->addr), pfn);
+
+ if (pfn >= limit_pfn)
+ break;
+ }
+}
+#endif
/*
* Early reserved memory areas.
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -9,7 +9,6 @@
#include <linux/mm.h>
#include <linux/pfn.h>
#include <linux/uaccess.h>
-#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/page.h>
@@ -208,37 +207,6 @@ void __init init_iomem_resources(struct
}
}
-#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
-/**
- * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
- * correspond to e820 RAM areas and mark the corresponding pages as nosave for
- * hibernation.
- *
- * This function requires the e820 map to be sorted and without any
- * overlapping entries and assumes the first e820 area to be RAM.
- */
-void __init e820_mark_nosave_regions(void)
-{
- int i;
- unsigned long pfn;
-
- pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
- for (i = 1; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
-
- if (pfn < PFN_UP(ei->addr))
- register_nosave_region(pfn, PFN_UP(ei->addr));
-
- pfn = PFN_DOWN(ei->addr + ei->size);
- if (ei->type != E820_RAM)
- register_nosave_region(PFN_UP(ei->addr), pfn);
-
- if (pfn >= max_low_pfn)
- break;
- }
-}
-#endif
-
/*
* Find the highest page frame number we have available
*/
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -17,7 +17,6 @@
#include <linux/kexec.h>
#include <linux/module.h>
#include <linux/mm.h>
-#include <linux/suspend.h>
#include <linux/pfn.h>
#include <linux/pci.h>
@@ -94,37 +93,6 @@ void __init e820_reserve_resources(void)
}
/*
- * Find the ranges of physical addresses that do not correspond to
- * e820 RAM areas and mark the corresponding pages as nosave for software
- * suspend and suspend to RAM.
- *
- * This function requires the e820 map to be sorted and without any
- * overlapping entries and assumes the first e820 area to be RAM.
- */
-void __init e820_mark_nosave_regions(void)
-{
- int i;
- unsigned long paddr;
-
- paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
- for (i = 1; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
-
- if (paddr < ei->addr)
- register_nosave_region(PFN_DOWN(paddr),
- PFN_UP(ei->addr));
-
- paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
- if (ei->type != E820_RAM)
- register_nosave_region(PFN_UP(ei->addr),
- PFN_DOWN(paddr));
-
- if (paddr >= (end_pfn << PAGE_SHIFT))
- break;
- }
-}
-
-/*
* Finds an active region in the address range from start_pfn to last_pfn and
* returns its range in ei_startpfn and ei_endpfn for the e820 entry.
*/
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -789,7 +789,7 @@ void __init setup_arch(char **cmdline_p)
#endif
e820_setup_gap();
- e820_mark_nosave_regions();
+ e820_mark_nosave_regions(max_low_pfn);
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
Index: linux-2.6/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_64.c
+++ linux-2.6/arch/x86/kernel/setup_64.c
@@ -499,7 +499,7 @@ void __init setup_arch(char **cmdline_p)
* We trust e820 completely. No explicit ROM probing in memory.
*/
e820_reserve_resources();
- e820_mark_nosave_regions();
+ e820_mark_nosave_regions(end_pfn);
/* request I/O space for devices used on all i[345]86 PCs */
for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
Index: linux-2.6/include/asm-x86/e820.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820.h
+++ linux-2.6/include/asm-x86/e820.h
@@ -70,6 +70,14 @@ extern u64 update_memory_range(u64 start
extern void update_e820(void);
extern void e820_setup_gap(void);
+#if defined(CONFIG_X86_64) || (defined(CONFIG_X86_32) && defined(CONFIG_PM) && defined(CONFIG_HIBERNATION))
+extern void e820_mark_nosave_regions(unsigned long limit_pfn);
+#else
+static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
+{
+}
+#endif
+
extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
extern void reserve_early(u64 start, u64 end, char *name);
Index: linux-2.6/include/asm-x86/e820_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_32.h
+++ linux-2.6/include/asm-x86/e820_32.h
@@ -28,13 +28,5 @@ extern void init_iomem_resources(struct
struct resource *data_resource,
struct resource *bss_resource);
-#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
-extern void e820_mark_nosave_regions(void);
-#else
-static inline void e820_mark_nosave_regions(void)
-{
-}
-#endif
-
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -18,7 +18,6 @@ extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern unsigned long e820_end_of_ram(void);
extern void e820_reserve_resources(void);
-extern void e820_mark_nosave_regions(void);
extern int e820_any_non_reserved(unsigned long start, unsigned long end);
extern int is_memory_any_valid(unsigned long start, unsigned long end);
extern int e820_all_non_reserved(unsigned long start, unsigned long end);
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: extend e820 ealy_res support 32bit - fix
2008-05-18 8:18 ` [PATCH] x86: extend e820 ealy_res support 32bit Yinghai Lu
2008-05-21 3:10 ` [PATCH] x86: move e820_mark_nosave_regions to e820.c Yinghai Lu
@ 2008-05-22 1:40 ` Yinghai Lu
2008-05-22 10:12 ` Jeremy Fitzhardinge
` (2 more replies)
1 sibling, 3 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-22 1:40 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
Jeremy Fitzhardinge
Cc: linux-kernel@vger.kernel.org
use find_e820_area to find addess for new RAMDISK, instead of using ram blindly
also print out low ram and bootmap info
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -466,11 +466,11 @@ static bool do_relocate_initrd = false;
static void __init reserve_initrd(void)
{
- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
- unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
- unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
- unsigned long ramdisk_here;
+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 ramdisk_end = ramdisk_image + ramdisk_size;
+ u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+ u64 ramdisk_here;
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
@@ -478,14 +478,8 @@ static void __init reserve_initrd(void)
initrd_start = 0;
- if (ramdisk_end < ramdisk_image) {
- free_bootmem(ramdisk_image, ramdisk_size);
- printk(KERN_ERR "initrd wraps around end of memory, "
- "disabling initrd\n");
- return;
- }
if (ramdisk_size >= end_of_lowmem/2) {
- free_bootmem(ramdisk_image, ramdisk_size);
+ free_early(ramdisk_image, ramdisk_image + ramdisk_size - 1);
printk(KERN_ERR "initrd too large to handle, "
"disabling initrd\n");
return;
@@ -495,8 +489,7 @@ static void __init reserve_initrd(void)
/* All in lowmem, easy case */
/*
* don't need to reserve again, already reserved early
- * in i386_start_kernel, and early_res_to_bootmem
- * convert that to reserved in bootmem
+ * in i386_start_kernel
*/
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start+ramdisk_size;
@@ -504,11 +497,14 @@ static void __init reserve_initrd(void)
}
/* We need to move the initrd down into lowmem */
- ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK;
+ ramdisk_here = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+ end_of_lowmem, ramdisk_size,
+ PAGE_SIZE);
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
- reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT);
+ reserve_early(ramdisk_here, ramdisk_here + ramdisk_size - 1,
+ "NEW RAMDISK");
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
@@ -519,10 +515,10 @@ static void __init reserve_initrd(void)
static void __init relocate_initrd(void)
{
- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
- unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
- unsigned long ramdisk_here;
+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+ u64 ramdisk_here;
unsigned long slop, clen, mapaddr;
char *p, *q;
@@ -540,6 +536,8 @@ static void __init relocate_initrd(void)
memcpy(q, p, clen);
q += clen;
/* need to free these low pages...*/
+ printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n",
+ ramdisk_image, ramdisk_image + clen - 1);
free_bootmem(ramdisk_image, clen);
ramdisk_image += clen;
ramdisk_size -= clen;
@@ -560,6 +558,11 @@ static void __init relocate_initrd(void)
ramdisk_size -= clen;
}
/* high pages is not converted by early_res_to_bootmem */
+ ramdisk_image = boot_params.hdr.ramdisk_image;
+ ramdisk_size = boot_params.hdr.ramdisk_size;
+ printk(KERN_INFO "Copied RAMDISK from %016llx - %016llx to %08llx - %08llx\n",
+ ramdisk_image, ramdisk_image + ramdisk_size - 1,
+ ramdisk_here, ramdisk_here + ramdisk_size - 1);
}
#endif /* CONFIG_BLK_DEV_INITRD */
@@ -576,10 +579,17 @@ void __init setup_bootmem_allocator(void
PAGE_SIZE);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+ reserve_early(bootmap, bootmap + bootmap_size - 1, "BOOTMAP");
+#ifdef CONFIG_BLK_DEV_INITRD
+ reserve_initrd();
+#endif
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
+ printk(KERN_INFO " low ram: %08lx - %08lx\n",
+ min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
+ printk(KERN_INFO " bootmap [%08lx - %08lx]\n",
+ bootmap, bootmap + bootmap_size - 1);
register_bootmem_low_pages(max_low_pfn);
early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
- reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
#ifdef CONFIG_ACPI_SLEEP
/*
@@ -593,9 +603,6 @@ void __init setup_bootmem_allocator(void
*/
find_smp_config();
#endif
-#ifdef CONFIG_BLK_DEV_INITRD
- reserve_initrd();
-#endif
numa_kva_reserve();
reserve_crashkernel();
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix
2008-05-22 1:40 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix Yinghai Lu
@ 2008-05-22 10:12 ` Jeremy Fitzhardinge
2008-05-22 17:58 ` Yinghai Lu
2008-05-22 22:20 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix v2 Yinghai Lu
2008-05-25 17:00 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix #2 Yinghai Lu
2 siblings, 1 reply; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-22 10:12 UTC (permalink / raw)
To: Yinghai Lu
Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
Yinghai Lu wrote:
> use find_e820_area to find addess for new RAMDISK, instead of using ram blindly
>
Unfortunately this patch does not resolve the problem for me either.
J
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix
2008-05-22 10:12 ` Jeremy Fitzhardinge
@ 2008-05-22 17:58 ` Yinghai Lu
0 siblings, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-22 17:58 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
On Thu, May 22, 2008 at 3:12 AM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Yinghai Lu wrote:
>>
>> use find_e820_area to find addess for new RAMDISK, instead of using ram
>> blindly
>>
>
> Unfortunately this patch does not resolve the problem for me either.
boot log from serial console?
doesn't fail with dom0 or domU? FV?
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
2008-05-22 1:40 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix Yinghai Lu
2008-05-22 10:12 ` Jeremy Fitzhardinge
@ 2008-05-22 22:20 ` Yinghai Lu
2008-05-23 23:08 ` Yinghai Lu
2008-05-24 8:54 ` Jeremy Fitzhardinge
2008-05-25 17:00 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix #2 Yinghai Lu
2 siblings, 2 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-22 22:20 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
Jeremy Fitzhardinge
Cc: linux-kernel@vger.kernel.org
[PATCH] x86: extend e820 ealy_res support 32bit - fix v2
use find_e820_area to find addess for new RAMDISK, instead of using ram blindly
also print out low ram and bootmap info
v2: remove extra -1 in reaseve_early calling
panic if can not find space for new RAMDISK
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -466,11 +466,11 @@ static bool do_relocate_initrd = false;
static void __init reserve_initrd(void)
{
- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
- unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
- unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
- unsigned long ramdisk_here;
+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 ramdisk_end = ramdisk_image + ramdisk_size;
+ u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+ u64 ramdisk_here;
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
@@ -478,14 +478,8 @@ static void __init reserve_initrd(void)
initrd_start = 0;
- if (ramdisk_end < ramdisk_image) {
- free_bootmem(ramdisk_image, ramdisk_size);
- printk(KERN_ERR "initrd wraps around end of memory, "
- "disabling initrd\n");
- return;
- }
if (ramdisk_size >= end_of_lowmem/2) {
- free_bootmem(ramdisk_image, ramdisk_size);
+ free_early(ramdisk_image, ramdisk_end);
printk(KERN_ERR "initrd too large to handle, "
"disabling initrd\n");
return;
@@ -495,8 +489,7 @@ static void __init reserve_initrd(void)
/* All in lowmem, easy case */
/*
* don't need to reserve again, already reserved early
- * in i386_start_kernel, and early_res_to_bootmem
- * convert that to reserved in bootmem
+ * in i386_start_kernel
*/
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start+ramdisk_size;
@@ -504,11 +497,18 @@ static void __init reserve_initrd(void)
}
/* We need to move the initrd down into lowmem */
- ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK;
+ ramdisk_here = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+ end_of_lowmem, ramdisk_size,
+ PAGE_SIZE);
+
+ if (ramdisk_here == -1ULL)
+ panic("Cannot find placce for new RAMDISK of size %lld\n",
+ ramdisk_size);
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
- reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT);
+ reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
+ "NEW RAMDISK");
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
@@ -519,10 +519,10 @@ static void __init reserve_initrd(void)
static void __init relocate_initrd(void)
{
- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
- unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
- unsigned long ramdisk_here;
+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+ u64 ramdisk_here;
unsigned long slop, clen, mapaddr;
char *p, *q;
@@ -540,6 +540,8 @@ static void __init relocate_initrd(void)
memcpy(q, p, clen);
q += clen;
/* need to free these low pages...*/
+ printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n",
+ ramdisk_image, ramdisk_image + clen - 1);
free_bootmem(ramdisk_image, clen);
ramdisk_image += clen;
ramdisk_size -= clen;
@@ -560,6 +562,11 @@ static void __init relocate_initrd(void)
ramdisk_size -= clen;
}
/* high pages is not converted by early_res_to_bootmem */
+ ramdisk_image = boot_params.hdr.ramdisk_image;
+ ramdisk_size = boot_params.hdr.ramdisk_size;
+ printk(KERN_INFO "Copied RAMDISK from %016llx - %016llx to %08llx - %08llx\n",
+ ramdisk_image, ramdisk_image + ramdisk_size - 1,
+ ramdisk_here, ramdisk_here + ramdisk_size - 1);
}
#endif /* CONFIG_BLK_DEV_INITRD */
@@ -576,10 +583,17 @@ void __init setup_bootmem_allocator(void
PAGE_SIZE);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+ reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
+#ifdef CONFIG_BLK_DEV_INITRD
+ reserve_initrd();
+#endif
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
+ printk(KERN_INFO " low ram: %08lx - %08lx\n",
+ min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
+ printk(KERN_INFO " bootmap %08lx - %08lx\n",
+ bootmap, bootmap + bootmap_size);
register_bootmem_low_pages(max_low_pfn);
early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
- reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
#ifdef CONFIG_ACPI_SLEEP
/*
@@ -593,9 +607,6 @@ void __init setup_bootmem_allocator(void
*/
find_smp_config();
#endif
-#ifdef CONFIG_BLK_DEV_INITRD
- reserve_initrd();
-#endif
numa_kva_reserve();
reserve_crashkernel();
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
2008-05-22 22:20 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix v2 Yinghai Lu
@ 2008-05-23 23:08 ` Yinghai Lu
2008-05-23 23:32 ` Jeremy Fitzhardinge
2008-05-23 23:38 ` Jeremy Fitzhardinge
2008-05-24 8:54 ` Jeremy Fitzhardinge
1 sibling, 2 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-23 23:08 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
Jeremy Fitzhardinge
Cc: linux-kernel@vger.kernel.org
[-- Attachment #1: Type: text/plain, Size: 443 bytes --]
On Thu, May 22, 2008 at 3:20 PM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
>
> use find_e820_area to find addess for new RAMDISK, instead of using ram blindly
>
> also print out low ram and bootmap info
>
> v2: remove extra -1 in reaseve_early calling
> panic if can not find space for new RAMDISK
>
Jeremy,
your config works on XEN of 5.2 as domU
( I added ext2 support...)
YH
[-- Attachment #2: dmesg_domu.txt --]
[-- Type: text/plain, Size: 15921 bytes --]
Linux version 2.6.26-rc3-sched-devel.git (yhlu@mpk12-office-77-136) (gcc version 4.2.1 (SUSE Linux)) #65 SMP PREEMPT Fri May 23 15:55:55 PDT 2008
BIOS-provided physical RAM map:
BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)
BIOS-e820: 0000000000100000 - 000000001fffac00 (usable)
BIOS-e820: 000000001fffac00 - 0000000020000000 (reserved)
Early serial console at I/O port 0x3f8 (options '9600n8')
console [uart0] enabled
x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
0MB HIGHMEM available.
511MB LOWMEM available.
low ram: 00622000 - 1fffa000
bootmap 00622000 - 00626000
early res: 0 [0-fff] BIOS data page
early res: 1 [1000-1fff] EX TRAMPOLINE
early res: 2 [6000-6fff] TRAMPOLINE
early res: 3 [100000-619487] TEXT DATA BSS
early res: 4 [1ea1f000-1ffe9e56] RAMDISK
early res: 5 [619488-621fff] INIT_PG_TABLE
early res: 6 [9fc00-fffff] BIOS reserved
early res: 7 [622000-625fff] BOOTMAP
Scan SMP from c0000000 for 1024 bytes.
Scan SMP from c009fc00 for 1024 bytes.
Scan SMP from c00f0000 for 65536 bytes.
found SMP MP-table at [c00fccd0] 000fccd0
NX (Execute Disable) protection: active
Entering add_active_range(0, 0, 131066) 0 entries of 256 used
Zone PFN ranges:
DMA 0 -> 4096
Normal 4096 -> 131066
HighMem 131066 -> 131066
Movable zone start PFN for each node
early_node_map[1] active PFN ranges
0: 0 -> 131066
On node 0 totalpages: 131066
DMA zone: 32 pages used for memmap
DMA zone: 0 pages reserved
DMA zone: 4064 pages, LIFO batch:0
Normal zone: 991 pages used for memmap
Normal zone: 125979 pages, LIFO batch:31
HighMem zone: 0 pages used for memmap
Movable zone: 0 pages used for memmap
DMI 2.4 present.
ACPI: RSDP 000EB0B0, 0024 (r2 Xen)
ACPI: XSDT 000EB020, 0044 (r1 Xen HVM 0 HVML 0)
ACPI: FACP 000EAE30, 00F4 (r4 Xen HVM 0 HVML 0)
ACPI: DSDT 000EA040, 0D67 (r2 Xen HVM 0 INTL 20060707)
ACPI: FACS 000EA000, 0040
ACPI: APIC 000EAF30, 0072 (r2 Xen HVM 0 HVML 0)
ACPI: HPET 000EAFB0, 0038 (r1 Xen HVM 0 HVML 0)
ACPI: SSDT 000EAFE8, 0038 (r2 Xen HVM 0 HVML 0)
ACPI: DMI BIOS year==0, assuming ACPI-capable machine
ACPI: PM-Timer IO Port: 0x1f48
ACPI: Local APIC address 0xfee00000
ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0])
IOAPIC[0]: apic_id 1, version 17, address 0xfec00000, GSI 0-47
ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
ACPI: INT_SRC_OVR (bus 0 bus_irq 5 global_irq 5 low level)
ACPI: INT_SRC_OVR (bus 0 bus_irq 7 global_irq 7 low level)
ACPI: INT_SRC_OVR (bus 0 bus_irq 10 global_irq 10 low level)
ACPI: INT_SRC_OVR (bus 0 bus_irq 11 global_irq 11 low level)
ACPI: IRQ0 used by override.
ACPI: IRQ2 used by override.
ACPI: IRQ5 used by override.
ACPI: IRQ7 used by override.
ACPI: IRQ9 used by override.
ACPI: IRQ10 used by override.
ACPI: IRQ11 used by override.
Enabling APIC mode: Flat. Using 1 I/O APICs
ACPI: HPET id: 0x8086a201 base: 0xfed00000
Using ACPI (MADT) for SMP configuration information
Allocating PCI resources starting at 30000000 (gap: 20000000:e0000000)
SMP: Allowing 1 CPUs, 0 hotplug CPUs
PERCPU: Allocating 37096 bytes of per cpu data
NR_CPUS: 4, nr_cpu_ids: 1, nr_node_ids 1
Built 1 zonelists in Zone order, mobility grouping on. Total pages: 130043
Kernel command line: rw root=/dev/ram0 console=tty0 console=uart8250,io,0x3f8,9600n8 apic=verbose pci=routeirq ip=dhcp ramdisk_size=131072
mapped APIC to ffffb000 (fee00000)
mapped IOAPIC to ffffa000 (fec00000)
Enabling fast FPU save and restore... done.
Enabling unmasked SIMD FPU exception support... done.
Initializing CPU#0
PID hash table entries: 2048 (order: 11, 8192 bytes)
Detected 2493.395 MHz processor.
Console: colour VGA+ 80x25
console [tty0] enabled
console handover: boot [uart0] -> real [ttyS0]
Dentry cache hash table entries: 65536 (order: 6, 262144 bytes)
Inode-cache hash table entries: 32768 (order: 5, 131072 bytes)
Memory: 491344k/524264k available (3066k kernel code, 32304k reserved, 1436k data, 280k init, 0k highmem)
virtual kernel memory layout:
fixmap : 0xfff84000 - 0xfffff000 ( 492 kB)
pkmap : 0xffa00000 - 0xffc00000 (2048 kB)
vmalloc : 0xe0800000 - 0xff9fe000 ( 497 MB)
lowmem : 0xc0000000 - 0xdfffa000 ( 511 MB)
.init : 0xc056c000 - 0xc05b2000 ( 280 kB)
.data : 0xc03febff - 0xc0565ffc (1436 kB)
.text : 0xc0100000 - 0xc03febff (3066 kB)
Checking if this processor honours the WP bit even in supervisor mode...Ok.
CPA: page pool initialized 1 of 1 pages preallocated
SLUB: Genslabs=12, HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
hpet clockevent registered
Calibrating delay using timer specific routine.. 5014.51 BogoMIPS (lpj=10029037)
Security Framework initialized
Capability LSM initialized
Mount-cache hash table entries: 512
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#0.
Checking 'hlt' instruction... OK.
SMP alternatives: switching to UP code
Freeing SMP alternatives: 14k freed
ACPI: Core revision 20080321
enabled ExtINT on CPU#0
ENABLING IO-APIC IRQs
init IO_APIC IRQs
IO-APIC (apicid-pin) 1-0 not connected.
IO-APIC (apicid-pin) 1-16, 1-17, 1-18, 1-19, 1-20, 1-21, 1-22, 1-23, 1-24, 1-25, 1-26, 1-27, 1-28, 1-29, 1-30, 1-31, 1-32, 1-33, 1-34, 1-35, 1-36, 1-37, 1-38, 1-39, 1-40, 1-41, 1-42, 1-43, 1-44, 1-45, 1-46, 1-47 not connected.
..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
CPU0: AMD Quad-Core AMD Opteron(tm) Processor 8360 SE stepping 03
Using local APIC timer interrupts.
calibrating APIC timer ...
... lapic delta = 625666
... PM timer delta = 358317
... PM timer result ok
..... delta 625666
..... mult: 26870470
..... calibration result: 400426
..... CPU clock speed is 2496.0229 MHz.
..... host bus clock speed is 100.0426 MHz.
Brought up 1 CPUs
Total of 1 processors activated (5014.51 BogoMIPS).
CPU0 attaching NULL sched-domain.
net_namespace: 632 bytes
Booting paravirtualized kernel on bare hardware
NET: Registered protocol family 16
No dock devices found.
ACPI: bus type pci registered
PCI: PCI BIOS revision 2.10 entry at 0xfa3b0, last bus=0
PCI: Using configuration type 1 for base access
Setting up standard PCI resources
ACPI: EC: Look up EC in DSDT
ACPI: Interpreter enabled
ACPI: (supports S0 S5)
ACPI: Using IOAPIC for interrupt routing
ACPI: PCI Root Bridge [PCI0] (0000:00)
PCI: 0000:00:01.1 reg 20 io port: [c000, c00f]
* Found PM-Timer Bug on the chipset. Due to workarounds for a bug,
* this clock source is slow. Consider trying other clock sources
pci 0000:00:01.2: quirk: region 1f40-1f7f claimed by PIIX4 ACPI
PCI: 0000:00:01.3 reg 20 io port: [c020, c03f]
PCI: 0000:00:02.0 reg 10 32bit mmio: [f0000000, f1ffffff]
PCI: 0000:00:02.0 reg 14 32bit mmio: [f2000000, f2000fff]
PCI: 0000:00:03.0 reg 10 io port: [c100, c1ff]
PCI: 0000:00:03.0 reg 14 32bit mmio: [f3000000, f3ffffff]
PCI: 0000:00:04.0 reg 10 io port: [c200, c2ff]
PCI: 0000:00:04.0 reg 14 32bit mmio: [f4000000, f40000ff]
bus 00 -> node 0
ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
ACPI: PCI Interrupt Link [LNKA] (IRQs *5 7 10 11)
ACPI: PCI Interrupt Link [LNKB] (IRQs 5 *7 10 11)
ACPI: PCI Interrupt Link [LNKC] (IRQs 5 7 *10 11)
ACPI: PCI Interrupt Link [LNKD] (IRQs 5 7 10 *11)
Linux Plug and Play Support v0.97 (c) Adam Belay
pnp: PnP ACPI init
ACPI: bus type pnp registered
pnp: PnP ACPI: found 11 devices
ACPI: ACPI bus type pnp unregistered
SCSI subsystem initialized
libata version 3.00 loaded.
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
PCI: Using ACPI for IRQ routing
PCI: Routing PCI interrupts for all devices because "pci=routeirq" specified
ACPI: PCI Interrupt 0000:00:01.2[A] -> GSI 20 (level, low) -> IRQ 20
Int: type 0, pol 3, trig 3, bus 00, IRQ 04, APIC ID 1, APIC INT 14
ACPI: PCI Interrupt 0000:00:01.3[D] -> GSI 23 (level, low) -> IRQ 23
Int: type 0, pol 3, trig 3, bus 00, IRQ 07, APIC ID 1, APIC INT 17
ACPI: PCI Interrupt 0000:00:03.0[A] -> GSI 28 (level, low) -> IRQ 28
Int: type 0, pol 3, trig 3, bus 00, IRQ 0c, APIC ID 1, APIC INT 1c
ACPI: PCI Interrupt 0000:00:04.0[A] -> GSI 32 (level, low) -> IRQ 32
Int: type 0, pol 3, trig 3, bus 00, IRQ 10, APIC ID 1, APIC INT 20
number of MP IRQ sources: 19.
number of IO-APIC #1 registers: 48.
testing the IO APIC.......................
IO APIC #1......
.... register #00: 00000000
....... : physical APIC id: 00
....... : Delivery Type: 0
....... : LTS : 0
.... register #01: 002F0011
....... : max redirection entries: 002F
....... : PRQ implemented: 0
....... : IO APIC version: 0011
.... register #02: 00000000
....... : arbitration: 00
.... IRQ redirection table:
NR Log Phy Mask Trig IRR Pol Stat Dest Deli Vect:
00 000 00 1 0 0 0 0 0 0 00
01 001 01 1 0 0 0 0 1 1 39
02 001 01 0 0 0 0 0 1 1 31
03 001 01 0 0 0 0 0 1 1 41
04 001 01 1 0 0 0 0 1 1 49
05 001 01 1 1 0 1 0 1 1 51
06 001 01 1 0 0 0 0 1 1 59
07 001 01 1 1 0 1 0 1 1 61
08 001 01 1 0 0 0 0 1 1 69
09 001 01 0 1 0 1 0 1 1 71
0a 001 01 1 1 0 1 0 1 1 79
0b 001 01 1 1 0 1 0 1 1 81
0c 001 01 1 0 0 0 0 1 1 89
0d 001 01 0 0 0 0 0 1 1 91
0e 001 01 0 0 0 0 0 1 1 99
0f 001 01 0 0 0 0 0 1 1 A1
10 000 00 1 0 0 0 0 0 0 00
11 000 00 1 0 0 0 0 0 0 00
12 000 00 1 0 0 0 0 0 0 00
13 000 00 1 0 0 0 0 0 0 00
14 001 01 1 1 0 1 0 1 1 A9
15 000 00 1 0 0 0 0 0 0 00
16 000 00 1 0 0 0 0 0 0 00
17 001 01 1 1 0 1 0 1 1 B1
18 000 00 1 0 0 0 0 0 0 00
19 000 00 1 0 0 0 0 0 0 00
1a 000 00 1 0 0 0 0 0 0 00
1b 000 00 1 0 0 0 0 0 0 00
1c 001 01 1 1 0 1 0 1 1 B9
1d 000 00 1 0 0 0 0 0 0 00
1e 000 00 1 0 0 0 0 0 0 00
1f 000 00 1 0 0 0 0 0 0 00
20 001 01 1 1 0 1 0 1 1 C1
21 000 00 1 0 0 0 0 0 0 00
22 000 00 1 0 0 0 0 0 0 00
23 000 00 1 0 0 0 0 0 0 00
24 000 00 1 0 0 0 0 0 0 00
25 000 00 1 0 0 0 0 0 0 00
26 000 00 1 0 0 0 0 0 0 00
27 000 00 1 0 0 0 0 0 0 00
28 000 00 1 0 0 0 0 0 0 00
29 000 00 1 0 0 0 0 0 0 00
2a 000 00 1 0 0 0 0 0 0 00
2b 000 00 1 0 0 0 0 0 0 00
2c 000 00 1 0 0 0 0 0 0 00
2d 000 00 1 0 0 0 0 0 0 00
2e 000 00 1 0 0 0 0 0 0 00
2f 000 00 1 0 0 0 0 0 0 00
IRQ to pin mappings:
IRQ0 -> 0:2
IRQ1 -> 0:1
IRQ3 -> 0:3
IRQ4 -> 0:4
IRQ5 -> 0:5
IRQ6 -> 0:6
IRQ7 -> 0:7
IRQ8 -> 0:8
IRQ9 -> 0:9
IRQ10 -> 0:10
IRQ11 -> 0:11
IRQ12 -> 0:12
IRQ13 -> 0:13
IRQ14 -> 0:14
IRQ15 -> 0:15
IRQ20 -> 0:20
IRQ23 -> 0:23
IRQ28 -> 0:28
IRQ32 -> 0:32
.................................... done.
hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
hpet0: 3 64-bit timers, 77918844 Hz
system 00:00: iomem range 0x0-0x9ffff could not be reserved
Switched to high resolution mode on CPU 0
system 00:03: ioport range 0x8a0-0x8a3 has been reserved
system 00:03: ioport range 0xcc0-0xccf has been reserved
system 00:03: ioport range 0x4d0-0x4d1 has been reserved
bus: 00 index 0 io port: [0, ffff]
bus: 00 index 1 mmio: [0, ffffffffffffffff]
NET: Registered protocol family 2
IP route cache hash table entries: 4096 (order: 2, 16384 bytes)
TCP established hash table entries: 16384 (order: 5, 131072 bytes)
TCP bind hash table entries: 16384 (order: 5, 131072 bytes)
TCP: Hash tables configured (established 16384 bind 16384)
TCP reno registered
NET: Registered protocol family 1
checking if image is initramfs...it isn't (no cpio magic); looks like an initrd
Freeing initrd memory: 22315k freed
kvm: no hardware support
Machine check exception polling timer started.
Total HugeTLB memory allocated, 0
SGI XFS with security attributes, no debug enabled
msgmni has been set to 1003 for ipc namespace c053cec8
Block layer SCSI generic (bsg) driver version 0.4 loaded (major 253)
io scheduler noop registered
io scheduler anticipatory registered
io scheduler deadline registered
io scheduler cfq registered (default)
pci 0000:00:00.0: Limiting direct PCI/PCI transfers
pci 0000:00:01.0: PIIX3: Enabling Passive Release
pci 0000:00:01.0: Activating ISA DMA hang workarounds
pci 0000:00:02.0: Boot video device
pci_hotplug: PCI Hot Plug PCI Core version: 0.5
acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5
acpiphp_ibm: ibm_acpiphp_init: acpi_walk_namespace failed
pciehp: PCI Express Hot Plug Controller Driver version: 0.4
ACPI: ACPI0007:00 is registered as cooling_device0
hpet_resources: 0xfed00000 is busy
Non-volatile memory driver v1.2
Linux agpgart interface v0.103
Serial: 8250/16550 driver $Revision: 1.90 $ 4 ports, IRQ sharing disabled
serial8250: ttyS0 at I/O 0x3f8 (irq = 4) is a 16450
00:0a: ttyS0 at I/O 0x3f8 (irq = 4) is a 16450
brd: module loaded
pcnet32.c:v1.35 21.Apr.2008 tsbogend@alpha.franken.de
PPP generic driver version 2.4.2
PPP Deflate Compression module registered
PPP BSD Compression module registered
8139too Fast Ethernet driver 0.9.28
8139too 0000:00:04.0: This (id 10ec:8139 rev 20) is an enhanced 8139C+ chip
8139too 0000:00:04.0: Use the "8139cp" driver for improved performance and stability.
ACPI: PCI Interrupt 0000:00:04.0[A] -> GSI 32 (level, low) -> IRQ 32
Int: type 0, pol 3, trig 3, bus 00, IRQ 10, APIC ID 1, APIC INT 20
PCI: Setting latency timer of device 0000:00:04.0 to 64
8139too 0000:00:04.0: unknown chip version, assuming RTL-8139
8139too 0000:00:04.0: TxConfig = 0x74800000
eth0: RealTek RTL8139 at 0xc200, 00:16:3e:23:81:db, IRQ 32
eth0: Identified 8139 chip type 'RTL-8139'
Driver 'sd' needs updating - please use bus_type methods
usbcore: registered new interface driver libusual
PNP: PS/2 Controller [PNP0303:PS2K,PNP0f13:PS2M] at 0x60,0x64 irq 1,12
serio: i8042 KBD port at 0x60,0x64 irq 1
serio: i8042 AUX port at 0x60,0x64 irq 12
mice: PS/2 mouse device common for all mice
input: AT Translated Set 2 keyboard as /devices/platform/i8042/serio0/input/input0
rtc_cmos 00:05: rtc core: registered rtc_cmos as rtc0
rtc0: alarms up to one day
device-mapper: uevent: version 1.0.3
device-mapper: ioctl: 4.13.0-ioctl (2007-10-18) initialised: dm-devel@redhat.com
usbcore: registered new interface driver hiddev
usbcore: registered new interface driver usbhid
usbhid: v2.6:USB HID core driver
TCP cubic registered
Initializing XFRM netlink socket
NET: Registered protocol family 10
lo: Disabled Privacy Extensions
IPv6 over IPv4 tunneling driver
sit0: Disabled Privacy Extensions
NET: Registered protocol family 17
RPC: Registered udp transport module.
RPC: Registered tcp transport module.
Using IPI No-Shortcut mode
registered taskstats version 1
rtc_cmos 00:05: setting system clock to 2008-05-23 22:50:51 UTC (1211583051)
input: ImExPS/2 Generic Explorer Mouse as /devices/platform/i8042/serio1/input/input1
RAMDISK: Compressed image found at block 0
VFS: Mounted root (ext2 filesystem).
Freeing unused kernel memory: 280k freed
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
2008-05-23 23:08 ` Yinghai Lu
@ 2008-05-23 23:32 ` Jeremy Fitzhardinge
2008-05-23 23:38 ` Jeremy Fitzhardinge
1 sibling, 0 replies; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 23:32 UTC (permalink / raw)
To: Yinghai Lu
Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
Yinghai Lu wrote:
> On Thu, May 22, 2008 at 3:20 PM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
>>
>> use find_e820_area to find addess for new RAMDISK, instead of using ram blindly
>>
>> also print out low ram and bootmap info
>>
>> v2: remove extra -1 in reaseve_early calling
>> panic if can not find space for new RAMDISK
>>
>>
>
> Jeremy,
>
> your config works on XEN of 5.2 as domU
> ( I added ext2 support...)
>
Hm, still crashing for me. I'm not getting any output; I need to work
out why earlyprintk=xen isn't producing any output.
J
> YH
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
2008-05-23 23:08 ` Yinghai Lu
2008-05-23 23:32 ` Jeremy Fitzhardinge
@ 2008-05-23 23:38 ` Jeremy Fitzhardinge
2008-05-24 0:01 ` Yinghai Lu
1 sibling, 1 reply; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-23 23:38 UTC (permalink / raw)
To: Yinghai Lu
Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
Yinghai Lu wrote:
> Jeremy,
>
> your config works on XEN of 5.2 as domU
> ( I added ext2 support...)
>
The boot log you attached is for a kernel booting either natively or in
an hvm domain ("Booting paravirtualized kernel on bare hardware"). Were
you booting it as an hvm guest rather than a pv one?
J
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
2008-05-23 23:38 ` Jeremy Fitzhardinge
@ 2008-05-24 0:01 ` Yinghai Lu
2008-05-24 0:09 ` Yinghai Lu
0 siblings, 1 reply; 51+ messages in thread
From: Yinghai Lu @ 2008-05-24 0:01 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
On Fri, May 23, 2008 at 4:38 PM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Yinghai Lu wrote:
>>
>> Jeremy,
>>
>> your config works on XEN of 5.2 as domU
>> ( I added ext2 support...)
>>
>
> The boot log you attached is for a kernel booting either natively or in an
> hvm domain ("Booting paravirtualized kernel on bare hardware"). Were you
> booting it as an hvm guest rather than a pv one?
run it as FV guest.
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
2008-05-24 0:01 ` Yinghai Lu
@ 2008-05-24 0:09 ` Yinghai Lu
0 siblings, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-24 0:09 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
On Fri, May 23, 2008 at 5:01 PM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> On Fri, May 23, 2008 at 4:38 PM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>> Yinghai Lu wrote:
>>>
>>> Jeremy,
>>>
>>> your config works on XEN of 5.2 as domU
>>> ( I added ext2 support...)
>>>
>>
>> The boot log you attached is for a kernel booting either natively or in an
>> hvm domain ("Booting paravirtualized kernel on bare hardware"). Were you
>> booting it as an hvm guest rather than a pv one?
>
> run it as FV guest.
I moved reserve_ebda_region() to i386_start_kernel from setup_arch.
and reserve_ebda_region will check if (paravirt_enabled()), wonder if
that cause problem.
but 64bit already did that.
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
2008-05-22 22:20 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix v2 Yinghai Lu
2008-05-23 23:08 ` Yinghai Lu
@ 2008-05-24 8:54 ` Jeremy Fitzhardinge
2008-05-24 9:49 ` [PATCH] xen: boot via i386_start_kernel to get early reservations Jeremy Fitzhardinge
2008-05-24 19:57 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix v2 Yinghai Lu
1 sibling, 2 replies; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-24 8:54 UTC (permalink / raw)
To: Yinghai Lu
Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
Yinghai Lu wrote:
> [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
>
> use find_e820_area to find addess for new RAMDISK, instead of using ram blindly
>
> also print out low ram and bootmap info
>
> v2: remove extra -1 in reaseve_early calling
> panic if can not find space for new RAMDISK
>
OK, I've fixed earlyprintk=xen, so I can finally get some useful
debugging information.
With this patch it still crashes, but outputs:
(early) Reserving virtual address space above 0xf57fe000
(early) Linux version 2.6.26-rc3-sched-devel.git (jeremy@victim.goop.org) (gcc version 4.1.2 20070925 (Red Hat 4.1.2-33)) #466 SMP PREEMPT Sat May 24 01:05:41 PDT 2008
(early) ACPI in unprivileged domain disabled
(early) BIOS-provided physical RAM map:
(early) Xen: 0000000000000000 - 000000000009f000 (usable)
(early) Xen: 0000000000100000 - 0000000010000000 (usable)
(early) console [xenboot0] enabled
(early) debug: ignoring loglevel setting.
(early) limit_regions start: 0000000000000000 - 000000000009f000 (early) (usable)
(early) limit_regions start: 0000000000100000 - 0000000010000000 (early) (usable)
(early) limit_regions endfunc: 0000000000000000 - 000000000009f000 (early) (usable)
(early) limit_regions endfunc: 0000000000100000 - 0000000010000000 (early) (usable)
(early) user-defined physical RAM map:
(early) user: 0000000000000000 - 000000000009f000 (early) (usable)
(early) user: 0000000000100000 - 0000000010000000 (early) (usable)
(early) 0MB HIGHMEM available.
(early) 256MB LOWMEM available.
(early) low ram: 0102c000 - 10000000
(early) bootmap 0102c000 - 0102e000
(early) early res: 0 [0-fff] BIOS data page
(early) early res: 1 [1000-1fff] EX TRAMPOLINE
(early) early res: 2 [6000-6fff] TRAMPOLINE
(early) early res: 3 [102c000-102dfff] BOOTMAP
(early) Scan SMP from c0000000 for 1024 bytes.
(early) Scan SMP from c009fc00 for 1024 bytes.
(early) Scan SMP from c00f0000 for 65536 bytes.
(early) Scan SMP from c00c2c20 for 1024 bytes.
(early) NX (Execute Disable) protection: active
[crash]
Hm, I think this is the problem:
/*
* don't need to reserve again, already reserved early
* in i386_start_kernel
*/
A Xen pv boot doesn't presently go via i386_start_kernel; it goes
directly from xen_start_kernel to start_kernel (you can see that the
"early res" lines are missing important things like the kernel code and
pagetables).
I tried making xen_start_kernel directly call i386_start_kernel, and it
nearly works. The problem is that the initial address space layout for
a Xen domain is:
kernel
ramdisk
init pagetable
which means that in
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = ramdisk_image + ramdisk_size;
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE");
the INIT_PG_TABLE reserve_early() will panic because it partially
overlaps with the RAMDISK reservation.
J
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] xen: boot via i386_start_kernel to get early reservations
2008-05-24 8:54 ` Jeremy Fitzhardinge
@ 2008-05-24 9:49 ` Jeremy Fitzhardinge
2008-05-24 22:04 ` Yinghai Lu
2008-05-24 19:57 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix v2 Yinghai Lu
1 sibling, 1 reply; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-24 9:49 UTC (permalink / raw)
To: Yinghai Lu
Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
Boot Xen via i386_start_kernel so that all the early reservations are
made properly; without these, it will start using the kernel and
pagetables as early heap memory, which is a bit suboptimal.
One tricky part is that reserve_early() will just panic if any of the
early reservations overlap any others. When a Xen domain is built, it
constructs the initial address space as:
kernel text+data+bss
initrd
initial pagetable
Therefore, when reserving the pagetable (from &_end to
init_pg_tables_end), it covers the whole initrd area. If it then
tries to reserve the initrd, it will panic because of the overlap.
The simple fix here is to reserve INIT_PG_TABLE first, and then only
reserve the ramdisk if it doesn't overlap with the previous
reservations. A better/more complex fix might be to make
reserve_early() deal with overlapping reservations.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/kernel/head32.c | 6 ++++--
arch/x86/xen/enlighten.c | 2 +-
include/asm-x86/setup.h | 1 +
3 files changed, 6 insertions(+), 3 deletions(-)
===================================================================
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -66,6 +66,7 @@ void __init i386_start_kernel(void)
void __init i386_start_kernel(void)
{
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+ reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
@@ -73,10 +74,11 @@ void __init i386_start_kernel(void)
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = ramdisk_image + ramdisk_size;
- reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
+ if (ramdisk_end <= __pa_symbol(&_text) ||
+ ramdisk_image > init_pg_tables_end)
+ reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
- reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE");
reserve_ebda_region();
===================================================================
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1264,5 +1264,5 @@ asmlinkage void __init xen_start_kernel(
}
/* Start the world */
- start_kernel();
+ i386_start_kernel();
}
===================================================================
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -58,6 +58,7 @@ int __init copy_e820_map(struct e820entr
int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
void __init add_memory_region(unsigned long long start,
unsigned long long size, int type);
+void __init i386_start_kernel(void);
extern unsigned long init_pg_tables_end;
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
2008-05-24 8:54 ` Jeremy Fitzhardinge
2008-05-24 9:49 ` [PATCH] xen: boot via i386_start_kernel to get early reservations Jeremy Fitzhardinge
@ 2008-05-24 19:57 ` Yinghai Lu
1 sibling, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-24 19:57 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
On Sat, May 24, 2008 at 1:54 AM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Yinghai Lu wrote:
>>
>> [PATCH] x86: extend e820 ealy_res support 32bit - fix v2
>>
>> use find_e820_area to find addess for new RAMDISK, instead of using ram
>> blindly
>>
>> also print out low ram and bootmap info
>>
>> v2: remove extra -1 in reaseve_early calling
>> panic if can not find space for new RAMDISK
>>
>
> OK, I've fixed earlyprintk=xen, so I can finally get some useful debugging
> information.
>
> With this patch it still crashes, but outputs:
>
> (early) Reserving virtual address space above 0xf57fe000
> (early) Linux version 2.6.26-rc3-sched-devel.git (jeremy@victim.goop.org)
> (gcc version 4.1.2 20070925 (Red Hat 4.1.2-33)) #466 SMP PREEMPT Sat May 24
> 01:05:41 PDT 2008
> (early) ACPI in unprivileged domain disabled
> (early) BIOS-provided physical RAM map:
> (early) Xen: 0000000000000000 - 000000000009f000 (usable)
> (early) Xen: 0000000000100000 - 0000000010000000 (usable)
> (early) console [xenboot0] enabled
> (early) debug: ignoring loglevel setting.
> (early) limit_regions start: 0000000000000000 - 000000000009f000 (early)
> (usable)
> (early) limit_regions start: 0000000000100000 - 0000000010000000 (early)
> (usable)
> (early) limit_regions endfunc: 0000000000000000 - 000000000009f000 (early)
> (usable)
> (early) limit_regions endfunc: 0000000000100000 - 0000000010000000 (early)
> (usable)
> (early) user-defined physical RAM map:
> (early) user: 0000000000000000 - 000000000009f000 (early) (usable)
> (early) user: 0000000000100000 - 0000000010000000 (early) (usable)
> (early) 0MB HIGHMEM available.
> (early) 256MB LOWMEM available.
> (early) low ram: 0102c000 - 10000000
> (early) bootmap 0102c000 - 0102e000
> (early) early res: 0 [0-fff] BIOS data page
> (early) early res: 1 [1000-1fff] EX TRAMPOLINE
> (early) early res: 2 [6000-6fff] TRAMPOLINE
> (early) early res: 3 [102c000-102dfff] BOOTMAP
> (early) Scan SMP from c0000000 for 1024 bytes.
> (early) Scan SMP from c009fc00 for 1024 bytes.
> (early) Scan SMP from c00f0000 for 65536 bytes.
> (early) Scan SMP from c00c2c20 for 1024 bytes.
> (early) NX (Execute Disable) protection: active
> [crash]
>
>
> Hm, I think this is the problem:
>
> /*
> * don't need to reserve again, already reserved early
> * in i386_start_kernel
> */
>
> A Xen pv boot doesn't presently go via i386_start_kernel; it goes directly
> from xen_start_kernel to start_kernel (you can see that the "early res"
> lines are missing important things like the kernel code and pagetables).
>
> I tried making xen_start_kernel directly call i386_start_kernel, and it
> nearly works. The problem is that the initial address space layout for a
> Xen domain is:
>
> kernel
> ramdisk
> init pagetable
>
> which means that in
>
> reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA
> BSS");
>
> #ifdef CONFIG_BLK_DEV_INITRD
> /* Reserve INITRD */
> if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image)
> {
> u64 ramdisk_image = boot_params.hdr.ramdisk_image;
> u64 ramdisk_size = boot_params.hdr.ramdisk_size;
> u64 ramdisk_end = ramdisk_image + ramdisk_size;
> reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
> }
> #endif
> reserve_early(__pa_symbol(&_end), init_pg_tables_end,
> "INIT_PG_TABLE");
>
> the INIT_PG_TABLE reserve_early() will panic because it partially overlaps
> with the RAMDISK reservation.
great. i guess 64bit XEN pv will call x86_64_start_kernel.
INIT_PG_TABLE is right after "TEXT DATA BSS".
So you bootloader will don't leave space between kernel and ramdisk?
or need to put INIT_PG_TABLE before end of BSS like 64bit did....
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] xen: boot via i386_start_kernel to get early reservations
2008-05-24 9:49 ` [PATCH] xen: boot via i386_start_kernel to get early reservations Jeremy Fitzhardinge
@ 2008-05-24 22:04 ` Yinghai Lu
0 siblings, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-24 22:04 UTC (permalink / raw)
To: Jeremy Fitzhardinge, Rusty Russell, Ingo Molnar
Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
On Sat, May 24, 2008 at 2:49 AM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Boot Xen via i386_start_kernel so that all the early reservations are
> made properly; without these, it will start using the kernel and
> pagetables as early heap memory, which is a bit suboptimal.
>
> One tricky part is that reserve_early() will just panic if any of the
> early reservations overlap any others. When a Xen domain is built, it
> constructs the initial address space as:
>
> kernel text+data+bss
> initrd
> initial pagetable
>
> Therefore, when reserving the pagetable (from &_end to
> init_pg_tables_end), it covers the whole initrd area. If it then
> tries to reserve the initrd, it will panic because of the overlap.
>
> The simple fix here is to reserve INIT_PG_TABLE first, and then only
> reserve the ramdisk if it doesn't overlap with the previous
> reservations. A better/more complex fix might be to make
> reserve_early() deal with overlapping reservations.
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> ---
...
> ===================================================================
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1264,5 +1264,5 @@ asmlinkage void __init xen_start_kernel(
> }
>
> /* Start the world */
> - start_kernel();
> + i386_start_kernel();
> }
..
need to do the same thing in arch/x86/lguest.c::lguest_init
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-22 1:40 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix Yinghai Lu
2008-05-22 10:12 ` Jeremy Fitzhardinge
2008-05-22 22:20 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix v2 Yinghai Lu
@ 2008-05-25 17:00 ` Yinghai Lu
2008-05-27 15:44 ` Thomas Gleixner
` (4 more replies)
2 siblings, 5 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-25 17:00 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
Jeremy Fitzhardinge
Cc: linux-kernel@vger.kernel.org
remove extra -1 in reseve_early calling
panic if can not find space for new RAMDISK
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/setup_32.c
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 361e5c9..eed7121 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -479,7 +479,7 @@ static void __init reserve_initrd(void)
initrd_start = 0;
if (ramdisk_size >= end_of_lowmem/2) {
- free_early(ramdisk_image, ramdisk_image + ramdisk_size - 1);
+ free_early(ramdisk_image, ramdisk_end);
printk(KERN_ERR "initrd too large to handle, "
"disabling initrd\n");
return;
@@ -501,9 +501,13 @@ static void __init reserve_initrd(void)
end_of_lowmem, ramdisk_size,
PAGE_SIZE);
+ if (ramdisk_here == -1ULL)
+ panic("Cannot find place for new RAMDISK of size %lld\n",
+ ramdisk_size);
+
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
- reserve_early(ramdisk_here, ramdisk_here + ramdisk_size - 1,
+ reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
"NEW RAMDISK");
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
@@ -579,15 +583,15 @@ void __init setup_bootmem_allocator(void)
PAGE_SIZE);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
- reserve_early(bootmap, bootmap + bootmap_size - 1, "BOOTMAP");
+ reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
#ifdef CONFIG_BLK_DEV_INITRD
reserve_initrd();
#endif
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
printk(KERN_INFO " low ram: %08lx - %08lx\n",
min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
- printk(KERN_INFO " bootmap [%08lx - %08lx]\n",
- bootmap, bootmap + bootmap_size - 1);
+ printk(KERN_INFO " bootmap %08lx - %08lx\n",
+ bootmap, bootmap + bootmap_size);
register_bootmem_low_pages(max_low_pfn);
early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
^ permalink raw reply related [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-25 17:00 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix #2 Yinghai Lu
@ 2008-05-27 15:44 ` Thomas Gleixner
2008-05-27 20:37 ` Jeremy Fitzhardinge
2008-05-29 19:56 ` [PATCH] x86: extend e820 early_res support 32bit -fix #3 Yinghai Lu
` (3 subsequent siblings)
4 siblings, 1 reply; 51+ messages in thread
From: Thomas Gleixner @ 2008-05-27 15:44 UTC (permalink / raw)
To: Yinghai Lu
Cc: Ingo Molnar, H. Peter Anvin, Andrew Morton, Jeremy Fitzhardinge,
linux-kernel@vger.kernel.org
On Sun, 25 May 2008, Yinghai Lu wrote:
>
> remove extra -1 in reseve_early calling
> panic if can not find space for new RAMDISK
>
Thanks, applied on top of the v2 update
tglx
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-27 15:44 ` Thomas Gleixner
@ 2008-05-27 20:37 ` Jeremy Fitzhardinge
2008-05-27 20:58 ` Thomas Gleixner
2008-05-27 21:06 ` Yinghai Lu
0 siblings, 2 replies; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-27 20:37 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Yinghai Lu, Ingo Molnar, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
Thomas Gleixner wrote:
> On Sun, 25 May 2008, Yinghai Lu wrote:
>
>> remove extra -1 in reseve_early calling
>> panic if can not find space for new RAMDISK
>>
>>
>
> Thanks, applied on top of the v2 update
>
> tglx
>
This patch is still required to make Xen boot with the e820 changes:
Subject: xen: boot via i386_start_kernel to get early reservations
Boot Xen via i386_start_kernel so that all the early reservations are
made properly; without these, it will start using the kernel and
pagetables as early heap memory, which is a bit suboptimal.
One tricky part is that reserve_early() will just panic if any of the
early reservations overlap any others. When a Xen domain is built, it
constructs the initial address space as:
kernel text+data+bss
initrd
inital pagetable
Therefore, when reserving the pagetable (from &_end to
init_pg_tables_end), it covers the whole initrd area. If it then
tries to reserve the initrd, it will panic because of the overlap.
The simple fix here is to reserve INIT_PG_TABLE first, and then only
reserve the ramdisk if it doesn't overlap with the previous
reservations. A better/more complex fix might be to make
reserve_early() deal with overlapping reservations.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/kernel/head32.c | 6 ++++--
arch/x86/xen/enlighten.c | 2 +-
include/asm-x86/setup.h | 1 +
3 files changed, 6 insertions(+), 3 deletions(-)
===================================================================
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -66,6 +66,7 @@ void __init i386_start_kernel(void)
void __init i386_start_kernel(void)
{
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+ reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
@@ -73,10 +74,11 @@ void __init i386_start_kernel(void)
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = ramdisk_image + ramdisk_size;
- reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
+ if (ramdisk_end < __pa_symbol(&_text) ||
+ ramdisk_image >= init_pg_tables_end)
+ reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
- reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE");
reserve_ebda_region();
===================================================================
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1264,5 +1264,5 @@ asmlinkage void __init xen_start_kernel(
}
/* Start the world */
- start_kernel();
+ i386_start_kernel();
}
===================================================================
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -58,6 +58,7 @@ int __init copy_e820_map(struct e820entr
int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
void __init add_memory_region(unsigned long long start,
unsigned long long size, int type);
+void __init i386_start_kernel(void);
extern unsigned long init_pg_tables_end;
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-27 20:37 ` Jeremy Fitzhardinge
@ 2008-05-27 20:58 ` Thomas Gleixner
2008-05-27 21:06 ` Jeremy Fitzhardinge
2008-05-27 21:06 ` Yinghai Lu
1 sibling, 1 reply; 51+ messages in thread
From: Thomas Gleixner @ 2008-05-27 20:58 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Yinghai Lu, Ingo Molnar, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
On Tue, 27 May 2008, Jeremy Fitzhardinge wrote:
> Thomas Gleixner wrote:
> > On Sun, 25 May 2008, Yinghai Lu wrote:
> >
> > > remove extra -1 in reseve_early calling
> > > panic if can not find space for new RAMDISK
> > >
> > >
> >
> > Thanks, applied on top of the v2 update
> >
> > tglx
> >
> This patch is still required to make Xen boot with the e820 changes:
Ok. This one was not in the queue you sent ?
Will pick it up.
Thanks,
tglx
> Subject: xen: boot via i386_start_kernel to get early reservations
>
> Boot Xen via i386_start_kernel so that all the early reservations are
> made properly; without these, it will start using the kernel and
> pagetables as early heap memory, which is a bit suboptimal.
>
> One tricky part is that reserve_early() will just panic if any of the
> early reservations overlap any others. When a Xen domain is built, it
> constructs the initial address space as:
>
> kernel text+data+bss
> initrd
> inital pagetable
>
> Therefore, when reserving the pagetable (from &_end to
> init_pg_tables_end), it covers the whole initrd area. If it then
> tries to reserve the initrd, it will panic because of the overlap.
>
> The simple fix here is to reserve INIT_PG_TABLE first, and then only
> reserve the ramdisk if it doesn't overlap with the previous
> reservations. A better/more complex fix might be to make
> reserve_early() deal with overlapping reservations.
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> ---
> arch/x86/kernel/head32.c | 6 ++++--
> arch/x86/xen/enlighten.c | 2 +-
> include/asm-x86/setup.h | 1 +
> 3 files changed, 6 insertions(+), 3 deletions(-)
>
> ===================================================================
> --- a/arch/x86/kernel/head32.c
> +++ b/arch/x86/kernel/head32.c
> @@ -66,6 +66,7 @@ void __init i386_start_kernel(void)
> void __init i386_start_kernel(void)
> {
> reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA
> BSS");
> + reserve_early(__pa_symbol(&_end), init_pg_tables_end,
> "INIT_PG_TABLE");
>
> #ifdef CONFIG_BLK_DEV_INITRD
> /* Reserve INITRD */
> @@ -73,10 +74,11 @@ void __init i386_start_kernel(void)
> u64 ramdisk_image = boot_params.hdr.ramdisk_image;
> u64 ramdisk_size = boot_params.hdr.ramdisk_size;
> u64 ramdisk_end = ramdisk_image + ramdisk_size;
> - reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
> + if (ramdisk_end < __pa_symbol(&_text) ||
> + ramdisk_image >= init_pg_tables_end)
> + reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
> }
> #endif
> - reserve_early(__pa_symbol(&_end), init_pg_tables_end,
> "INIT_PG_TABLE");
>
> reserve_ebda_region();
>
> ===================================================================
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1264,5 +1264,5 @@ asmlinkage void __init xen_start_kernel(
> }
>
> /* Start the world */
> - start_kernel();
> + i386_start_kernel();
> }
> ===================================================================
> --- a/include/asm-x86/setup.h
> +++ b/include/asm-x86/setup.h
> @@ -58,6 +58,7 @@ int __init copy_e820_map(struct e820entr
> int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
> void __init add_memory_region(unsigned long long start,
> unsigned long long size, int type);
> +void __init i386_start_kernel(void);
>
> extern unsigned long init_pg_tables_end;
>
>
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-27 20:58 ` Thomas Gleixner
@ 2008-05-27 21:06 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-27 21:06 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Yinghai Lu, Ingo Molnar, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
Thomas Gleixner wrote:
> On Tue, 27 May 2008, Jeremy Fitzhardinge wrote:
>
>> Thomas Gleixner wrote:
>>
>>> On Sun, 25 May 2008, Yinghai Lu wrote:
>>>
>>>
>>>> remove extra -1 in reseve_early calling
>>>> panic if can not find space for new RAMDISK
>>>>
>>>>
>>>>
>>> Thanks, applied on top of the v2 update
>>>
>>> tglx
>>>
>>>
>> This patch is still required to make Xen boot with the e820 changes:
>>
>
> Ok. This one was not in the queue you sent ?
>
It wasn't, partly because I wasn't sure of the state of the e820 patches.
> Will pick it up.
>
Thanks.
J
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-27 20:37 ` Jeremy Fitzhardinge
2008-05-27 20:58 ` Thomas Gleixner
@ 2008-05-27 21:06 ` Yinghai Lu
2008-05-27 21:22 ` Jeremy Fitzhardinge
1 sibling, 1 reply; 51+ messages in thread
From: Yinghai Lu @ 2008-05-27 21:06 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
On Tue, May 27, 2008 at 1:37 PM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Thomas Gleixner wrote:
>>
>> On Sun, 25 May 2008, Yinghai Lu wrote:
>>
>>>
>>> remove extra -1 in reseve_early calling
>>> panic if can not find space for new RAMDISK
>>>
>>>
>>
>> Thanks, applied on top of the v2 update
>>
>> tglx
>>
>
> This patch is still required to make Xen boot with the e820 changes:
>
> Subject: xen: boot via i386_start_kernel to get early reservations
>
> Boot Xen via i386_start_kernel so that all the early reservations are
> made properly; without these, it will start using the kernel and
> pagetables as early heap memory, which is a bit suboptimal.
>
> One tricky part is that reserve_early() will just panic if any of the
> early reservations overlap any others. When a Xen domain is built, it
> constructs the initial address space as:
>
> kernel text+data+bss
> initrd
> inital pagetable
>
> Therefore, when reserving the pagetable (from &_end to
> init_pg_tables_end), it covers the whole initrd area. If it then
> tries to reserve the initrd, it will panic because of the overlap.
>
> The simple fix here is to reserve INIT_PG_TABLE first, and then only
> reserve the ramdisk if it doesn't overlap with the previous
> reservations. A better/more complex fix might be to make
> reserve_early() deal with overlapping reservations.
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> ---
> arch/x86/kernel/head32.c | 6 ++++--
> arch/x86/xen/enlighten.c | 2 +-
> include/asm-x86/setup.h | 1 +
> 3 files changed, 6 insertions(+), 3 deletions(-)
>
> ===================================================================
> --- a/arch/x86/kernel/head32.c
> +++ b/arch/x86/kernel/head32.c
> @@ -66,6 +66,7 @@ void __init i386_start_kernel(void)
> void __init i386_start_kernel(void)
> {
> reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA
> BSS");
> + reserve_early(__pa_symbol(&_end), init_pg_tables_end,
> "INIT_PG_TABLE");
>
> #ifdef CONFIG_BLK_DEV_INITRD
> /* Reserve INITRD */
> @@ -73,10 +74,11 @@ void __init i386_start_kernel(void)
> u64 ramdisk_image = boot_params.hdr.ramdisk_image;
> u64 ramdisk_size = boot_params.hdr.ramdisk_size;
> u64 ramdisk_end = ramdisk_image + ramdisk_size;
> - reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
> + if (ramdisk_end < __pa_symbol(&_text) ||
> + ramdisk_image >= init_pg_tables_end)
> + reserve_early(ramdisk_image, ramdisk_end,
> "RAMDISK");
> }
> #endif
> - reserve_early(__pa_symbol(&_end), init_pg_tables_end,
> "INIT_PG_TABLE");
>
> reserve_ebda_region();
>
> ===================================================================
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1264,5 +1264,5 @@ asmlinkage void __init xen_start_kernel(
> }
>
> /* Start the world */
> - start_kernel();
> + i386_start_kernel();
> }
> ===================================================================
> --- a/include/asm-x86/setup.h
> +++ b/include/asm-x86/setup.h
> @@ -58,6 +58,7 @@ int __init copy_e820_map(struct e820entr
> int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
> void __init add_memory_region(unsigned long long start,
> unsigned long long size, int type);
> +void __init i386_start_kernel(void);
>
> extern unsigned long init_pg_tables_end;
Jeremy,
Can you send out your boot log?
I still can not figure out INIT_PG_TABLE and RAMDISK could overlap? or
only one byte? or the same page?
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-27 21:06 ` Yinghai Lu
@ 2008-05-27 21:22 ` Jeremy Fitzhardinge
2008-05-27 21:35 ` Yinghai Lu
0 siblings, 1 reply; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-27 21:22 UTC (permalink / raw)
To: Yinghai Lu
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
Yinghai Lu wrote:
> Can you send out your boot log?
>
I've included it below.
> I still can not figure out INIT_PG_TABLE and RAMDISK could overlap? or
> only one byte? or the same page?
>
Perhaps my explanation in the patch description wasn't clear enough.
When Xen builds the domain, it's responsible for layout of the kernel,
the initrd and the initial pagetable (the kernel does not create its own
pagetable like it does when booting directly on x86).
The layout is (from low to high addresses):
kernel start ->+-------+
|.text |
+ - - - +
|.data |
+ - - - +
|.bss |
_end->+-------+
:padding:
+-------+<-ramdisk_image
: :
|initrd |
: :
+-------+<-ramdisk_end
:padding:
+-------+
|pgtable|
init_pg_tables_end->+-------+
Therefore, when you call early_reserve(&end, init_pg_tables_end) to
reserve the inital pagetable, you also implicitly reserve the whole
ramdisk area. Since your code currently reserves the ramdisk first, the
reservation of the pagetable fails because it overlaps the ramdisk.
My patch does two things:
1. reserve the initial pagetable first
2. skip reserving the ramdisk if it is within the pagetable range
This makes sure that all the important memory is reserved from early in
boot.
There are two alternate fixes:
1. try to precisely reserve *just* the pagetable, rather than
assuming it starts at &_end. I'm not sure there's currently a way
to do this, but it would be easy enough to add. Or,
2. Make early reservation cope with overlapping ranges, and deal with
them appropriately.
But for now, my patch prevents your code from causing a regression when
booting under Xen.
J
(early) Linux version 2.6.26-rc4 (jeremy@victim.goop.org) (gcc version 4.1.2 20070925 (Red Hat 4.1.2-33)) #502 SMP PREEMPT Tue May 27 13:37:29 PDT 2008
(early) ACPI in unprivileged domain disabled
(early) BIOS-provided physical RAM map:
(early) Xen: 0000000000000000 - 000000000009f000 (usable)
(early) Xen: 0000000000100000 - 0000000010000000 (usable)
(early) console [xenboot0] enabled
(early) debug: ignoring loglevel setting.
(early) limit_regions start: 0000000000000000 - 000000000009f000 (usable)
(early) limit_regions start: 0000000000100000 - 0000000010000000 (usable)
(early) limit_regions endfunc: 0000000000000000 - 000000000009f000 (usable)
(early) limit_regions endfunc: 0000000000100000 - 0000000010000000 (usable)
(early) user-defined physical RAM map:
(early) user: 0000000000000000 - 000000000009f000 (usable)
(early) user: 0000000000100000 - 0000000010000000 (usable)
(early) 0MB HIGHMEM available.
(early) 256MB LOWMEM available.
(early) low ram: 01036000 - 10000000
(early) bootmap 01036000 - 01038000
(early) early res: 0 [0-fff] BIOS data page
(early) early res: 1 [1000-1fff] EX TRAMPOLINE
(early) early res: 2 [6000-6fff] TRAMPOLINE
(early) early res: 3 [100000-970c1b] TEXT DATA BSS
(early) early res: 4 [970c1c-1035fff] INIT_PG_TABLE
(early) early res: 5 [1036000-1037fff] BOOTMAP
(early) Scan SMP from c0000000 for 1024 bytes.
(early) Scan SMP from c009fc00 for 1024 bytes.
(early) Scan SMP from c00f0000 for 65536 bytes.
(early) Scan SMP from c00c2c20 for 1024 bytes.
(early) NX (Execute Disable) protection: active
(early) Entering add_active_range(0, 0, 65536) 0 entries of 256 used
(early) Zone PFN ranges:
(early) DMA 0 -> 4096
(early) Normal 4096 -> 65536
(early) HighMem 65536 -> 65536
(early) Movable zone start PFN for each node
(early) early_node_map[1] active PFN ranges
(early) 0: 0 -> 65536
(early) On node 0 totalpages: 65536
(early) DMA zone: 56 pages used for memmap
(early) DMA zone: 0 pages reserved
(early) DMA zone: 4040 pages, LIFO batch:0
(early) Normal zone: 840 pages used for memmap
(early) Normal zone: 60600 pages, LIFO batch:15
(early) HighMem zone: 0 pages used for memmap
(early) Movable zone: 0 pages used for memmap
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-27 21:22 ` Jeremy Fitzhardinge
@ 2008-05-27 21:35 ` Yinghai Lu
2008-05-27 21:47 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 51+ messages in thread
From: Yinghai Lu @ 2008-05-27 21:35 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
On Tue, May 27, 2008 at 2:22 PM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Yinghai Lu wrote:
>>
>> Can you send out your boot log?
>>
>
> I've included it below.
>
>> I still can not figure out INIT_PG_TABLE and RAMDISK could overlap? or
>> only one byte? or the same page?
>>
>
> Perhaps my explanation in the patch description wasn't clear enough.
>
> When Xen builds the domain, it's responsible for layout of the kernel, the
> initrd and the initial pagetable (the kernel does not create its own
> pagetable like it does when booting directly on x86).
>
> The layout is (from low to high addresses):
>
> kernel start ->+-------+
> |.text |
> + - - - +
> |.data |
> + - - - +
> |.bss |
> _end->+-------+
> :padding:
> +-------+<-ramdisk_image
> : :
> |initrd |
> : :
> +-------+<-ramdisk_end
> :padding:
> +-------+
> |pgtable|
> init_pg_tables_end->+-------+
>
>
> Therefore, when you call early_reserve(&end, init_pg_tables_end) to reserve
> the inital pagetable, you also implicitly reserve the whole ramdisk area.
> Since your code currently reserves the ramdisk first, the reservation of
> the pagetable fails because it overlaps the ramdisk.
>
> My patch does two things:
>
> 1. reserve the initial pagetable first
> 2. skip reserving the ramdisk if it is within the pagetable range
>
> This makes sure that all the important memory is reserved from early in
> boot.
>
> There are two alternate fixes:
>
> 1. try to precisely reserve *just* the pagetable, rather than
> assuming it starts at &_end. I'm not sure there's currently a way
> to do this, but it would be easy enough to add. Or,
> 2. Make early reservation cope with overlapping ranges, and deal with
> them appropriately.
thanks for the explanation...
method 1 need to be done.
BTW, XEN PV only support 32 bit?
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-27 21:35 ` Yinghai Lu
@ 2008-05-27 21:47 ` Jeremy Fitzhardinge
2008-05-27 22:52 ` Yinghai Lu
0 siblings, 1 reply; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-27 21:47 UTC (permalink / raw)
To: Yinghai Lu
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
Yinghai Lu wrote:
> thanks for the explanation...
>
> method 1 need to be done.
>
> BTW, XEN PV only support 32 bit?
>
At the moment, but 64-bit is being worked on. Any solution must work
for both.
J
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-27 21:47 ` Jeremy Fitzhardinge
@ 2008-05-27 22:52 ` Yinghai Lu
2008-05-28 10:01 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 51+ messages in thread
From: Yinghai Lu @ 2008-05-27 22:52 UTC (permalink / raw)
To: Jeremy Fitzhardinge, Rusty Russell
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
[-- Attachment #1: Type: text/plain, Size: 337 bytes --]
On Tue, May 27, 2008 at 2:47 PM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Yinghai Lu wrote:
>>
>> thanks for the explanation...
>>
>> method 1 need to be done.
>>
>> BTW, XEN PV only support 32 bit?
>>
>
> At the moment, but 64-bit is being worked on. Any solution must work for
> both.
>
can you try attached patch?
Thanks
YH
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: init_pg_table_32.patch --]
[-- Type: text/x-patch; name=init_pg_table_32.patch, Size: 3510 bytes --]
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c216d3c..36af003 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -76,7 +76,7 @@ void __init i386_start_kernel(void)
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
- reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE");
+ reserve_early(init_pg_tables_start, init_pg_tables_end, "INIT_PG_TABLE");
reserve_ebda_region();
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b2cc737..bef4618 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -194,6 +194,7 @@ default_entry:
xorl %ebx,%ebx /* %ebx is kept at zero */
movl $pa(pg0), %edi
+ movl %edi, pa(init_pg_tables_start)
movl $pa(swapper_pg_pmd), %edx
movl $PTE_ATTR, %eax
10:
@@ -228,6 +229,7 @@ default_entry:
page_pde_offset = (__PAGE_OFFSET >> 20);
movl $pa(pg0), %edi
+ movl %edi, pa(init_pg_tables_start)
movl $pa(swapper_pg_dir), %edx
movl $PTE_ATTR, %eax
10:
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 896ec59..340c153 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -71,6 +71,7 @@
/* This value is set up by the early boot code to point to the value
immediately after the boot time page tables. It contains a *physical*
address, and must not be in the .bss segment! */
+unsigned long init_pg_tables_start __initdata = ~0UL;
unsigned long init_pg_tables_end __initdata = ~0UL;
/*
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index f5cbb74..c3af6c6 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1012,6 +1012,7 @@ __init void lguest_init(void)
* clobbered. The Launcher places our initial pagetables somewhere at
* the top of our physical memory, so we don't need extra space: set
* init_pg_tables_end to the end of the kernel. */
+ init_pg_tables_start = __pa(pg0);
init_pg_tables_end = __pa(pg0);
/* Load the %fs segment register (the per-cpu segment register) with
@@ -1065,9 +1066,9 @@ __init void lguest_init(void)
pm_power_off = lguest_power_off;
machine_ops.restart = lguest_restart;
- /* Now we're set up, call start_kernel() in init/main.c and we proceed
+ /* Now we're set up, call i386_start_kernel() in head32.c and we proceed
* to boot as normal. It never returns. */
- start_kernel();
+ i386_start_kernel();
}
/*
* This marks the end of stage II of our journey, The Guest.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 691d282..57e96a6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1228,6 +1228,7 @@ asmlinkage void __init xen_start_kernel(void)
pgd = (pgd_t *)xen_start_info->pt_base;
+ init_pg_tables_start = __pa(pgd);
init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
init_mm.pgd = pgd; /* use the Xen pagetables to start */
@@ -1266,5 +1267,5 @@ asmlinkage void __init xen_start_kernel(void)
}
/* Start the world */
- start_kernel();
+ i386_start_kernel();
}
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index ffa0f54..920307e 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -59,6 +59,7 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
void __init add_memory_region(unsigned long long start,
unsigned long long size, int type);
+extern unsigned long init_pg_tables_start;
extern unsigned long init_pg_tables_end;
^ permalink raw reply related [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-27 22:52 ` Yinghai Lu
@ 2008-05-28 10:01 ` Jeremy Fitzhardinge
2008-05-28 20:48 ` Yinghai Lu
0 siblings, 1 reply; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-28 10:01 UTC (permalink / raw)
To: Yinghai Lu
Cc: Rusty Russell, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Andrew Morton, linux-kernel@vger.kernel.org
Yinghai Lu wrote:
>
> can you try attached patch?
>
Thanks, that nearly works. I needed the patch below to get it to
successfully boot.
For reference, this is what the early reserve map looks like now:
(early) 256MB LOWMEM available.
(early) low ram: 02634000 - 10000000
(early) bootmap 02634000 - 02636000
(early) early res: 0 [0-fff] BIOS data page
(early) early res: 1 [1000-1fff] EX TRAMPOLINE
(early) early res: 2 [6000-6fff] TRAMPOLINE
(early) early res: 3 [25db000-261dfff] XEN
(early) early res: 4 [1000000-18a8303] TEXT DATA BSS
(early) early res: 5 [18a9000-25dafff] RAMDISK
(early) early res: 6 [261e000-2633fff] INIT_PG_TABLE
(early) early res: 7 [2634000-2635fff] BOOTMAP
J
Subject: xen: reserve_early Xen-specific memory
Make sure that the start_info and pfn->mfn translation array are reserved.
i386_start_kernel will arrange to reserve the kernel code/data, initrd
and pagetable.
Also, add prototype for i386_start_kernel.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/xen/enlighten.c | 9 +++++++++
include/asm-x86/setup.h | 1 +
2 files changed, 10 insertions(+)
===================================================================
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1266,6 +1266,15 @@
add_preferred_console("hvc", 0, NULL);
}
+ /*
+ * Reserve Xen bits:
+ * - mfn_list
+ * - xen_start_info
+ * See comment above "struct start_info" in <xen/interface/xen.h>
+ */
+ reserve_early(__pa(xen_start_info->mfn_list),
+ __pa(xen_start_info->pt_base), "XEN");
+
/* Start the world */
i386_start_kernel();
}
===================================================================
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -58,6 +58,7 @@
int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
void __init add_memory_region(unsigned long long start,
unsigned long long size, int type);
+void __init i386_start_kernel(void);
extern unsigned long init_pg_tables_start;
extern unsigned long init_pg_tables_end;
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-28 10:01 ` Jeremy Fitzhardinge
@ 2008-05-28 20:48 ` Yinghai Lu
2008-05-28 21:24 ` Jeremy Fitzhardinge
2008-05-29 13:37 ` Jeremy Fitzhardinge
0 siblings, 2 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-28 20:48 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Rusty Russell, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Andrew Morton, linux-kernel@vger.kernel.org
On Wed, May 28, 2008 at 3:01 AM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Yinghai Lu wrote:
>>
>> can you try attached patch?
>>
>
> Thanks, that nearly works. I needed the patch below to get it to
> successfully boot.
>
> For reference, this is what the early reserve map looks like now:
>
> (early) 256MB LOWMEM available.
> (early) low ram: 02634000 - 10000000
> (early) bootmap 02634000 - 02636000
> (early) early res: 0 [0-fff] BIOS data page
> (early) early res: 1 [1000-1fff] EX TRAMPOLINE
> (early) early res: 2 [6000-6fff] TRAMPOLINE
> (early) early res: 3 [25db000-261dfff] XEN
> (early) early res: 4 [1000000-18a8303] TEXT DATA BSS
> (early) early res: 5 [18a9000-25dafff] RAMDISK
> (early) early res: 6 [261e000-2633fff] INIT_PG_TABLE
> (early) early res: 7 [2634000-2635fff] BOOTMAP
>
>
> J
>
>
> Subject: xen: reserve_early Xen-specific memory
>
> Make sure that the start_info and pfn->mfn translation array are reserved.
> i386_start_kernel will arrange to reserve the kernel code/data, initrd
> and pagetable.
>
> Also, add prototype for i386_start_kernel.
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> ---
> arch/x86/xen/enlighten.c | 9 +++++++++
> include/asm-x86/setup.h | 1 +
> 2 files changed, 10 insertions(+)
>
> ===================================================================
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1266,6 +1266,15 @@
> add_preferred_console("hvc", 0, NULL);
> }
>
> + /*
> + * Reserve Xen bits:
> + * - mfn_list
> + * - xen_start_info
> + * See comment above "struct start_info" in <xen/interface/xen.h>
> + */
> + reserve_early(__pa(xen_start_info->mfn_list),
> + __pa(xen_start_info->pt_base), "XEN");
> +
can we use e820 entries for that? So the domain builder could have
several entries for E820_RAM and E820_RESERVED...
> /* Start the world */
> i386_start_kernel();
> }
> ===================================================================
> --- a/include/asm-x86/setup.h
> +++ b/include/asm-x86/setup.h
> @@ -58,6 +58,7 @@
> int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
> void __init add_memory_region(unsigned long long start,
> unsigned long long size, int type);
> +void __init i386_start_kernel(void);
will need x86_64_start_kernel there, maybe we should change all name
to x86_start_kernel
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-28 20:48 ` Yinghai Lu
@ 2008-05-28 21:24 ` Jeremy Fitzhardinge
2008-05-29 13:37 ` Jeremy Fitzhardinge
1 sibling, 0 replies; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-28 21:24 UTC (permalink / raw)
To: Yinghai Lu
Cc: Rusty Russell, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Andrew Morton, linux-kernel@vger.kernel.org
Yinghai Lu wrote:
>> ===================================================================
>> --- a/arch/x86/xen/enlighten.c
>> +++ b/arch/x86/xen/enlighten.c
>> @@ -1266,6 +1266,15 @@
>> add_preferred_console("hvc", 0, NULL);
>> }
>>
>> + /*
>> + * Reserve Xen bits:
>> + * - mfn_list
>> + * - xen_start_info
>> + * See comment above "struct start_info" in <xen/interface/xen.h>
>> + */
>> + reserve_early(__pa(xen_start_info->mfn_list),
>> + __pa(xen_start_info->pt_base), "XEN");
>> +
>>
>
> can we use e820 entries for that? So the domain builder could have
> several entries for E820_RAM and E820_RESERVED...
>
Yeah, seems reasonable. I'll try it out.
> will need x86_64_start_kernel there, maybe we should change all name
> to x86_start_kernel
We can do that later. For now it's better to give them distinct names.
J
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-28 20:48 ` Yinghai Lu
2008-05-28 21:24 ` Jeremy Fitzhardinge
@ 2008-05-29 13:37 ` Jeremy Fitzhardinge
2008-05-29 18:41 ` Yinghai Lu
2008-05-29 18:52 ` Yinghai Lu
1 sibling, 2 replies; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-29 13:37 UTC (permalink / raw)
To: Yinghai Lu
Cc: Rusty Russell, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Andrew Morton, linux-kernel@vger.kernel.org
Yinghai Lu wrote:
> can we use e820 entries for that? So the domain builder could have
> several entries for E820_RAM and E820_RESERVED...
>
I tried this, but it doesn't work; the kernel crashes during boot,
presumably because it's trying to use the reserved memory as heap. I
suspect the e820 maps are not registered early enough or something...
(One thought: if reserving in the E820 map were enough, then couldn't we
use it for all the early reservations?)
I've attached the non-working patch below.
The working kernel reports:
(early) BIOS-provided physical RAM map:
(early) Xen: 0000000000000000 - 000000000009f000 (usable)
(early) Xen: 0000000000100000 - 0000000010000000 (usable)
(early) console [xenboot0] enabled
(early) debug: ignoring loglevel setting.
(early) 0MB HIGHMEM available.
(early) 256MB LOWMEM available.
(early) low ram: 018fd000 - 10000000
(early) bootmap 018fd000 - 018ff000
(early) early res: 0 [0-fff] BIOS data page
(early) early res: 1 [1000-1fff] EX TRAMPOLINE
(early) early res: 2 [6000-6fff] TRAMPOLINE
(early) early res: 3 [18aa000-18ecfff] XEN
(early) early res: 4 [1000000-18a9303] TEXT DATA BSS
(early) early res: 5 [18ed000-18fcfff] INIT_PG_TABLE
(early) early res: 6 [18fd000-18fefff] BOOTMAP
But the non-working one says:
(early) BIOS-provided physical RAM map:
(early) Xen: 0000000000000000 - 000000000009f000 (usable)
(early) Xen: 0000000000100000 - 0000000010000000 (usable)
(early) Xen: 00000000018aa000 - 00000000018ed000 (reserved)
(early) console [xenboot0] enabled
(early) debug: ignoring loglevel setting.
(early) 0MB HIGHMEM available.
(early) 256MB LOWMEM available.
(early) low ram: 018fd000 - 10000000
(early) bootmap 018fd000 - 018ff000
(early) early res: 0 [0-fff] BIOS data page
(early) early res: 1 [1000-1fff] EX TRAMPOLINE
(early) early res: 2 [6000-6fff] TRAMPOLINE
(early) early res: 3 [1000000-18a9303] TEXT DATA BSS
(early) early res: 4 [18ed000-18fcfff] INIT_PG_TABLE
(early) early res: 5 [18fd000-18fefff] BOOTMAP
J
Subject: xen: reserve Xen-specific memory in e820 map
Make sure that the start_info and pfn->mfn translation array are reserved.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/xen/setup.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff -r ad372188bccf arch/x86/xen/setup.c
--- a/arch/x86/xen/setup.c Thu May 29 13:56:09 2008 +0100
+++ b/arch/x86/xen/setup.c Thu May 29 14:34:35 2008 +0100
@@ -40,9 +40,19 @@
max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
e820.nr_map = 0;
+
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
+ /*
+ * Reserve Xen bits:
+ * - mfn_list
+ * - xen_start_info
+ * See comment above "struct start_info" in <xen/interface/xen.h>
+ */
+ add_memory_region(__pa(xen_start_info->mfn_list),
+ xen_start_info->pt_base - xen_start_info->mfn_list,
+ E820_RESERVED);
return "Xen";
}
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-29 13:37 ` Jeremy Fitzhardinge
@ 2008-05-29 18:41 ` Yinghai Lu
2008-05-29 18:58 ` H. Peter Anvin
2008-05-29 18:52 ` Yinghai Lu
1 sibling, 1 reply; 51+ messages in thread
From: Yinghai Lu @ 2008-05-29 18:41 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Rusty Russell, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Andrew Morton, linux-kernel@vger.kernel.org
On Thu, May 29, 2008 at 6:37 AM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Yinghai Lu wrote:
>>
>> can we use e820 entries for that? So the domain builder could have
>> several entries for E820_RAM and E820_RESERVED...
>>
>
> I tried this, but it doesn't work; the kernel crashes during boot,
> presumably because it's trying to use the reserved memory as heap. I
> suspect the e820 maps are not registered early enough or something...
>
> (One thought: if reserving in the E820 map were enough, then couldn't we use
> it for all the early reservations?)
"e820 reserved entries" means kernel will only read it.
reserve_early will be converted to bootmem via reserve_bootmem[_generic]
and later it could be freed and be reused.
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-29 13:37 ` Jeremy Fitzhardinge
2008-05-29 18:41 ` Yinghai Lu
@ 2008-05-29 18:52 ` Yinghai Lu
2008-05-29 19:14 ` Yinghai Lu
1 sibling, 1 reply; 51+ messages in thread
From: Yinghai Lu @ 2008-05-29 18:52 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Rusty Russell, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Andrew Morton, linux-kernel@vger.kernel.org
On Thu, May 29, 2008 at 6:37 AM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Yinghai Lu wrote:
>>
>> can we use e820 entries for that? So the domain builder could have
>> several entries for E820_RAM and E820_RESERVED...
>>
>
> I tried this, but it doesn't work; the kernel crashes during boot,
> presumably because it's trying to use the reserved memory as heap. I
> suspect the e820 maps are not registered early enough or something...
>
> (One thought: if reserving in the E820 map were enough, then couldn't we use
> it for all the early reservations?)
>
> I've attached the non-working patch below.
>
> The working kernel reports:
>
> (early) BIOS-provided physical RAM map:
> (early) Xen: 0000000000000000 - 000000000009f000 (usable)
> (early) Xen: 0000000000100000 - 0000000010000000 (usable)
> (early) console [xenboot0] enabled
> (early) debug: ignoring loglevel setting.
> (early) 0MB HIGHMEM available.
> (early) 256MB LOWMEM available.
> (early) low ram: 018fd000 - 10000000
> (early) bootmap 018fd000 - 018ff000
> (early) early res: 0 [0-fff] BIOS data page
> (early) early res: 1 [1000-1fff] EX TRAMPOLINE
> (early) early res: 2 [6000-6fff] TRAMPOLINE
> (early) early res: 3 [18aa000-18ecfff] XEN
> (early) early res: 4 [1000000-18a9303] TEXT DATA BSS
> (early) early res: 5 [18ed000-18fcfff] INIT_PG_TABLE
> (early) early res: 6 [18fd000-18fefff] BOOTMAP
>
> But the non-working one says:
>
> (early) BIOS-provided physical RAM map:
> (early) Xen: 0000000000000000 - 000000000009f000 (usable)
> (early) Xen: 0000000000100000 - 0000000010000000 (usable)
> (early) Xen: 00000000018aa000 - 00000000018ed000 (reserved)
it seems we miss to call sanitize_e820_map for 32 bit. we should get
(early) Xen: 0000000000000000 - 000000000009f000 (usable)
(early) Xen: 0000000000100000 - 00000000018aa000 (usable)
(early) Xen: 00000000018aa000 - 00000000018ed000 (reserved)
(early) Xen: 00000000018ed000 - 0000000010000000 (usable)
YH
> (early) console [xenboot0] enabled
> (early) debug: ignoring loglevel setting.
> (early) 0MB HIGHMEM available.
> (early) 256MB LOWMEM available.
> (early) low ram: 018fd000 - 10000000
> (early) bootmap 018fd000 - 018ff000
> (early) early res: 0 [0-fff] BIOS data page
> (early) early res: 1 [1000-1fff] EX TRAMPOLINE
> (early) early res: 2 [6000-6fff] TRAMPOLINE
> (early) early res: 3 [1000000-18a9303] TEXT DATA BSS
> (early) early res: 4 [18ed000-18fcfff] INIT_PG_TABLE
> (early) early res: 5 [18fd000-18fefff] BOOTMAP
>
> J
>
> Subject: xen: reserve Xen-specific memory in e820 map
>
> Make sure that the start_info and pfn->mfn translation array are reserved.
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> ---
> arch/x86/xen/setup.c | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff -r ad372188bccf arch/x86/xen/setup.c
> --- a/arch/x86/xen/setup.c Thu May 29 13:56:09 2008 +0100
> +++ b/arch/x86/xen/setup.c Thu May 29 14:34:35 2008 +0100
> @@ -40,9 +40,19 @@
> max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
>
> e820.nr_map = 0;
> +
> add_memory_region(0, LOWMEMSIZE(), E820_RAM);
> add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY,
> E820_RAM);
>
> + /*
> + * Reserve Xen bits:
> + * - mfn_list
> + * - xen_start_info
> + * See comment above "struct start_info" in <xen/interface/xen.h>
> + */
> + add_memory_region(__pa(xen_start_info->mfn_list),
> + xen_start_info->pt_base -
> xen_start_info->mfn_list,
> + E820_RESERVED);
> return "Xen";
> }
>
>
>
>
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-29 18:41 ` Yinghai Lu
@ 2008-05-29 18:58 ` H. Peter Anvin
0 siblings, 0 replies; 51+ messages in thread
From: H. Peter Anvin @ 2008-05-29 18:58 UTC (permalink / raw)
To: Yinghai Lu
Cc: Jeremy Fitzhardinge, Rusty Russell, x86, Andrew Morton,
linux-kernel@vger.kernel.org
Yinghai Lu wrote:
> On Thu, May 29, 2008 at 6:37 AM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>> Yinghai Lu wrote:
>>> can we use e820 entries for that? So the domain builder could have
>>> several entries for E820_RAM and E820_RESERVED...
>>>
>> I tried this, but it doesn't work; the kernel crashes during boot,
>> presumably because it's trying to use the reserved memory as heap. I
>> suspect the e820 maps are not registered early enough or something...
>>
>> (One thought: if reserving in the E820 map were enough, then couldn't we use
>> it for all the early reservations?)
>
> "e820 reserved entries" means kernel will only read it.
>
> reserve_early will be converted to bootmem via reserve_bootmem[_generic]
> and later it could be freed and be reused.
>
Hang on...
E820_RESERVED is supposed to mean "nothing touches this address space;
the kernel cannot use it either as RAM nor for I/O allocations."
The kernel *cannot* assume it is safe to read. There might be an I/O
device there.
Now, to support using the E820 map for early reservations, we can simply
define new "fake" E820 types. The easiest way to do that, is to
pre-sanitize the map so that all unknown-type entries are collapsed into
a "fake" type E820_UNKNOWN:
/* Real E820 types */
#define E820_NONE 0
#define E820_RAM 1
#define E820_RESERVED 2
#define E820_ACPI 3
#define E820_NVS 4
/* Fake E820 types */
#define E820_UNKNOWN 5
#define E820_BOOTMEM 6 /* Pre-bootmem allocation in kernel */
/* .... */
/* Look for unknown types */
if (e820->type >= E820_UNKNOWN)
e820->type = E820_UNKNOWN;
/* Now all numbers above E820_UNKNOWN are available for the kernel */
We can either do this sanitization in the kernel proper, in which case
the values we pick have no real importance, as they can change from one
version to the next, or in the boot code (Xen domain builder, ELILO,
etc.) The latter case would allow us to pass in new types, but in that
case, we want to pick a Linux-specific range much higher up the
numberspace, for obvious reasons.
-hpa
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-29 18:52 ` Yinghai Lu
@ 2008-05-29 19:14 ` Yinghai Lu
2008-05-30 15:50 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 51+ messages in thread
From: Yinghai Lu @ 2008-05-29 19:14 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Rusty Russell, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Andrew Morton, linux-kernel@vger.kernel.org
On Thu, May 29, 2008 at 11:52 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> On Thu, May 29, 2008 at 6:37 AM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>> Yinghai Lu wrote:
>>>
>>> can we use e820 entries for that? So the domain builder could have
>>> several entries for E820_RAM and E820_RESERVED...
>>>
>>
>> I tried this, but it doesn't work; the kernel crashes during boot,
>> presumably because it's trying to use the reserved memory as heap. I
>> suspect the e820 maps are not registered early enough or something...
>>
>> (One thought: if reserving in the E820 map were enough, then couldn't we use
>> it for all the early reservations?)
>>
>> I've attached the non-working patch below.
>>
>> The working kernel reports:
>>
>> (early) BIOS-provided physical RAM map:
>> (early) Xen: 0000000000000000 - 000000000009f000 (usable)
>> (early) Xen: 0000000000100000 - 0000000010000000 (usable)
>> (early) console [xenboot0] enabled
>> (early) debug: ignoring loglevel setting.
>> (early) 0MB HIGHMEM available.
>> (early) 256MB LOWMEM available.
>> (early) low ram: 018fd000 - 10000000
>> (early) bootmap 018fd000 - 018ff000
>> (early) early res: 0 [0-fff] BIOS data page
>> (early) early res: 1 [1000-1fff] EX TRAMPOLINE
>> (early) early res: 2 [6000-6fff] TRAMPOLINE
>> (early) early res: 3 [18aa000-18ecfff] XEN
>> (early) early res: 4 [1000000-18a9303] TEXT DATA BSS
>> (early) early res: 5 [18ed000-18fcfff] INIT_PG_TABLE
>> (early) early res: 6 [18fd000-18fefff] BOOTMAP
>>
>> But the non-working one says:
>>
>> (early) BIOS-provided physical RAM map:
>> (early) Xen: 0000000000000000 - 000000000009f000 (usable)
>> (early) Xen: 0000000000100000 - 0000000010000000 (usable)
>> (early) Xen: 00000000018aa000 - 00000000018ed000 (reserved)
>
> it seems we miss to call sanitize_e820_map for 32 bit. we should get
> (early) Xen: 0000000000000000 - 000000000009f000 (usable)
> (early) Xen: 0000000000100000 - 00000000018aa000 (usable)
> (early) Xen: 00000000018aa000 - 00000000018ed000 (reserved)
> (early) Xen: 00000000018ed000 - 0000000010000000 (usable)
xen pv override the memory_setup. and default one is
machine_specific_memory_setup(), and it does call sanitize_e820_map...
so you may need to add sanitize_e820_map to xen_memory_setup, or re
arrange your add_memory_range parameter...
YH
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: extend e820 early_res support 32bit -fix #3
2008-05-25 17:00 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix #2 Yinghai Lu
2008-05-27 15:44 ` Thomas Gleixner
@ 2008-05-29 19:56 ` Yinghai Lu
2008-05-29 19:57 ` [PATCH] x86: extend e820 early_res support 32bit -fix #4 Yinghai Lu
` (2 subsequent siblings)
4 siblings, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-29 19:56 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
Jeremy Fitzhardinge
Cc: linux-kernel@vger.kernel.org
introduce init_pg_table_start, so xen PV could specify the value.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -76,7 +76,8 @@ void __init i386_start_kernel(void)
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
- reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE");
+ reserve_early(init_pg_tables_start, init_pg_tables_end,
+ "INIT_PG_TABLE");
reserve_ebda_region();
Index: linux-2.6/arch/x86/kernel/head_32.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head_32.S
+++ linux-2.6/arch/x86/kernel/head_32.S
@@ -194,6 +194,7 @@ default_entry:
xorl %ebx,%ebx /* %ebx is kept at zero */
movl $pa(pg0), %edi
+ movl %edi, pa(init_pg_tables_start)
movl $pa(swapper_pg_pmd), %edx
movl $PTE_ATTR, %eax
10:
@@ -228,6 +229,7 @@ default_entry:
page_pde_offset = (__PAGE_OFFSET >> 20);
movl $pa(pg0), %edi
+ movl %edi, pa(init_pg_tables_start)
movl $pa(swapper_pg_dir), %edx
movl $PTE_ATTR, %eax
10:
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -72,6 +72,7 @@
/* This value is set up by the early boot code to point to the value
immediately after the boot time page tables. It contains a *physical*
address, and must not be in the .bss segment! */
+unsigned long init_pg_tables_start __initdata = ~0UL;
unsigned long init_pg_tables_end __initdata = ~0UL;
/*
@@ -486,6 +487,10 @@ static void __init reserve_initrd(void)
return;
}
+ printk(KERN_INFO "old RAMDISK: %08llx - %08llx\n", ramdisk_image,
+ ramdisk_end);
+
+
if (ramdisk_end <= end_of_lowmem) {
/* All in lowmem, easy case */
/*
@@ -512,6 +517,8 @@ static void __init reserve_initrd(void)
"NEW RAMDISK");
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
+ printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
+ ramdisk_here, ramdisk_here + ramdisk_size);
do_relocate_initrd = true;
}
Index: linux-2.6/arch/x86/lguest/boot.c
===================================================================
--- linux-2.6.orig/arch/x86/lguest/boot.c
+++ linux-2.6/arch/x86/lguest/boot.c
@@ -1012,6 +1012,7 @@ __init void lguest_init(void)
* clobbered. The Launcher places our initial pagetables somewhere at
* the top of our physical memory, so we don't need extra space: set
* init_pg_tables_end to the end of the kernel. */
+ init_pg_tables_start = __pa(pg0);
init_pg_tables_end = __pa(pg0);
/* Load the %fs segment register (the per-cpu segment register) with
@@ -1065,9 +1066,9 @@ __init void lguest_init(void)
pm_power_off = lguest_power_off;
machine_ops.restart = lguest_restart;
- /* Now we're set up, call start_kernel() in init/main.c and we proceed
+ /* Now we're set up, call i386_start_kernel() in head32.c and we proceed
* to boot as normal. It never returns. */
- start_kernel();
+ i386_start_kernel();
}
/*
* This marks the end of stage II of our journey, The Guest.
Index: linux-2.6/arch/x86/xen/enlighten.c
===================================================================
--- linux-2.6.orig/arch/x86/xen/enlighten.c
+++ linux-2.6/arch/x86/xen/enlighten.c
@@ -1228,6 +1228,7 @@ asmlinkage void __init xen_start_kernel(
pgd = (pgd_t *)xen_start_info->pt_base;
+ init_pg_tables_start = __pa(pgd);
init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
init_mm.pgd = pgd; /* use the Xen pagetables to start */
@@ -1266,5 +1267,5 @@ asmlinkage void __init xen_start_kernel(
}
/* Start the world */
- start_kernel();
+ i386_start_kernel();
}
Index: linux-2.6/include/asm-x86/setup.h
===================================================================
--- linux-2.6.orig/include/asm-x86/setup.h
+++ linux-2.6/include/asm-x86/setup.h
@@ -53,9 +53,11 @@ extern struct boot_params boot_params;
char * __init machine_specific_memory_setup(void);
char *memory_setup(void);
-extern unsigned long init_pg_tables_end;
+void __init i386_start_kernel(void);
+extern unsigned long init_pg_tables_start;
+extern unsigned long init_pg_tables_end;
#endif /* __i386__ */
#endif /* _SETUP */
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: extend e820 early_res support 32bit -fix #4
2008-05-25 17:00 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix #2 Yinghai Lu
2008-05-27 15:44 ` Thomas Gleixner
2008-05-29 19:56 ` [PATCH] x86: extend e820 early_res support 32bit -fix #3 Yinghai Lu
@ 2008-05-29 19:57 ` Yinghai Lu
2008-05-29 19:58 ` [PATCH] x86: extend e820 early_res support 32bit -fix #5 Yinghai Lu
2008-05-29 23:25 ` [PATCH] x86: 32bit numa srat fix early_ioremap leak Yinghai Lu
4 siblings, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-29 19:57 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
Jeremy Fitzhardinge
Cc: linux-kernel@vger.kernel.org
reserve_early pgdata for 32bit numa
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -159,8 +159,13 @@ static void __init allocate_pgdat(int ni
if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid])
NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
else {
- NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn));
- min_low_pfn += PFN_UP(sizeof(pg_data_t));
+ unsigned long pgdat_phys;
+ pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+ max_low_pfn<<PAGE_SHIFT, sizeof(pg_data_t),
+ PAGE_SIZE);
+ NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
+ reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t),
+ "NODE_DATA");
}
}
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: extend e820 early_res support 32bit -fix #5
2008-05-25 17:00 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix #2 Yinghai Lu
` (2 preceding siblings ...)
2008-05-29 19:57 ` [PATCH] x86: extend e820 early_res support 32bit -fix #4 Yinghai Lu
@ 2008-05-29 19:58 ` Yinghai Lu
2008-05-29 23:25 ` [PATCH] x86: 32bit numa srat fix early_ioremap leak Yinghai Lu
4 siblings, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-29 19:58 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
Jeremy Fitzhardinge
Cc: linux-kernel@vger.kernel.org
reserve early numa kva, so it will not clash with new RAMDISK
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -615,7 +615,6 @@ void __init setup_bootmem_allocator(void
*/
find_smp_config();
#endif
- numa_kva_reserve();
reserve_crashkernel();
reserve_ibft_region();
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -334,6 +334,11 @@ unsigned long __init setup_memory(void)
printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
kva_start_pfn, max_low_pfn);
printk("max_pfn = %ld\n", max_pfn);
+
+ /* avoid clash with initrd */
+ reserve_early(kva_start_pfn<<PAGE_SHIFT,
+ (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
+ "KVA PG");
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > system_max_low_pfn)
@@ -369,13 +374,6 @@ unsigned long __init setup_memory(void)
return max_low_pfn;
}
-void __init numa_kva_reserve(void)
-{
- if (kva_pages)
- reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages),
- BOOTMEM_DEFAULT);
-}
-
void __init zone_sizes_init(void)
{
int nid;
Index: linux-2.6/include/asm-x86/mmzone_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/mmzone_32.h
+++ linux-2.6/include/asm-x86/mmzone_32.h
@@ -38,16 +38,12 @@ static inline void get_memcfg_numa(void)
}
extern int early_pfn_to_nid(unsigned long pfn);
-extern void numa_kva_reserve(void);
#else /* !CONFIG_NUMA */
#define get_memcfg_numa get_memcfg_numa_flat
#define get_zholes_size(n) (0)
-static inline void numa_kva_reserve(void)
-{
-}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DISCONTIGMEM
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: 32bit numa srat fix early_ioremap leak
2008-05-25 17:00 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix #2 Yinghai Lu
` (3 preceding siblings ...)
2008-05-29 19:58 ` [PATCH] x86: extend e820 early_res support 32bit -fix #5 Yinghai Lu
@ 2008-05-29 23:25 ` Yinghai Lu
2008-05-31 8:01 ` Ingo Molnar
` (2 more replies)
4 siblings, 3 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-05-29 23:25 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
on two node system (16g RAM) with numa config got
get_memcfg_from_srat: assigning address to rsdp
RSD PTR v0 [ACPIAM]
ACPI: Too big length in RSDT: 92
failed to get NUMA memory information from SRAT table
NUMA - single node, flat memory mode
Node: 0, start_pfn: 0, end_pfn: 153
Setting physnode_map array to node 0 for pfns:
0
...
Pid: 0, comm: swapper Not tainted 2.6.26-rc4 #4
[<80b41289>] hlt_loop+0x0/0x3
[<8011efa0>] ? alloc_remap+0x50/0x70
[<8079e32e>] alloc_node_mem_map+0x5e/0xa0
[<8012e77b>] ? printk+0x1b/0x20
[<80b590f6>] free_area_init_node+0xc6/0x470
[<80b588fc>] ? __alloc_bootmem_node+0x2c/0x50
[<80b58ad8>] ? find_min_pfn_for_node+0x38/0x70
[<8012e77b>] ? printk+0x1b/0x20
[<80b597c4>] free_area_init_nodes+0x254/0x2d0
[<80b544d7>] zone_sizes_init+0x97/0xa0
[<80b48a03>] setup_arch+0x383/0x530
[<8012e77b>] ? printk+0x1b/0x20
[<80b41aa4>] start_kernel+0x64/0x350
[<80b412d8>] i386_start_kernel+0x8/0x10
=======================
this patch increase the acpi table limit to 32
match early_ioremap with early_iounmap
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index 70e4a37..88971ee 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -261,7 +261,7 @@ out_fail:
struct acpi_static_rsdt {
struct acpi_table_rsdt table;
- u32 padding[7]; /* Allow for 7 more table entries */
+ u32 padding[32]; /* Allow for 32 more table entries */
};
int __init get_memcfg_from_srat(void)
@@ -297,7 +297,7 @@ int __init get_memcfg_from_srat(void)
}
rsdt = (struct acpi_table_rsdt *)
- early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
+ early_ioremap(rsdp->rsdt_physical_address, sizeof(saved_rsdt));
if (!rsdt) {
printk(KERN_WARNING
@@ -310,6 +310,7 @@ int __init get_memcfg_from_srat(void)
if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) {
printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
+ early_iounmap(rsdt, sizeof(saved_rsdt));
goto out_err;
}
@@ -319,37 +320,51 @@ int __init get_memcfg_from_srat(void)
* size of RSDT) divided by the size of each entry
* (4-byte table pointers).
*/
- tables = (header->length - sizeof(struct acpi_table_header)) / 4;
+ tables = (header->length - sizeof(struct acpi_table_header)) / sizeof(u32);
if (!tables)
goto out_err;
memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
-
+ early_iounmap(rsdt, sizeof(saved_rsdt));
if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) {
printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
saved_rsdt.table.header.length);
goto out_err;
}
- printk("Begin SRAT table scan....\n");
+ printk("Begin SRAT table scan....%d\n", tables);
- for (i = 0; i < tables; i++) {
+ for (i = 0; i < tables; i++){
+ int result;
+ u32 length;
/* Map in header, then map in full table length. */
header = (struct acpi_table_header *)
early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
if (!header)
break;
+
+ printk(KERN_INFO "ACPI: %4.4s %08lX, %04X\n",
+ header->signature,
+ (unsigned long)saved_rsdt.table.table_offset_entry[i],
+ header->length);
+
+ if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4)) {
+ early_iounmap(header, sizeof(struct acpi_table_header));
+ continue;
+ }
+
+ length = header->length;
+ early_iounmap(header, sizeof(struct acpi_table_header));
header = (struct acpi_table_header *)
- early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
+ early_ioremap(saved_rsdt.table.table_offset_entry[i], length);
if (!header)
break;
- if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4))
- continue;
-
/* we've found the srat table. don't need to look at any more tables */
- return acpi20_parse_srat((struct acpi_table_srat *)header);
+ result = acpi20_parse_srat((struct acpi_table_srat *)header);
+ early_iounmap(header, length);
+ return result;
}
out_err:
remove_all_active_ranges();
^ permalink raw reply related [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: extend e820 ealy_res support 32bit - fix #2
2008-05-29 19:14 ` Yinghai Lu
@ 2008-05-30 15:50 ` Jeremy Fitzhardinge
0 siblings, 0 replies; 51+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-30 15:50 UTC (permalink / raw)
To: Yinghai Lu
Cc: Rusty Russell, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Andrew Morton, linux-kernel@vger.kernel.org
Yinghai Lu wrote:
> On Thu, May 29, 2008 at 11:52 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> On Thu, May 29, 2008 at 6:37 AM, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>>
>>> Yinghai Lu wrote:
>>>
>>>> can we use e820 entries for that? So the domain builder could have
>>>> several entries for E820_RAM and E820_RESERVED...
>>>>
>>>>
>>> I tried this, but it doesn't work; the kernel crashes during boot,
>>> presumably because it's trying to use the reserved memory as heap. I
>>> suspect the e820 maps are not registered early enough or something...
>>>
>>> (One thought: if reserving in the E820 map were enough, then couldn't we use
>>> it for all the early reservations?)
>>>
>>> I've attached the non-working patch below.
>>>
>>> The working kernel reports:
>>>
>>> (early) BIOS-provided physical RAM map:
>>> (early) Xen: 0000000000000000 - 000000000009f000 (usable)
>>> (early) Xen: 0000000000100000 - 0000000010000000 (usable)
>>> (early) console [xenboot0] enabled
>>> (early) debug: ignoring loglevel setting.
>>> (early) 0MB HIGHMEM available.
>>> (early) 256MB LOWMEM available.
>>> (early) low ram: 018fd000 - 10000000
>>> (early) bootmap 018fd000 - 018ff000
>>> (early) early res: 0 [0-fff] BIOS data page
>>> (early) early res: 1 [1000-1fff] EX TRAMPOLINE
>>> (early) early res: 2 [6000-6fff] TRAMPOLINE
>>> (early) early res: 3 [18aa000-18ecfff] XEN
>>> (early) early res: 4 [1000000-18a9303] TEXT DATA BSS
>>> (early) early res: 5 [18ed000-18fcfff] INIT_PG_TABLE
>>> (early) early res: 6 [18fd000-18fefff] BOOTMAP
>>>
>>> But the non-working one says:
>>>
>>> (early) BIOS-provided physical RAM map:
>>> (early) Xen: 0000000000000000 - 000000000009f000 (usable)
>>> (early) Xen: 0000000000100000 - 0000000010000000 (usable)
>>> (early) Xen: 00000000018aa000 - 00000000018ed000 (reserved)
>>>
>> it seems we miss to call sanitize_e820_map for 32 bit. we should get
>> (early) Xen: 0000000000000000 - 000000000009f000 (usable)
>> (early) Xen: 0000000000100000 - 00000000018aa000 (usable)
>> (early) Xen: 00000000018aa000 - 00000000018ed000 (reserved)
>> (early) Xen: 00000000018ed000 - 0000000010000000 (usable)
>>
>
> xen pv override the memory_setup. and default one is
> machine_specific_memory_setup(), and it does call sanitize_e820_map...
>
> so you may need to add sanitize_e820_map to xen_memory_setup, or re
> arrange your add_memory_range parameter...
Yes, adding
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
after the Xen reservation made everything happy.
J
Subject: xen: reserve Xen-specific memory in e820 map
Make sure that the start_info and pfn->mfn translation array are reserved.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
arch/x86/xen/setup.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
===================================================================
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -40,8 +40,21 @@
max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
e820.nr_map = 0;
+
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
+
+ /*
+ * Reserve Xen bits:
+ * - mfn_list
+ * - xen_start_info
+ * See comment above "struct start_info" in <xen/interface/xen.h>
+ */
+ add_memory_region(__pa(xen_start_info->mfn_list),
+ xen_start_info->pt_base - xen_start_info->mfn_list,
+ E820_RESERVED);
+
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
return "Xen";
}
^ permalink raw reply [flat|nested] 51+ messages in thread
* Re: [PATCH] x86: 32bit numa srat fix early_ioremap leak
2008-05-29 23:25 ` [PATCH] x86: 32bit numa srat fix early_ioremap leak Yinghai Lu
@ 2008-05-31 8:01 ` Ingo Molnar
2008-06-01 5:51 ` [PATCH] x86: 32bit numa increase max_elements to 1024 Yinghai Lu
2008-06-02 4:06 ` [PATCH] x86: numa_32 avoid clash between ramdisk and kva Yinghai Lu
2 siblings, 0 replies; 51+ messages in thread
From: Ingo Molnar @ 2008-05-31 8:01 UTC (permalink / raw)
To: Yinghai Lu
Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton,
linux-kernel@vger.kernel.org
* Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> Pid: 0, comm: swapper Not tainted 2.6.26-rc4 #4
> [<80b41289>] hlt_loop+0x0/0x3
> [<8011efa0>] ? alloc_remap+0x50/0x70
> [<8079e32e>] alloc_node_mem_map+0x5e/0xa0
> [<8012e77b>] ? printk+0x1b/0x20
> [<80b590f6>] free_area_init_node+0xc6/0x470
> [<80b588fc>] ? __alloc_bootmem_node+0x2c/0x50
> [<80b58ad8>] ? find_min_pfn_for_node+0x38/0x70
> [<8012e77b>] ? printk+0x1b/0x20
> [<80b597c4>] free_area_init_nodes+0x254/0x2d0
> [<80b544d7>] zone_sizes_init+0x97/0xa0
> [<80b48a03>] setup_arch+0x383/0x530
> [<8012e77b>] ? printk+0x1b/0x20
> [<80b41aa4>] start_kernel+0x64/0x350
> [<80b412d8>] i386_start_kernel+0x8/0x10
> =======================
>
> this patch increase the acpi table limit to 32
> match early_ioremap with early_iounmap
applied all 4 early-reserve related fixes from you - thanks Yinghai!
Ingo
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: 32bit numa increase max_elements to 1024
2008-05-29 23:25 ` [PATCH] x86: 32bit numa srat fix early_ioremap leak Yinghai Lu
2008-05-31 8:01 ` Ingo Molnar
@ 2008-06-01 5:51 ` Yinghai Lu
2008-06-01 5:52 ` [PATCH] x86: change propagate_e820_map back to find_max_pfn -32bit Yinghai Lu
2008-06-02 4:06 ` [PATCH] x86: numa_32 avoid clash between ramdisk and kva Yinghai Lu
2 siblings, 1 reply; 51+ messages in thread
From: Yinghai Lu @ 2008-06-01 5:51 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
so every element will repsent to 64M instead of 256M.
AMD opteron later based could have HW memory hole remapping. so could have
[0, 8g + 64M) on node0. reduce element size to 64M could keep that on node 0
later need to use find_e820_area to allocate memory_node_map like 64bit.
but need to move memory_present out of populate_mem_map...
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 0c1a1bf..0ea4854 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -59,14 +59,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
/*
* 4) physnode_map - the mapping between a pfn and owning node
* physnode_map keeps track of the physical memory layout of a generic
- * numa node on a 256Mb break (each element of the array will
- * represent 256Mb of memory and will be marked by the node id. so,
+ * numa node on a 64Mb break (each element of the array will
+ * represent 64Mb of memory and will be marked by the node id. so,
* if the first gig is on node 0, and the second gig is on node 1
* physnode_map will contain:
*
- * physnode_map[0-3] = 0;
- * physnode_map[4-7] = 1;
- * physnode_map[8- ] = -1;
+ * physnode_map[0-15] = 0;
+ * physnode_map[16-31] = 1;
+ * physnode_map[32- ] = -1;
*/
s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1};
EXPORT_SYMBOL(physnode_map);
@@ -81,9 +81,9 @@ void memory_present(int nid, unsigned long start, unsigned long end)
printk(KERN_DEBUG " ");
for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
- printk("%ld ", pfn);
+ printk(KERN_CONT "%ld ", pfn);
}
- printk("\n");
+ printk(KERN_CONT "\n");
}
unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index faef751..ab00128 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -51,14 +51,14 @@ extern int early_pfn_to_nid(unsigned long pfn);
/*
* generic node memory support, the following assumptions apply:
*
- * 1) memory comes in 256Mb contigious chunks which are either present or not
+ * 1) memory comes in 64Mb contigious chunks which are either present or not
* 2) we will not have more than 64Gb in total
*
* for now assume that 64Gb is max amount of RAM for whole system
* 64Gb / 4096bytes/page = 16777216 pages
*/
#define MAX_NR_PAGES 16777216
-#define MAX_ELEMENTS 256
+#define MAX_ELEMENTS 1024
#define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS)
extern s8 physnode_map[];
^ permalink raw reply related [flat|nested] 51+ messages in thread
* [PATCH] x86: change propagate_e820_map back to find_max_pfn -32bit
2008-06-01 5:51 ` [PATCH] x86: 32bit numa increase max_elements to 1024 Yinghai Lu
@ 2008-06-01 5:52 ` Yinghai Lu
2008-06-01 5:53 ` [PATCH] x86: set node_remap_size[0] in fallback path Yinghai Lu
2008-06-03 2:16 ` [PATCH] x86: change propagate_e820_map back to find_max_pfn -32bit -v2 Yinghai Lu
0 siblings, 2 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-06-01 5:52 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
we don't need to call memory_present that early.
numa dist and sparse will call memory_present later
and even fail, it will call memory_present for all range.
also for sparse it will call alloc_bootmem ... before we set up bootmem
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -210,7 +210,7 @@ void __init init_iomem_resources(struct
/*
* Find the highest page frame number we have available
*/
-void __init propagate_e820_map(void)
+void __init find_max_pfn(void)
{
int i;
@@ -227,7 +227,6 @@ void __init propagate_e820_map(void)
continue;
if (end > max_pfn)
max_pfn = end;
- memory_present(0, start, end);
}
}
@@ -361,7 +360,7 @@ static int __init parse_memmap(char *arg
* size before original memory map is
* reset.
*/
- propagate_e820_map();
+ find_max_pfn();
saved_max_pfn = max_pfn;
#endif
e820.nr_map = 0;
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -742,10 +742,10 @@ void __init setup_arch(char **cmdline_p)
efi_init();
/* update e820 for memory not covered by WB MTRRs */
- propagate_e820_map();
+ find_max_pfn();
mtrr_bp_init();
if (mtrr_trim_uncached_memory(max_pfn))
- propagate_e820_map();
+ find_max_pfn();
max_low_pfn = setup_memory();
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -120,7 +120,7 @@ int __init get_memcfg_numa_flat(void)
printk("NUMA - single node, flat memory mode\n");
/* Run the memory configuration and find the top of memory. */
- propagate_e820_map();
+ find_max_pfn();
node_start_pfn[0] = 0;
node_end_pfn[0] = max_pfn;
memory_present(0, 0, max_pfn);
Index: linux-2.6/include/asm-x86/e820_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_32.h
+++ linux-2.6/include/asm-x86/e820_32.h
@@ -21,7 +21,7 @@
extern void setup_memory_map(void);
extern void finish_e820_parsing(void);
-extern void propagate_e820_map(void);
+extern void find_max_pfn(void);
extern void register_bootmem_low_pages(unsigned long max_low_pfn);
extern void limit_regions(unsigned long long size);
extern void init_iomem_resources(struct resource *code_resource,
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: set node_remap_size[0] in fallback path
2008-06-01 5:52 ` [PATCH] x86: change propagate_e820_map back to find_max_pfn -32bit Yinghai Lu
@ 2008-06-01 5:53 ` Yinghai Lu
2008-06-01 5:56 ` [PATCH] x86: numa_32 print out debug info all kva Yinghai Lu
2008-06-03 2:16 ` [PATCH] x86: change propagate_e820_map back to find_max_pfn -32bit -v2 Yinghai Lu
1 sibling, 1 reply; 51+ messages in thread
From: Yinghai Lu @ 2008-06-01 5:53 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
otherwise alloc_remap will not get node_mem_map from kva area, and
alloc_node_mem_map has to alloc_bootmem_node get mem_map.
it will use two copies low address...
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
--- a/arch/x86/mm/discontig_32.c 2008-05-31 21:46:14.000000000 -0700
+++ b/arch/x86/mm/discontig_32.c 2008-05-31 21:49:02.000000000 -0700
@@ -124,6 +124,7 @@ int __init get_memcfg_numa_flat(void)
node_start_pfn[0] = 0;
node_end_pfn[0] = max_pfn;
memory_present(0, 0, max_pfn);
+ node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
/* Indicate there is one node available. */
nodes_clear(node_online_map);
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: numa_32 print out debug info all kva
2008-06-01 5:53 ` [PATCH] x86: set node_remap_size[0] in fallback path Yinghai Lu
@ 2008-06-01 5:56 ` Yinghai Lu
2008-06-01 20:15 ` [PATCH] x86: numa_32 print out debug info all kva v2 Yinghai Lu
0 siblings, 1 reply; 51+ messages in thread
From: Yinghai Lu @ 2008-06-01 5:56 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
also fix the print out of node_remap_end_vaddr
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -168,6 +168,8 @@ static void __init allocate_pgdat(int ni
reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t),
"NODE_DATA");
}
+ printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
+ nid, (unsigned long)NODE_DATA(nid));
}
/*
@@ -205,8 +207,12 @@ void __init remap_numa_kva(void)
int node;
for_each_online_node(node) {
+ printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
+ printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
+ (unsigned long)vaddr,
+ node_remap_start_pfn[node] + pfn);
set_pmd_pfn((ulong) vaddr,
node_remap_start_pfn[node] + pfn,
PAGE_KERNEL_LARGE);
@@ -290,8 +296,7 @@ static void init_remap_allocator(int nid
printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
(ulong) node_remap_start_vaddr[nid],
- (ulong) pfn_to_kaddr(highstart_pfn
- + node_remap_offset[nid] + node_remap_size[nid]));
+ (ulong) node_remap_end_vaddr[nid]);
}
extern void setup_bootmem_allocator(void);
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3499,6 +3499,9 @@ void __paginginit free_area_init_node(in
calculate_node_totalpages(pgdat, zones_size, zholes_size);
alloc_node_mem_map(pgdat);
+ printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
+ nid, (unsigned long)pgdat,
+ (unsigned long)pgdat->node_mem_map);
free_area_init_core(pgdat, zones_size, zholes_size);
}
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: numa_32 print out debug info all kva v2
2008-06-01 5:56 ` [PATCH] x86: numa_32 print out debug info all kva Yinghai Lu
@ 2008-06-01 20:15 ` Yinghai Lu
0 siblings, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-06-01 20:15 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
also fix the print out of node_remap_end_vaddr
v2: fix non numa flat compling
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -168,6 +168,8 @@ static void __init allocate_pgdat(int ni
reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t),
"NODE_DATA");
}
+ printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
+ nid, (unsigned long)NODE_DATA(nid));
}
/*
@@ -205,8 +207,12 @@ void __init remap_numa_kva(void)
int node;
for_each_online_node(node) {
+ printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
+ printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
+ (unsigned long)vaddr,
+ node_remap_start_pfn[node] + pfn);
set_pmd_pfn((ulong) vaddr,
node_remap_start_pfn[node] + pfn,
PAGE_KERNEL_LARGE);
@@ -290,8 +296,7 @@ static void init_remap_allocator(int nid
printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
(ulong) node_remap_start_vaddr[nid],
- (ulong) pfn_to_kaddr(highstart_pfn
- + node_remap_offset[nid] + node_remap_size[nid]));
+ (ulong) node_remap_end_vaddr[nid]);
}
extern void setup_bootmem_allocator(void);
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3499,6 +3499,11 @@ void __paginginit free_area_init_node(in
calculate_node_totalpages(pgdat, zones_size, zholes_size);
alloc_node_mem_map(pgdat);
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+ printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
+ nid, (unsigned long)pgdat,
+ (unsigned long)pgdat->node_mem_map);
+#endif
free_area_init_core(pgdat, zones_size, zholes_size);
}
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: numa_32 avoid clash between ramdisk and kva
2008-05-29 23:25 ` [PATCH] x86: 32bit numa srat fix early_ioremap leak Yinghai Lu
2008-05-31 8:01 ` Ingo Molnar
2008-06-01 5:51 ` [PATCH] x86: 32bit numa increase max_elements to 1024 Yinghai Lu
@ 2008-06-02 4:06 ` Yinghai Lu
2008-06-02 6:53 ` [PATCH] x86: cleanup max_pfn_mapped usage - 32bit Yinghai Lu
2008-06-02 6:55 ` [PATCH] x86: cleanup max_pfn_mapped usage - 64bit Yinghai Lu
2 siblings, 2 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-06-02 4:06 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
use find_e820_area to get address space...
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -38,6 +38,7 @@
#include <asm/setup.h>
#include <asm/mmzone.h>
#include <asm/bios_ebda.h>
+#include <asm/proto.h>
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
@@ -304,7 +305,6 @@ unsigned long __init setup_memory(void)
{
int nid;
unsigned long system_start_pfn, system_max_low_pfn;
- unsigned long wasted_pages;
/*
* When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -315,29 +315,18 @@ unsigned long __init setup_memory(void)
*/
get_memcfg_numa();
- kva_pages = calculate_numa_remap_pages();
+ kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
/* partially used pages are not usable - thus round upwards */
system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
- kva_start_pfn = find_max_low_pfn() - kva_pages;
-
-#ifdef CONFIG_BLK_DEV_INITRD
- /* Numa kva area is below the initrd */
- if (initrd_start)
- kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET)
- - kva_pages;
-#endif
-
- /*
- * We waste pages past at the end of the KVA for no good reason other
- * than how it is located. This is bad.
- */
- wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1);
- kva_start_pfn -= wasted_pages;
- kva_pages += wasted_pages;
-
system_max_low_pfn = max_low_pfn = find_max_low_pfn();
+ kva_start_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
+ kva_start_pfn = find_e820_area(kva_start_pfn<<PAGE_SHIFT,
+ max_low_pfn<<PAGE_SHIFT,
+ kva_pages<<PAGE_SHIFT,
+ PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
+
printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
kva_start_pfn, max_low_pfn);
printk("max_pfn = %ld\n", max_pfn);
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: cleanup max_pfn_mapped usage - 32bit
2008-06-02 4:06 ` [PATCH] x86: numa_32 avoid clash between ramdisk and kva Yinghai Lu
@ 2008-06-02 6:53 ` Yinghai Lu
2008-06-02 6:55 ` [PATCH] x86: cleanup max_pfn_mapped usage - 64bit Yinghai Lu
1 sibling, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-06-02 6:53 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton,
Jeremy Fitzhardinge
Cc: linux-kernel@vger.kernel.org
32bit in head_32.S after initial page table is done, we get initial
max_pfn_mapped, and then kernel_physical_mapping_init will give us
final one.
We need to use that to make sure find_e820_area to get valid address for
boot_map and NODE_DATA(0) for numa32.
XEN PV and lguest may need to assign max_pfn_mapped too.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/head_32.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head_32.S
+++ linux-2.6/arch/x86/kernel/head_32.S
@@ -220,6 +220,8 @@ default_entry:
jb 10b
1:
movl %edi,pa(init_pg_tables_end)
+ shrl $12, %eax
+ movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
@@ -251,6 +253,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
cmpl %ebp,%eax
jb 10b
movl %edi,pa(init_pg_tables_end)
+ shrl $12, %eax
+ movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -587,7 +587,7 @@ void __init setup_bootmem_allocator(void
*/
bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
- max_low_pfn<<PAGE_SHIFT, bootmap_size,
+ max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
PAGE_SIZE);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
@@ -596,6 +596,8 @@ void __init setup_bootmem_allocator(void
reserve_initrd();
#endif
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
+ printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
+ max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: %08lx - %08lx\n",
min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
printk(KERN_INFO " bootmap %08lx - %08lx\n",
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -163,7 +163,8 @@ static void __init allocate_pgdat(int ni
else {
unsigned long pgdat_phys;
pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT,
- max_low_pfn<<PAGE_SHIFT, sizeof(pg_data_t),
+ (nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT,
+ sizeof(pg_data_t),
PAGE_SIZE);
NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t),
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: cleanup max_pfn_mapped usage - 64bit
2008-06-02 4:06 ` [PATCH] x86: numa_32 avoid clash between ramdisk and kva Yinghai Lu
2008-06-02 6:53 ` [PATCH] x86: cleanup max_pfn_mapped usage - 32bit Yinghai Lu
@ 2008-06-02 6:55 ` Yinghai Lu
1 sibling, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-06-02 6:55 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
64bit only after init_memory_mapping, we get valid max_pfn_mapped.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -55,16 +55,12 @@ unsigned long __init e820_end_of_ram(voi
last_pfn = find_max_pfn_with_active_regions();
- if (last_pfn > max_pfn_mapped)
- max_pfn_mapped = last_pfn;
- if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
- max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
+ if (last_pfn > MAXMEM>>PAGE_SHIFT)
+ last_pfn = MAXMEM>>PAGE_SHIFT;
if (last_pfn > end_user_pfn)
last_pfn = end_user_pfn;
- if (last_pfn > max_pfn_mapped)
- last_pfn = max_pfn_mapped;
- printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
+ printk(KERN_INFO "last_pfn = %lu\n", last_pfn);
return last_pfn;
}
@@ -109,10 +105,6 @@ static int __init e820_find_active_regio
if (*ei_startpfn >= *ei_endpfn)
return 0;
- /* Check if max_pfn_mapped should be updated */
- if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
- max_pfn_mapped = *ei_endpfn;
-
/* Skip if map is outside the node */
if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
*ei_startpfn >= last_pfn)
@@ -229,7 +221,6 @@ static int __init parse_memmap_opt(char
saved_max_pfn = e820_end_of_ram();
remove_all_active_ranges();
#endif
- max_pfn_mapped = 0;
e820.nr_map = 0;
userdef = 1;
return 0;
Index: linux-2.6/arch/x86/kernel/setup_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_64.c
+++ linux-2.6/arch/x86/kernel/setup_64.c
@@ -388,7 +388,7 @@ void __init setup_arch(char **cmdline_p)
check_efer();
- max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
+ max_pfn_mapped = init_memory_mapping(0, (end_pfn << PAGE_SHIFT));
if (efi_enabled)
efi_init();
^ permalink raw reply [flat|nested] 51+ messages in thread
* [PATCH] x86: change propagate_e820_map back to find_max_pfn -32bit -v2
2008-06-01 5:52 ` [PATCH] x86: change propagate_e820_map back to find_max_pfn -32bit Yinghai Lu
2008-06-01 5:53 ` [PATCH] x86: set node_remap_size[0] in fallback path Yinghai Lu
@ 2008-06-03 2:16 ` Yinghai Lu
1 sibling, 0 replies; 51+ messages in thread
From: Yinghai Lu @ 2008-06-03 2:16 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel@vger.kernel.org
we don't need to call memory_present that early.
numa dist and sparse will call memory_present later
and even fail, it will call memory_present for all range.
v2: add memory_present calling for sparse and non numa
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -210,7 +210,7 @@ void __init init_iomem_resources(struct
/*
* Find the highest page frame number we have available
*/
-void __init propagate_e820_map(void)
+void __init find_max_pfn(void)
{
int i;
@@ -227,7 +227,6 @@ void __init propagate_e820_map(void)
continue;
if (end > max_pfn)
max_pfn = end;
- memory_present(0, start, end);
}
}
@@ -361,7 +360,7 @@ static int __init parse_memmap(char *arg
* size before original memory map is
* reset.
*/
- propagate_e820_map();
+ find_max_pfn();
saved_max_pfn = max_pfn;
#endif
e820.nr_map = 0;
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -378,11 +378,13 @@ static unsigned long __init setup_memory
if (max_pfn > max_low_pfn) {
highstart_pfn = max_low_pfn;
}
+ memory_present(0, 0, highend_pfn);
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
num_physpages = highend_pfn;
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
+ memory_present(0, 0, max_low_pfn);
num_physpages = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
@@ -742,10 +744,10 @@ void __init setup_arch(char **cmdline_p)
efi_init();
/* update e820 for memory not covered by WB MTRRs */
- propagate_e820_map();
+ find_max_pfn();
mtrr_bp_init();
if (mtrr_trim_uncached_memory(max_pfn))
- propagate_e820_map();
+ find_max_pfn();
max_low_pfn = setup_memory();
Index: linux-2.6/arch/x86/mm/discontig_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/discontig_32.c
+++ linux-2.6/arch/x86/mm/discontig_32.c
@@ -120,7 +120,7 @@ int __init get_memcfg_numa_flat(void)
printk("NUMA - single node, flat memory mode\n");
/* Run the memory configuration and find the top of memory. */
- propagate_e820_map();
+ find_max_pfn();
node_start_pfn[0] = 0;
node_end_pfn[0] = max_pfn;
memory_present(0, 0, max_pfn);
Index: linux-2.6/include/asm-x86/e820_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_32.h
+++ linux-2.6/include/asm-x86/e820_32.h
@@ -21,7 +21,7 @@
extern void setup_memory_map(void);
extern void finish_e820_parsing(void);
-extern void propagate_e820_map(void);
+extern void find_max_pfn(void);
extern void register_bootmem_low_pages(unsigned long max_low_pfn);
extern void limit_regions(unsigned long long size);
extern void init_iomem_resources(struct resource *code_resource,
^ permalink raw reply [flat|nested] 51+ messages in thread
end of thread, other threads:[~2008-06-03 2:17 UTC | newest]
Thread overview: 51+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-11 7:30 [PATCH] x86: make e820.c to have common functions Yinghai Lu
2008-05-13 13:05 ` Ingo Molnar
2008-05-13 17:35 ` Yinghai Lu
2008-05-18 8:18 ` [PATCH] x86: extend e820 ealy_res support 32bit Yinghai Lu
2008-05-21 3:10 ` [PATCH] x86: move e820_mark_nosave_regions to e820.c Yinghai Lu
2008-05-22 1:40 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix Yinghai Lu
2008-05-22 10:12 ` Jeremy Fitzhardinge
2008-05-22 17:58 ` Yinghai Lu
2008-05-22 22:20 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix v2 Yinghai Lu
2008-05-23 23:08 ` Yinghai Lu
2008-05-23 23:32 ` Jeremy Fitzhardinge
2008-05-23 23:38 ` Jeremy Fitzhardinge
2008-05-24 0:01 ` Yinghai Lu
2008-05-24 0:09 ` Yinghai Lu
2008-05-24 8:54 ` Jeremy Fitzhardinge
2008-05-24 9:49 ` [PATCH] xen: boot via i386_start_kernel to get early reservations Jeremy Fitzhardinge
2008-05-24 22:04 ` Yinghai Lu
2008-05-24 19:57 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix v2 Yinghai Lu
2008-05-25 17:00 ` [PATCH] x86: extend e820 ealy_res support 32bit - fix #2 Yinghai Lu
2008-05-27 15:44 ` Thomas Gleixner
2008-05-27 20:37 ` Jeremy Fitzhardinge
2008-05-27 20:58 ` Thomas Gleixner
2008-05-27 21:06 ` Jeremy Fitzhardinge
2008-05-27 21:06 ` Yinghai Lu
2008-05-27 21:22 ` Jeremy Fitzhardinge
2008-05-27 21:35 ` Yinghai Lu
2008-05-27 21:47 ` Jeremy Fitzhardinge
2008-05-27 22:52 ` Yinghai Lu
2008-05-28 10:01 ` Jeremy Fitzhardinge
2008-05-28 20:48 ` Yinghai Lu
2008-05-28 21:24 ` Jeremy Fitzhardinge
2008-05-29 13:37 ` Jeremy Fitzhardinge
2008-05-29 18:41 ` Yinghai Lu
2008-05-29 18:58 ` H. Peter Anvin
2008-05-29 18:52 ` Yinghai Lu
2008-05-29 19:14 ` Yinghai Lu
2008-05-30 15:50 ` Jeremy Fitzhardinge
2008-05-29 19:56 ` [PATCH] x86: extend e820 early_res support 32bit -fix #3 Yinghai Lu
2008-05-29 19:57 ` [PATCH] x86: extend e820 early_res support 32bit -fix #4 Yinghai Lu
2008-05-29 19:58 ` [PATCH] x86: extend e820 early_res support 32bit -fix #5 Yinghai Lu
2008-05-29 23:25 ` [PATCH] x86: 32bit numa srat fix early_ioremap leak Yinghai Lu
2008-05-31 8:01 ` Ingo Molnar
2008-06-01 5:51 ` [PATCH] x86: 32bit numa increase max_elements to 1024 Yinghai Lu
2008-06-01 5:52 ` [PATCH] x86: change propagate_e820_map back to find_max_pfn -32bit Yinghai Lu
2008-06-01 5:53 ` [PATCH] x86: set node_remap_size[0] in fallback path Yinghai Lu
2008-06-01 5:56 ` [PATCH] x86: numa_32 print out debug info all kva Yinghai Lu
2008-06-01 20:15 ` [PATCH] x86: numa_32 print out debug info all kva v2 Yinghai Lu
2008-06-03 2:16 ` [PATCH] x86: change propagate_e820_map back to find_max_pfn -32bit -v2 Yinghai Lu
2008-06-02 4:06 ` [PATCH] x86: numa_32 avoid clash between ramdisk and kva Yinghai Lu
2008-06-02 6:53 ` [PATCH] x86: cleanup max_pfn_mapped usage - 32bit Yinghai Lu
2008-06-02 6:55 ` [PATCH] x86: cleanup max_pfn_mapped usage - 64bit Yinghai Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox