From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yinghai Lu Subject: [PATCH 5/6] early_res: seperate common memmap func from e820.c to fw_memmap.c Date: Wed, 10 Mar 2010 13:24:26 -0800 Message-ID: <1268256267-3769-6-git-send-email-yinghai@kernel.org> References: <1268256267-3769-1-git-send-email-yinghai@kernel.org> Mime-Version: 1.0 Content-Type: TEXT/PLAIN Content-Transfer-Encoding: 7BIT Return-path: In-reply-to: <1268256267-3769-1-git-send-email-yinghai@kernel.org> Sender: linux-kernel-owner@vger.kernel.org To: Ingo Molnar , Thomas Gleixner , "H. Peter Anvin" , Andrew Morton , David Miller Cc: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, Yinghai Lu List-Id: linux-arch.vger.kernel.org move it to kernel/fw_memmap.c from arch/x86/kernel/e820.c Signed-off-by: Yinghai Lu --- arch/x86/include/asm/e820.h | 130 ----- arch/x86/kernel/e820.c | 1142 -------------------------------------------- include/linux/bootmem.h | 2 include/linux/fw_memmap.h | 114 ++++ kernel/Makefile | 2 kernel/fw_memmap.c | 1134 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1290 insertions(+), 1234 deletions(-) Index: linux-2.6/arch/x86/kernel/e820.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/e820.c +++ linux-2.6/arch/x86/kernel/e820.c @@ -12,31 +12,11 @@ #include #include #include -#include #include -#include #include -#include #include -/* - * The e820 map is the map that gets modified e.g. with command line parameters - * and that is also registered with modifications in the kernel resource tree - * with the iomem_resource as parent. - * - * The e820_saved is directly saved after the BIOS-provided memory map is - * copied. It doesn't get modified afterwards. It's registered for the - * /sys/firmware/memmap interface. - * - * That memory map is not modified and is used as base for kexec. The kexec'd - * kernel should get the same memory map as the firmware provides. Then the - * user can e.g. boot the original kernel with mem=1G while still booting the - * next kernel with full memory. - */ -static struct e820map __initdata e820; -static struct e820map __initdata e820_saved; - /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0xaeedbabe; #ifdef CONFIG_PCI @@ -44,577 +24,6 @@ EXPORT_SYMBOL(pci_mem_start); #endif /* - * This function checks if any part of the range is mapped - * with type. - * phys_pud_init() is using it and is _meminit, but we have !after_bootmem - * so could use refok here - */ -int __init_refok e820_any_mapped(u64 start, u64 end, unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} - -/* - * This function checks if the entire range is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init e820_all_mapped(u64 start, u64 end, unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* - * if start is now at or beyond end, we're done, full - * coverage - */ - if (start >= end) - return 1; - } - return 0; -} - -/* - * Add a memory region to the kernel e820 map. - */ -static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, - int type) -{ - int x = e820x->nr_map; - - if (x >= ARRAY_SIZE(e820x->map)) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820x->map[x].addr = start; - e820x->map[x].size = size; - e820x->map[x].type = type; - e820x->nr_map++; -} - -void __init e820_add_region(u64 start, u64 size, int type) -{ - __e820_add_region(&e820, start, size, type); -} - -static void __init e820_print_type(u32 type) -{ - switch (type) { - case E820_RAM: - case E820_RESERVED_KERN: - printk(KERN_CONT "(usable)"); - break; - case E820_RESERVED: - printk(KERN_CONT "(reserved)"); - break; - case E820_ACPI: - printk(KERN_CONT "(ACPI data)"); - break; - case E820_NVS: - printk(KERN_CONT "(ACPI NVS)"); - break; - case E820_UNUSABLE: - printk(KERN_CONT "(unusable)"); - break; - default: - printk(KERN_CONT "type %u", type); - break; - } -} - -void __init e820_print_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(KERN_INFO " %s: %016Lx - %016Lx ", who, - (unsigned long long) e820.map[i].addr, - (unsigned long long) - (e820.map[i].addr + e820.map[i].size)); - e820_print_type(e820.map[i].type); - printk(KERN_CONT "\n"); - } -} - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps, - * and resolving conflicting memory types in favor of highest - * numbered type. - * - * The input parameter biosmap points to an array of 'struct - * e820entry' which on entry has elements in the range [0, *pnr_map) - * valid, and which has space for up to max_nr_map entries. - * On return, the resulting sanitized e820 map entries will be in - * overwritten in the same location, starting at biosmap. - * - * The integer pointed to by pnr_map must be valid on entry (the - * current number of valid entries located at biosmap) and will - * be updated on return, with the new number of valid entries - * (something no more than max_nr_map.) - * - * The return value from sanitize_e820_map() is zero if it - * successfully 'sanitized' the map entries passed in, and is -1 - * if it did nothing, which can happen if either of (1) it was - * only passed one map entry, or (2) any of the input map entries - * were invalid (start + size < start, meaning that the size was - * so big the described memory range wrapped around through zero.) - * - * Visually we're performing the following - * (1,2,3,4 = memory types)... - * - * Sample memory map (w/overlaps): - * ____22__________________ - * ______________________4_ - * ____1111________________ - * _44_____________________ - * 11111111________________ - * ____________________33__ - * ___________44___________ - * __________33333_________ - * ______________22________ - * ___________________2222_ - * _________111111111______ - * _____________________11_ - * _________________4______ - * - * Sanitized equivalent (no overlap): - * 1_______________________ - * _44_____________________ - * ___1____________________ - * ____22__________________ - * ______11________________ - * _________1______________ - * __________3_____________ - * ___________44___________ - * _____________33_________ - * _______________2________ - * ________________1_______ - * _________________4______ - * ___________________2____ - * ____________________33__ - * ______________________4_ - */ - -static int __init __sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, - u32 *pnr_map) -{ - struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ - }; - static struct change_member change_point_list[2*E820_X_MAX] __initdata; - static struct change_member *change_point[2*E820_X_MAX] __initdata; - static struct e820entry *overlap_list[E820_X_MAX] __initdata; - static struct e820entry new_bios[E820_X_MAX] __initdata; - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) - return -1; - - old_nr = *pnr_map; - BUG_ON(old_nr > max_nr_map); - - /* bail out if we find any unreasonable addresses in bios map */ - for (i = 0; i < old_nr; i++) - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) - return -1; - - /* create pointers for initial change-point information (for sorting) */ - for (i = 0; i < 2 * old_nr; i++) - change_point[i] = &change_point_list[i]; - - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ - chgidx = 0; - for (i = 0; i < old_nr; i++) { - if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + - biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i = 1; i < chg_nr; i++) { - unsigned long long curaddr, lastaddr; - unsigned long long curpbaddr, lastpbaddr; - - curaddr = change_point[i]->addr; - lastaddr = change_point[i - 1]->addr; - curpbaddr = change_point[i]->pbios->addr; - lastpbaddr = change_point[i - 1]->pbios->addr; - - /* - * swap entries, when: - * - * curaddr > lastaddr or - * curaddr == lastaddr and curaddr == curpbaddr and - * lastaddr != lastpbaddr - */ - if (curaddr < lastaddr || - (curaddr == lastaddr && curaddr == curpbaddr && - lastaddr != lastpbaddr)) { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing = 1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries = 0; /* number of entries in the overlap table */ - new_bios_entry = 0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - - /* loop through change-points, determining affect on the new bios map */ - for (chgidx = 0; chgidx < chg_nr; chgidx++) { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == - change_point[chgidx]->pbios->addr) { - /* - * add map entry to overlap list (> 1 entry - * implies an overlap) - */ - overlap_list[overlap_entries++] = - change_point[chgidx]->pbios; - } else { - /* - * remove entry from list (order independent, - * so swap with last) - */ - for (i = 0; i < overlap_entries; i++) { - if (overlap_list[i] == - change_point[chgidx]->pbios) - overlap_list[i] = - overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* - * if there are overlapping entries, decide which - * "type" to use (larger value takes precedence -- - * 1=usable, 2,3,4,4+=unusable) - */ - current_type = 0; - for (i = 0; i < overlap_entries; i++) - if (overlap_list[i]->type > current_type) - current_type = overlap_list[i]->type; - /* - * continue building up new bios map based on this - * information - */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* - * move forward only if the new size - * was non-zero - */ - if (new_bios[new_bios_entry].size != 0) - /* - * no more space left for new - * bios entries ? - */ - if (++new_bios_entry >= max_nr_map) - break; - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = - change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr = change_point[chgidx]->addr; - } - last_type = current_type; - } - } - /* retain count for new bios entries */ - new_nr = new_bios_entry; - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -int __init sanitize_e820_map(void) -{ - return __sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); -} - -static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) -{ - while (nr_map) { - u64 start = biosmap->addr; - u64 size = biosmap->size; - u64 end = start + size; - u32 type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - e820_add_region(start, size, type); - - biosmap++; - nr_map--; - } - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - */ -static int __init append_e820_map(struct e820entry *biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - return __append_e820_map(biosmap, nr_map); -} - -static u64 __init __e820_update_range(struct e820map *e820x, u64 start, - u64 size, unsigned old_type, - unsigned new_type) -{ - u64 end; - unsigned int i; - u64 real_updated_size = 0; - - BUG_ON(old_type == new_type); - - if (size > (ULLONG_MAX - start)) - size = ULLONG_MAX - start; - - end = start + size; - printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", - (unsigned long long) start, - (unsigned long long) end); - e820_print_type(old_type); - printk(KERN_CONT " ==> "); - e820_print_type(new_type); - printk(KERN_CONT "\n"); - - for (i = 0; i < e820x->nr_map; i++) { - struct e820entry *ei = &e820x->map[i]; - u64 final_start, final_end; - u64 ei_end; - - if (ei->type != old_type) - continue; - - ei_end = ei->addr + ei->size; - /* totally covered by new range? */ - if (ei->addr >= start && ei_end <= end) { - ei->type = new_type; - real_updated_size += ei->size; - continue; - } - - /* new range is totally covered? */ - if (ei->addr < start && ei_end > end) { - __e820_add_region(e820x, start, size, new_type); - __e820_add_region(e820x, end, ei_end - end, ei->type); - ei->size = start - ei->addr; - real_updated_size += size; - continue; - } - - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(end, ei_end); - if (final_start >= final_end) - continue; - - __e820_add_region(e820x, final_start, final_end - final_start, - new_type); - - real_updated_size += final_end - final_start; - - /* - * left range could be head or tail, so need to update - * size at first. - */ - ei->size -= final_end - final_start; - if (ei->addr < final_start) - continue; - ei->addr = final_end; - } - return real_updated_size; -} - -u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) -{ - return __e820_update_range(&e820, start, size, old_type, new_type); -} - -static u64 __init e820_update_range_saved(u64 start, u64 size, - unsigned old_type, unsigned new_type) -{ - return __e820_update_range(&e820_saved, start, size, old_type, - new_type); -} - -/* make e820 not cover the range */ -u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, - int checktype) -{ - int i; - u64 end; - u64 real_removed_size = 0; - - if (size > (ULLONG_MAX - start)) - size = ULLONG_MAX - start; - - end = start + size; - printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", - (unsigned long long) start, - (unsigned long long) end); - e820_print_type(old_type); - printk(KERN_CONT "\n"); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 final_start, final_end; - - if (checktype && ei->type != old_type) - continue; - /* totally covered? */ - if (ei->addr >= start && - (ei->addr + ei->size) <= (start + size)) { - real_removed_size += ei->size; - memset(ei, 0, sizeof(struct e820entry)); - continue; - } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); - if (final_start >= final_end) - continue; - real_removed_size += final_end - final_start; - - ei->size -= final_end - final_start; - if (ei->addr < final_start) - continue; - ei->addr = final_end; - } - return real_removed_size; -} - -void __init update_e820(void) -{ - u32 nr_map; - - nr_map = e820.nr_map; - if (__sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) - return; - e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); - e820_print_map("modified"); -} -static void __init update_e820_saved(void) -{ - u32 nr_map; - - nr_map = e820_saved.nr_map; - if (__sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) - return; - e820_saved.nr_map = nr_map; -} -#define MAX_GAP_END 0x100000000ull -/* - * Search for a gap in the e820 memory space from start_addr to end_addr. - */ -__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr) -{ - unsigned long long last; - int i = e820.nr_map; - int found = 0; - - last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; - - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - if (end < start_addr) - continue; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap >= *gapsize) { - *gapsize = gap; - *gapstart = end; - found = 1; - } - } - if (start < last) - last = start; - } - return found; -} - -/* * Search for the biggest gap in the low 32 bits of the e820 * memory space. We pass this space to PCI to assign MMIO resources * for hotplug or unconfigured devices in. @@ -648,6 +57,15 @@ __init void e820_setup_gap(void) pci_mem_start, gapstart, gapsize); } +u64 __init get_max_mapped(void) +{ + u64 end = max_pfn_mapped; + + end <<= PAGE_SHIFT; + + return end; +} + /** * Because of the size limitation of struct boot_params, only first * 128 E820 memory entries are passed to kernel via @@ -673,483 +91,22 @@ void __init parse_e820_ext(struct setup_ e820_print_map("extended"); } -#if defined(CONFIG_X86_64) || \ - (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) -/** - * Find the ranges of physical addresses that do not correspond to - * e820 RAM areas and mark the corresponding pages as nosave for - * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(unsigned long limit_pfn) -{ - int i; - unsigned long pfn; - - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (pfn < PFN_UP(ei->addr)) - register_nosave_region(pfn, PFN_UP(ei->addr)); - - pfn = PFN_DOWN(ei->addr + ei->size); - if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) - register_nosave_region(PFN_UP(ei->addr), pfn); - - if (pfn >= limit_pfn) - break; - } -} -#endif - -#ifdef CONFIG_HIBERNATION -/** - * Mark ACPI NVS memory region, so that we can save/restore it during - * hibernation and the subsequent resume. - */ -static int __init e820_mark_nvs_memory(void) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (ei->type == E820_NVS) - hibernate_nvs_register(ei->addr, ei->size); - } - - return 0; -} -core_initcall(e820_mark_nvs_memory); -#endif - -/* - * Find a free area with specified alignment in a specific range. - */ -u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr; - u64 ei_start, ei_last; - - if (ei->type != E820_RAM) - continue; - - ei_last = ei->addr + ei->size; - ei_start = ei->addr; - addr = find_early_area(ei_start, ei_last, start, end, - size, align); - - if (addr != -1ULL) - return addr; - } - return -1ULL; -} - -u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) -{ - return find_e820_area(start, end, size, align); -} - -u64 __init get_max_mapped(void) -{ - u64 end = max_pfn_mapped; - - end <<= PAGE_SHIFT; - - return end; -} -/* - * Find next free range after *start - */ -u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr; - u64 ei_start, ei_last; - - if (ei->type != E820_RAM) - continue; - - ei_last = ei->addr + ei->size; - ei_start = ei->addr; - addr = find_early_area_size(ei_start, ei_last, start, - sizep, align); - - if (addr != -1ULL) - return addr; - } - - return -1ULL; -} - -/* - * pre allocated 4k and reserved it in e820 - */ -u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) -{ - u64 size = 0; - u64 addr; - u64 start; - - for (start = startt; ; start += size) { - start = find_e820_area_size(start, &size, align); - if (!(start + 1)) - return 0; - if (size >= sizet) - break; - } - -#ifdef CONFIG_X86_32 - if (start >= MAXMEM) - return 0; - if (start + size > MAXMEM) - size = MAXMEM - start; -#endif - - addr = round_down(start + size - sizet, align); - if (addr < start) - return 0; - e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); - e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); - printk(KERN_INFO "update e820 for early_reserve_e820\n"); - update_e820(); - update_e820_saved(); - - return addr; -} - -#ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_PAE -# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) -# else -# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) -# endif -#else /* CONFIG_X86_32 */ -# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT -#endif - -/* - * Find the highest page frame number we have available - */ -static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) -{ - int i; - unsigned long last_pfn = 0; - unsigned long max_arch_pfn = MAX_ARCH_PFN; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long start_pfn; - unsigned long end_pfn; - - if (ei->type != type) - continue; - - start_pfn = ei->addr >> PAGE_SHIFT; - end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; - - if (start_pfn >= limit_pfn) - continue; - if (end_pfn > limit_pfn) { - last_pfn = limit_pfn; - break; - } - if (end_pfn > last_pfn) - last_pfn = end_pfn; - } - - if (last_pfn > max_arch_pfn) - last_pfn = max_arch_pfn; - - printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", - last_pfn, max_arch_pfn); - return last_pfn; -} -unsigned long __init e820_end_of_ram_pfn(void) -{ - return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); -} - -unsigned long __init e820_end_of_low_ram_pfn(void) -{ - return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); -} /* - * Finds an active region in the address range from start_pfn to last_pfn and - * returns its range in ei_startpfn and ei_endpfn for the e820 entry. - */ -int __init e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long last_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn) -{ - u64 align = PAGE_SIZE; - - *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; - *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; - - /* Skip map entries smaller than a page */ - if (*ei_startpfn >= *ei_endpfn) - return 0; - - /* Skip if map is outside the node */ - if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || - *ei_startpfn >= last_pfn) - return 0; - - /* Check for overlaps */ - if (*ei_startpfn < start_pfn) - *ei_startpfn = start_pfn; - if (*ei_endpfn > last_pfn) - *ei_endpfn = last_pfn; - - return 1; -} - -/* Walk the e820 map and register active regions within a node */ -void __init e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long last_pfn) -{ - unsigned long ei_startpfn; - unsigned long ei_endpfn; - int i; - - for (i = 0; i < e820.nr_map; i++) - if (e820_find_active_region(&e820.map[i], - start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - add_active_range(nid, ei_startpfn, ei_endpfn); -} - -/* - * Find the hole size (in bytes) in the memory range. - * @start: starting address of the memory range to scan - * @end: ending address of the memory range to scan - */ -u64 __init e820_hole_size(u64 start, u64 end) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long last_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn, ei_endpfn, ram = 0; - int i; - - for (i = 0; i < e820.nr_map; i++) { - if (e820_find_active_region(&e820.map[i], - start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - ram += ei_endpfn - ei_startpfn; - } - return end - start - ((u64)ram << PAGE_SHIFT); -} - -static void early_panic(char *msg) -{ - early_printk(msg); - panic(msg); -} - -static int userdef __initdata; - -/* "mem=nopentium" disables the 4MB page tables. */ -static int __init parse_memopt(char *p) -{ - u64 mem_size; - - if (!p) - return -EINVAL; - -#ifdef CONFIG_X86_32 - if (!strcmp(p, "nopentium")) { - setup_clear_cpu_cap(X86_FEATURE_PSE); - return 0; - } -#endif - - userdef = 1; - mem_size = memparse(p, &p); - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); - - return 0; -} -early_param("mem", parse_memopt); - -static int __init parse_memmap_opt(char *p) -{ - char *oldp; - u64 start_at, mem_size; - - if (!p) - return -EINVAL; - - if (!strncmp(p, "exactmap", 8)) { -#ifdef CONFIG_CRASH_DUMP - /* - * If we are doing a crash dump, we still need to know - * the real mem size before original memory map is - * reset. - */ - saved_max_pfn = e820_end_of_ram_pfn(); -#endif - e820.nr_map = 0; - userdef = 1; - return 0; - } - - oldp = p; - mem_size = memparse(p, &p); - if (p == oldp) - return -EINVAL; - - userdef = 1; - if (*p == '@') { - start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_RAM); - } else if (*p == '#') { - start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_ACPI); - } else if (*p == '$') { - start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_RESERVED); - } else - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); - - return *p == '\0' ? 0 : -EINVAL; -} -early_param("memmap", parse_memmap_opt); - -void __init finish_e820_parsing(void) -{ - if (userdef) { - u32 nr = e820.nr_map; - - if (__sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) - early_panic("Invalid user supplied memory map"); - e820.nr_map = nr; - - printk(KERN_INFO "user-defined physical RAM map:\n"); - e820_print_map("user"); - } -} - -static inline const char *e820_type_to_string(int e820_type) -{ - switch (e820_type) { - case E820_RESERVED_KERN: - case E820_RAM: return "System RAM"; - case E820_ACPI: return "ACPI Tables"; - case E820_NVS: return "ACPI Non-volatile Storage"; - case E820_UNUSABLE: return "Unusable memory"; - default: return "reserved"; - } -} - -/* - * Mark e820 reserved areas as busy for the resource manager. + * Copy the BIOS e820 map into a safe place. + * + * Sanity-check it while we're at it.. + * + * If we're lucky and live on a modern system, the setup code + * will have given us a memory map that we can use to properly + * set up memory. If we aren't, we'll fake a memory map. */ -static struct resource __initdata *e820_res; -void __init e820_reserve_resources(void) -{ - int i; - struct resource *res; - u64 end; - - res = alloc_bootmem(sizeof(struct resource) * e820.nr_map); - e820_res = res; - for (i = 0; i < e820.nr_map; i++) { - end = e820.map[i].addr + e820.map[i].size - 1; - if (end != (resource_size_t)end) { - res++; - continue; - } - res->name = e820_type_to_string(e820.map[i].type); - res->start = e820.map[i].addr; - res->end = end; - - res->flags = IORESOURCE_MEM; - - /* - * don't register the region that could be conflicted with - * pci device BAR resource and insert them later in - * pcibios_resource_survey() - */ - if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) { - res->flags |= IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); - } - res++; - } - - for (i = 0; i < e820_saved.nr_map; i++) { - struct e820entry *entry = &e820_saved.map[i]; - firmware_map_add_early(entry->addr, - entry->addr + entry->size - 1, - e820_type_to_string(entry->type)); - } -} - -/* How much should we pad RAM ending depending on where it is? */ -static unsigned long ram_alignment(resource_size_t pos) -{ - unsigned long mb = pos >> 20; - - /* To 64kB in the first megabyte */ - if (!mb) - return 64*1024; - - /* To 1MB in the first 16MB */ - if (mb < 16) - return 1024*1024; - - /* To 64MB for anything above that */ - return 64*1024*1024; -} - -#define MAX_RESOURCE_SIZE ((resource_size_t)-1) - -void __init e820_reserve_resources_late(void) +static int __init append_e820_map(struct e820entry *biosmap, int nr_map) { - int i; - struct resource *res; - - res = e820_res; - for (i = 0; i < e820.nr_map; i++) { - if (!res->parent && res->end) - insert_resource_expand_to_fit(&iomem_resource, res); - res++; - } + /* Only one memory region (or negative)? Ignore it */ + if (nr_map < 2) + return -1; - /* - * Try to bump up RAM regions to reasonable boundaries to - * avoid stolen RAM: - */ - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *entry = &e820.map[i]; - u64 start, end; - - if (entry->type != E820_RAM) - continue; - start = entry->addr + entry->size; - end = round_up(start, ram_alignment(start)) - 1; - if (end > MAX_RESOURCE_SIZE) - end = MAX_RESOURCE_SIZE; - if (start >= end) - continue; - printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", - start, end); - reserve_region_with_split(&iomem_resource, start, end, - "RAM buffer"); - } + return __append_e820_map(biosmap, nr_map); } char *__init default_machine_specific_memory_setup(void) @@ -1181,7 +138,7 @@ char *__init default_machine_specific_me who = "BIOS-e801"; } - e820.nr_map = 0; + clear_e820_map(); e820_add_region(0, LOWMEMSIZE(), E820_RAM); e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); } @@ -1190,11 +147,6 @@ char *__init default_machine_specific_me return who; } -void __init save_e820_map(void) -{ - memcpy(&e820_saved, &e820, sizeof(struct e820map)); -} - void __init setup_memory_map(void) { char *who; @@ -1206,58 +158,12 @@ void __init setup_memory_map(void) } #ifdef CONFIG_X86_OOSTORE -/* - * Figure what we can cover with MCR's - * - * Shortcut: We know you can't put 4Gig of RAM on a winchip - */ +int centaur_ram_top; void __init get_centaur_ram_top(void) { - u32 clip = 0xFFFFFFFFUL; - u32 top = 0; - int i; - if (boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) return; - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - - if (e820.map[i].addr > 0xFFFFFFFFUL) - continue; - /* - * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophe already - */ - if (e820.map[i].type == E820_RESERVED) { - if (e820.map[i].addr >= 0x100000UL && - e820.map[i].addr < clip) - clip = e820.map[i].addr; - continue; - } - start = e820.map[i].addr; - end = e820.map[i].addr + e820.map[i].size; - if (start >= end) - continue; - if (end > top) - top = end; - } - /* - * Everything below 'top' should be RAM except for the ISA hole. - * Because of the limited MCR's we want to map NV/ACPI into our - * MCR range for gunk in RAM - * - * Clip might cause us to MCR insufficient RAM but that is an - * acceptable failure mode and should only bite obscure boxes with - * a VESA hole at 15Mb - * - * The second case Clip sometimes kicks in is when the EBDA is marked - * as reserved. Again we fail safe with reasonable results - */ - if (top > clip) - top = clip; - - centaur_ram_top = top; + centaur_ram_top = __get_special_low_ram_top(); } #endif - Index: linux-2.6/kernel/fw_memmap.c =================================================================== --- /dev/null +++ linux-2.6/kernel/fw_memmap.c @@ -0,0 +1,1134 @@ +/* + * Handle the memory map. + * The functions here do the job until bootmem takes over. + * + * Getting sanitize_e820_map() in sync with i386 version by applying change: + * - Provisions for empty E820 memory regions (reported by certain BIOSes). + * Alex Achenbach , December 2002. + * Venkatesh Pallipadi + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The e820 map is the map that gets modified e.g. with command line parameters + * and that is also registered with modifications in the kernel resource tree + * with the iomem_resource as parent. + * + * The e820_saved is directly saved after the BIOS-provided memory map is + * copied. It doesn't get modified afterwards. It's registered for the + * /sys/firmware/memmap interface. + * + * That memory map is not modified and is used as base for kexec. The kexec'd + * kernel should get the same memory map as the firmware provides. Then the + * user can e.g. boot the original kernel with mem=1G while still booting the + * next kernel with full memory. + */ +static struct e820map __initdata e820; +static struct e820map __initdata e820_saved; + +/* + * This function checks if any part of the range is mapped + * with type. + * phys_pud_init() is using it and is _meminit, but we have !after_bootmem + * so could use refok here + */ +int __init_refok e820_any_mapped(u64 start, u64 end, unsigned type) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) + continue; + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + return 1; + } + return 0; +} + +/* + * This function checks if the entire range is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init e820_all_mapped(u64 start, u64 end, unsigned type) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* + * if start is now at or beyond end, we're done, full + * coverage + */ + if (start >= end) + return 1; + } + return 0; +} + +/* + * Add a memory region to the kernel e820 map. + */ +static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, + int type) +{ + int x = e820x->nr_map; + + if (x >= ARRAY_SIZE(e820x->map)) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } + + e820x->map[x].addr = start; + e820x->map[x].size = size; + e820x->map[x].type = type; + e820x->nr_map++; +} + +void __init e820_add_region(u64 start, u64 size, int type) +{ + __e820_add_region(&e820, start, size, type); +} + +static void __init e820_print_type(u32 type) +{ + switch (type) { + case E820_RAM: + case E820_RESERVED_KERN: + printk(KERN_CONT "(usable)"); + break; + case E820_RESERVED: + printk(KERN_CONT "(reserved)"); + break; + case E820_ACPI: + printk(KERN_CONT "(ACPI data)"); + break; + case E820_NVS: + printk(KERN_CONT "(ACPI NVS)"); + break; + case E820_UNUSABLE: + printk(KERN_CONT "(unusable)"); + break; + default: + printk(KERN_CONT "type %u", type); + break; + } +} + +void __init e820_print_map(char *who) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + printk(KERN_INFO " %s: %016Lx - %016Lx ", who, + (unsigned long long) e820.map[i].addr, + (unsigned long long) + (e820.map[i].addr + e820.map[i].size)); + e820_print_type(e820.map[i].type); + printk(KERN_CONT "\n"); + } +} + +/* + * Sanitize the BIOS e820 map. + * + * Some e820 responses include overlapping entries. The following + * replaces the original e820 map with a new one, removing overlaps, + * and resolving conflicting memory types in favor of highest + * numbered type. + * + * The input parameter biosmap points to an array of 'struct + * e820entry' which on entry has elements in the range [0, *pnr_map) + * valid, and which has space for up to max_nr_map entries. + * On return, the resulting sanitized e820 map entries will be in + * overwritten in the same location, starting at biosmap. + * + * The integer pointed to by pnr_map must be valid on entry (the + * current number of valid entries located at biosmap) and will + * be updated on return, with the new number of valid entries + * (something no more than max_nr_map.) + * + * The return value from sanitize_e820_map() is zero if it + * successfully 'sanitized' the map entries passed in, and is -1 + * if it did nothing, which can happen if either of (1) it was + * only passed one map entry, or (2) any of the input map entries + * were invalid (start + size < start, meaning that the size was + * so big the described memory range wrapped around through zero.) + * + * Visually we're performing the following + * (1,2,3,4 = memory types)... + * + * Sample memory map (w/overlaps): + * ____22__________________ + * ______________________4_ + * ____1111________________ + * _44_____________________ + * 11111111________________ + * ____________________33__ + * ___________44___________ + * __________33333_________ + * ______________22________ + * ___________________2222_ + * _________111111111______ + * _____________________11_ + * _________________4______ + * + * Sanitized equivalent (no overlap): + * 1_______________________ + * _44_____________________ + * ___1____________________ + * ____22__________________ + * ______11________________ + * _________1______________ + * __________3_____________ + * ___________44___________ + * _____________33_________ + * _______________2________ + * ________________1_______ + * _________________4______ + * ___________________2____ + * ____________________33__ + * ______________________4_ + */ + +int __init __sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, + u32 *pnr_map) +{ + struct change_member { + struct e820entry *pbios; /* pointer to original bios entry */ + unsigned long long addr; /* address for this change point */ + }; + static struct change_member change_point_list[2*E820_X_MAX] __initdata; + static struct change_member *change_point[2*E820_X_MAX] __initdata; + static struct e820entry *overlap_list[E820_X_MAX] __initdata; + static struct e820entry new_bios[E820_X_MAX] __initdata; + struct change_member *change_tmp; + unsigned long current_type, last_type; + unsigned long long last_addr; + int chgidx, still_changing; + int overlap_entries; + int new_bios_entry; + int old_nr, new_nr, chg_nr; + int i; + + /* if there's only one memory region, don't bother */ + if (*pnr_map < 2) + return -1; + + old_nr = *pnr_map; + BUG_ON(old_nr > max_nr_map); + + /* bail out if we find any unreasonable addresses in bios map */ + for (i = 0; i < old_nr; i++) + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) + return -1; + + /* create pointers for initial change-point information (for sorting) */ + for (i = 0; i < 2 * old_nr; i++) + change_point[i] = &change_point_list[i]; + + /* record all known change-points (starting and ending addresses), + omitting those that are for empty memory regions */ + chgidx = 0; + for (i = 0; i < old_nr; i++) { + if (biosmap[i].size != 0) { + change_point[chgidx]->addr = biosmap[i].addr; + change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr + + biosmap[i].size; + change_point[chgidx++]->pbios = &biosmap[i]; + } + } + chg_nr = chgidx; + + /* sort change-point list by memory addresses (low -> high) */ + still_changing = 1; + while (still_changing) { + still_changing = 0; + for (i = 1; i < chg_nr; i++) { + unsigned long long curaddr, lastaddr; + unsigned long long curpbaddr, lastpbaddr; + + curaddr = change_point[i]->addr; + lastaddr = change_point[i - 1]->addr; + curpbaddr = change_point[i]->pbios->addr; + lastpbaddr = change_point[i - 1]->pbios->addr; + + /* + * swap entries, when: + * + * curaddr > lastaddr or + * curaddr == lastaddr and curaddr == curpbaddr and + * lastaddr != lastpbaddr + */ + if (curaddr < lastaddr || + (curaddr == lastaddr && curaddr == curpbaddr && + lastaddr != lastpbaddr)) { + change_tmp = change_point[i]; + change_point[i] = change_point[i-1]; + change_point[i-1] = change_tmp; + still_changing = 1; + } + } + } + + /* create a new bios memory map, removing overlaps */ + overlap_entries = 0; /* number of entries in the overlap table */ + new_bios_entry = 0; /* index for creating new bios map entries */ + last_type = 0; /* start with undefined memory type */ + last_addr = 0; /* start with 0 as last starting address */ + + /* loop through change-points, determining affect on the new bios map */ + for (chgidx = 0; chgidx < chg_nr; chgidx++) { + /* keep track of all overlapping bios entries */ + if (change_point[chgidx]->addr == + change_point[chgidx]->pbios->addr) { + /* + * add map entry to overlap list (> 1 entry + * implies an overlap) + */ + overlap_list[overlap_entries++] = + change_point[chgidx]->pbios; + } else { + /* + * remove entry from list (order independent, + * so swap with last) + */ + for (i = 0; i < overlap_entries; i++) { + if (overlap_list[i] == + change_point[chgidx]->pbios) + overlap_list[i] = + overlap_list[overlap_entries-1]; + } + overlap_entries--; + } + /* + * if there are overlapping entries, decide which + * "type" to use (larger value takes precedence -- + * 1=usable, 2,3,4,4+=unusable) + */ + current_type = 0; + for (i = 0; i < overlap_entries; i++) + if (overlap_list[i]->type > current_type) + current_type = overlap_list[i]->type; + /* + * continue building up new bios map based on this + * information + */ + if (current_type != last_type) { + if (last_type != 0) { + new_bios[new_bios_entry].size = + change_point[chgidx]->addr - last_addr; + /* + * move forward only if the new size + * was non-zero + */ + if (new_bios[new_bios_entry].size != 0) + /* + * no more space left for new + * bios entries ? + */ + if (++new_bios_entry >= max_nr_map) + break; + } + if (current_type != 0) { + new_bios[new_bios_entry].addr = + change_point[chgidx]->addr; + new_bios[new_bios_entry].type = current_type; + last_addr = change_point[chgidx]->addr; + } + last_type = current_type; + } + } + /* retain count for new bios entries */ + new_nr = new_bios_entry; + + /* copy new bios mapping into original location */ + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); + *pnr_map = new_nr; + + return 0; +} + +int __init sanitize_e820_map(void) +{ + int max_nr_map = ARRAY_SIZE(e820.map); + + return __sanitize_e820_map(e820.map, max_nr_map, &e820.nr_map); +} + +int __init __append_e820_map(struct e820entry *biosmap, int nr_map) +{ + while (nr_map) { + u64 start = biosmap->addr; + u64 size = biosmap->size; + u64 end = start + size; + u32 type = biosmap->type; + + /* Overflow in 64 bits? Ignore the memory map. */ + if (start > end) + return -1; + + e820_add_region(start, size, type); + + biosmap++; + nr_map--; + } + return 0; +} + +void __init clear_e820_map(void) +{ + e820.nr_map = 0; +} + +static u64 __init __e820_update_range(struct e820map *e820x, u64 start, + u64 size, unsigned old_type, + unsigned new_type) +{ + u64 end; + unsigned int i; + u64 real_updated_size = 0; + + BUG_ON(old_type == new_type); + + if (size > (ULLONG_MAX - start)) + size = ULLONG_MAX - start; + + end = start + size; + printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", + (unsigned long long) start, + (unsigned long long) end); + e820_print_type(old_type); + printk(KERN_CONT " ==> "); + e820_print_type(new_type); + printk(KERN_CONT "\n"); + + for (i = 0; i < e820x->nr_map; i++) { + struct e820entry *ei = &e820x->map[i]; + u64 final_start, final_end; + u64 ei_end; + + if (ei->type != old_type) + continue; + + ei_end = ei->addr + ei->size; + /* totally covered by new range? */ + if (ei->addr >= start && ei_end <= end) { + ei->type = new_type; + real_updated_size += ei->size; + continue; + } + + /* new range is totally covered? */ + if (ei->addr < start && ei_end > end) { + __e820_add_region(e820x, start, size, new_type); + __e820_add_region(e820x, end, ei_end - end, ei->type); + ei->size = start - ei->addr; + real_updated_size += size; + continue; + } + + /* partially covered */ + final_start = max(start, ei->addr); + final_end = min(end, ei_end); + if (final_start >= final_end) + continue; + + __e820_add_region(e820x, final_start, final_end - final_start, + new_type); + + real_updated_size += final_end - final_start; + + /* + * left range could be head or tail, so need to update + * size at first. + */ + ei->size -= final_end - final_start; + if (ei->addr < final_start) + continue; + ei->addr = final_end; + } + return real_updated_size; +} + +u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, + unsigned new_type) +{ + return __e820_update_range(&e820, start, size, old_type, new_type); +} + +static u64 __init e820_update_range_saved(u64 start, u64 size, + unsigned old_type, unsigned new_type) +{ + return __e820_update_range(&e820_saved, start, size, old_type, + new_type); +} + +/* make e820 not cover the range */ +u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, + int checktype) +{ + int i; + u64 end; + u64 real_removed_size = 0; + + if (size > (ULLONG_MAX - start)) + size = ULLONG_MAX - start; + + end = start + size; + printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", + (unsigned long long) start, + (unsigned long long) end); + e820_print_type(old_type); + printk(KERN_CONT "\n"); + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 final_start, final_end; + + if (checktype && ei->type != old_type) + continue; + /* totally covered? */ + if (ei->addr >= start && + (ei->addr + ei->size) <= (start + size)) { + real_removed_size += ei->size; + memset(ei, 0, sizeof(struct e820entry)); + continue; + } + /* partially covered */ + final_start = max(start, ei->addr); + final_end = min(start + size, ei->addr + ei->size); + if (final_start >= final_end) + continue; + real_removed_size += final_end - final_start; + + ei->size -= final_end - final_start; + if (ei->addr < final_start) + continue; + ei->addr = final_end; + } + return real_removed_size; +} + +void __init update_e820(void) +{ + u32 nr_map; + + nr_map = e820.nr_map; + if (__sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) + return; + e820.nr_map = nr_map; + printk(KERN_INFO "modified physical RAM map:\n"); + e820_print_map("modified"); +} + +static void __init update_e820_saved(void) +{ + u32 nr_map; + int max_nr_map = ARRAY_SIZE(e820_saved.map); + + nr_map = e820_saved.nr_map; + if (__sanitize_e820_map(e820_saved.map, max_nr_map, &nr_map)) + return; + e820_saved.nr_map = nr_map; +} + +/* + * Search for a gap in the e820 memory space from start_addr to end_addr. + */ +__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, + unsigned long start_addr, unsigned long long end_addr) +{ + unsigned long long last; + int i = e820.nr_map; + int found = 0; + + last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; + + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + + if (end < start_addr) + continue; + + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap >= *gapsize) { + *gapsize = gap; + *gapstart = end; + found = 1; + } + } + if (start < last) + last = start; + } + return found; +} + +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +/** + * Find the ranges of physical addresses that do not correspond to + * e820 RAM areas and mark the corresponding pages as nosave for + * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). + * + * This function requires the e820 map to be sorted and without any + * overlapping entries and assumes the first e820 area to be RAM. + */ +void __init e820_mark_nosave_regions(unsigned long limit_pfn) +{ + int i; + unsigned long pfn; + + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); + for (i = 1; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (pfn < PFN_UP(ei->addr)) + register_nosave_region(pfn, PFN_UP(ei->addr)); + + pfn = PFN_DOWN(ei->addr + ei->size); + if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) + register_nosave_region(PFN_UP(ei->addr), pfn); + + if (pfn >= limit_pfn) + break; + } +} +#endif + +#ifdef CONFIG_HIBERNATION +/** + * Mark ACPI NVS memory region, so that we can save/restore it during + * hibernation and the subsequent resume. + */ +static int __init e820_mark_nvs_memory(void) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (ei->type == E820_NVS) + hibernate_nvs_register(ei->addr, ei->size); + } + + return 0; +} +core_initcall(e820_mark_nvs_memory); +#endif + +/* + * Find a free area with specified alignment in a specific range. + */ +u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr; + u64 ei_start, ei_last; + + if (ei->type != E820_RAM) + continue; + + ei_last = ei->addr + ei->size; + ei_start = ei->addr; + addr = find_early_area(ei_start, ei_last, start, end, + size, align); + + if (addr != -1ULL) + return addr; + } + return -1ULL; +} + +u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) +{ + return find_e820_area(start, end, size, align); +} + +/* + * Find next free range after *start + */ +u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr; + u64 ei_start, ei_last; + + if (ei->type != E820_RAM) + continue; + + ei_last = ei->addr + ei->size; + ei_start = ei->addr; + addr = find_early_area_size(ei_start, ei_last, start, + sizep, align); + + if (addr != -1ULL) + return addr; + } + + return -1ULL; +} + +/* + * pre allocated 4k and reserved it in e820 + */ +u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) +{ + u64 size = 0; + u64 addr; + u64 start; + + for (start = startt; ; start += size) { + start = find_e820_area_size(start, &size, align); + if (!(start + 1)) + return 0; + if (size >= sizet) + break; + } + +#ifdef CONFIG_X86_32 + if (start >= MAXMEM) + return 0; + if (start + size > MAXMEM) + size = MAXMEM - start; +#endif + + addr = round_down(start + size - sizet, align); + if (addr < start) + return 0; + e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); + e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); + printk(KERN_INFO "update e820 for early_reserve_e820\n"); + update_e820(); + update_e820_saved(); + + return addr; +} + +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_PAE +# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) +# else +# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) +# endif +#else /* CONFIG_X86_32 */ +# define MAX_ARCH_PFN (MAXMEM>>PAGE_SHIFT) +#endif + +/* + * Find the highest page frame number we have available + */ +static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) +{ + int i; + unsigned long last_pfn = 0; + unsigned long max_arch_pfn = MAX_ARCH_PFN; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long start_pfn; + unsigned long end_pfn; + + if (ei->type != type) + continue; + + start_pfn = ei->addr >> PAGE_SHIFT; + end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; + + if (start_pfn >= limit_pfn) + continue; + if (end_pfn > limit_pfn) { + last_pfn = limit_pfn; + break; + } + if (end_pfn > last_pfn) + last_pfn = end_pfn; + } + + if (last_pfn > max_arch_pfn) + last_pfn = max_arch_pfn; + + printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", + last_pfn, max_arch_pfn); + return last_pfn; +} +unsigned long __init e820_end_of_ram_pfn(void) +{ + return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); +} + +unsigned long __init e820_end_of_low_ram_pfn(void) +{ + return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); +} +/* + * Finds an active region in the address range from start_pfn to last_pfn and + * returns its range in ei_startpfn and ei_endpfn for the e820 entry. + */ +int __init e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn) +{ + u64 align = PAGE_SIZE; + + *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; + *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; + + /* Skip map entries smaller than a page */ + if (*ei_startpfn >= *ei_endpfn) + return 0; + + /* Skip if map is outside the node */ + if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || + *ei_startpfn >= last_pfn) + return 0; + + /* Check for overlaps */ + if (*ei_startpfn < start_pfn) + *ei_startpfn = start_pfn; + if (*ei_endpfn > last_pfn) + *ei_endpfn = last_pfn; + + return 1; +} + +/* Walk the e820 map and register active regions within a node */ +void __init e820_register_active_regions(int nid, unsigned long start_pfn, + unsigned long last_pfn) +{ + unsigned long ei_startpfn; + unsigned long ei_endpfn; + int i; + + for (i = 0; i < e820.nr_map; i++) + if (e820_find_active_region(&e820.map[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + add_active_range(nid, ei_startpfn, ei_endpfn); +} + +/* + * Find the hole size (in bytes) in the memory range. + * @start: starting address of the memory range to scan + * @end: ending address of the memory range to scan + */ +u64 __init e820_hole_size(u64 start, u64 end) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long last_pfn = end >> PAGE_SHIFT; + unsigned long ei_startpfn, ei_endpfn, ram = 0; + int i; + + for (i = 0; i < e820.nr_map; i++) { + if (e820_find_active_region(&e820.map[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + ram += ei_endpfn - ei_startpfn; + } + return end - start - ((u64)ram << PAGE_SHIFT); +} + +static void early_panic(char *msg) +{ + early_printk(msg); + panic(msg); +} + +static int userdef __initdata; + +/* "mem=nopentium" disables the 4MB page tables. */ +static int __init parse_memopt(char *p) +{ + u64 mem_size; + + if (!p) + return -EINVAL; + +#ifdef CONFIG_X86_32 + if (!strcmp(p, "nopentium")) { + setup_clear_cpu_cap(X86_FEATURE_PSE); + return 0; + } +#endif + + userdef = 1; + mem_size = memparse(p, &p); + e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); + + return 0; +} +early_param("mem", parse_memopt); + +static int __init parse_memmap_opt(char *p) +{ + char *oldp; + u64 start_at, mem_size; + + if (!p) + return -EINVAL; + + if (!strncmp(p, "exactmap", 8)) { +#ifdef CONFIG_CRASH_DUMP + /* + * If we are doing a crash dump, we still need to know + * the real mem size before original memory map is + * reset. + */ + saved_max_pfn = e820_end_of_ram_pfn(); +#endif + e820.nr_map = 0; + userdef = 1; + return 0; + } + + oldp = p; + mem_size = memparse(p, &p); + if (p == oldp) + return -EINVAL; + + userdef = 1; + if (*p == '@') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_RAM); + } else if (*p == '#') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_ACPI); + } else if (*p == '$') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_RESERVED); + } else + e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); + + return *p == '\0' ? 0 : -EINVAL; +} +early_param("memmap", parse_memmap_opt); + +void __init finish_e820_parsing(void) +{ + if (userdef) { + u32 nr = e820.nr_map; + int max_nr_map = ARRAY_SIZE(e820.map); + + if (__sanitize_e820_map(e820.map, max_nr_map, &nr) < 0) + early_panic("Invalid user supplied memory map"); + e820.nr_map = nr; + + printk(KERN_INFO "user-defined physical RAM map:\n"); + e820_print_map("user"); + } +} + +static inline const char *e820_type_to_string(int e820_type) +{ + switch (e820_type) { + case E820_RESERVED_KERN: + case E820_RAM: return "System RAM"; + case E820_ACPI: return "ACPI Tables"; + case E820_NVS: return "ACPI Non-volatile Storage"; + case E820_UNUSABLE: return "Unusable memory"; + default: return "reserved"; + } +} + +/* + * Mark e820 reserved areas as busy for the resource manager. + */ +static struct resource __initdata *e820_res; +void __init e820_reserve_resources(void) +{ + int i; + struct resource *res; + u64 end; + + res = alloc_bootmem(sizeof(struct resource) * e820.nr_map); + e820_res = res; + for (i = 0; i < e820.nr_map; i++) { + end = e820.map[i].addr + e820.map[i].size - 1; + if (end != (resource_size_t)end) { + res++; + continue; + } + res->name = e820_type_to_string(e820.map[i].type); + res->start = e820.map[i].addr; + res->end = end; + + res->flags = IORESOURCE_MEM; + + /* + * don't register the region that could be conflicted with + * pci device BAR resource and insert them later in + * pcibios_resource_survey() + */ + if (e820.map[i].type != E820_RESERVED || + res->start < (1ULL<<20)) { + res->flags |= IORESOURCE_BUSY; + insert_resource(&iomem_resource, res); + } + res++; + } + + for (i = 0; i < e820_saved.nr_map; i++) { + struct e820entry *entry = &e820_saved.map[i]; + firmware_map_add_early(entry->addr, + entry->addr + entry->size - 1, + e820_type_to_string(entry->type)); + } +} + +/* How much should we pad RAM ending depending on where it is? */ +static unsigned long __init ram_alignment(resource_size_t pos) +{ + unsigned long mb = pos >> 20; + + /* To 64kB in the first megabyte */ + if (!mb) + return 64*1024; + + /* To 1MB in the first 16MB */ + if (mb < 16) + return 1024*1024; + + /* To 64MB for anything above that */ + return 64*1024*1024; +} + +#define MAX_RESOURCE_SIZE ((resource_size_t)-1) + +void __init e820_reserve_resources_late(void) +{ + int i; + struct resource *res; + + res = e820_res; + for (i = 0; i < e820.nr_map; i++) { + if (!res->parent && res->end) + insert_resource_expand_to_fit(&iomem_resource, res); + res++; + } + + /* + * Try to bump up RAM regions to reasonable boundaries to + * avoid stolen RAM: + */ + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *entry = &e820.map[i]; + u64 start, end; + + if (entry->type != E820_RAM) + continue; + start = entry->addr + entry->size; + end = round_up(start, ram_alignment(start)) - 1; + if (end > MAX_RESOURCE_SIZE) + end = MAX_RESOURCE_SIZE; + if (start >= end) + continue; + printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", + start, end); + reserve_region_with_split(&iomem_resource, start, end, + "RAM buffer"); + } +} + +void __init save_e820_map(void) +{ + memcpy(&e820_saved, &e820, sizeof(struct e820map)); +} + +#ifdef CONFIG_X86_OOSTORE + +/* + * this one should stay in arch/x86/kernel/e820.c, + * but we want to keep e820 to be static here + */ +/* + * Figure what we can cover with MCR's + * + * Shortcut: We know you can't put 4Gig of RAM on a winchip + */ +void __init __get_special_low_ram_top(void) +{ + u32 clip = 0xFFFFFFFFUL; + u32 top = 0; + int i; + + for (i = 0; i < e820.nr_map; i++) { + unsigned long start, end; + + if (e820.map[i].addr > 0xFFFFFFFFUL) + continue; + /* + * Don't MCR over reserved space. Ignore the ISA hole + * we frob around that catastrophe already + */ + if (e820.map[i].type == E820_RESERVED) { + if (e820.map[i].addr >= 0x100000UL && + e820.map[i].addr < clip) + clip = e820.map[i].addr; + continue; + } + start = e820.map[i].addr; + end = e820.map[i].addr + e820.map[i].size; + if (start >= end) + continue; + if (end > top) + top = end; + } + /* + * Everything below 'top' should be RAM except for the ISA hole. + * Because of the limited MCR's we want to map NV/ACPI into our + * MCR range for gunk in RAM + * + * Clip might cause us to MCR insufficient RAM but that is an + * acceptable failure mode and should only bite obscure boxes with + * a VESA hole at 15Mb + * + * The second case Clip sometimes kicks in is when the EBDA is marked + * as reserved. Again we fail safe with reasonable results + */ + if (top > clip) + top = clip; + + return top; +} +#endif + Index: linux-2.6/arch/x86/include/asm/e820.h =================================================================== --- linux-2.6.orig/arch/x86/include/asm/e820.h +++ linux-2.6/arch/x86/include/asm/e820.h @@ -1,65 +1,9 @@ #ifndef _ASM_X86_E820_H #define _ASM_X86_E820_H -#define E820MAP 0x2d0 /* our map */ -#define E820MAX 128 /* number of entries in E820MAP */ -/* - * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the - * constrained space in the zeropage. If we have more nodes than - * that, and if we've booted off EFI firmware, then the EFI tables - * passed us from the EFI firmware can list more nodes. Size our - * internal memory map tables to have room for these additional - * nodes, based on up to three entries per node for which the - * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT), - * plus E820MAX, allowing space for the possible duplicate E820 - * entries that might need room in the same arrays, prior to the - * call to sanitize_e820_map() to remove duplicates. The allowance - * of three memory map entries per node is "enough" entries for - * the initial hardware platform motivating this mechanism to make - * use of additional EFI map entries. Future platforms may want - * to allow more than three entries per node or otherwise refine - * this size. - */ - -/* - * Odd: 'make headers_check' complains about numa.h if I try - * to collapse the next two #ifdef lines to a single line: - * #if defined(__KERNEL__) && defined(CONFIG_EFI) - */ -#ifdef __KERNEL__ -#ifdef CONFIG_EFI -#include -#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES) -#else /* ! CONFIG_EFI */ -#define E820_X_MAX E820MAX -#endif -#else /* ! __KERNEL__ */ -#define E820_X_MAX E820MAX -#endif - -#define E820NR 0x1e8 /* # entries in E820MAP */ - -#define E820_RAM 1 -#define E820_RESERVED 2 -#define E820_ACPI 3 -#define E820_NVS 4 -#define E820_UNUSABLE 5 - -/* reserved RAM used by kernel itself */ -#define E820_RESERVED_KERN 128 +#include #ifndef __ASSEMBLY__ -#include -struct e820entry { - __u64 addr; /* start of memory segment */ - __u64 size; /* size of memory segment */ - __u32 type; /* type of memory segment */ -} __attribute__((packed)); - -struct e820map { - __u32 nr_map; - struct e820entry map[E820_X_MAX]; -}; #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 @@ -69,73 +13,20 @@ struct e820map { #ifdef __KERNEL__ -#ifdef CONFIG_X86_OOSTORE -extern int centaur_ram_top; -void get_centaur_ram_top(void); +#ifdef CONFIG_MEMTEST +extern void early_memtest(unsigned long start, unsigned long end); #else -static inline void get_centaur_ram_top(void) +static inline void early_memtest(unsigned long start, unsigned long end) { } #endif extern unsigned long pci_mem_start; -extern int e820_any_mapped(u64 start, u64 end, unsigned type); -extern int e820_all_mapped(u64 start, u64 end, unsigned type); -extern void e820_add_region(u64 start, u64 size, int type); -extern void e820_print_map(char *who); -int sanitize_e820_map(void); -void save_e820_map(void); -extern u64 e820_update_range(u64 start, u64 size, unsigned old_type, - unsigned new_type); -extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, - int checktype); -extern void update_e820(void); extern void e820_setup_gap(void); -extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr); struct setup_data; extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data); - -#if defined(CONFIG_X86_64) || \ - (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) -extern void e820_mark_nosave_regions(unsigned long limit_pfn); -#else -static inline void e820_mark_nosave_regions(unsigned long limit_pfn) -{ -} -#endif - -#ifdef CONFIG_MEMTEST -extern void early_memtest(unsigned long start, unsigned long end); -#else -static inline void early_memtest(unsigned long start, unsigned long end) -{ -} -#endif - -extern unsigned long end_user_pfn; - -extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); -extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); -extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); -#include - -extern unsigned long e820_end_of_ram_pfn(void); -extern unsigned long e820_end_of_low_ram_pfn(void); -extern int e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long last_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn); -extern void e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern u64 e820_hole_size(u64 start, u64 end); -extern void finish_e820_parsing(void); -extern void e820_reserve_resources(void); -extern void e820_reserve_resources_late(void); -extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); - +extern void setup_memory_map(void); /* * Returns true iff the specified range [s,e) is completely contained inside * the ISA region. @@ -145,7 +36,18 @@ static inline bool is_ISA_range(u64 s, u return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS; } +#ifdef CONFIG_X86_OOSTORE +extern int centaur_ram_top; +int __get_special_low_ram_top(void); +void get_centaur_ram_top(void); +#else +static inline void get_centaur_ram_top(void) +{ +} +#endif + #endif /* __KERNEL__ */ + #endif /* __ASSEMBLY__ */ #ifdef __KERNEL__ Index: linux-2.6/include/linux/fw_memmap.h =================================================================== --- /dev/null +++ linux-2.6/include/linux/fw_memmap.h @@ -0,0 +1,114 @@ +#ifndef _LINUX_FW_MEMMAP_H +#define _LINUX_FW_MEMMAP_H +#define E820MAX 128 /* number of entries in E820MAP */ + +/* + * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the + * constrained space in the zeropage. If we have more nodes than + * that, and if we've booted off EFI firmware, then the EFI tables + * passed us from the EFI firmware can list more nodes. Size our + * internal memory map tables to have room for these additional + * nodes, based on up to three entries per node for which the + * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT), + * plus E820MAX, allowing space for the possible duplicate E820 + * entries that might need room in the same arrays, prior to the + * call to sanitize_e820_map() to remove duplicates. The allowance + * of three memory map entries per node is "enough" entries for + * the initial hardware platform motivating this mechanism to make + * use of additional EFI map entries. Future platforms may want + * to allow more than three entries per node or otherwise refine + * this size. + */ + +/* + * Odd: 'make headers_check' complains about numa.h if I try + * to collapse the next two #ifdef lines to a single line: + * #if defined(__KERNEL__) && defined(CONFIG_EFI) + */ +#ifdef __KERNEL__ +#ifdef CONFIG_EFI +#include +#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES) +#else /* ! CONFIG_EFI */ +#define E820_X_MAX E820MAX +#endif +#else /* ! __KERNEL__ */ +#define E820_X_MAX E820MAX +#endif + +#define E820_RAM 1 +#define E820_RESERVED 2 +#define E820_ACPI 3 +#define E820_NVS 4 +#define E820_UNUSABLE 5 + +/* reserved RAM used by kernel itself */ +#define E820_RESERVED_KERN 128 + +#ifndef __ASSEMBLY__ +#include +struct e820entry { + __u64 addr; /* start of memory segment */ + __u64 size; /* size of memory segment */ + __u32 type; /* type of memory segment */ +} __attribute__((packed)); + +struct e820map { + __u32 nr_map; + struct e820entry map[E820_X_MAX]; +}; + +#ifdef __KERNEL__ + +void clear_e820_map(void); +int __append_e820_map(struct e820entry *biosmap, int nr_map); +extern int e820_any_mapped(u64 start, u64 end, unsigned type); +extern int e820_all_mapped(u64 start, u64 end, unsigned type); +extern void e820_add_region(u64 start, u64 size, int type); +extern void e820_print_map(char *who); +int sanitize_e820_map(void); +int __sanitize_e820_map(struct e820entry *biosmap, int max_nr, u32 *pnr_map); +void save_e820_map(void); +extern u64 e820_update_range(u64 start, u64 size, unsigned old_type, + unsigned new_type); +extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, + int checktype); +extern void update_e820(void); +#define MAX_GAP_END 0x100000000ull +extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, + unsigned long start_addr, unsigned long long end_addr); + +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +extern void e820_mark_nosave_regions(unsigned long limit_pfn); +#else +static inline void e820_mark_nosave_regions(unsigned long limit_pfn) +{ +} +#endif + +extern unsigned long end_user_pfn; + +extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); +extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); +extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); +#include + +extern unsigned long e820_end_of_ram_pfn(void); +extern unsigned long e820_end_of_low_ram_pfn(void); +extern int e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn); +extern void e820_register_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn); +extern u64 e820_hole_size(u64 start, u64 end); +extern void finish_e820_parsing(void); +extern void e820_reserve_resources(void); +extern void e820_reserve_resources_late(void); + +#endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ + +#endif /* _LINUX_FW_MEMMAP_H */ Index: linux-2.6/kernel/Makefile =================================================================== --- linux-2.6.orig/kernel/Makefile +++ linux-2.6/kernel/Makefile @@ -11,7 +11,7 @@ obj-y = sched.o fork.o exec_domain.o hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ async.o range.o -obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o +obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o fw_memmap.o obj-y += groups.o ifdef CONFIG_FUNCTION_TRACER Index: linux-2.6/include/linux/bootmem.h =================================================================== --- linux-2.6.orig/include/linux/bootmem.h +++ linux-2.6/include/linux/bootmem.h @@ -6,7 +6,7 @@ #include #include - +#include /* * simple boot-time physical memory area allocator. */ From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from sca-es-mail-2.Sun.COM ([192.18.43.133]:62741 "EHLO sca-es-mail-2.sun.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757258Ab0CJVZX (ORCPT ); Wed, 10 Mar 2010 16:25:23 -0500 MIME-version: 1.0 Content-transfer-encoding: 7BIT Content-type: TEXT/PLAIN Date: Wed, 10 Mar 2010 13:24:26 -0800 From: Yinghai Lu Subject: [PATCH 5/6] early_res: seperate common memmap func from e820.c to fw_memmap.c In-reply-to: <1268256267-3769-1-git-send-email-yinghai@kernel.org> Message-ID: <1268256267-3769-6-git-send-email-yinghai@kernel.org> References: <1268256267-3769-1-git-send-email-yinghai@kernel.org> Sender: linux-arch-owner@vger.kernel.org List-ID: To: Ingo Molnar , Thomas Gleixner , "H. Peter Anvin" , Andrew Morton , David Miller Cc: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, Yinghai Lu Message-ID: <20100310212426.ePNE-E8FbIZjz5YdOof6UkC2beIbRv80mge-NtguV48@z> move it to kernel/fw_memmap.c from arch/x86/kernel/e820.c Signed-off-by: Yinghai Lu --- arch/x86/include/asm/e820.h | 130 ----- arch/x86/kernel/e820.c | 1142 -------------------------------------------- include/linux/bootmem.h | 2 include/linux/fw_memmap.h | 114 ++++ kernel/Makefile | 2 kernel/fw_memmap.c | 1134 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1290 insertions(+), 1234 deletions(-) Index: linux-2.6/arch/x86/kernel/e820.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/e820.c +++ linux-2.6/arch/x86/kernel/e820.c @@ -12,31 +12,11 @@ #include #include #include -#include #include -#include #include -#include #include -/* - * The e820 map is the map that gets modified e.g. with command line parameters - * and that is also registered with modifications in the kernel resource tree - * with the iomem_resource as parent. - * - * The e820_saved is directly saved after the BIOS-provided memory map is - * copied. It doesn't get modified afterwards. It's registered for the - * /sys/firmware/memmap interface. - * - * That memory map is not modified and is used as base for kexec. The kexec'd - * kernel should get the same memory map as the firmware provides. Then the - * user can e.g. boot the original kernel with mem=1G while still booting the - * next kernel with full memory. - */ -static struct e820map __initdata e820; -static struct e820map __initdata e820_saved; - /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0xaeedbabe; #ifdef CONFIG_PCI @@ -44,577 +24,6 @@ EXPORT_SYMBOL(pci_mem_start); #endif /* - * This function checks if any part of the range is mapped - * with type. - * phys_pud_init() is using it and is _meminit, but we have !after_bootmem - * so could use refok here - */ -int __init_refok e820_any_mapped(u64 start, u64 end, unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} - -/* - * This function checks if the entire range is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init e820_all_mapped(u64 start, u64 end, unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* - * if start is now at or beyond end, we're done, full - * coverage - */ - if (start >= end) - return 1; - } - return 0; -} - -/* - * Add a memory region to the kernel e820 map. - */ -static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, - int type) -{ - int x = e820x->nr_map; - - if (x >= ARRAY_SIZE(e820x->map)) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820x->map[x].addr = start; - e820x->map[x].size = size; - e820x->map[x].type = type; - e820x->nr_map++; -} - -void __init e820_add_region(u64 start, u64 size, int type) -{ - __e820_add_region(&e820, start, size, type); -} - -static void __init e820_print_type(u32 type) -{ - switch (type) { - case E820_RAM: - case E820_RESERVED_KERN: - printk(KERN_CONT "(usable)"); - break; - case E820_RESERVED: - printk(KERN_CONT "(reserved)"); - break; - case E820_ACPI: - printk(KERN_CONT "(ACPI data)"); - break; - case E820_NVS: - printk(KERN_CONT "(ACPI NVS)"); - break; - case E820_UNUSABLE: - printk(KERN_CONT "(unusable)"); - break; - default: - printk(KERN_CONT "type %u", type); - break; - } -} - -void __init e820_print_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(KERN_INFO " %s: %016Lx - %016Lx ", who, - (unsigned long long) e820.map[i].addr, - (unsigned long long) - (e820.map[i].addr + e820.map[i].size)); - e820_print_type(e820.map[i].type); - printk(KERN_CONT "\n"); - } -} - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps, - * and resolving conflicting memory types in favor of highest - * numbered type. - * - * The input parameter biosmap points to an array of 'struct - * e820entry' which on entry has elements in the range [0, *pnr_map) - * valid, and which has space for up to max_nr_map entries. - * On return, the resulting sanitized e820 map entries will be in - * overwritten in the same location, starting at biosmap. - * - * The integer pointed to by pnr_map must be valid on entry (the - * current number of valid entries located at biosmap) and will - * be updated on return, with the new number of valid entries - * (something no more than max_nr_map.) - * - * The return value from sanitize_e820_map() is zero if it - * successfully 'sanitized' the map entries passed in, and is -1 - * if it did nothing, which can happen if either of (1) it was - * only passed one map entry, or (2) any of the input map entries - * were invalid (start + size < start, meaning that the size was - * so big the described memory range wrapped around through zero.) - * - * Visually we're performing the following - * (1,2,3,4 = memory types)... - * - * Sample memory map (w/overlaps): - * ____22__________________ - * ______________________4_ - * ____1111________________ - * _44_____________________ - * 11111111________________ - * ____________________33__ - * ___________44___________ - * __________33333_________ - * ______________22________ - * ___________________2222_ - * _________111111111______ - * _____________________11_ - * _________________4______ - * - * Sanitized equivalent (no overlap): - * 1_______________________ - * _44_____________________ - * ___1____________________ - * ____22__________________ - * ______11________________ - * _________1______________ - * __________3_____________ - * ___________44___________ - * _____________33_________ - * _______________2________ - * ________________1_______ - * _________________4______ - * ___________________2____ - * ____________________33__ - * ______________________4_ - */ - -static int __init __sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, - u32 *pnr_map) -{ - struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ - }; - static struct change_member change_point_list[2*E820_X_MAX] __initdata; - static struct change_member *change_point[2*E820_X_MAX] __initdata; - static struct e820entry *overlap_list[E820_X_MAX] __initdata; - static struct e820entry new_bios[E820_X_MAX] __initdata; - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) - return -1; - - old_nr = *pnr_map; - BUG_ON(old_nr > max_nr_map); - - /* bail out if we find any unreasonable addresses in bios map */ - for (i = 0; i < old_nr; i++) - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) - return -1; - - /* create pointers for initial change-point information (for sorting) */ - for (i = 0; i < 2 * old_nr; i++) - change_point[i] = &change_point_list[i]; - - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ - chgidx = 0; - for (i = 0; i < old_nr; i++) { - if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + - biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i = 1; i < chg_nr; i++) { - unsigned long long curaddr, lastaddr; - unsigned long long curpbaddr, lastpbaddr; - - curaddr = change_point[i]->addr; - lastaddr = change_point[i - 1]->addr; - curpbaddr = change_point[i]->pbios->addr; - lastpbaddr = change_point[i - 1]->pbios->addr; - - /* - * swap entries, when: - * - * curaddr > lastaddr or - * curaddr == lastaddr and curaddr == curpbaddr and - * lastaddr != lastpbaddr - */ - if (curaddr < lastaddr || - (curaddr == lastaddr && curaddr == curpbaddr && - lastaddr != lastpbaddr)) { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing = 1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries = 0; /* number of entries in the overlap table */ - new_bios_entry = 0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - - /* loop through change-points, determining affect on the new bios map */ - for (chgidx = 0; chgidx < chg_nr; chgidx++) { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == - change_point[chgidx]->pbios->addr) { - /* - * add map entry to overlap list (> 1 entry - * implies an overlap) - */ - overlap_list[overlap_entries++] = - change_point[chgidx]->pbios; - } else { - /* - * remove entry from list (order independent, - * so swap with last) - */ - for (i = 0; i < overlap_entries; i++) { - if (overlap_list[i] == - change_point[chgidx]->pbios) - overlap_list[i] = - overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* - * if there are overlapping entries, decide which - * "type" to use (larger value takes precedence -- - * 1=usable, 2,3,4,4+=unusable) - */ - current_type = 0; - for (i = 0; i < overlap_entries; i++) - if (overlap_list[i]->type > current_type) - current_type = overlap_list[i]->type; - /* - * continue building up new bios map based on this - * information - */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* - * move forward only if the new size - * was non-zero - */ - if (new_bios[new_bios_entry].size != 0) - /* - * no more space left for new - * bios entries ? - */ - if (++new_bios_entry >= max_nr_map) - break; - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = - change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr = change_point[chgidx]->addr; - } - last_type = current_type; - } - } - /* retain count for new bios entries */ - new_nr = new_bios_entry; - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -int __init sanitize_e820_map(void) -{ - return __sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); -} - -static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) -{ - while (nr_map) { - u64 start = biosmap->addr; - u64 size = biosmap->size; - u64 end = start + size; - u32 type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - e820_add_region(start, size, type); - - biosmap++; - nr_map--; - } - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - */ -static int __init append_e820_map(struct e820entry *biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - return __append_e820_map(biosmap, nr_map); -} - -static u64 __init __e820_update_range(struct e820map *e820x, u64 start, - u64 size, unsigned old_type, - unsigned new_type) -{ - u64 end; - unsigned int i; - u64 real_updated_size = 0; - - BUG_ON(old_type == new_type); - - if (size > (ULLONG_MAX - start)) - size = ULLONG_MAX - start; - - end = start + size; - printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", - (unsigned long long) start, - (unsigned long long) end); - e820_print_type(old_type); - printk(KERN_CONT " ==> "); - e820_print_type(new_type); - printk(KERN_CONT "\n"); - - for (i = 0; i < e820x->nr_map; i++) { - struct e820entry *ei = &e820x->map[i]; - u64 final_start, final_end; - u64 ei_end; - - if (ei->type != old_type) - continue; - - ei_end = ei->addr + ei->size; - /* totally covered by new range? */ - if (ei->addr >= start && ei_end <= end) { - ei->type = new_type; - real_updated_size += ei->size; - continue; - } - - /* new range is totally covered? */ - if (ei->addr < start && ei_end > end) { - __e820_add_region(e820x, start, size, new_type); - __e820_add_region(e820x, end, ei_end - end, ei->type); - ei->size = start - ei->addr; - real_updated_size += size; - continue; - } - - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(end, ei_end); - if (final_start >= final_end) - continue; - - __e820_add_region(e820x, final_start, final_end - final_start, - new_type); - - real_updated_size += final_end - final_start; - - /* - * left range could be head or tail, so need to update - * size at first. - */ - ei->size -= final_end - final_start; - if (ei->addr < final_start) - continue; - ei->addr = final_end; - } - return real_updated_size; -} - -u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) -{ - return __e820_update_range(&e820, start, size, old_type, new_type); -} - -static u64 __init e820_update_range_saved(u64 start, u64 size, - unsigned old_type, unsigned new_type) -{ - return __e820_update_range(&e820_saved, start, size, old_type, - new_type); -} - -/* make e820 not cover the range */ -u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, - int checktype) -{ - int i; - u64 end; - u64 real_removed_size = 0; - - if (size > (ULLONG_MAX - start)) - size = ULLONG_MAX - start; - - end = start + size; - printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", - (unsigned long long) start, - (unsigned long long) end); - e820_print_type(old_type); - printk(KERN_CONT "\n"); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 final_start, final_end; - - if (checktype && ei->type != old_type) - continue; - /* totally covered? */ - if (ei->addr >= start && - (ei->addr + ei->size) <= (start + size)) { - real_removed_size += ei->size; - memset(ei, 0, sizeof(struct e820entry)); - continue; - } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); - if (final_start >= final_end) - continue; - real_removed_size += final_end - final_start; - - ei->size -= final_end - final_start; - if (ei->addr < final_start) - continue; - ei->addr = final_end; - } - return real_removed_size; -} - -void __init update_e820(void) -{ - u32 nr_map; - - nr_map = e820.nr_map; - if (__sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) - return; - e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); - e820_print_map("modified"); -} -static void __init update_e820_saved(void) -{ - u32 nr_map; - - nr_map = e820_saved.nr_map; - if (__sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) - return; - e820_saved.nr_map = nr_map; -} -#define MAX_GAP_END 0x100000000ull -/* - * Search for a gap in the e820 memory space from start_addr to end_addr. - */ -__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr) -{ - unsigned long long last; - int i = e820.nr_map; - int found = 0; - - last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; - - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - if (end < start_addr) - continue; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap >= *gapsize) { - *gapsize = gap; - *gapstart = end; - found = 1; - } - } - if (start < last) - last = start; - } - return found; -} - -/* * Search for the biggest gap in the low 32 bits of the e820 * memory space. We pass this space to PCI to assign MMIO resources * for hotplug or unconfigured devices in. @@ -648,6 +57,15 @@ __init void e820_setup_gap(void) pci_mem_start, gapstart, gapsize); } +u64 __init get_max_mapped(void) +{ + u64 end = max_pfn_mapped; + + end <<= PAGE_SHIFT; + + return end; +} + /** * Because of the size limitation of struct boot_params, only first * 128 E820 memory entries are passed to kernel via @@ -673,483 +91,22 @@ void __init parse_e820_ext(struct setup_ e820_print_map("extended"); } -#if defined(CONFIG_X86_64) || \ - (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) -/** - * Find the ranges of physical addresses that do not correspond to - * e820 RAM areas and mark the corresponding pages as nosave for - * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(unsigned long limit_pfn) -{ - int i; - unsigned long pfn; - - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (pfn < PFN_UP(ei->addr)) - register_nosave_region(pfn, PFN_UP(ei->addr)); - - pfn = PFN_DOWN(ei->addr + ei->size); - if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) - register_nosave_region(PFN_UP(ei->addr), pfn); - - if (pfn >= limit_pfn) - break; - } -} -#endif - -#ifdef CONFIG_HIBERNATION -/** - * Mark ACPI NVS memory region, so that we can save/restore it during - * hibernation and the subsequent resume. - */ -static int __init e820_mark_nvs_memory(void) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (ei->type == E820_NVS) - hibernate_nvs_register(ei->addr, ei->size); - } - - return 0; -} -core_initcall(e820_mark_nvs_memory); -#endif - -/* - * Find a free area with specified alignment in a specific range. - */ -u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr; - u64 ei_start, ei_last; - - if (ei->type != E820_RAM) - continue; - - ei_last = ei->addr + ei->size; - ei_start = ei->addr; - addr = find_early_area(ei_start, ei_last, start, end, - size, align); - - if (addr != -1ULL) - return addr; - } - return -1ULL; -} - -u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) -{ - return find_e820_area(start, end, size, align); -} - -u64 __init get_max_mapped(void) -{ - u64 end = max_pfn_mapped; - - end <<= PAGE_SHIFT; - - return end; -} -/* - * Find next free range after *start - */ -u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr; - u64 ei_start, ei_last; - - if (ei->type != E820_RAM) - continue; - - ei_last = ei->addr + ei->size; - ei_start = ei->addr; - addr = find_early_area_size(ei_start, ei_last, start, - sizep, align); - - if (addr != -1ULL) - return addr; - } - - return -1ULL; -} - -/* - * pre allocated 4k and reserved it in e820 - */ -u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) -{ - u64 size = 0; - u64 addr; - u64 start; - - for (start = startt; ; start += size) { - start = find_e820_area_size(start, &size, align); - if (!(start + 1)) - return 0; - if (size >= sizet) - break; - } - -#ifdef CONFIG_X86_32 - if (start >= MAXMEM) - return 0; - if (start + size > MAXMEM) - size = MAXMEM - start; -#endif - - addr = round_down(start + size - sizet, align); - if (addr < start) - return 0; - e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); - e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); - printk(KERN_INFO "update e820 for early_reserve_e820\n"); - update_e820(); - update_e820_saved(); - - return addr; -} - -#ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_PAE -# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) -# else -# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) -# endif -#else /* CONFIG_X86_32 */ -# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT -#endif - -/* - * Find the highest page frame number we have available - */ -static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) -{ - int i; - unsigned long last_pfn = 0; - unsigned long max_arch_pfn = MAX_ARCH_PFN; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long start_pfn; - unsigned long end_pfn; - - if (ei->type != type) - continue; - - start_pfn = ei->addr >> PAGE_SHIFT; - end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; - - if (start_pfn >= limit_pfn) - continue; - if (end_pfn > limit_pfn) { - last_pfn = limit_pfn; - break; - } - if (end_pfn > last_pfn) - last_pfn = end_pfn; - } - - if (last_pfn > max_arch_pfn) - last_pfn = max_arch_pfn; - - printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", - last_pfn, max_arch_pfn); - return last_pfn; -} -unsigned long __init e820_end_of_ram_pfn(void) -{ - return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); -} - -unsigned long __init e820_end_of_low_ram_pfn(void) -{ - return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); -} /* - * Finds an active region in the address range from start_pfn to last_pfn and - * returns its range in ei_startpfn and ei_endpfn for the e820 entry. - */ -int __init e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long last_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn) -{ - u64 align = PAGE_SIZE; - - *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; - *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; - - /* Skip map entries smaller than a page */ - if (*ei_startpfn >= *ei_endpfn) - return 0; - - /* Skip if map is outside the node */ - if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || - *ei_startpfn >= last_pfn) - return 0; - - /* Check for overlaps */ - if (*ei_startpfn < start_pfn) - *ei_startpfn = start_pfn; - if (*ei_endpfn > last_pfn) - *ei_endpfn = last_pfn; - - return 1; -} - -/* Walk the e820 map and register active regions within a node */ -void __init e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long last_pfn) -{ - unsigned long ei_startpfn; - unsigned long ei_endpfn; - int i; - - for (i = 0; i < e820.nr_map; i++) - if (e820_find_active_region(&e820.map[i], - start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - add_active_range(nid, ei_startpfn, ei_endpfn); -} - -/* - * Find the hole size (in bytes) in the memory range. - * @start: starting address of the memory range to scan - * @end: ending address of the memory range to scan - */ -u64 __init e820_hole_size(u64 start, u64 end) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long last_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn, ei_endpfn, ram = 0; - int i; - - for (i = 0; i < e820.nr_map; i++) { - if (e820_find_active_region(&e820.map[i], - start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - ram += ei_endpfn - ei_startpfn; - } - return end - start - ((u64)ram << PAGE_SHIFT); -} - -static void early_panic(char *msg) -{ - early_printk(msg); - panic(msg); -} - -static int userdef __initdata; - -/* "mem=nopentium" disables the 4MB page tables. */ -static int __init parse_memopt(char *p) -{ - u64 mem_size; - - if (!p) - return -EINVAL; - -#ifdef CONFIG_X86_32 - if (!strcmp(p, "nopentium")) { - setup_clear_cpu_cap(X86_FEATURE_PSE); - return 0; - } -#endif - - userdef = 1; - mem_size = memparse(p, &p); - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); - - return 0; -} -early_param("mem", parse_memopt); - -static int __init parse_memmap_opt(char *p) -{ - char *oldp; - u64 start_at, mem_size; - - if (!p) - return -EINVAL; - - if (!strncmp(p, "exactmap", 8)) { -#ifdef CONFIG_CRASH_DUMP - /* - * If we are doing a crash dump, we still need to know - * the real mem size before original memory map is - * reset. - */ - saved_max_pfn = e820_end_of_ram_pfn(); -#endif - e820.nr_map = 0; - userdef = 1; - return 0; - } - - oldp = p; - mem_size = memparse(p, &p); - if (p == oldp) - return -EINVAL; - - userdef = 1; - if (*p == '@') { - start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_RAM); - } else if (*p == '#') { - start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_ACPI); - } else if (*p == '$') { - start_at = memparse(p+1, &p); - e820_add_region(start_at, mem_size, E820_RESERVED); - } else - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); - - return *p == '\0' ? 0 : -EINVAL; -} -early_param("memmap", parse_memmap_opt); - -void __init finish_e820_parsing(void) -{ - if (userdef) { - u32 nr = e820.nr_map; - - if (__sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) - early_panic("Invalid user supplied memory map"); - e820.nr_map = nr; - - printk(KERN_INFO "user-defined physical RAM map:\n"); - e820_print_map("user"); - } -} - -static inline const char *e820_type_to_string(int e820_type) -{ - switch (e820_type) { - case E820_RESERVED_KERN: - case E820_RAM: return "System RAM"; - case E820_ACPI: return "ACPI Tables"; - case E820_NVS: return "ACPI Non-volatile Storage"; - case E820_UNUSABLE: return "Unusable memory"; - default: return "reserved"; - } -} - -/* - * Mark e820 reserved areas as busy for the resource manager. + * Copy the BIOS e820 map into a safe place. + * + * Sanity-check it while we're at it.. + * + * If we're lucky and live on a modern system, the setup code + * will have given us a memory map that we can use to properly + * set up memory. If we aren't, we'll fake a memory map. */ -static struct resource __initdata *e820_res; -void __init e820_reserve_resources(void) -{ - int i; - struct resource *res; - u64 end; - - res = alloc_bootmem(sizeof(struct resource) * e820.nr_map); - e820_res = res; - for (i = 0; i < e820.nr_map; i++) { - end = e820.map[i].addr + e820.map[i].size - 1; - if (end != (resource_size_t)end) { - res++; - continue; - } - res->name = e820_type_to_string(e820.map[i].type); - res->start = e820.map[i].addr; - res->end = end; - - res->flags = IORESOURCE_MEM; - - /* - * don't register the region that could be conflicted with - * pci device BAR resource and insert them later in - * pcibios_resource_survey() - */ - if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) { - res->flags |= IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); - } - res++; - } - - for (i = 0; i < e820_saved.nr_map; i++) { - struct e820entry *entry = &e820_saved.map[i]; - firmware_map_add_early(entry->addr, - entry->addr + entry->size - 1, - e820_type_to_string(entry->type)); - } -} - -/* How much should we pad RAM ending depending on where it is? */ -static unsigned long ram_alignment(resource_size_t pos) -{ - unsigned long mb = pos >> 20; - - /* To 64kB in the first megabyte */ - if (!mb) - return 64*1024; - - /* To 1MB in the first 16MB */ - if (mb < 16) - return 1024*1024; - - /* To 64MB for anything above that */ - return 64*1024*1024; -} - -#define MAX_RESOURCE_SIZE ((resource_size_t)-1) - -void __init e820_reserve_resources_late(void) +static int __init append_e820_map(struct e820entry *biosmap, int nr_map) { - int i; - struct resource *res; - - res = e820_res; - for (i = 0; i < e820.nr_map; i++) { - if (!res->parent && res->end) - insert_resource_expand_to_fit(&iomem_resource, res); - res++; - } + /* Only one memory region (or negative)? Ignore it */ + if (nr_map < 2) + return -1; - /* - * Try to bump up RAM regions to reasonable boundaries to - * avoid stolen RAM: - */ - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *entry = &e820.map[i]; - u64 start, end; - - if (entry->type != E820_RAM) - continue; - start = entry->addr + entry->size; - end = round_up(start, ram_alignment(start)) - 1; - if (end > MAX_RESOURCE_SIZE) - end = MAX_RESOURCE_SIZE; - if (start >= end) - continue; - printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", - start, end); - reserve_region_with_split(&iomem_resource, start, end, - "RAM buffer"); - } + return __append_e820_map(biosmap, nr_map); } char *__init default_machine_specific_memory_setup(void) @@ -1181,7 +138,7 @@ char *__init default_machine_specific_me who = "BIOS-e801"; } - e820.nr_map = 0; + clear_e820_map(); e820_add_region(0, LOWMEMSIZE(), E820_RAM); e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); } @@ -1190,11 +147,6 @@ char *__init default_machine_specific_me return who; } -void __init save_e820_map(void) -{ - memcpy(&e820_saved, &e820, sizeof(struct e820map)); -} - void __init setup_memory_map(void) { char *who; @@ -1206,58 +158,12 @@ void __init setup_memory_map(void) } #ifdef CONFIG_X86_OOSTORE -/* - * Figure what we can cover with MCR's - * - * Shortcut: We know you can't put 4Gig of RAM on a winchip - */ +int centaur_ram_top; void __init get_centaur_ram_top(void) { - u32 clip = 0xFFFFFFFFUL; - u32 top = 0; - int i; - if (boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) return; - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - - if (e820.map[i].addr > 0xFFFFFFFFUL) - continue; - /* - * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophe already - */ - if (e820.map[i].type == E820_RESERVED) { - if (e820.map[i].addr >= 0x100000UL && - e820.map[i].addr < clip) - clip = e820.map[i].addr; - continue; - } - start = e820.map[i].addr; - end = e820.map[i].addr + e820.map[i].size; - if (start >= end) - continue; - if (end > top) - top = end; - } - /* - * Everything below 'top' should be RAM except for the ISA hole. - * Because of the limited MCR's we want to map NV/ACPI into our - * MCR range for gunk in RAM - * - * Clip might cause us to MCR insufficient RAM but that is an - * acceptable failure mode and should only bite obscure boxes with - * a VESA hole at 15Mb - * - * The second case Clip sometimes kicks in is when the EBDA is marked - * as reserved. Again we fail safe with reasonable results - */ - if (top > clip) - top = clip; - - centaur_ram_top = top; + centaur_ram_top = __get_special_low_ram_top(); } #endif - Index: linux-2.6/kernel/fw_memmap.c =================================================================== --- /dev/null +++ linux-2.6/kernel/fw_memmap.c @@ -0,0 +1,1134 @@ +/* + * Handle the memory map. + * The functions here do the job until bootmem takes over. + * + * Getting sanitize_e820_map() in sync with i386 version by applying change: + * - Provisions for empty E820 memory regions (reported by certain BIOSes). + * Alex Achenbach , December 2002. + * Venkatesh Pallipadi + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The e820 map is the map that gets modified e.g. with command line parameters + * and that is also registered with modifications in the kernel resource tree + * with the iomem_resource as parent. + * + * The e820_saved is directly saved after the BIOS-provided memory map is + * copied. It doesn't get modified afterwards. It's registered for the + * /sys/firmware/memmap interface. + * + * That memory map is not modified and is used as base for kexec. The kexec'd + * kernel should get the same memory map as the firmware provides. Then the + * user can e.g. boot the original kernel with mem=1G while still booting the + * next kernel with full memory. + */ +static struct e820map __initdata e820; +static struct e820map __initdata e820_saved; + +/* + * This function checks if any part of the range is mapped + * with type. + * phys_pud_init() is using it and is _meminit, but we have !after_bootmem + * so could use refok here + */ +int __init_refok e820_any_mapped(u64 start, u64 end, unsigned type) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) + continue; + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + return 1; + } + return 0; +} + +/* + * This function checks if the entire range is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init e820_all_mapped(u64 start, u64 end, unsigned type) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* + * if start is now at or beyond end, we're done, full + * coverage + */ + if (start >= end) + return 1; + } + return 0; +} + +/* + * Add a memory region to the kernel e820 map. + */ +static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, + int type) +{ + int x = e820x->nr_map; + + if (x >= ARRAY_SIZE(e820x->map)) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } + + e820x->map[x].addr = start; + e820x->map[x].size = size; + e820x->map[x].type = type; + e820x->nr_map++; +} + +void __init e820_add_region(u64 start, u64 size, int type) +{ + __e820_add_region(&e820, start, size, type); +} + +static void __init e820_print_type(u32 type) +{ + switch (type) { + case E820_RAM: + case E820_RESERVED_KERN: + printk(KERN_CONT "(usable)"); + break; + case E820_RESERVED: + printk(KERN_CONT "(reserved)"); + break; + case E820_ACPI: + printk(KERN_CONT "(ACPI data)"); + break; + case E820_NVS: + printk(KERN_CONT "(ACPI NVS)"); + break; + case E820_UNUSABLE: + printk(KERN_CONT "(unusable)"); + break; + default: + printk(KERN_CONT "type %u", type); + break; + } +} + +void __init e820_print_map(char *who) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + printk(KERN_INFO " %s: %016Lx - %016Lx ", who, + (unsigned long long) e820.map[i].addr, + (unsigned long long) + (e820.map[i].addr + e820.map[i].size)); + e820_print_type(e820.map[i].type); + printk(KERN_CONT "\n"); + } +} + +/* + * Sanitize the BIOS e820 map. + * + * Some e820 responses include overlapping entries. The following + * replaces the original e820 map with a new one, removing overlaps, + * and resolving conflicting memory types in favor of highest + * numbered type. + * + * The input parameter biosmap points to an array of 'struct + * e820entry' which on entry has elements in the range [0, *pnr_map) + * valid, and which has space for up to max_nr_map entries. + * On return, the resulting sanitized e820 map entries will be in + * overwritten in the same location, starting at biosmap. + * + * The integer pointed to by pnr_map must be valid on entry (the + * current number of valid entries located at biosmap) and will + * be updated on return, with the new number of valid entries + * (something no more than max_nr_map.) + * + * The return value from sanitize_e820_map() is zero if it + * successfully 'sanitized' the map entries passed in, and is -1 + * if it did nothing, which can happen if either of (1) it was + * only passed one map entry, or (2) any of the input map entries + * were invalid (start + size < start, meaning that the size was + * so big the described memory range wrapped around through zero.) + * + * Visually we're performing the following + * (1,2,3,4 = memory types)... + * + * Sample memory map (w/overlaps): + * ____22__________________ + * ______________________4_ + * ____1111________________ + * _44_____________________ + * 11111111________________ + * ____________________33__ + * ___________44___________ + * __________33333_________ + * ______________22________ + * ___________________2222_ + * _________111111111______ + * _____________________11_ + * _________________4______ + * + * Sanitized equivalent (no overlap): + * 1_______________________ + * _44_____________________ + * ___1____________________ + * ____22__________________ + * ______11________________ + * _________1______________ + * __________3_____________ + * ___________44___________ + * _____________33_________ + * _______________2________ + * ________________1_______ + * _________________4______ + * ___________________2____ + * ____________________33__ + * ______________________4_ + */ + +int __init __sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, + u32 *pnr_map) +{ + struct change_member { + struct e820entry *pbios; /* pointer to original bios entry */ + unsigned long long addr; /* address for this change point */ + }; + static struct change_member change_point_list[2*E820_X_MAX] __initdata; + static struct change_member *change_point[2*E820_X_MAX] __initdata; + static struct e820entry *overlap_list[E820_X_MAX] __initdata; + static struct e820entry new_bios[E820_X_MAX] __initdata; + struct change_member *change_tmp; + unsigned long current_type, last_type; + unsigned long long last_addr; + int chgidx, still_changing; + int overlap_entries; + int new_bios_entry; + int old_nr, new_nr, chg_nr; + int i; + + /* if there's only one memory region, don't bother */ + if (*pnr_map < 2) + return -1; + + old_nr = *pnr_map; + BUG_ON(old_nr > max_nr_map); + + /* bail out if we find any unreasonable addresses in bios map */ + for (i = 0; i < old_nr; i++) + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) + return -1; + + /* create pointers for initial change-point information (for sorting) */ + for (i = 0; i < 2 * old_nr; i++) + change_point[i] = &change_point_list[i]; + + /* record all known change-points (starting and ending addresses), + omitting those that are for empty memory regions */ + chgidx = 0; + for (i = 0; i < old_nr; i++) { + if (biosmap[i].size != 0) { + change_point[chgidx]->addr = biosmap[i].addr; + change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr + + biosmap[i].size; + change_point[chgidx++]->pbios = &biosmap[i]; + } + } + chg_nr = chgidx; + + /* sort change-point list by memory addresses (low -> high) */ + still_changing = 1; + while (still_changing) { + still_changing = 0; + for (i = 1; i < chg_nr; i++) { + unsigned long long curaddr, lastaddr; + unsigned long long curpbaddr, lastpbaddr; + + curaddr = change_point[i]->addr; + lastaddr = change_point[i - 1]->addr; + curpbaddr = change_point[i]->pbios->addr; + lastpbaddr = change_point[i - 1]->pbios->addr; + + /* + * swap entries, when: + * + * curaddr > lastaddr or + * curaddr == lastaddr and curaddr == curpbaddr and + * lastaddr != lastpbaddr + */ + if (curaddr < lastaddr || + (curaddr == lastaddr && curaddr == curpbaddr && + lastaddr != lastpbaddr)) { + change_tmp = change_point[i]; + change_point[i] = change_point[i-1]; + change_point[i-1] = change_tmp; + still_changing = 1; + } + } + } + + /* create a new bios memory map, removing overlaps */ + overlap_entries = 0; /* number of entries in the overlap table */ + new_bios_entry = 0; /* index for creating new bios map entries */ + last_type = 0; /* start with undefined memory type */ + last_addr = 0; /* start with 0 as last starting address */ + + /* loop through change-points, determining affect on the new bios map */ + for (chgidx = 0; chgidx < chg_nr; chgidx++) { + /* keep track of all overlapping bios entries */ + if (change_point[chgidx]->addr == + change_point[chgidx]->pbios->addr) { + /* + * add map entry to overlap list (> 1 entry + * implies an overlap) + */ + overlap_list[overlap_entries++] = + change_point[chgidx]->pbios; + } else { + /* + * remove entry from list (order independent, + * so swap with last) + */ + for (i = 0; i < overlap_entries; i++) { + if (overlap_list[i] == + change_point[chgidx]->pbios) + overlap_list[i] = + overlap_list[overlap_entries-1]; + } + overlap_entries--; + } + /* + * if there are overlapping entries, decide which + * "type" to use (larger value takes precedence -- + * 1=usable, 2,3,4,4+=unusable) + */ + current_type = 0; + for (i = 0; i < overlap_entries; i++) + if (overlap_list[i]->type > current_type) + current_type = overlap_list[i]->type; + /* + * continue building up new bios map based on this + * information + */ + if (current_type != last_type) { + if (last_type != 0) { + new_bios[new_bios_entry].size = + change_point[chgidx]->addr - last_addr; + /* + * move forward only if the new size + * was non-zero + */ + if (new_bios[new_bios_entry].size != 0) + /* + * no more space left for new + * bios entries ? + */ + if (++new_bios_entry >= max_nr_map) + break; + } + if (current_type != 0) { + new_bios[new_bios_entry].addr = + change_point[chgidx]->addr; + new_bios[new_bios_entry].type = current_type; + last_addr = change_point[chgidx]->addr; + } + last_type = current_type; + } + } + /* retain count for new bios entries */ + new_nr = new_bios_entry; + + /* copy new bios mapping into original location */ + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); + *pnr_map = new_nr; + + return 0; +} + +int __init sanitize_e820_map(void) +{ + int max_nr_map = ARRAY_SIZE(e820.map); + + return __sanitize_e820_map(e820.map, max_nr_map, &e820.nr_map); +} + +int __init __append_e820_map(struct e820entry *biosmap, int nr_map) +{ + while (nr_map) { + u64 start = biosmap->addr; + u64 size = biosmap->size; + u64 end = start + size; + u32 type = biosmap->type; + + /* Overflow in 64 bits? Ignore the memory map. */ + if (start > end) + return -1; + + e820_add_region(start, size, type); + + biosmap++; + nr_map--; + } + return 0; +} + +void __init clear_e820_map(void) +{ + e820.nr_map = 0; +} + +static u64 __init __e820_update_range(struct e820map *e820x, u64 start, + u64 size, unsigned old_type, + unsigned new_type) +{ + u64 end; + unsigned int i; + u64 real_updated_size = 0; + + BUG_ON(old_type == new_type); + + if (size > (ULLONG_MAX - start)) + size = ULLONG_MAX - start; + + end = start + size; + printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", + (unsigned long long) start, + (unsigned long long) end); + e820_print_type(old_type); + printk(KERN_CONT " ==> "); + e820_print_type(new_type); + printk(KERN_CONT "\n"); + + for (i = 0; i < e820x->nr_map; i++) { + struct e820entry *ei = &e820x->map[i]; + u64 final_start, final_end; + u64 ei_end; + + if (ei->type != old_type) + continue; + + ei_end = ei->addr + ei->size; + /* totally covered by new range? */ + if (ei->addr >= start && ei_end <= end) { + ei->type = new_type; + real_updated_size += ei->size; + continue; + } + + /* new range is totally covered? */ + if (ei->addr < start && ei_end > end) { + __e820_add_region(e820x, start, size, new_type); + __e820_add_region(e820x, end, ei_end - end, ei->type); + ei->size = start - ei->addr; + real_updated_size += size; + continue; + } + + /* partially covered */ + final_start = max(start, ei->addr); + final_end = min(end, ei_end); + if (final_start >= final_end) + continue; + + __e820_add_region(e820x, final_start, final_end - final_start, + new_type); + + real_updated_size += final_end - final_start; + + /* + * left range could be head or tail, so need to update + * size at first. + */ + ei->size -= final_end - final_start; + if (ei->addr < final_start) + continue; + ei->addr = final_end; + } + return real_updated_size; +} + +u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, + unsigned new_type) +{ + return __e820_update_range(&e820, start, size, old_type, new_type); +} + +static u64 __init e820_update_range_saved(u64 start, u64 size, + unsigned old_type, unsigned new_type) +{ + return __e820_update_range(&e820_saved, start, size, old_type, + new_type); +} + +/* make e820 not cover the range */ +u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, + int checktype) +{ + int i; + u64 end; + u64 real_removed_size = 0; + + if (size > (ULLONG_MAX - start)) + size = ULLONG_MAX - start; + + end = start + size; + printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", + (unsigned long long) start, + (unsigned long long) end); + e820_print_type(old_type); + printk(KERN_CONT "\n"); + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 final_start, final_end; + + if (checktype && ei->type != old_type) + continue; + /* totally covered? */ + if (ei->addr >= start && + (ei->addr + ei->size) <= (start + size)) { + real_removed_size += ei->size; + memset(ei, 0, sizeof(struct e820entry)); + continue; + } + /* partially covered */ + final_start = max(start, ei->addr); + final_end = min(start + size, ei->addr + ei->size); + if (final_start >= final_end) + continue; + real_removed_size += final_end - final_start; + + ei->size -= final_end - final_start; + if (ei->addr < final_start) + continue; + ei->addr = final_end; + } + return real_removed_size; +} + +void __init update_e820(void) +{ + u32 nr_map; + + nr_map = e820.nr_map; + if (__sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) + return; + e820.nr_map = nr_map; + printk(KERN_INFO "modified physical RAM map:\n"); + e820_print_map("modified"); +} + +static void __init update_e820_saved(void) +{ + u32 nr_map; + int max_nr_map = ARRAY_SIZE(e820_saved.map); + + nr_map = e820_saved.nr_map; + if (__sanitize_e820_map(e820_saved.map, max_nr_map, &nr_map)) + return; + e820_saved.nr_map = nr_map; +} + +/* + * Search for a gap in the e820 memory space from start_addr to end_addr. + */ +__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, + unsigned long start_addr, unsigned long long end_addr) +{ + unsigned long long last; + int i = e820.nr_map; + int found = 0; + + last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; + + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + + if (end < start_addr) + continue; + + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap >= *gapsize) { + *gapsize = gap; + *gapstart = end; + found = 1; + } + } + if (start < last) + last = start; + } + return found; +} + +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +/** + * Find the ranges of physical addresses that do not correspond to + * e820 RAM areas and mark the corresponding pages as nosave for + * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). + * + * This function requires the e820 map to be sorted and without any + * overlapping entries and assumes the first e820 area to be RAM. + */ +void __init e820_mark_nosave_regions(unsigned long limit_pfn) +{ + int i; + unsigned long pfn; + + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); + for (i = 1; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (pfn < PFN_UP(ei->addr)) + register_nosave_region(pfn, PFN_UP(ei->addr)); + + pfn = PFN_DOWN(ei->addr + ei->size); + if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) + register_nosave_region(PFN_UP(ei->addr), pfn); + + if (pfn >= limit_pfn) + break; + } +} +#endif + +#ifdef CONFIG_HIBERNATION +/** + * Mark ACPI NVS memory region, so that we can save/restore it during + * hibernation and the subsequent resume. + */ +static int __init e820_mark_nvs_memory(void) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (ei->type == E820_NVS) + hibernate_nvs_register(ei->addr, ei->size); + } + + return 0; +} +core_initcall(e820_mark_nvs_memory); +#endif + +/* + * Find a free area with specified alignment in a specific range. + */ +u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr; + u64 ei_start, ei_last; + + if (ei->type != E820_RAM) + continue; + + ei_last = ei->addr + ei->size; + ei_start = ei->addr; + addr = find_early_area(ei_start, ei_last, start, end, + size, align); + + if (addr != -1ULL) + return addr; + } + return -1ULL; +} + +u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) +{ + return find_e820_area(start, end, size, align); +} + +/* + * Find next free range after *start + */ +u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr; + u64 ei_start, ei_last; + + if (ei->type != E820_RAM) + continue; + + ei_last = ei->addr + ei->size; + ei_start = ei->addr; + addr = find_early_area_size(ei_start, ei_last, start, + sizep, align); + + if (addr != -1ULL) + return addr; + } + + return -1ULL; +} + +/* + * pre allocated 4k and reserved it in e820 + */ +u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) +{ + u64 size = 0; + u64 addr; + u64 start; + + for (start = startt; ; start += size) { + start = find_e820_area_size(start, &size, align); + if (!(start + 1)) + return 0; + if (size >= sizet) + break; + } + +#ifdef CONFIG_X86_32 + if (start >= MAXMEM) + return 0; + if (start + size > MAXMEM) + size = MAXMEM - start; +#endif + + addr = round_down(start + size - sizet, align); + if (addr < start) + return 0; + e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); + e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); + printk(KERN_INFO "update e820 for early_reserve_e820\n"); + update_e820(); + update_e820_saved(); + + return addr; +} + +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_PAE +# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) +# else +# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) +# endif +#else /* CONFIG_X86_32 */ +# define MAX_ARCH_PFN (MAXMEM>>PAGE_SHIFT) +#endif + +/* + * Find the highest page frame number we have available + */ +static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) +{ + int i; + unsigned long last_pfn = 0; + unsigned long max_arch_pfn = MAX_ARCH_PFN; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long start_pfn; + unsigned long end_pfn; + + if (ei->type != type) + continue; + + start_pfn = ei->addr >> PAGE_SHIFT; + end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; + + if (start_pfn >= limit_pfn) + continue; + if (end_pfn > limit_pfn) { + last_pfn = limit_pfn; + break; + } + if (end_pfn > last_pfn) + last_pfn = end_pfn; + } + + if (last_pfn > max_arch_pfn) + last_pfn = max_arch_pfn; + + printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", + last_pfn, max_arch_pfn); + return last_pfn; +} +unsigned long __init e820_end_of_ram_pfn(void) +{ + return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); +} + +unsigned long __init e820_end_of_low_ram_pfn(void) +{ + return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); +} +/* + * Finds an active region in the address range from start_pfn to last_pfn and + * returns its range in ei_startpfn and ei_endpfn for the e820 entry. + */ +int __init e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn) +{ + u64 align = PAGE_SIZE; + + *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; + *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; + + /* Skip map entries smaller than a page */ + if (*ei_startpfn >= *ei_endpfn) + return 0; + + /* Skip if map is outside the node */ + if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || + *ei_startpfn >= last_pfn) + return 0; + + /* Check for overlaps */ + if (*ei_startpfn < start_pfn) + *ei_startpfn = start_pfn; + if (*ei_endpfn > last_pfn) + *ei_endpfn = last_pfn; + + return 1; +} + +/* Walk the e820 map and register active regions within a node */ +void __init e820_register_active_regions(int nid, unsigned long start_pfn, + unsigned long last_pfn) +{ + unsigned long ei_startpfn; + unsigned long ei_endpfn; + int i; + + for (i = 0; i < e820.nr_map; i++) + if (e820_find_active_region(&e820.map[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + add_active_range(nid, ei_startpfn, ei_endpfn); +} + +/* + * Find the hole size (in bytes) in the memory range. + * @start: starting address of the memory range to scan + * @end: ending address of the memory range to scan + */ +u64 __init e820_hole_size(u64 start, u64 end) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long last_pfn = end >> PAGE_SHIFT; + unsigned long ei_startpfn, ei_endpfn, ram = 0; + int i; + + for (i = 0; i < e820.nr_map; i++) { + if (e820_find_active_region(&e820.map[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + ram += ei_endpfn - ei_startpfn; + } + return end - start - ((u64)ram << PAGE_SHIFT); +} + +static void early_panic(char *msg) +{ + early_printk(msg); + panic(msg); +} + +static int userdef __initdata; + +/* "mem=nopentium" disables the 4MB page tables. */ +static int __init parse_memopt(char *p) +{ + u64 mem_size; + + if (!p) + return -EINVAL; + +#ifdef CONFIG_X86_32 + if (!strcmp(p, "nopentium")) { + setup_clear_cpu_cap(X86_FEATURE_PSE); + return 0; + } +#endif + + userdef = 1; + mem_size = memparse(p, &p); + e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); + + return 0; +} +early_param("mem", parse_memopt); + +static int __init parse_memmap_opt(char *p) +{ + char *oldp; + u64 start_at, mem_size; + + if (!p) + return -EINVAL; + + if (!strncmp(p, "exactmap", 8)) { +#ifdef CONFIG_CRASH_DUMP + /* + * If we are doing a crash dump, we still need to know + * the real mem size before original memory map is + * reset. + */ + saved_max_pfn = e820_end_of_ram_pfn(); +#endif + e820.nr_map = 0; + userdef = 1; + return 0; + } + + oldp = p; + mem_size = memparse(p, &p); + if (p == oldp) + return -EINVAL; + + userdef = 1; + if (*p == '@') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_RAM); + } else if (*p == '#') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_ACPI); + } else if (*p == '$') { + start_at = memparse(p+1, &p); + e820_add_region(start_at, mem_size, E820_RESERVED); + } else + e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); + + return *p == '\0' ? 0 : -EINVAL; +} +early_param("memmap", parse_memmap_opt); + +void __init finish_e820_parsing(void) +{ + if (userdef) { + u32 nr = e820.nr_map; + int max_nr_map = ARRAY_SIZE(e820.map); + + if (__sanitize_e820_map(e820.map, max_nr_map, &nr) < 0) + early_panic("Invalid user supplied memory map"); + e820.nr_map = nr; + + printk(KERN_INFO "user-defined physical RAM map:\n"); + e820_print_map("user"); + } +} + +static inline const char *e820_type_to_string(int e820_type) +{ + switch (e820_type) { + case E820_RESERVED_KERN: + case E820_RAM: return "System RAM"; + case E820_ACPI: return "ACPI Tables"; + case E820_NVS: return "ACPI Non-volatile Storage"; + case E820_UNUSABLE: return "Unusable memory"; + default: return "reserved"; + } +} + +/* + * Mark e820 reserved areas as busy for the resource manager. + */ +static struct resource __initdata *e820_res; +void __init e820_reserve_resources(void) +{ + int i; + struct resource *res; + u64 end; + + res = alloc_bootmem(sizeof(struct resource) * e820.nr_map); + e820_res = res; + for (i = 0; i < e820.nr_map; i++) { + end = e820.map[i].addr + e820.map[i].size - 1; + if (end != (resource_size_t)end) { + res++; + continue; + } + res->name = e820_type_to_string(e820.map[i].type); + res->start = e820.map[i].addr; + res->end = end; + + res->flags = IORESOURCE_MEM; + + /* + * don't register the region that could be conflicted with + * pci device BAR resource and insert them later in + * pcibios_resource_survey() + */ + if (e820.map[i].type != E820_RESERVED || + res->start < (1ULL<<20)) { + res->flags |= IORESOURCE_BUSY; + insert_resource(&iomem_resource, res); + } + res++; + } + + for (i = 0; i < e820_saved.nr_map; i++) { + struct e820entry *entry = &e820_saved.map[i]; + firmware_map_add_early(entry->addr, + entry->addr + entry->size - 1, + e820_type_to_string(entry->type)); + } +} + +/* How much should we pad RAM ending depending on where it is? */ +static unsigned long __init ram_alignment(resource_size_t pos) +{ + unsigned long mb = pos >> 20; + + /* To 64kB in the first megabyte */ + if (!mb) + return 64*1024; + + /* To 1MB in the first 16MB */ + if (mb < 16) + return 1024*1024; + + /* To 64MB for anything above that */ + return 64*1024*1024; +} + +#define MAX_RESOURCE_SIZE ((resource_size_t)-1) + +void __init e820_reserve_resources_late(void) +{ + int i; + struct resource *res; + + res = e820_res; + for (i = 0; i < e820.nr_map; i++) { + if (!res->parent && res->end) + insert_resource_expand_to_fit(&iomem_resource, res); + res++; + } + + /* + * Try to bump up RAM regions to reasonable boundaries to + * avoid stolen RAM: + */ + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *entry = &e820.map[i]; + u64 start, end; + + if (entry->type != E820_RAM) + continue; + start = entry->addr + entry->size; + end = round_up(start, ram_alignment(start)) - 1; + if (end > MAX_RESOURCE_SIZE) + end = MAX_RESOURCE_SIZE; + if (start >= end) + continue; + printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", + start, end); + reserve_region_with_split(&iomem_resource, start, end, + "RAM buffer"); + } +} + +void __init save_e820_map(void) +{ + memcpy(&e820_saved, &e820, sizeof(struct e820map)); +} + +#ifdef CONFIG_X86_OOSTORE + +/* + * this one should stay in arch/x86/kernel/e820.c, + * but we want to keep e820 to be static here + */ +/* + * Figure what we can cover with MCR's + * + * Shortcut: We know you can't put 4Gig of RAM on a winchip + */ +void __init __get_special_low_ram_top(void) +{ + u32 clip = 0xFFFFFFFFUL; + u32 top = 0; + int i; + + for (i = 0; i < e820.nr_map; i++) { + unsigned long start, end; + + if (e820.map[i].addr > 0xFFFFFFFFUL) + continue; + /* + * Don't MCR over reserved space. Ignore the ISA hole + * we frob around that catastrophe already + */ + if (e820.map[i].type == E820_RESERVED) { + if (e820.map[i].addr >= 0x100000UL && + e820.map[i].addr < clip) + clip = e820.map[i].addr; + continue; + } + start = e820.map[i].addr; + end = e820.map[i].addr + e820.map[i].size; + if (start >= end) + continue; + if (end > top) + top = end; + } + /* + * Everything below 'top' should be RAM except for the ISA hole. + * Because of the limited MCR's we want to map NV/ACPI into our + * MCR range for gunk in RAM + * + * Clip might cause us to MCR insufficient RAM but that is an + * acceptable failure mode and should only bite obscure boxes with + * a VESA hole at 15Mb + * + * The second case Clip sometimes kicks in is when the EBDA is marked + * as reserved. Again we fail safe with reasonable results + */ + if (top > clip) + top = clip; + + return top; +} +#endif + Index: linux-2.6/arch/x86/include/asm/e820.h =================================================================== --- linux-2.6.orig/arch/x86/include/asm/e820.h +++ linux-2.6/arch/x86/include/asm/e820.h @@ -1,65 +1,9 @@ #ifndef _ASM_X86_E820_H #define _ASM_X86_E820_H -#define E820MAP 0x2d0 /* our map */ -#define E820MAX 128 /* number of entries in E820MAP */ -/* - * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the - * constrained space in the zeropage. If we have more nodes than - * that, and if we've booted off EFI firmware, then the EFI tables - * passed us from the EFI firmware can list more nodes. Size our - * internal memory map tables to have room for these additional - * nodes, based on up to three entries per node for which the - * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT), - * plus E820MAX, allowing space for the possible duplicate E820 - * entries that might need room in the same arrays, prior to the - * call to sanitize_e820_map() to remove duplicates. The allowance - * of three memory map entries per node is "enough" entries for - * the initial hardware platform motivating this mechanism to make - * use of additional EFI map entries. Future platforms may want - * to allow more than three entries per node or otherwise refine - * this size. - */ - -/* - * Odd: 'make headers_check' complains about numa.h if I try - * to collapse the next two #ifdef lines to a single line: - * #if defined(__KERNEL__) && defined(CONFIG_EFI) - */ -#ifdef __KERNEL__ -#ifdef CONFIG_EFI -#include -#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES) -#else /* ! CONFIG_EFI */ -#define E820_X_MAX E820MAX -#endif -#else /* ! __KERNEL__ */ -#define E820_X_MAX E820MAX -#endif - -#define E820NR 0x1e8 /* # entries in E820MAP */ - -#define E820_RAM 1 -#define E820_RESERVED 2 -#define E820_ACPI 3 -#define E820_NVS 4 -#define E820_UNUSABLE 5 - -/* reserved RAM used by kernel itself */ -#define E820_RESERVED_KERN 128 +#include #ifndef __ASSEMBLY__ -#include -struct e820entry { - __u64 addr; /* start of memory segment */ - __u64 size; /* size of memory segment */ - __u32 type; /* type of memory segment */ -} __attribute__((packed)); - -struct e820map { - __u32 nr_map; - struct e820entry map[E820_X_MAX]; -}; #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 @@ -69,73 +13,20 @@ struct e820map { #ifdef __KERNEL__ -#ifdef CONFIG_X86_OOSTORE -extern int centaur_ram_top; -void get_centaur_ram_top(void); +#ifdef CONFIG_MEMTEST +extern void early_memtest(unsigned long start, unsigned long end); #else -static inline void get_centaur_ram_top(void) +static inline void early_memtest(unsigned long start, unsigned long end) { } #endif extern unsigned long pci_mem_start; -extern int e820_any_mapped(u64 start, u64 end, unsigned type); -extern int e820_all_mapped(u64 start, u64 end, unsigned type); -extern void e820_add_region(u64 start, u64 size, int type); -extern void e820_print_map(char *who); -int sanitize_e820_map(void); -void save_e820_map(void); -extern u64 e820_update_range(u64 start, u64 size, unsigned old_type, - unsigned new_type); -extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, - int checktype); -extern void update_e820(void); extern void e820_setup_gap(void); -extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr); struct setup_data; extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data); - -#if defined(CONFIG_X86_64) || \ - (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) -extern void e820_mark_nosave_regions(unsigned long limit_pfn); -#else -static inline void e820_mark_nosave_regions(unsigned long limit_pfn) -{ -} -#endif - -#ifdef CONFIG_MEMTEST -extern void early_memtest(unsigned long start, unsigned long end); -#else -static inline void early_memtest(unsigned long start, unsigned long end) -{ -} -#endif - -extern unsigned long end_user_pfn; - -extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); -extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); -extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); -#include - -extern unsigned long e820_end_of_ram_pfn(void); -extern unsigned long e820_end_of_low_ram_pfn(void); -extern int e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long last_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn); -extern void e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern u64 e820_hole_size(u64 start, u64 end); -extern void finish_e820_parsing(void); -extern void e820_reserve_resources(void); -extern void e820_reserve_resources_late(void); -extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); - +extern void setup_memory_map(void); /* * Returns true iff the specified range [s,e) is completely contained inside * the ISA region. @@ -145,7 +36,18 @@ static inline bool is_ISA_range(u64 s, u return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS; } +#ifdef CONFIG_X86_OOSTORE +extern int centaur_ram_top; +int __get_special_low_ram_top(void); +void get_centaur_ram_top(void); +#else +static inline void get_centaur_ram_top(void) +{ +} +#endif + #endif /* __KERNEL__ */ + #endif /* __ASSEMBLY__ */ #ifdef __KERNEL__ Index: linux-2.6/include/linux/fw_memmap.h =================================================================== --- /dev/null +++ linux-2.6/include/linux/fw_memmap.h @@ -0,0 +1,114 @@ +#ifndef _LINUX_FW_MEMMAP_H +#define _LINUX_FW_MEMMAP_H +#define E820MAX 128 /* number of entries in E820MAP */ + +/* + * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the + * constrained space in the zeropage. If we have more nodes than + * that, and if we've booted off EFI firmware, then the EFI tables + * passed us from the EFI firmware can list more nodes. Size our + * internal memory map tables to have room for these additional + * nodes, based on up to three entries per node for which the + * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT), + * plus E820MAX, allowing space for the possible duplicate E820 + * entries that might need room in the same arrays, prior to the + * call to sanitize_e820_map() to remove duplicates. The allowance + * of three memory map entries per node is "enough" entries for + * the initial hardware platform motivating this mechanism to make + * use of additional EFI map entries. Future platforms may want + * to allow more than three entries per node or otherwise refine + * this size. + */ + +/* + * Odd: 'make headers_check' complains about numa.h if I try + * to collapse the next two #ifdef lines to a single line: + * #if defined(__KERNEL__) && defined(CONFIG_EFI) + */ +#ifdef __KERNEL__ +#ifdef CONFIG_EFI +#include +#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES) +#else /* ! CONFIG_EFI */ +#define E820_X_MAX E820MAX +#endif +#else /* ! __KERNEL__ */ +#define E820_X_MAX E820MAX +#endif + +#define E820_RAM 1 +#define E820_RESERVED 2 +#define E820_ACPI 3 +#define E820_NVS 4 +#define E820_UNUSABLE 5 + +/* reserved RAM used by kernel itself */ +#define E820_RESERVED_KERN 128 + +#ifndef __ASSEMBLY__ +#include +struct e820entry { + __u64 addr; /* start of memory segment */ + __u64 size; /* size of memory segment */ + __u32 type; /* type of memory segment */ +} __attribute__((packed)); + +struct e820map { + __u32 nr_map; + struct e820entry map[E820_X_MAX]; +}; + +#ifdef __KERNEL__ + +void clear_e820_map(void); +int __append_e820_map(struct e820entry *biosmap, int nr_map); +extern int e820_any_mapped(u64 start, u64 end, unsigned type); +extern int e820_all_mapped(u64 start, u64 end, unsigned type); +extern void e820_add_region(u64 start, u64 size, int type); +extern void e820_print_map(char *who); +int sanitize_e820_map(void); +int __sanitize_e820_map(struct e820entry *biosmap, int max_nr, u32 *pnr_map); +void save_e820_map(void); +extern u64 e820_update_range(u64 start, u64 size, unsigned old_type, + unsigned new_type); +extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, + int checktype); +extern void update_e820(void); +#define MAX_GAP_END 0x100000000ull +extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, + unsigned long start_addr, unsigned long long end_addr); + +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +extern void e820_mark_nosave_regions(unsigned long limit_pfn); +#else +static inline void e820_mark_nosave_regions(unsigned long limit_pfn) +{ +} +#endif + +extern unsigned long end_user_pfn; + +extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); +extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); +extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); +#include + +extern unsigned long e820_end_of_ram_pfn(void); +extern unsigned long e820_end_of_low_ram_pfn(void); +extern int e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn); +extern void e820_register_active_regions(int nid, unsigned long start_pfn, + unsigned long end_pfn); +extern u64 e820_hole_size(u64 start, u64 end); +extern void finish_e820_parsing(void); +extern void e820_reserve_resources(void); +extern void e820_reserve_resources_late(void); + +#endif /* __KERNEL__ */ +#endif /* __ASSEMBLY__ */ + +#endif /* _LINUX_FW_MEMMAP_H */ Index: linux-2.6/kernel/Makefile =================================================================== --- linux-2.6.orig/kernel/Makefile +++ linux-2.6/kernel/Makefile @@ -11,7 +11,7 @@ obj-y = sched.o fork.o exec_domain.o hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ async.o range.o -obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o +obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o fw_memmap.o obj-y += groups.o ifdef CONFIG_FUNCTION_TRACER Index: linux-2.6/include/linux/bootmem.h =================================================================== --- linux-2.6.orig/include/linux/bootmem.h +++ linux-2.6/include/linux/bootmem.h @@ -6,7 +6,7 @@ #include #include - +#include /* * simple boot-time physical memory area allocator. */