From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: Yinghai Lu <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
"H. Peter Anvin" <hpa@zytor.com>,
Andrew Morton <akpm@linux-foundation.org>,
David Miller <davem@davemloft.net>,
Jesse Barnes <jbarnes@virtuousgeek.org>,
"Eric W. Biederman" <ebiederm@xmission.com>,
linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org
Subject: Re: [PATCH 06/20] early_res: seperate common memmap func from e820.c to fw_memmap.c
Date: Mon, 22 Mar 2010 13:37:15 +1100 [thread overview]
Message-ID: <1269225435.8599.70.camel@pasglop> (raw)
In-Reply-To: <1269155601-18247-7-git-send-email-yinghai@kernel.org>
On Sun, 2010-03-21 at 00:13 -0700, Yinghai Lu wrote:
> move it to kernel/fw_memmap.c from arch/x86/kernel/e820.c
>
> -v2: add fw_memmap wrapper to some func...
> move some functions back to e820.c
NAK
This is even worse than before. You are now moving that entire pile of
x86 gunk into "generic" code, but even keep it names e820 there !
What happened to the discussion we had earlier, which iirc concluded
that a better approach would be to adapt x86 to use LMB ?
Cheers,
Ben.
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> ---
> arch/x86/include/asm/e820.h | 176 ++++++-------
> arch/x86/kernel/e820.c | 638 ++----------------------------------------
> include/linux/bootmem.h | 2 +-
> include/linux/early_res.h | 1 +
> include/linux/fw_memmap.h | 40 +++
> kernel/Makefile | 2 +-
> kernel/fw_memmap.c | 625 +++++++++++++++++++++++++++++++++++++++++
> kernel/fw_memmap_internals.h | 49 ++++
> 8 files changed, 822 insertions(+), 711 deletions(-)
> create mode 100644 include/linux/fw_memmap.h
> create mode 100644 kernel/fw_memmap.c
> create mode 100644 kernel/fw_memmap_internals.h
>
> diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
> index 71c0348..c038616 100644
> --- a/arch/x86/include/asm/e820.h
> +++ b/arch/x86/include/asm/e820.h
> @@ -1,65 +1,10 @@
> #ifndef _ASM_X86_E820_H
> #define _ASM_X86_E820_H
> -#define E820MAP 0x2d0 /* our map */
> -#define E820MAX 128 /* number of entries in E820MAP */
> -
> -/*
> - * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the
> - * constrained space in the zeropage. If we have more nodes than
> - * that, and if we've booted off EFI firmware, then the EFI tables
> - * passed us from the EFI firmware can list more nodes. Size our
> - * internal memory map tables to have room for these additional
> - * nodes, based on up to three entries per node for which the
> - * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT),
> - * plus E820MAX, allowing space for the possible duplicate E820
> - * entries that might need room in the same arrays, prior to the
> - * call to sanitize_e820_map() to remove duplicates. The allowance
> - * of three memory map entries per node is "enough" entries for
> - * the initial hardware platform motivating this mechanism to make
> - * use of additional EFI map entries. Future platforms may want
> - * to allow more than three entries per node or otherwise refine
> - * this size.
> - */
> -
> -/*
> - * Odd: 'make headers_check' complains about numa.h if I try
> - * to collapse the next two #ifdef lines to a single line:
> - * #if defined(__KERNEL__) && defined(CONFIG_EFI)
> - */
> -#ifdef __KERNEL__
> -#ifdef CONFIG_EFI
> -#include <linux/numa.h>
> -#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
> -#else /* ! CONFIG_EFI */
> -#define E820_X_MAX E820MAX
> -#endif
> -#else /* ! __KERNEL__ */
> -#define E820_X_MAX E820MAX
> -#endif
> -
> -#define E820NR 0x1e8 /* # entries in E820MAP */
> -
> -#define E820_RAM 1
> -#define E820_RESERVED 2
> -#define E820_ACPI 3
> -#define E820_NVS 4
> -#define E820_UNUSABLE 5
>
> /* reserved RAM used by kernel itself */
> #define E820_RESERVED_KERN 128
>
> #ifndef __ASSEMBLY__
> -#include <linux/types.h>
> -struct e820entry {
> - __u64 addr; /* start of memory segment */
> - __u64 size; /* size of memory segment */
> - __u32 type; /* type of memory segment */
> -} __attribute__((packed));
> -
> -struct e820map {
> - __u32 nr_map;
> - struct e820entry map[E820_X_MAX];
> -};
>
> #define ISA_START_ADDRESS 0xa0000
> #define ISA_END_ADDRESS 0x100000
> @@ -69,32 +14,18 @@ struct e820map {
>
> #ifdef __KERNEL__
>
> -#ifdef CONFIG_X86_OOSTORE
> -extern int centaur_ram_top;
> -void get_centaur_ram_top(void);
> +#include <linux/fw_memmap.h>
> +
> +#ifdef CONFIG_MEMTEST
> +extern void early_memtest(unsigned long start, unsigned long end);
> #else
> -static inline void get_centaur_ram_top(void)
> +static inline void early_memtest(unsigned long start, unsigned long end)
> {
> }
> #endif
>
> extern unsigned long pci_mem_start;
> -extern int e820_any_mapped(u64 start, u64 end, unsigned type);
> -extern int e820_all_mapped(u64 start, u64 end, unsigned type);
> -extern void e820_add_region(u64 start, u64 size, int type);
> -extern void e820_print_map(char *who);
> -int sanitize_e820_map(void);
> -void save_e820_map(void);
> -extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
> - unsigned new_type);
> -extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
> - int checktype);
> -extern void update_e820(void);
> extern void e820_setup_gap(void);
> -extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
> - unsigned long start_addr, unsigned long long end_addr);
> -struct setup_data;
> -extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
>
> #if defined(CONFIG_X86_64) || \
> (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
> @@ -105,37 +36,80 @@ static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
> }
> #endif
>
> -#ifdef CONFIG_MEMTEST
> -extern void early_memtest(unsigned long start, unsigned long end);
> -#else
> -static inline void early_memtest(unsigned long start, unsigned long end)
> +static inline void e820_add_region(u64 start, u64 size, int type)
> {
> + fw_memmap_add_region(start, size, type);
> +}
> +
> +static inline void e820_print_map(char *who)
> +{
> + fw_memmap_print_map(who);
> +}
> +
> +static inline int sanitize_e820_map(void)
> +{
> + return sanitize_fw_memmap();
> +}
> +
> +static inline void finish_e820_parsing(void)
> +{
> + finish_fw_memmap_parsing();
> +}
> +
> +static inline void e820_register_active_regions(int nid,
> + unsigned long start_pfn,
> + unsigned long end_pfn)
> +{
> + fw_memmap_register_active_regions(nid, start_pfn, end_pfn);
> +}
> +
> +static inline u64 e820_hole_size(u64 start, u64 end)
> +{
> + return fw_memmap_hole_size(start, end);
> +}
> +
> +static inline u64 find_e820_area(u64 start, u64 end, u64 size, u64 align)
> +{
> + return find_fw_memmap_area(start, end, size, align);
> +}
> +
> +static inline u64 find_e820_area_node(int nid, u64 start, u64 end,
> + u64 size, u64 align)
> +{
> + return find_fw_memmap_area_node(nid, start, end, size, align);
> }
> -#endif
>
> -extern unsigned long end_user_pfn;
> +static inline unsigned long e820_end_of_ram_pfn(void)
> +{
> + return fw_memmap_end_of_ram_pfn();
> +}
> +
> +void clear_e820_map(void);
> +
> +extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
> + int checktype);
> +struct e820entry;
> +int __sanitize_e820_map(struct e820entry *biosmap, int max_nr, u32 *pnr_map);
> +extern unsigned long e820_end_of_low_ram_pfn(void);
> +
> +extern int e820_any_mapped(u64 start, u64 end, unsigned type);
> +extern int e820_all_mapped(u64 start, u64 end, unsigned type);
> +extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
> + unsigned new_type);
> +
> +extern void update_e820(void);
> +void save_e820_map(void);
> +struct setup_data;
> +extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
> +extern char *default_machine_specific_memory_setup(void);
> +extern void setup_memory_map(void);
>
> -extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
> extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
> -u64 find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align);
> +
> extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
> -#include <linux/early_res.h>
>
> -extern unsigned long e820_end_of_ram_pfn(void);
> -extern unsigned long e820_end_of_low_ram_pfn(void);
> -extern int e820_find_active_region(const struct e820entry *ei,
> - unsigned long start_pfn,
> - unsigned long last_pfn,
> - unsigned long *ei_startpfn,
> - unsigned long *ei_endpfn);
> -extern void e820_register_active_regions(int nid, unsigned long start_pfn,
> - unsigned long end_pfn);
> -extern u64 e820_hole_size(u64 start, u64 end);
> -extern void finish_e820_parsing(void);
> extern void e820_reserve_resources(void);
> extern void e820_reserve_resources_late(void);
> -extern void setup_memory_map(void);
> -extern char *default_machine_specific_memory_setup(void);
>
> /*
> * Returns true iff the specified range [s,e) is completely contained inside
> @@ -146,7 +120,17 @@ static inline bool is_ISA_range(u64 s, u64 e)
> return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS;
> }
>
> +#ifdef CONFIG_X86_OOSTORE
> +extern int centaur_ram_top;
> +void get_centaur_ram_top(void);
> +#else
> +static inline void get_centaur_ram_top(void)
> +{
> +}
> +#endif
> +
> #endif /* __KERNEL__ */
> +
> #endif /* __ASSEMBLY__ */
>
> #ifdef __KERNEL__
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index a558609..9f125ca 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -12,18 +12,15 @@
> #include <linux/types.h>
> #include <linux/init.h>
> #include <linux/bootmem.h>
> -#include <linux/pfn.h>
> #include <linux/suspend.h>
> #include <linux/firmware-map.h>
>
> #include <asm/e820.h>
> -#include <asm/proto.h>
> #include <asm/setup.h>
>
> +#include "../../../kernel/fw_memmap_internals.h"
> +
> /*
> - * The e820 map is the map that gets modified e.g. with command line parameters
> - * and that is also registered with modifications in the kernel resource tree
> - * with the iomem_resource as parent.
> *
> * The e820_saved is directly saved after the BIOS-provided memory map is
> * copied. It doesn't get modified afterwards. It's registered for the
> @@ -34,7 +31,6 @@
> * user can e.g. boot the original kernel with mem=1G while still booting the
> * next kernel with full memory.
> */
> -static struct e820map __initdata e820;
> static struct e820map __initdata e820_saved;
>
> /* For PCI or other memory-mapped resources */
> @@ -99,295 +95,6 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
> return 0;
> }
>
> -/*
> - * Add a memory region to the kernel e820 map.
> - */
> -static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
> - int type)
> -{
> - int x = e820x->nr_map;
> -
> - if (x >= ARRAY_SIZE(e820x->map)) {
> - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
> - return;
> - }
> -
> - e820x->map[x].addr = start;
> - e820x->map[x].size = size;
> - e820x->map[x].type = type;
> - e820x->nr_map++;
> -}
> -
> -void __init e820_add_region(u64 start, u64 size, int type)
> -{
> - __e820_add_region(&e820, start, size, type);
> -}
> -
> -static void __init e820_print_type(u32 type)
> -{
> - switch (type) {
> - case E820_RAM:
> - case E820_RESERVED_KERN:
> - printk(KERN_CONT "(usable)");
> - break;
> - case E820_RESERVED:
> - printk(KERN_CONT "(reserved)");
> - break;
> - case E820_ACPI:
> - printk(KERN_CONT "(ACPI data)");
> - break;
> - case E820_NVS:
> - printk(KERN_CONT "(ACPI NVS)");
> - break;
> - case E820_UNUSABLE:
> - printk(KERN_CONT "(unusable)");
> - break;
> - default:
> - printk(KERN_CONT "type %u", type);
> - break;
> - }
> -}
> -
> -void __init e820_print_map(char *who)
> -{
> - int i;
> -
> - for (i = 0; i < e820.nr_map; i++) {
> - printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
> - (unsigned long long) e820.map[i].addr,
> - (unsigned long long)
> - (e820.map[i].addr + e820.map[i].size));
> - e820_print_type(e820.map[i].type);
> - printk(KERN_CONT "\n");
> - }
> -}
> -
> -/*
> - * Sanitize the BIOS e820 map.
> - *
> - * Some e820 responses include overlapping entries. The following
> - * replaces the original e820 map with a new one, removing overlaps,
> - * and resolving conflicting memory types in favor of highest
> - * numbered type.
> - *
> - * The input parameter biosmap points to an array of 'struct
> - * e820entry' which on entry has elements in the range [0, *pnr_map)
> - * valid, and which has space for up to max_nr_map entries.
> - * On return, the resulting sanitized e820 map entries will be in
> - * overwritten in the same location, starting at biosmap.
> - *
> - * The integer pointed to by pnr_map must be valid on entry (the
> - * current number of valid entries located at biosmap) and will
> - * be updated on return, with the new number of valid entries
> - * (something no more than max_nr_map.)
> - *
> - * The return value from sanitize_e820_map() is zero if it
> - * successfully 'sanitized' the map entries passed in, and is -1
> - * if it did nothing, which can happen if either of (1) it was
> - * only passed one map entry, or (2) any of the input map entries
> - * were invalid (start + size < start, meaning that the size was
> - * so big the described memory range wrapped around through zero.)
> - *
> - * Visually we're performing the following
> - * (1,2,3,4 = memory types)...
> - *
> - * Sample memory map (w/overlaps):
> - * ____22__________________
> - * ______________________4_
> - * ____1111________________
> - * _44_____________________
> - * 11111111________________
> - * ____________________33__
> - * ___________44___________
> - * __________33333_________
> - * ______________22________
> - * ___________________2222_
> - * _________111111111______
> - * _____________________11_
> - * _________________4______
> - *
> - * Sanitized equivalent (no overlap):
> - * 1_______________________
> - * _44_____________________
> - * ___1____________________
> - * ____22__________________
> - * ______11________________
> - * _________1______________
> - * __________3_____________
> - * ___________44___________
> - * _____________33_________
> - * _______________2________
> - * ________________1_______
> - * _________________4______
> - * ___________________2____
> - * ____________________33__
> - * ______________________4_
> - */
> -
> -static int __init __sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
> - u32 *pnr_map)
> -{
> - struct change_member {
> - struct e820entry *pbios; /* pointer to original bios entry */
> - unsigned long long addr; /* address for this change point */
> - };
> - static struct change_member change_point_list[2*E820_X_MAX] __initdata;
> - static struct change_member *change_point[2*E820_X_MAX] __initdata;
> - static struct e820entry *overlap_list[E820_X_MAX] __initdata;
> - static struct e820entry new_bios[E820_X_MAX] __initdata;
> - struct change_member *change_tmp;
> - unsigned long current_type, last_type;
> - unsigned long long last_addr;
> - int chgidx, still_changing;
> - int overlap_entries;
> - int new_bios_entry;
> - int old_nr, new_nr, chg_nr;
> - int i;
> -
> - /* if there's only one memory region, don't bother */
> - if (*pnr_map < 2)
> - return -1;
> -
> - old_nr = *pnr_map;
> - BUG_ON(old_nr > max_nr_map);
> -
> - /* bail out if we find any unreasonable addresses in bios map */
> - for (i = 0; i < old_nr; i++)
> - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
> - return -1;
> -
> - /* create pointers for initial change-point information (for sorting) */
> - for (i = 0; i < 2 * old_nr; i++)
> - change_point[i] = &change_point_list[i];
> -
> - /* record all known change-points (starting and ending addresses),
> - omitting those that are for empty memory regions */
> - chgidx = 0;
> - for (i = 0; i < old_nr; i++) {
> - if (biosmap[i].size != 0) {
> - change_point[chgidx]->addr = biosmap[i].addr;
> - change_point[chgidx++]->pbios = &biosmap[i];
> - change_point[chgidx]->addr = biosmap[i].addr +
> - biosmap[i].size;
> - change_point[chgidx++]->pbios = &biosmap[i];
> - }
> - }
> - chg_nr = chgidx;
> -
> - /* sort change-point list by memory addresses (low -> high) */
> - still_changing = 1;
> - while (still_changing) {
> - still_changing = 0;
> - for (i = 1; i < chg_nr; i++) {
> - unsigned long long curaddr, lastaddr;
> - unsigned long long curpbaddr, lastpbaddr;
> -
> - curaddr = change_point[i]->addr;
> - lastaddr = change_point[i - 1]->addr;
> - curpbaddr = change_point[i]->pbios->addr;
> - lastpbaddr = change_point[i - 1]->pbios->addr;
> -
> - /*
> - * swap entries, when:
> - *
> - * curaddr > lastaddr or
> - * curaddr == lastaddr and curaddr == curpbaddr and
> - * lastaddr != lastpbaddr
> - */
> - if (curaddr < lastaddr ||
> - (curaddr == lastaddr && curaddr == curpbaddr &&
> - lastaddr != lastpbaddr)) {
> - change_tmp = change_point[i];
> - change_point[i] = change_point[i-1];
> - change_point[i-1] = change_tmp;
> - still_changing = 1;
> - }
> - }
> - }
> -
> - /* create a new bios memory map, removing overlaps */
> - overlap_entries = 0; /* number of entries in the overlap table */
> - new_bios_entry = 0; /* index for creating new bios map entries */
> - last_type = 0; /* start with undefined memory type */
> - last_addr = 0; /* start with 0 as last starting address */
> -
> - /* loop through change-points, determining affect on the new bios map */
> - for (chgidx = 0; chgidx < chg_nr; chgidx++) {
> - /* keep track of all overlapping bios entries */
> - if (change_point[chgidx]->addr ==
> - change_point[chgidx]->pbios->addr) {
> - /*
> - * add map entry to overlap list (> 1 entry
> - * implies an overlap)
> - */
> - overlap_list[overlap_entries++] =
> - change_point[chgidx]->pbios;
> - } else {
> - /*
> - * remove entry from list (order independent,
> - * so swap with last)
> - */
> - for (i = 0; i < overlap_entries; i++) {
> - if (overlap_list[i] ==
> - change_point[chgidx]->pbios)
> - overlap_list[i] =
> - overlap_list[overlap_entries-1];
> - }
> - overlap_entries--;
> - }
> - /*
> - * if there are overlapping entries, decide which
> - * "type" to use (larger value takes precedence --
> - * 1=usable, 2,3,4,4+=unusable)
> - */
> - current_type = 0;
> - for (i = 0; i < overlap_entries; i++)
> - if (overlap_list[i]->type > current_type)
> - current_type = overlap_list[i]->type;
> - /*
> - * continue building up new bios map based on this
> - * information
> - */
> - if (current_type != last_type) {
> - if (last_type != 0) {
> - new_bios[new_bios_entry].size =
> - change_point[chgidx]->addr - last_addr;
> - /*
> - * move forward only if the new size
> - * was non-zero
> - */
> - if (new_bios[new_bios_entry].size != 0)
> - /*
> - * no more space left for new
> - * bios entries ?
> - */
> - if (++new_bios_entry >= max_nr_map)
> - break;
> - }
> - if (current_type != 0) {
> - new_bios[new_bios_entry].addr =
> - change_point[chgidx]->addr;
> - new_bios[new_bios_entry].type = current_type;
> - last_addr = change_point[chgidx]->addr;
> - }
> - last_type = current_type;
> - }
> - }
> - /* retain count for new bios entries */
> - new_nr = new_bios_entry;
> -
> - /* copy new bios mapping into original location */
> - memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
> - *pnr_map = new_nr;
> -
> - return 0;
> -}
> -
> -int __init sanitize_e820_map(void)
> -{
> - return __sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
> -}
> -
> static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
> {
> while (nr_map) {
> @@ -509,52 +216,6 @@ static u64 __init e820_update_range_saved(u64 start, u64 size,
> new_type);
> }
>
> -/* make e820 not cover the range */
> -u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
> - int checktype)
> -{
> - int i;
> - u64 end;
> - u64 real_removed_size = 0;
> -
> - if (size > (ULLONG_MAX - start))
> - size = ULLONG_MAX - start;
> -
> - end = start + size;
> - printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
> - (unsigned long long) start,
> - (unsigned long long) end);
> - e820_print_type(old_type);
> - printk(KERN_CONT "\n");
> -
> - for (i = 0; i < e820.nr_map; i++) {
> - struct e820entry *ei = &e820.map[i];
> - u64 final_start, final_end;
> -
> - if (checktype && ei->type != old_type)
> - continue;
> - /* totally covered? */
> - if (ei->addr >= start &&
> - (ei->addr + ei->size) <= (start + size)) {
> - real_removed_size += ei->size;
> - memset(ei, 0, sizeof(struct e820entry));
> - continue;
> - }
> - /* partially covered */
> - final_start = max(start, ei->addr);
> - final_end = min(start + size, ei->addr + ei->size);
> - if (final_start >= final_end)
> - continue;
> - real_removed_size += final_end - final_start;
> -
> - ei->size -= final_end - final_start;
> - if (ei->addr < final_start)
> - continue;
> - ei->addr = final_end;
> - }
> - return real_removed_size;
> -}
> -
> void __init update_e820(void)
> {
> u32 nr_map;
> @@ -566,20 +227,24 @@ void __init update_e820(void)
> printk(KERN_INFO "modified physical RAM map:\n");
> e820_print_map("modified");
> }
> +
> static void __init update_e820_saved(void)
> {
> u32 nr_map;
> + int max_nr_map = ARRAY_SIZE(e820_saved.map);
>
> nr_map = e820_saved.nr_map;
> - if (__sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
> + if (__sanitize_e820_map(e820_saved.map, max_nr_map, &nr_map))
> return;
> e820_saved.nr_map = nr_map;
> }
> +
> #define MAX_GAP_END 0x100000000ull
> /*
> * Search for a gap in the e820 memory space from start_addr to end_addr.
> */
> -__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
> +static int __init
> +e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
> unsigned long start_addr, unsigned long long end_addr)
> {
> unsigned long long last;
> @@ -726,37 +391,6 @@ static int __init e820_mark_nvs_memory(void)
> core_initcall(e820_mark_nvs_memory);
> #endif
>
> -/*
> - * Find a free area with specified alignment in a specific range.
> - */
> -u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
> -{
> - int i;
> -
> - for (i = 0; i < e820.nr_map; i++) {
> - struct e820entry *ei = &e820.map[i];
> - u64 addr;
> - u64 ei_start, ei_last;
> -
> - if (ei->type != E820_RAM)
> - continue;
> -
> - ei_last = ei->addr + ei->size;
> - ei_start = ei->addr;
> - addr = find_early_area(ei_start, ei_last, start, end,
> - size, align);
> -
> - if (addr != -1ULL)
> - return addr;
> - }
> - return -1ULL;
> -}
> -
> -u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
> -{
> - return find_e820_area(start, end, size, align);
> -}
> -
> u64 __init get_max_mapped(void)
> {
> u64 end = max_pfn_mapped;
> @@ -765,6 +399,7 @@ u64 __init get_max_mapped(void)
>
> return end;
> }
> +
> /*
> * Find next free range after *start
> */
> @@ -792,21 +427,6 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
> return -1ULL;
> }
>
> -u64 __init find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align)
> -{
> - u64 addr;
> - /*
> - * need to call this function after e820_register_active_regions
> - * so early_node_map[] is set
> - */
> - addr = find_memory_core_early(nid, size, align, start, end);
> - if (addr != -1ULL)
> - return addr;
> -
> - /* fallback, should already have start end in the node range */
> - return find_e820_area(start, end, size, align);
> -}
> -
> /*
> * pre allocated 4k and reserved it in e820
> */
> @@ -843,220 +463,6 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
> return addr;
> }
>
> -#ifdef CONFIG_X86_32
> -# ifdef CONFIG_X86_PAE
> -# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT))
> -# else
> -# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT))
> -# endif
> -#else /* CONFIG_X86_32 */
> -# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
> -#endif
> -
> -/*
> - * Find the highest page frame number we have available
> - */
> -static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
> -{
> - int i;
> - unsigned long last_pfn = 0;
> - unsigned long max_arch_pfn = MAX_ARCH_PFN;
> -
> - for (i = 0; i < e820.nr_map; i++) {
> - struct e820entry *ei = &e820.map[i];
> - unsigned long start_pfn;
> - unsigned long end_pfn;
> -
> - if (ei->type != type)
> - continue;
> -
> - start_pfn = ei->addr >> PAGE_SHIFT;
> - end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
> -
> - if (start_pfn >= limit_pfn)
> - continue;
> - if (end_pfn > limit_pfn) {
> - last_pfn = limit_pfn;
> - break;
> - }
> - if (end_pfn > last_pfn)
> - last_pfn = end_pfn;
> - }
> -
> - if (last_pfn > max_arch_pfn)
> - last_pfn = max_arch_pfn;
> -
> - printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
> - last_pfn, max_arch_pfn);
> - return last_pfn;
> -}
> -unsigned long __init e820_end_of_ram_pfn(void)
> -{
> - return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
> -}
> -
> -unsigned long __init e820_end_of_low_ram_pfn(void)
> -{
> - return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
> -}
> -/*
> - * Finds an active region in the address range from start_pfn to last_pfn and
> - * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
> - */
> -int __init e820_find_active_region(const struct e820entry *ei,
> - unsigned long start_pfn,
> - unsigned long last_pfn,
> - unsigned long *ei_startpfn,
> - unsigned long *ei_endpfn)
> -{
> - u64 align = PAGE_SIZE;
> -
> - *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
> - *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
> -
> - /* Skip map entries smaller than a page */
> - if (*ei_startpfn >= *ei_endpfn)
> - return 0;
> -
> - /* Skip if map is outside the node */
> - if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
> - *ei_startpfn >= last_pfn)
> - return 0;
> -
> - /* Check for overlaps */
> - if (*ei_startpfn < start_pfn)
> - *ei_startpfn = start_pfn;
> - if (*ei_endpfn > last_pfn)
> - *ei_endpfn = last_pfn;
> -
> - return 1;
> -}
> -
> -/* Walk the e820 map and register active regions within a node */
> -void __init e820_register_active_regions(int nid, unsigned long start_pfn,
> - unsigned long last_pfn)
> -{
> - unsigned long ei_startpfn;
> - unsigned long ei_endpfn;
> - int i;
> -
> - for (i = 0; i < e820.nr_map; i++)
> - if (e820_find_active_region(&e820.map[i],
> - start_pfn, last_pfn,
> - &ei_startpfn, &ei_endpfn))
> - add_active_range(nid, ei_startpfn, ei_endpfn);
> -}
> -
> -/*
> - * Find the hole size (in bytes) in the memory range.
> - * @start: starting address of the memory range to scan
> - * @end: ending address of the memory range to scan
> - */
> -u64 __init e820_hole_size(u64 start, u64 end)
> -{
> - unsigned long start_pfn = start >> PAGE_SHIFT;
> - unsigned long last_pfn = end >> PAGE_SHIFT;
> - unsigned long ei_startpfn, ei_endpfn, ram = 0;
> - int i;
> -
> - for (i = 0; i < e820.nr_map; i++) {
> - if (e820_find_active_region(&e820.map[i],
> - start_pfn, last_pfn,
> - &ei_startpfn, &ei_endpfn))
> - ram += ei_endpfn - ei_startpfn;
> - }
> - return end - start - ((u64)ram << PAGE_SHIFT);
> -}
> -
> -static void early_panic(char *msg)
> -{
> - early_printk(msg);
> - panic(msg);
> -}
> -
> -static int userdef __initdata;
> -
> -/* "mem=nopentium" disables the 4MB page tables. */
> -static int __init parse_memopt(char *p)
> -{
> - u64 mem_size;
> -
> - if (!p)
> - return -EINVAL;
> -
> -#ifdef CONFIG_X86_32
> - if (!strcmp(p, "nopentium")) {
> - setup_clear_cpu_cap(X86_FEATURE_PSE);
> - return 0;
> - }
> -#endif
> -
> - userdef = 1;
> - mem_size = memparse(p, &p);
> - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
> -
> - return 0;
> -}
> -early_param("mem", parse_memopt);
> -
> -static int __init parse_memmap_opt(char *p)
> -{
> - char *oldp;
> - u64 start_at, mem_size;
> -
> - if (!p)
> - return -EINVAL;
> -
> - if (!strncmp(p, "exactmap", 8)) {
> -#ifdef CONFIG_CRASH_DUMP
> - /*
> - * If we are doing a crash dump, we still need to know
> - * the real mem size before original memory map is
> - * reset.
> - */
> - saved_max_pfn = e820_end_of_ram_pfn();
> -#endif
> - e820.nr_map = 0;
> - userdef = 1;
> - return 0;
> - }
> -
> - oldp = p;
> - mem_size = memparse(p, &p);
> - if (p == oldp)
> - return -EINVAL;
> -
> - userdef = 1;
> - if (*p == '@') {
> - start_at = memparse(p+1, &p);
> - e820_add_region(start_at, mem_size, E820_RAM);
> - } else if (*p == '#') {
> - start_at = memparse(p+1, &p);
> - e820_add_region(start_at, mem_size, E820_ACPI);
> - } else if (*p == '$') {
> - start_at = memparse(p+1, &p);
> - e820_add_region(start_at, mem_size, E820_RESERVED);
> - } else
> - e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
> -
> - return *p == '\0' ? 0 : -EINVAL;
> -}
> -early_param("memmap", parse_memmap_opt);
> -
> -void __init finish_e820_parsing(void)
> -{
> - if (userdef) {
> - u32 nr = e820.nr_map;
> -
> - if (__sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
> - early_panic("Invalid user supplied memory map");
> - e820.nr_map = nr;
> -
> - printk(KERN_INFO "user-defined physical RAM map:\n");
> - e820_print_map("user");
> - }
> -}
> -
> static inline const char *e820_type_to_string(int e820_type)
> {
> switch (e820_type) {
> @@ -1098,7 +504,8 @@ void __init e820_reserve_resources(void)
> * pci device BAR resource and insert them later in
> * pcibios_resource_survey()
> */
> - if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
> + if (e820.map[i].type != E820_RESERVED ||
> + res->start < (1ULL<<20)) {
> res->flags |= IORESOURCE_BUSY;
> insert_resource(&iomem_resource, res);
> }
> @@ -1114,7 +521,7 @@ void __init e820_reserve_resources(void)
> }
>
> /* How much should we pad RAM ending depending on where it is? */
> -static unsigned long ram_alignment(resource_size_t pos)
> +static unsigned long __init ram_alignment(resource_size_t pos)
> {
> unsigned long mb = pos >> 20;
>
> @@ -1196,7 +603,7 @@ char *__init default_machine_specific_memory_setup(void)
> who = "BIOS-e801";
> }
>
> - e820.nr_map = 0;
> + clear_e820_map();
> e820_add_region(0, LOWMEMSIZE(), E820_RAM);
> e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
> }
> @@ -1204,7 +611,6 @@ char *__init default_machine_specific_memory_setup(void)
> /* In case someone cares... */
> return who;
> }
> -
> void __init save_e820_map(void)
> {
> memcpy(&e820_saved, &e820, sizeof(struct e820map));
> @@ -1221,20 +627,18 @@ void __init setup_memory_map(void)
> }
>
> #ifdef CONFIG_X86_OOSTORE
> +
> /*
> * Figure what we can cover with MCR's
> *
> * Shortcut: We know you can't put 4Gig of RAM on a winchip
> */
> -void __init get_centaur_ram_top(void)
> +static void __init __get_special_low_ram_top(void)
> {
> u32 clip = 0xFFFFFFFFUL;
> u32 top = 0;
> int i;
>
> - if (boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR)
> - return;
> -
> for (i = 0; i < e820.nr_map; i++) {
> unsigned long start, end;
>
> @@ -1272,7 +676,15 @@ void __init get_centaur_ram_top(void)
> if (top > clip)
> top = clip;
>
> - centaur_ram_top = top;
> + return top;
> }
> -#endif
>
> +int centaur_ram_top;
> +void __init get_centaur_ram_top(void)
> +{
> + if (boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR)
> + return;
> +
> + centaur_ram_top = __get_special_low_ram_top();
> +}
> +#endif
> diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
> index 266ab92..c341c18 100644
> --- a/include/linux/bootmem.h
> +++ b/include/linux/bootmem.h
> @@ -6,7 +6,7 @@
>
> #include <linux/mmzone.h>
> #include <asm/dma.h>
> -
> +#include <linux/early_res.h>
> /*
> * simple boot-time physical memory area allocator.
> */
> diff --git a/include/linux/early_res.h b/include/linux/early_res.h
> index 29c09f5..0f4590f 100644
> --- a/include/linux/early_res.h
> +++ b/include/linux/early_res.h
> @@ -14,6 +14,7 @@ u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
> u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
> u64 *sizep, u64 align);
> u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align);
> +u64 find_fw_memmap_area_node(int nid, u64 start, u64 end, u64 size, u64 align);
> u64 get_max_mapped(void);
> #include <linux/range.h>
> int get_free_all_memory_range(struct range **rangep, int nodeid);
> diff --git a/include/linux/fw_memmap.h b/include/linux/fw_memmap.h
> new file mode 100644
> index 0000000..e0fcc1b
> --- /dev/null
> +++ b/include/linux/fw_memmap.h
> @@ -0,0 +1,40 @@
> +#ifndef _LINUX_FW_MEMMAP_H
> +#define _LINUX_FW_MEMMAP_H
> +#define E820MAX 128 /* number of entries in E820MAP */
> +
> +#define FW_MEMMAP_RAM 1
> +#define FW_MEMMAP_RESERVED 2
> +
> +#define E820_RAM FW_MEMMAP_RAM
> +#define E820_RESERVED FW_MEMMAP_RESERVED
> +
> +#define E820_ACPI 3
> +#define E820_NVS 4
> +#define E820_UNUSABLE 5
> +
> +#ifndef __ASSEMBLY__
> +#include <linux/types.h>
> +struct e820entry {
> + __u64 addr; /* start of memory segment */
> + __u64 size; /* size of memory segment */
> + __u32 type; /* type of memory segment */
> +} __attribute__((packed));
> +
> +#ifdef __KERNEL__
> +
> +void fw_memmap_add_region(u64 start, u64 size, int type);
> +void fw_memmap_print_map(char *who);
> +int sanitize_fw_memmap(void);
> +void finish_fw_memmap_parsing(void);
> +
> +#include <linux/early_res.h>
> +
> +unsigned long fw_memmap_end_of_ram_pfn(void);
> +void fw_memmap_register_active_regions(int nid, unsigned long start_pfn,
> + unsigned long end_pfn);
> +u64 fw_memmap_hole_size(u64 start, u64 end);
> +
> +#endif /* __KERNEL__ */
> +#endif /* __ASSEMBLY__ */
> +
> +#endif /* _LINUX_FW_MEMMAP_H */
> diff --git a/kernel/Makefile b/kernel/Makefile
> index d5c3006..b0afaa5 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -11,7 +11,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
> hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
> notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
> async.o range.o
> -obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
> +obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o fw_memmap.o
> obj-y += groups.o
>
> ifdef CONFIG_FUNCTION_TRACER
> diff --git a/kernel/fw_memmap.c b/kernel/fw_memmap.c
> new file mode 100644
> index 0000000..11067f3
> --- /dev/null
> +++ b/kernel/fw_memmap.c
> @@ -0,0 +1,625 @@
> +/*
> + * Handle the memory map.
> + * The functions here do the job until bootmem takes over.
> + *
> + * Getting sanitize_e820_map() in sync with i386 version by applying change:
> + * - Provisions for empty E820 memory regions (reported by certain BIOSes).
> + * Alex Achenbach <xela@slit.de>, December 2002.
> + * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
> + *
> + */
> +#include <linux/kernel.h>
> +#include <linux/types.h>
> +#include <linux/init.h>
> +#include <linux/bootmem.h>
> +#include <linux/suspend.h>
> +#include <linux/ioport.h>
> +
> +#include <linux/fw_memmap.h>
> +#include "fw_memmap_internals.h"
> +
> +/*
> + * The e820 map is the map that gets modified e.g. with command line parameters
> + * and that is also registered with modifications in the kernel resource tree
> + * with the iomem_resource as parent.
> + */
> +struct e820map __initdata e820;
> +
> +/*
> + * Add a memory region to the kernel e820 map.
> + */
> +void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
> + int type)
> +{
> + int x = e820x->nr_map;
> +
> + if (x >= ARRAY_SIZE(e820x->map)) {
> + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
> + return;
> + }
> +
> + e820x->map[x].addr = start;
> + e820x->map[x].size = size;
> + e820x->map[x].type = type;
> + e820x->nr_map++;
> +}
> +
> +void __init fw_memmap_add_region(u64 start, u64 size, int type)
> +{
> + __e820_add_region(&e820, start, size, type);
> +}
> +
> +/* make e820 not cover the range */
> +u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
> + int checktype)
> +{
> + int i;
> + u64 end;
> + u64 real_removed_size = 0;
> +
> + if (size > (ULLONG_MAX - start))
> + size = ULLONG_MAX - start;
> +
> + end = start + size;
> + printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
> + (unsigned long long) start,
> + (unsigned long long) end);
> + e820_print_type(old_type);
> + printk(KERN_CONT "\n");
> +
> + for (i = 0; i < e820.nr_map; i++) {
> + struct e820entry *ei = &e820.map[i];
> + u64 final_start, final_end;
> +
> + if (checktype && ei->type != old_type)
> + continue;
> + /* totally covered? */
> + if (ei->addr >= start &&
> + (ei->addr + ei->size) <= (start + size)) {
> + real_removed_size += ei->size;
> + memset(ei, 0, sizeof(struct e820entry));
> + continue;
> + }
> + /* partially covered */
> + final_start = max(start, ei->addr);
> + final_end = min(start + size, ei->addr + ei->size);
> + if (final_start >= final_end)
> + continue;
> + real_removed_size += final_end - final_start;
> +
> + ei->size -= final_end - final_start;
> + if (ei->addr < final_start)
> + continue;
> + ei->addr = final_end;
> + }
> + return real_removed_size;
> +}
> +
> +void __init e820_print_type(u32 type)
> +{
> + switch (type) {
> + case E820_RAM:
> + case E820_RESERVED_KERN:
> + printk(KERN_CONT "(usable)");
> + break;
> + case E820_RESERVED:
> + printk(KERN_CONT "(reserved)");
> + break;
> + case E820_ACPI:
> + printk(KERN_CONT "(ACPI data)");
> + break;
> + case E820_NVS:
> + printk(KERN_CONT "(ACPI NVS)");
> + break;
> + case E820_UNUSABLE:
> + printk(KERN_CONT "(unusable)");
> + break;
> + default:
> + printk(KERN_CONT "type %u", type);
> + break;
> + }
> +}
> +
> +void __init fw_memmap_print_map(char *who)
> +{
> + int i;
> +
> + for (i = 0; i < e820.nr_map; i++) {
> + printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
> + (unsigned long long) e820.map[i].addr,
> + (unsigned long long)
> + (e820.map[i].addr + e820.map[i].size));
> + e820_print_type(e820.map[i].type);
> + printk(KERN_CONT "\n");
> + }
> +}
> +
> +/*
> + * Sanitize the BIOS e820 map.
> + *
> + * Some e820 responses include overlapping entries. The following
> + * replaces the original e820 map with a new one, removing overlaps,
> + * and resolving conflicting memory types in favor of highest
> + * numbered type.
> + *
> + * The input parameter biosmap points to an array of 'struct
> + * e820entry' which on entry has elements in the range [0, *pnr_map)
> + * valid, and which has space for up to max_nr_map entries.
> + * On return, the resulting sanitized e820 map entries will be in
> + * overwritten in the same location, starting at biosmap.
> + *
> + * The integer pointed to by pnr_map must be valid on entry (the
> + * current number of valid entries located at biosmap) and will
> + * be updated on return, with the new number of valid entries
> + * (something no more than max_nr_map.)
> + *
> + * The return value from sanitize_e820_map() is zero if it
> + * successfully 'sanitized' the map entries passed in, and is -1
> + * if it did nothing, which can happen if either of (1) it was
> + * only passed one map entry, or (2) any of the input map entries
> + * were invalid (start + size < start, meaning that the size was
> + * so big the described memory range wrapped around through zero.)
> + *
> + * Visually we're performing the following
> + * (1,2,3,4 = memory types)...
> + *
> + * Sample memory map (w/overlaps):
> + * ____22__________________
> + * ______________________4_
> + * ____1111________________
> + * _44_____________________
> + * 11111111________________
> + * ____________________33__
> + * ___________44___________
> + * __________33333_________
> + * ______________22________
> + * ___________________2222_
> + * _________111111111______
> + * _____________________11_
> + * _________________4______
> + *
> + * Sanitized equivalent (no overlap):
> + * 1_______________________
> + * _44_____________________
> + * ___1____________________
> + * ____22__________________
> + * ______11________________
> + * _________1______________
> + * __________3_____________
> + * ___________44___________
> + * _____________33_________
> + * _______________2________
> + * ________________1_______
> + * _________________4______
> + * ___________________2____
> + * ____________________33__
> + * ______________________4_
> + */
> +
> +int __init __sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
> + u32 *pnr_map)
> +{
> + struct change_member {
> + struct e820entry *pbios; /* pointer to original bios entry */
> + unsigned long long addr; /* address for this change point */
> + };
> + static struct change_member change_point_list[2*E820_X_MAX] __initdata;
> + static struct change_member *change_point[2*E820_X_MAX] __initdata;
> + static struct e820entry *overlap_list[E820_X_MAX] __initdata;
> + static struct e820entry new_bios[E820_X_MAX] __initdata;
> + struct change_member *change_tmp;
> + unsigned long current_type, last_type;
> + unsigned long long last_addr;
> + int chgidx, still_changing;
> + int overlap_entries;
> + int new_bios_entry;
> + int old_nr, new_nr, chg_nr;
> + int i;
> +
> + /* if there's only one memory region, don't bother */
> + if (*pnr_map < 2)
> + return -1;
> +
> + old_nr = *pnr_map;
> + BUG_ON(old_nr > max_nr_map);
> +
> + /* bail out if we find any unreasonable addresses in bios map */
> + for (i = 0; i < old_nr; i++)
> + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
> + return -1;
> +
> + /* create pointers for initial change-point information (for sorting) */
> + for (i = 0; i < 2 * old_nr; i++)
> + change_point[i] = &change_point_list[i];
> +
> + /* record all known change-points (starting and ending addresses),
> + omitting those that are for empty memory regions */
> + chgidx = 0;
> + for (i = 0; i < old_nr; i++) {
> + if (biosmap[i].size != 0) {
> + change_point[chgidx]->addr = biosmap[i].addr;
> + change_point[chgidx++]->pbios = &biosmap[i];
> + change_point[chgidx]->addr = biosmap[i].addr +
> + biosmap[i].size;
> + change_point[chgidx++]->pbios = &biosmap[i];
> + }
> + }
> + chg_nr = chgidx;
> +
> + /* sort change-point list by memory addresses (low -> high) */
> + still_changing = 1;
> + while (still_changing) {
> + still_changing = 0;
> + for (i = 1; i < chg_nr; i++) {
> + unsigned long long curaddr, lastaddr;
> + unsigned long long curpbaddr, lastpbaddr;
> +
> + curaddr = change_point[i]->addr;
> + lastaddr = change_point[i - 1]->addr;
> + curpbaddr = change_point[i]->pbios->addr;
> + lastpbaddr = change_point[i - 1]->pbios->addr;
> +
> + /*
> + * swap entries, when:
> + *
> + * curaddr > lastaddr or
> + * curaddr == lastaddr and curaddr == curpbaddr and
> + * lastaddr != lastpbaddr
> + */
> + if (curaddr < lastaddr ||
> + (curaddr == lastaddr && curaddr == curpbaddr &&
> + lastaddr != lastpbaddr)) {
> + change_tmp = change_point[i];
> + change_point[i] = change_point[i-1];
> + change_point[i-1] = change_tmp;
> + still_changing = 1;
> + }
> + }
> + }
> +
> + /* create a new bios memory map, removing overlaps */
> + overlap_entries = 0; /* number of entries in the overlap table */
> + new_bios_entry = 0; /* index for creating new bios map entries */
> + last_type = 0; /* start with undefined memory type */
> + last_addr = 0; /* start with 0 as last starting address */
> +
> + /* loop through change-points, determining affect on the new bios map */
> + for (chgidx = 0; chgidx < chg_nr; chgidx++) {
> + /* keep track of all overlapping bios entries */
> + if (change_point[chgidx]->addr ==
> + change_point[chgidx]->pbios->addr) {
> + /*
> + * add map entry to overlap list (> 1 entry
> + * implies an overlap)
> + */
> + overlap_list[overlap_entries++] =
> + change_point[chgidx]->pbios;
> + } else {
> + /*
> + * remove entry from list (order independent,
> + * so swap with last)
> + */
> + for (i = 0; i < overlap_entries; i++) {
> + if (overlap_list[i] ==
> + change_point[chgidx]->pbios)
> + overlap_list[i] =
> + overlap_list[overlap_entries-1];
> + }
> + overlap_entries--;
> + }
> + /*
> + * if there are overlapping entries, decide which
> + * "type" to use (larger value takes precedence --
> + * 1=usable, 2,3,4,4+=unusable)
> + */
> + current_type = 0;
> + for (i = 0; i < overlap_entries; i++)
> + if (overlap_list[i]->type > current_type)
> + current_type = overlap_list[i]->type;
> + /*
> + * continue building up new bios map based on this
> + * information
> + */
> + if (current_type != last_type) {
> + if (last_type != 0) {
> + new_bios[new_bios_entry].size =
> + change_point[chgidx]->addr - last_addr;
> + /*
> + * move forward only if the new size
> + * was non-zero
> + */
> + if (new_bios[new_bios_entry].size != 0)
> + /*
> + * no more space left for new
> + * bios entries ?
> + */
> + if (++new_bios_entry >= max_nr_map)
> + break;
> + }
> + if (current_type != 0) {
> + new_bios[new_bios_entry].addr =
> + change_point[chgidx]->addr;
> + new_bios[new_bios_entry].type = current_type;
> + last_addr = change_point[chgidx]->addr;
> + }
> + last_type = current_type;
> + }
> + }
> + /* retain count for new bios entries */
> + new_nr = new_bios_entry;
> +
> + /* copy new bios mapping into original location */
> + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
> + *pnr_map = new_nr;
> +
> + return 0;
> +}
> +
> +int __init sanitize_fw_memmap(void)
> +{
> + int max_nr_map = ARRAY_SIZE(e820.map);
> +
> + return __sanitize_e820_map(e820.map, max_nr_map, &e820.nr_map);
> +}
> +
> +void __init clear_e820_map(void)
> +{
> + e820.nr_map = 0;
> +}
> +
> +static int userdef __initdata;
> +
> +/* "mem=nopentium" disables the 4MB page tables. */
> +static int __init parse_memopt(char *p)
> +{
> + u64 mem_size;
> +
> + if (!p)
> + return -EINVAL;
> +
> +#ifdef CONFIG_X86_32
> + if (!strcmp(p, "nopentium")) {
> + setup_clear_cpu_cap(X86_FEATURE_PSE);
> + return 0;
> + }
> +#endif
> +
> + userdef = 1;
> + mem_size = memparse(p, &p);
> + e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
> +
> + return 0;
> +}
> +early_param("mem", parse_memopt);
> +
> +static int __init parse_memmap_opt(char *p)
> +{
> + char *oldp;
> + u64 start_at, mem_size;
> +
> + if (!p)
> + return -EINVAL;
> +
> + if (!strncmp(p, "exactmap", 8)) {
> +#ifdef CONFIG_CRASH_DUMP
> + /*
> + * If we are doing a crash dump, we still need to know
> + * the real mem size before original memory map is
> + * reset.
> + */
> + saved_max_pfn = fw_memmap_end_of_ram_pfn();
> +#endif
> + e820.nr_map = 0;
> + userdef = 1;
> + return 0;
> + }
> +
> + oldp = p;
> + mem_size = memparse(p, &p);
> + if (p == oldp)
> + return -EINVAL;
> +
> + userdef = 1;
> + if (*p == '@') {
> + start_at = memparse(p+1, &p);
> + e820_add_region(start_at, mem_size, E820_RAM);
> + } else if (*p == '#') {
> + start_at = memparse(p+1, &p);
> + e820_add_region(start_at, mem_size, E820_ACPI);
> + } else if (*p == '$') {
> + start_at = memparse(p+1, &p);
> + e820_add_region(start_at, mem_size, E820_RESERVED);
> + } else
> + e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
> +
> + return *p == '\0' ? 0 : -EINVAL;
> +}
> +early_param("memmap", parse_memmap_opt);
> +
> +static void early_panic(char *msg)
> +{
> + early_printk(msg);
> + panic(msg);
> +}
> +
> +void __init finish_fw_memmap_parsing(void)
> +{
> + if (userdef) {
> + u32 nr = e820.nr_map;
> + int max_nr_map = ARRAY_SIZE(e820.map);
> +
> + if (__sanitize_e820_map(e820.map, max_nr_map, &nr) < 0)
> + early_panic("Invalid user supplied memory map");
> + e820.nr_map = nr;
> +
> + printk(KERN_INFO "user-defined physical RAM map:\n");
> + e820_print_map("user");
> + }
> +}
> +
> +/*
> + * Find a free area with specified alignment in a specific range.
> + */
> +u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
> +{
> + int i;
> +
> + for (i = 0; i < e820.nr_map; i++) {
> + struct e820entry *ei = &e820.map[i];
> + u64 addr;
> + u64 ei_start, ei_last;
> +
> + if (ei->type != E820_RAM)
> + continue;
> +
> + ei_last = ei->addr + ei->size;
> + ei_start = ei->addr;
> + addr = find_early_area(ei_start, ei_last, start, end,
> + size, align);
> +
> + if (addr != -1ULL)
> + return addr;
> + }
> + return -1ULL;
> +}
> +
> +u64 __init
> +find_fw_memmap_area_node(int nid, u64 start, u64 end, u64 size, u64 align)
> +{
> + u64 addr;
> + /*
> + * need to call this function after e820_register_active_regions
> + * so early_node_map[] is set
> + */
> + addr = find_memory_core_early(nid, size, align, start, end);
> + if (addr != -1ULL)
> + return addr;
> +
> + /* fallback, should already have start end in the node range */
> + return find_fw_memmap_area(start, end, size, align);
> +}
> +
> +#ifdef CONFIG_X86_32
> +# ifdef CONFIG_X86_PAE
> +# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT))
> +# else
> +# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT))
> +# endif
> +#else /* CONFIG_X86_32 */
> +# define MAX_ARCH_PFN (MAXMEM>>PAGE_SHIFT)
> +#endif
> +
> +/*
> + * Find the highest page frame number we have available
> + */
> +static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
> +{
> + int i;
> + unsigned long last_pfn = 0;
> + unsigned long max_arch_pfn = MAX_ARCH_PFN;
> +
> + for (i = 0; i < e820.nr_map; i++) {
> + struct e820entry *ei = &e820.map[i];
> + unsigned long start_pfn;
> + unsigned long end_pfn;
> +
> + if (ei->type != type)
> + continue;
> +
> + start_pfn = ei->addr >> PAGE_SHIFT;
> + end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
> +
> + if (start_pfn >= limit_pfn)
> + continue;
> + if (end_pfn > limit_pfn) {
> + last_pfn = limit_pfn;
> + break;
> + }
> + if (end_pfn > last_pfn)
> + last_pfn = end_pfn;
> + }
> +
> + if (last_pfn > max_arch_pfn)
> + last_pfn = max_arch_pfn;
> +
> + printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
> + last_pfn, max_arch_pfn);
> + return last_pfn;
> +}
> +unsigned long __init fw_memmap_end_of_ram_pfn(void)
> +{
> + return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
> +}
> +
> +unsigned long __init e820_end_of_low_ram_pfn(void)
> +{
> + return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
> +}
> +/*
> + * Finds an active region in the address range from start_pfn to last_pfn and
> + * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
> + */
> +static int __init e820_find_active_region(const struct e820entry *ei,
> + unsigned long start_pfn,
> + unsigned long last_pfn,
> + unsigned long *ei_startpfn,
> + unsigned long *ei_endpfn)
> +{
> + u64 align = PAGE_SIZE;
> +
> + *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
> + *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
> +
> + /* Skip map entries smaller than a page */
> + if (*ei_startpfn >= *ei_endpfn)
> + return 0;
> +
> + /* Skip if map is outside the node */
> + if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
> + *ei_startpfn >= last_pfn)
> + return 0;
> +
> + /* Check for overlaps */
> + if (*ei_startpfn < start_pfn)
> + *ei_startpfn = start_pfn;
> + if (*ei_endpfn > last_pfn)
> + *ei_endpfn = last_pfn;
> +
> + return 1;
> +}
> +
> +/* Walk the e820 map and register active regions within a node */
> +void __init fw_memmap_register_active_regions(int nid, unsigned long start_pfn,
> + unsigned long last_pfn)
> +{
> + unsigned long ei_startpfn;
> + unsigned long ei_endpfn;
> + int i;
> +
> + for (i = 0; i < e820.nr_map; i++)
> + if (e820_find_active_region(&e820.map[i],
> + start_pfn, last_pfn,
> + &ei_startpfn, &ei_endpfn))
> + add_active_range(nid, ei_startpfn, ei_endpfn);
> +}
> +
> +/*
> + * Find the hole size (in bytes) in the memory range.
> + * @start: starting address of the memory range to scan
> + * @end: ending address of the memory range to scan
> + */
> +u64 __init fw_memmap_hole_size(u64 start, u64 end)
> +{
> + unsigned long start_pfn = start >> PAGE_SHIFT;
> + unsigned long last_pfn = end >> PAGE_SHIFT;
> + unsigned long ei_startpfn, ei_endpfn, ram = 0;
> + int i;
> +
> + for (i = 0; i < e820.nr_map; i++) {
> + if (e820_find_active_region(&e820.map[i],
> + start_pfn, last_pfn,
> + &ei_startpfn, &ei_endpfn))
> + ram += ei_endpfn - ei_startpfn;
> + }
> + return end - start - ((u64)ram << PAGE_SHIFT);
> +}
> diff --git a/kernel/fw_memmap_internals.h b/kernel/fw_memmap_internals.h
> new file mode 100644
> index 0000000..f217602
> --- /dev/null
> +++ b/kernel/fw_memmap_internals.h
> @@ -0,0 +1,49 @@
> +#ifndef __KERNEL_FW_MEMMAP_INTERNALS_H
> +#define __KERNEL_FW_MEMMAP_INTERNALS_H
> +
> +/*
> + * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the
> + * constrained space in the zeropage. If we have more nodes than
> + * that, and if we've booted off EFI firmware, then the EFI tables
> + * passed us from the EFI firmware can list more nodes. Size our
> + * internal memory map tables to have room for these additional
> + * nodes, based on up to three entries per node for which the
> + * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT),
> + * plus E820MAX, allowing space for the possible duplicate E820
> + * entries that might need room in the same arrays, prior to the
> + * call to sanitize_e820_map() to remove duplicates. The allowance
> + * of three memory map entries per node is "enough" entries for
> + * the initial hardware platform motivating this mechanism to make
> + * use of additional EFI map entries. Future platforms may want
> + * to allow more than three entries per node or otherwise refine
> + * this size.
> + */
> +
> +/*
> + * Odd: 'make headers_check' complains about numa.h if I try
> + * to collapse the next two #ifdef lines to a single line:
> + * #if defined(__KERNEL__) && defined(CONFIG_EFI)
> + */
> +#ifdef __KERNEL__
> +#ifdef CONFIG_EFI
> +#include <linux/numa.h>
> +#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
> +#else /* ! CONFIG_EFI */
> +#define E820_X_MAX E820MAX
> +#endif
> +#else /* ! __KERNEL__ */
> +#define E820_X_MAX E820MAX
> +#endif
> +
> +#ifndef __ASSEMBLY__
> +struct e820map {
> + __u32 nr_map;
> + struct e820entry map[E820_X_MAX];
> +};
> +#endif
> +
> +extern struct e820map __initdata e820;
> +void e820_print_type(u32 type);
> +void __e820_add_region(struct e820map *e820x, u64 start, u64 size, int type);
> +
> +#endif
next prev parent reply other threads:[~2010-03-22 2:38 UTC|newest]
Thread overview: 102+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-21 7:13 [PATCH 00/20] x86: early_res and irq_desc Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 01/20] x86: add find_e820_area_node Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 02/20] x86: add get_centaur_ram_top Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 03/20] x86: make e820 to be static Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 04/20] x86: use wake_system_ram_range instead of e820_any_mapped in agp path Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 05/20] x86: make e820 to be initdata Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 06/20] early_res: seperate common memmap func from e820.c to fw_memmap.c Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-22 2:37 ` Benjamin Herrenschmidt [this message]
2010-03-22 2:46 ` Questions about SMP bootup control Zhu, Yijun (NSN - CN/Beijing)
2010-03-22 2:46 ` Zhu, Yijun (NSN - CN/Beijing)
2010-03-22 3:29 ` Andi Kleen
2010-03-22 7:45 ` Zhu, Yijun (NSN - CN/Beijing)
2010-03-22 3:56 ` [PATCH 06/20] early_res: seperate common memmap func from e820.c to fw_memmap.c Yinghai Lu
2010-03-22 4:00 ` David Miller
2010-03-22 4:28 ` Yinghai Lu
2010-03-22 4:33 ` David Miller
2010-03-22 9:28 ` Ingo Molnar
2010-03-22 9:28 ` Ingo Molnar
2010-03-22 11:30 ` Paul Mackerras
2010-03-22 13:05 ` Ingo Molnar
2010-03-22 13:05 ` Ingo Molnar
2010-03-22 21:04 ` Benjamin Herrenschmidt
2010-03-22 21:20 ` Ingo Molnar
2010-03-22 21:52 ` Benjamin Herrenschmidt
2010-03-22 22:14 ` Yinghai Lu
2010-03-22 18:18 ` [PATCH 06/20] early_res: seperate common memmap func from e820.c to fw_memmap.cy Thomas Gleixner
2010-03-22 19:37 ` Ingo Molnar
2010-03-22 20:07 ` Yinghai Lu
2010-03-22 21:08 ` Benjamin Herrenschmidt
2010-03-22 22:09 ` Thomas Gleixner
2010-03-22 22:25 ` Yinghai Lu
2010-03-22 22:53 ` Thomas Gleixner
2010-03-22 23:41 ` Yinghai Lu
2010-03-23 0:45 ` Thomas Gleixner
2010-03-23 1:04 ` Yinghai Lu
2010-03-23 1:36 ` Thomas Gleixner
2010-03-23 6:01 ` Yinghai Lu
2010-03-23 8:02 ` Ingo Molnar
2010-03-23 9:02 ` Yinghai Lu
2010-03-23 9:48 ` Ingo Molnar
2010-03-24 4:29 ` Benjamin Herrenschmidt
2010-03-24 4:44 ` Benjamin Herrenschmidt
2010-03-24 5:54 ` Yinghai Lu
2010-03-24 7:43 ` Benjamin Herrenschmidt
2010-03-24 18:37 ` Yinghai Lu
2010-03-24 9:00 ` Ingo Molnar
2010-03-24 9:32 ` Benjamin Herrenschmidt
2010-03-24 4:24 ` Benjamin Herrenschmidt
2010-03-24 6:05 ` Yinghai Lu
2010-03-22 20:47 ` [PATCH 06/20] early_res: seperate common memmap func from e820.c to fw_memmap.c Benjamin Herrenschmidt
2010-03-22 20:57 ` Ingo Molnar
2010-03-22 21:54 ` Benjamin Herrenschmidt
2010-03-23 8:53 ` Geert Uytterhoeven
2010-03-23 11:16 ` Ingo Molnar
2010-03-24 4:50 ` Benjamin Herrenschmidt
2010-03-24 5:47 ` Kyle Moffett
2010-03-22 21:57 ` Paul Mackerras
2010-03-22 21:07 ` Benjamin Herrenschmidt
2010-03-22 21:07 ` Benjamin Herrenschmidt
2010-03-22 21:01 ` Benjamin Herrenschmidt
2010-03-22 5:12 ` Benjamin Herrenschmidt
2010-03-22 6:09 ` Yinghai Lu
2010-03-22 7:05 ` Eric W. Biederman
2010-03-21 7:13 ` [PATCH 07/20] irq: move some interrupt arch_* functions into struct irq_chip Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 08/20] x86: fix out of order of gsi - full Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 09/20] x86: set nr_irqs_gsi only in probe_nr_irqs_gsi Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 10/20] x86: kill smpboot_hooks.h Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 11/20] x86: use vector_desc instead of vector_irq Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 12/20] genericirq: change ack/mask in irq_chip to take irq_desc instead of irq -- x86 and core Yinghai Lu
2010-03-21 7:13 ` [PATCH 13/20] genericirq: change ack/mask in irq_chip to take irq_desc instead of irq -- other arch Yinghai Lu
2010-03-21 7:13 ` [PATCH 14/20] genericirq: add set_irq_desc_chip/data Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 15/20] x86/iommu/dmar: update iommu/inter_remapping to use desc Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 16/20] x86: use num_processors for possible cpus Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 17/20] x86: make 32bit apic flat to physflat switch like 64bit Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 18/20] x86: remove arch_probe_nr_irqs Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 19/20] x86/pci: ioh new version read all at same time Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-22 16:16 ` Jesse Barnes
2010-03-22 16:16 ` Jesse Barnes
2010-03-22 19:32 ` Yinghai Lu
2010-03-22 19:32 ` Yinghai Lu
2010-03-21 7:13 ` [PATCH 20/20] x86/pci: add mmconf range into e820 for when it is from MSR with amd faml0h Yinghai Lu
2010-03-21 7:13 ` Yinghai Lu
2010-03-22 2:35 ` [PATCH 00/20] x86: early_res and irq_desc Benjamin Herrenschmidt
2010-03-22 3:26 ` Yinghai Lu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1269225435.8599.70.camel@pasglop \
--to=benh@kernel.crashing.org \
--cc=akpm@linux-foundation.org \
--cc=davem@davemloft.net \
--cc=ebiederm@xmission.com \
--cc=hpa@zytor.com \
--cc=jbarnes@virtuousgeek.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=tglx@linutronix.de \
--cc=yinghai@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).