* [PATCH 4/6] Have x86_64 use add_active_range() and free_area_init_nodes
From: Mel Gorman @ 2006-05-08 14:11 UTC (permalink / raw)
To: akpm
Cc: davej, tony.luck, linux-mm, Mel Gorman, ak, bob.picco,
linux-kernel, linuxppc-dev
In-Reply-To: <20060508141030.26912.93090.sendpatchset@skynet>
Size zones and holes in an architecture independent manner for x86_64.
arch/x86_64/Kconfig | 3 +
arch/x86_64/kernel/e820.c | 109 ++++++++++-----------------------------
arch/x86_64/kernel/setup.c | 7 ++
arch/x86_64/mm/init.c | 62 +---------------------
arch/x86_64/mm/k8topology.c | 3 +
arch/x86_64/mm/numa.c | 18 +++---
arch/x86_64/mm/srat.c | 11 ++-
include/asm-x86_64/e820.h | 5 -
include/asm-x86_64/proto.h | 2
9 files changed, 63 insertions(+), 157 deletions(-)
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/Kconfig linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/Kconfig
--- linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/Kconfig 2006-05-01 11:36:58.000000000 +0100
+++ linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/Kconfig 2006-05-08 09:20:01.000000000 +0100
@@ -73,6 +73,9 @@ config ARCH_MAY_HAVE_PC_FDC
bool
default y
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
config DMI
bool
default y
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/kernel/e820.c linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/kernel/e820.c
--- linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/kernel/e820.c 2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/kernel/e820.c 2006-05-08 09:20:01.000000000 +0100
@@ -18,6 +18,7 @@
#include <linux/string.h>
#include <linux/kexec.h>
#include <linux/module.h>
+#include <linux/mm.h>
#include <asm/page.h>
#include <asm/e820.h>
@@ -155,58 +156,14 @@ unsigned long __init find_e820_area(unsi
return -1UL;
}
-/*
- * Free bootmem based on the e820 table for a node.
- */
-void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long last, addr;
-
- if (ei->type != E820_RAM ||
- ei->addr+ei->size <= start ||
- ei->addr >= end)
- continue;
-
- addr = round_up(ei->addr, PAGE_SIZE);
- if (addr < start)
- addr = start;
-
- last = round_down(ei->addr + ei->size, PAGE_SIZE);
- if (last >= end)
- last = end;
-
- if (last > addr && last-addr >= PAGE_SIZE)
- free_bootmem_node(pgdat, addr, last-addr);
- }
-}
-
/*
* Find the highest page frame number we have available
*/
unsigned long __init e820_end_of_ram(void)
{
- int i;
unsigned long end_pfn = 0;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long start, end;
-
- start = round_up(ei->addr, PAGE_SIZE);
- end = round_down(ei->addr + ei->size, PAGE_SIZE);
- if (start >= end)
- continue;
- if (ei->type == E820_RAM) {
- if (end > end_pfn<<PAGE_SHIFT)
- end_pfn = end>>PAGE_SHIFT;
- } else {
- if (end > end_pfn_map<<PAGE_SHIFT)
- end_pfn_map = end>>PAGE_SHIFT;
- }
- }
+ end_pfn = find_max_pfn_with_active_regions();
if (end_pfn > end_pfn_map)
end_pfn_map = end_pfn;
@@ -220,40 +177,6 @@ unsigned long __init e820_end_of_ram(voi
return end_pfn;
}
-/*
- * Compute how much memory is missing in a range.
- * Unlike the other functions in this file the arguments are in page numbers.
- */
-unsigned long __init
-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
-{
- unsigned long ram = 0;
- unsigned long start = start_pfn << PAGE_SHIFT;
- unsigned long end = end_pfn << PAGE_SHIFT;
- int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long last, addr;
-
- if (ei->type != E820_RAM ||
- ei->addr+ei->size <= start ||
- ei->addr >= end)
- continue;
-
- addr = round_up(ei->addr, PAGE_SIZE);
- if (addr < start)
- addr = start;
-
- last = round_down(ei->addr + ei->size, PAGE_SIZE);
- if (last >= end)
- last = end;
-
- if (last > addr)
- ram += last - addr;
- }
- return ((end - start) - ram) >> PAGE_SHIFT;
-}
-
/*
* Mark e820 reserved areas as busy for the resource manager.
*/
@@ -288,6 +211,34 @@ void __init e820_reserve_resources(void)
}
}
+/* Walk the e820 map and register active regions within a node */
+void __init
+e820_register_active_regions(int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ int i;
+ unsigned long ei_startpfn, ei_endpfn;
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
+ ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
+ >> PAGE_SHIFT;
+ /* Skip if map is outside the node */
+ if (ei->type != E820_RAM ||
+ ei_endpfn <= start_pfn ||
+ ei_startpfn >= end_pfn)
+ continue;
+
+ /* Check for overlaps */
+ if (ei_startpfn < start_pfn)
+ ei_startpfn = start_pfn;
+ if (ei_endpfn > end_pfn)
+ ei_endpfn = end_pfn;
+
+ add_active_range(nid, ei_startpfn, ei_endpfn);
+ }
+}
+
/*
* Add a memory region to the kernel e820 map.
*/
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/kernel/setup.c linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/kernel/setup.c
--- linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/kernel/setup.c 2006-05-01 11:36:58.000000000 +0100
+++ linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/kernel/setup.c 2006-05-08 09:20:01.000000000 +0100
@@ -475,7 +475,8 @@ contig_initmem_init(unsigned long start_
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n",bootmap_size);
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
- e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
+ e820_register_active_regions(0, start_pfn, end_pfn);
+ free_bootmem_with_active_regions(0, end_pfn);
reserve_bootmem(bootmap, bootmap_size);
}
#endif
@@ -645,6 +646,7 @@ void __init setup_arch(char **cmdline_p)
early_identify_cpu(&boot_cpu_data);
+ e820_register_active_regions(0, 0, -1UL);
/*
* partially used pages are not usable - thus
* we are rounding upwards:
@@ -668,6 +670,9 @@ void __init setup_arch(char **cmdline_p)
acpi_boot_table_init();
#endif
+ /* Remove active ranges so rediscovery with NUMA-awareness happens */
+ remove_all_active_ranges();
+
#ifdef CONFIG_ACPI_NUMA
/*
* Parse SRAT to discover nodes.
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/mm/init.c linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/mm/init.c
--- linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/mm/init.c 2006-05-01 11:36:58.000000000 +0100
+++ linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/mm/init.c 2006-05-08 09:20:01.000000000 +0100
@@ -406,69 +406,12 @@ void __cpuinit zap_low_mappings(int cpu)
__flush_tlb_all();
}
-/* Compute zone sizes for the DMA and DMA32 zones in a node. */
-__init void
-size_zones(unsigned long *z, unsigned long *h,
- unsigned long start_pfn, unsigned long end_pfn)
-{
- int i;
- unsigned long w;
-
- for (i = 0; i < MAX_NR_ZONES; i++)
- z[i] = 0;
-
- if (start_pfn < MAX_DMA_PFN)
- z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
- if (start_pfn < MAX_DMA32_PFN) {
- unsigned long dma32_pfn = MAX_DMA32_PFN;
- if (dma32_pfn > end_pfn)
- dma32_pfn = end_pfn;
- z[ZONE_DMA32] = dma32_pfn - start_pfn;
- }
- z[ZONE_NORMAL] = end_pfn - start_pfn;
-
- /* Remove lower zones from higher ones. */
- w = 0;
- for (i = 0; i < MAX_NR_ZONES; i++) {
- if (z[i])
- z[i] -= w;
- w += z[i];
- }
-
- /* Compute holes */
- w = start_pfn;
- for (i = 0; i < MAX_NR_ZONES; i++) {
- unsigned long s = w;
- w += z[i];
- h[i] = e820_hole_size(s, w);
- }
-
- /* Add the space pace needed for mem_map to the holes too. */
- for (i = 0; i < MAX_NR_ZONES; i++)
- h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
-
- /* The 16MB DMA zone has the kernel and other misc mappings.
- Account them too */
- if (h[ZONE_DMA]) {
- h[ZONE_DMA] += dma_reserve;
- if (h[ZONE_DMA] >= z[ZONE_DMA]) {
- printk(KERN_WARNING
- "Kernel too large and filling up ZONE_DMA?\n");
- h[ZONE_DMA] = z[ZONE_DMA];
- }
- }
-}
-
#ifndef CONFIG_NUMA
void __init paging_init(void)
{
- unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
-
memory_present(0, 0, end_pfn);
sparse_init();
- size_zones(zones, holes, 0, end_pfn);
- free_area_init_node(0, NODE_DATA(0), zones,
- __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
+ free_area_init_nodes(MAX_DMA_PFN, MAX_DMA32_PFN, end_pfn, end_pfn);
}
#endif
@@ -620,7 +563,8 @@ void __init mem_init(void)
#else
totalram_pages = free_all_bootmem();
#endif
- reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
+ reservedpages = end_pfn - totalram_pages -
+ absent_pages_in_range(0, end_pfn);
after_bootmem = 1;
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/mm/k8topology.c linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/mm/k8topology.c
--- linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/mm/k8topology.c 2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/mm/k8topology.c 2006-05-08 09:20:01.000000000 +0100
@@ -146,6 +146,9 @@ int __init k8_scan_nodes(unsigned long s
nodes[nodeid].start = base;
nodes[nodeid].end = limit;
+ e820_register_active_regions(nodeid,
+ nodes[nodeid].start >> PAGE_SHIFT,
+ nodes[nodeid].end >> PAGE_SHIFT);
prevbase = base;
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/mm/numa.c linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/mm/numa.c
--- linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/mm/numa.c 2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/mm/numa.c 2006-05-08 09:20:01.000000000 +0100
@@ -161,7 +161,7 @@ void __init setup_node_bootmem(int nodei
bootmap_start >> PAGE_SHIFT,
start_pfn, end_pfn);
- e820_bootmem_free(NODE_DATA(nodeid), start, end);
+ free_bootmem_with_active_regions(nodeid, end);
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
@@ -175,13 +175,11 @@ void __init setup_node_bootmem(int nodei
void __init setup_node_zones(int nodeid)
{
unsigned long start_pfn, end_pfn, memmapsize, limit;
- unsigned long zones[MAX_NR_ZONES];
- unsigned long holes[MAX_NR_ZONES];
start_pfn = node_start_pfn(nodeid);
end_pfn = node_end_pfn(nodeid);
- Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n",
+ Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n",
nodeid, start_pfn, end_pfn);
/* Try to allocate mem_map at end to not fill up precious <4GB
@@ -195,10 +193,6 @@ void __init setup_node_zones(int nodeid)
round_down(limit - memmapsize, PAGE_SIZE),
limit);
#endif
-
- size_zones(zones, holes, start_pfn, end_pfn);
- free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
- start_pfn, holes);
}
void __init numa_init_array(void)
@@ -259,8 +253,11 @@ static int numa_emulation(unsigned long
printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
return -1;
}
- for_each_online_node(i)
+ for_each_online_node(i) {
+ e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
+ nodes[i].end >> PAGE_SHIFT);
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+ }
numa_init_array();
return 0;
}
@@ -299,6 +296,7 @@ void __init numa_initmem_init(unsigned l
for (i = 0; i < NR_CPUS; i++)
numa_set_node(i, 0);
node_to_cpumask[0] = cpumask_of_cpu(0);
+ e820_register_active_regions(0, start_pfn, end_pfn);
setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
}
@@ -346,6 +344,8 @@ void __init paging_init(void)
for_each_online_node(i) {
setup_node_zones(i);
}
+
+ free_area_init_nodes(MAX_DMA_PFN, MAX_DMA32_PFN, end_pfn, end_pfn);
}
/* [numa=off] */
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/mm/srat.c linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/mm/srat.c
--- linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/x86_64/mm/srat.c 2006-05-01 11:36:58.000000000 +0100
+++ linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/arch/x86_64/mm/srat.c 2006-05-08 09:20:01.000000000 +0100
@@ -87,6 +87,7 @@ static __init void bad_srat(void)
apicid_to_node[i] = NUMA_NO_NODE;
for (i = 0; i < MAX_NUMNODES; i++)
nodes_add[i].start = nodes[i].end = 0;
+ remove_all_active_ranges();
}
static __init inline int srat_disabled(void)
@@ -168,7 +169,7 @@ static int hotadd_enough_memory(struct b
if (mem < 0)
return 0;
- allowed = (end_pfn - e820_hole_size(0, end_pfn)) * PAGE_SIZE;
+ allowed = (end_pfn - absent_pages_in_range(0, end_pfn)) * PAGE_SIZE;
allowed = (allowed / 100) * hotadd_percent;
if (allocated + mem > allowed) {
/* Give them at least part of their hotadd memory upto hotadd_percent
@@ -216,7 +217,7 @@ static int reserve_hotadd(int node, unsi
}
/* This check might be a bit too strict, but I'm keeping it for now. */
- if (e820_hole_size(s_pfn, e_pfn) != e_pfn - s_pfn) {
+ if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
printk(KERN_ERR "SRAT: Hotplug area has existing memory\n");
return -1;
}
@@ -310,6 +311,8 @@ acpi_numa_memory_affinity_init(struct ac
printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
nd->start, nd->end);
+ e820_register_active_regions(node, nd->start >> PAGE_SHIFT,
+ nd->end >> PAGE_SHIFT);
#ifdef RESERVE_HOTADD
if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
@@ -334,13 +337,13 @@ static int nodes_cover_memory(void)
unsigned long s = nodes[i].start >> PAGE_SHIFT;
unsigned long e = nodes[i].end >> PAGE_SHIFT;
pxmram += e - s;
- pxmram -= e820_hole_size(s, e);
+ pxmram -= absent_pages_in_range(s, e);
pxmram -= nodes_add[i].end - nodes_add[i].start;
if ((long)pxmram < 0)
pxmram = 0;
}
- e820ram = end_pfn - e820_hole_size(0, end_pfn);
+ e820ram = end_pfn - absent_pages_in_range(0, end_pfn);
/* We seem to lose 3 pages somewhere. Allow a bit of slack. */
if ((long)(e820ram - pxmram) >= 1*1024*1024) {
printk(KERN_ERR
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/include/asm-x86_64/e820.h linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/include/asm-x86_64/e820.h
--- linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/include/asm-x86_64/e820.h 2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/include/asm-x86_64/e820.h 2006-05-08 09:20:01.000000000 +0100
@@ -50,10 +50,9 @@ extern void e820_print_map(char *who);
extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
-extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
extern void e820_setup_gap(void);
-extern unsigned long e820_hole_size(unsigned long start_pfn,
- unsigned long end_pfn);
+extern void e820_register_active_regions(int nid,
+ unsigned long start_pfn, unsigned long end_pfn);
extern void __init parse_memopt(char *p, char **end);
extern void __init parse_memmapopt(char *p, char **end);
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/include/asm-x86_64/proto.h linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/include/asm-x86_64/proto.h
--- linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/include/asm-x86_64/proto.h 2006-05-01 11:37:01.000000000 +0100
+++ linux-2.6.17-rc3-mm1-104-x86_64_use_init_nodes/include/asm-x86_64/proto.h 2006-05-08 09:20:01.000000000 +0100
@@ -24,8 +24,6 @@ extern void mtrr_bp_init(void);
#define mtrr_bp_init() do {} while (0)
#endif
extern void init_memory_mapping(unsigned long start, unsigned long end);
-extern void size_zones(unsigned long *z, unsigned long *h,
- unsigned long start_pfn, unsigned long end_pfn);
extern void system_call(void);
extern int kernel_syscall(void);
^ permalink raw reply
* [PATCH 3/6] Have x86 use add_active_range() and free_area_init_nodes
From: Mel Gorman @ 2006-05-08 14:11 UTC (permalink / raw)
To: akpm
Cc: davej, tony.luck, linuxppc-dev, Mel Gorman, linux-kernel,
bob.picco, ak, linux-mm
In-Reply-To: <20060508141030.26912.93090.sendpatchset@skynet>
Size zones and holes in an architecture independent manner for x86.
Kconfig | 8 +---
kernel/setup.c | 19 +++------
kernel/srat.c | 100 +---------------------------------------------------
mm/discontig.c | 65 +++++++--------------------------
4 files changed, 25 insertions(+), 167 deletions(-)
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/Kconfig linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/Kconfig
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/Kconfig 2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/Kconfig 2006-05-08 09:18:57.000000000 +0100
@@ -577,12 +577,10 @@ config ARCH_SELECT_MEMORY_MODEL
def_bool y
depends on ARCH_SPARSEMEM_ENABLE
-source "mm/Kconfig"
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
-config HAVE_ARCH_EARLY_PFN_TO_NID
- bool
- default y
- depends on NUMA
+source "mm/Kconfig"
config HIGHPTE
bool "Allocate 3rd-level pagetables from highmem"
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/setup.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/setup.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/setup.c 2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/setup.c 2006-05-08 09:18:57.000000000 +0100
@@ -1207,22 +1207,15 @@ static unsigned long __init setup_memory
void __init zone_sizes_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
- unsigned int max_dma, low;
+ unsigned int max_dma;
+#ifndef CONFIG_HIGHMEM
+ unsigned long highend_pfn = max_low_pfn;
+#endif
max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
- low = max_low_pfn;
- if (low < max_dma)
- zones_size[ZONE_DMA] = low;
- else {
- zones_size[ZONE_DMA] = max_dma;
- zones_size[ZONE_NORMAL] = low - max_dma;
-#ifdef CONFIG_HIGHMEM
- zones_size[ZONE_HIGHMEM] = highend_pfn - low;
-#endif
- }
- free_area_init(zones_size);
+ add_active_range(0, 0, highend_pfn);
+ free_area_init_nodes(max_dma, max_dma, max_low_pfn, highend_pfn);
}
#else
extern unsigned long __init setup_memory(void);
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/srat.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/srat.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/srat.c 2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/srat.c 2006-05-08 09:18:57.000000000 +0100
@@ -55,8 +55,6 @@ struct node_memory_chunk_s {
static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
static int num_memory_chunks; /* total number of memory chunks */
-static int zholes_size_init;
-static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
extern void * boot_ioremap(unsigned long, unsigned long);
@@ -136,50 +134,6 @@ static void __init parse_memory_affinity
"enabled and removable" : "enabled" ) );
}
-#if MAX_NR_ZONES != 4
-#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"
-#endif
-/* Take a chunk of pages from page frame cstart to cend and count the number
- * of pages in each zone, returned via zones[].
- */
-static __init void chunk_to_zones(unsigned long cstart, unsigned long cend,
- unsigned long *zones)
-{
- unsigned long max_dma;
- extern unsigned long max_low_pfn;
-
- int z;
- unsigned long rend;
-
- /* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide
- * similarly scoped information and should be handled in a consistant
- * manner.
- */
- max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
- /* Split the hole into the zones in which it falls. Repeatedly
- * take the segment in which the remaining hole starts, round it
- * to the end of that zone.
- */
- memset(zones, 0, MAX_NR_ZONES * sizeof(long));
- while (cstart < cend) {
- if (cstart < max_dma) {
- z = ZONE_DMA;
- rend = (cend < max_dma)? cend : max_dma;
-
- } else if (cstart < max_low_pfn) {
- z = ZONE_NORMAL;
- rend = (cend < max_low_pfn)? cend : max_low_pfn;
-
- } else {
- z = ZONE_HIGHMEM;
- rend = cend;
- }
- zones[z] += rend - cstart;
- cstart = rend;
- }
-}
-
/*
* The SRAT table always lists ascending addresses, so can always
* assume that the first "start" address that you see is the real
@@ -224,7 +178,6 @@ static int __init acpi20_parse_srat(stru
memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */
memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
- memset(zholes_size, 0, sizeof(zholes_size));
num_memory_chunks = 0;
while (p < end) {
@@ -288,6 +241,7 @@ static int __init acpi20_parse_srat(stru
printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
node_read_chunk(chunk->nid, chunk);
+ add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
}
for_each_online_node(nid) {
@@ -396,57 +350,7 @@ int __init get_memcfg_from_srat(void)
return acpi20_parse_srat((struct acpi_table_srat *)header);
}
out_err:
+ remove_all_active_ranges();
printk("failed to get NUMA memory information from SRAT table\n");
return 0;
}
-
-/* For each node run the memory list to determine whether there are
- * any memory holes. For each hole determine which ZONE they fall
- * into.
- *
- * NOTE#1: this requires knowledge of the zone boundries and so
- * _cannot_ be performed before those are calculated in setup_memory.
- *
- * NOTE#2: we rely on the fact that the memory chunks are ordered by
- * start pfn number during setup.
- */
-static void __init get_zholes_init(void)
-{
- int nid;
- int c;
- int first;
- unsigned long end = 0;
-
- for_each_online_node(nid) {
- first = 1;
- for (c = 0; c < num_memory_chunks; c++){
- if (node_memory_chunk[c].nid == nid) {
- if (first) {
- end = node_memory_chunk[c].end_pfn;
- first = 0;
-
- } else {
- /* Record any gap between this chunk
- * and the previous chunk on this node
- * against the zones it spans.
- */
- chunk_to_zones(end,
- node_memory_chunk[c].start_pfn,
- &zholes_size[nid * MAX_NR_ZONES]);
- }
- }
- }
- }
-}
-
-unsigned long * __init get_zholes_size(int nid)
-{
- if (!zholes_size_init) {
- zholes_size_init++;
- get_zholes_init();
- }
- if (nid >= MAX_NUMNODES || !node_online(nid))
- printk("%s: nid = %d is invalid/offline. num_online_nodes = %d",
- __FUNCTION__, nid, num_online_nodes());
- return &zholes_size[nid * MAX_NR_ZONES];
-}
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/mm/discontig.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/mm/discontig.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/mm/discontig.c 2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/mm/discontig.c 2006-05-08 09:18:57.000000000 +0100
@@ -157,21 +157,6 @@ static void __init find_max_pfn_node(int
BUG();
}
-/* Find the owning node for a pfn. */
-int early_pfn_to_nid(unsigned long pfn)
-{
- int nid;
-
- for_each_node(nid) {
- if (node_end_pfn[nid] == 0)
- break;
- if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn)
- return nid;
- }
-
- return 0;
-}
-
/*
* Allocate memory for the pg_data_t for this node via a crude pre-bootmem
* method. For node zero take this from the bottom of memory, for
@@ -227,6 +212,8 @@ static unsigned long calculate_numa_rema
unsigned long pfn;
for_each_online_node(nid) {
+ unsigned old_end_pfn = node_end_pfn[nid];
+
/*
* The acpi/srat node info can show hot-add memroy zones
* where memory could be added but not currently present.
@@ -276,6 +263,7 @@ static unsigned long calculate_numa_rema
node_end_pfn[nid] -= size;
node_remap_start_pfn[nid] = node_end_pfn[nid];
+ shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
}
printk("Reserving total of %ld pages for numa KVA remap\n",
reserve_pages);
@@ -352,45 +340,20 @@ unsigned long __init setup_memory(void)
void __init zone_sizes_init(void)
{
int nid;
+ unsigned long max_dma_pfn;
-
- for_each_online_node(nid) {
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
- unsigned long *zholes_size;
- unsigned int max_dma;
-
- unsigned long low = max_low_pfn;
- unsigned long start = node_start_pfn[nid];
- unsigned long high = node_end_pfn[nid];
-
- max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
- if (node_has_online_mem(nid)){
- if (start > low) {
-#ifdef CONFIG_HIGHMEM
- BUG_ON(start > high);
- zones_size[ZONE_HIGHMEM] = high - start;
-#endif
- } else {
- if (low < max_dma)
- zones_size[ZONE_DMA] = low;
- else {
- BUG_ON(max_dma > low);
- BUG_ON(low > high);
- zones_size[ZONE_DMA] = max_dma;
- zones_size[ZONE_NORMAL] = low - max_dma;
-#ifdef CONFIG_HIGHMEM
- zones_size[ZONE_HIGHMEM] = high - low;
-#endif
- }
- }
+ /* If SRAT has not registered memory, register it now */
+ if (find_max_pfn_with_active_regions() == 0) {
+ for_each_online_node(nid) {
+ if (node_has_online_mem(nid))
+ add_active_range(nid, node_start_pfn[nid],
+ node_end_pfn[nid]);
}
-
- zholes_size = get_zholes_size(nid);
-
- free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
- zholes_size);
}
+
+ max_dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+ free_area_init_nodes(max_dma_pfn, max_dma_pfn,
+ max_low_pfn, highend_pfn);
return;
}
^ permalink raw reply
* [PATCH 2/6] Have Power use add_active_range() and free_area_init_nodes()
From: Mel Gorman @ 2006-05-08 14:11 UTC (permalink / raw)
To: akpm
Cc: davej, tony.luck, linux-mm, Mel Gorman, ak, bob.picco,
linux-kernel, linuxppc-dev
In-Reply-To: <20060508141030.26912.93090.sendpatchset@skynet>
Size zones and holes in an architecture independent manner for Power.
powerpc/Kconfig | 13 ++--
powerpc/mm/mem.c | 53 ++++++----------
powerpc/mm/numa.c | 157 ++++---------------------------------------------
ppc/Kconfig | 3
ppc/mm/init.c | 26 ++++----
5 files changed, 62 insertions(+), 190 deletions(-)
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/Kconfig linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/Kconfig
--- linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/Kconfig 2006-05-01 11:36:58.000000000 +0100
+++ linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/Kconfig 2006-05-08 09:17:57.000000000 +0100
@@ -676,11 +676,16 @@ config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on SMP && PPC_PSERIES
-source "mm/Kconfig"
-
-config HAVE_ARCH_EARLY_PFN_TO_NID
+config ARCH_POPULATES_NODE_MAP
def_bool y
- depends on NEED_MULTIPLE_NODES
+
+# Value of 256 is MAX_LMB_REGIONS * 2
+config MAX_ACTIVE_REGIONS
+ int
+ default 256
+ depends on ARCH_POPULATES_NODE_MAP
+
+source "mm/Kconfig"
config ARCH_MEMORY_PROBE
def_bool y
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/mm/mem.c linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/mm/mem.c
--- linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/mm/mem.c 2006-05-01 11:36:58.000000000 +0100
+++ linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/mm/mem.c 2006-05-08 09:17:57.000000000 +0100
@@ -257,20 +257,22 @@ void __init do_init_bootmem(void)
boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
+ /* Add active regions with valid PFNs */
+ for (i = 0; i < lmb.memory.cnt; i++) {
+ unsigned long start_pfn, end_pfn;
+ start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
+ end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+ add_active_range(0, start_pfn, end_pfn);
+ }
+
/* Add all physical memory to the bootmem map, mark each area
* present.
*/
- for (i = 0; i < lmb.memory.cnt; i++) {
- unsigned long base = lmb.memory.region[i].base;
- unsigned long size = lmb_size_bytes(&lmb.memory, i);
#ifdef CONFIG_HIGHMEM
- if (base >= total_lowmem)
- continue;
- if (base + size > total_lowmem)
- size = total_lowmem - base;
+ free_bootmem_with_active_regions(0, total_lowmem >> PAGE_SHIFT);
+#else
+ free_bootmem_with_active_regions(0, max_pfn);
#endif
- free_bootmem(base, size);
- }
/* reserve the sections we're already using */
for (i = 0; i < lmb.reserved.cnt; i++)
@@ -278,9 +280,8 @@ void __init do_init_bootmem(void)
lmb_size_bytes(&lmb.reserved, i));
/* XXX need to clip this if using highmem? */
- for (i = 0; i < lmb.memory.cnt; i++)
- memory_present(0, lmb_start_pfn(&lmb.memory, i),
- lmb_end_pfn(&lmb.memory, i));
+ sparse_memory_present_with_active_regions(0);
+
init_bootmem_done = 1;
}
@@ -289,8 +290,6 @@ void __init do_init_bootmem(void)
*/
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES];
- unsigned long zholes_size[MAX_NR_ZONES];
unsigned long total_ram = lmb_phys_mem_size();
unsigned long top_of_ram = lmb_end_of_DRAM();
@@ -308,26 +307,18 @@ void __init paging_init(void)
top_of_ram, total_ram);
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
- /*
- * All pages are DMA-able so we put them all in the DMA zone.
- */
- memset(zones_size, 0, sizeof(zones_size));
- memset(zholes_size, 0, sizeof(zholes_size));
-
- zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
- zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
-
#ifdef CONFIG_HIGHMEM
- zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
- zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
- zholes_size[ZONE_HIGHMEM] = (top_of_ram - total_ram) >> PAGE_SHIFT;
+ free_area_init_nodes(total_lowmem >> PAGE_SHIFT,
+ total_lowmem >> PAGE_SHIFT,
+ total_lowmem >> PAGE_SHIFT,
+ top_of_ram >> PAGE_SHIFT);
#else
- zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
- zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
-#endif /* CONFIG_HIGHMEM */
+ free_area_init_nodes(top_of_ram >> PAGE_SHIFT,
+ top_of_ram >> PAGE_SHIFT,
+ top_of_ram >> PAGE_SHIFT,
+ top_of_ram >> PAGE_SHIFT);
+#endif
- free_area_init_node(0, NODE_DATA(0), zones_size,
- __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
}
#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/mm/numa.c linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/mm/numa.c
--- linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/mm/numa.c 2006-05-01 11:36:58.000000000 +0100
+++ linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/mm/numa.c 2006-05-08 09:17:57.000000000 +0100
@@ -39,96 +39,6 @@ static bootmem_data_t __initdata plat_no
static int min_common_depth;
static int n_mem_addr_cells, n_mem_size_cells;
-/*
- * We need somewhere to store start/end/node for each region until we have
- * allocated the real node_data structures.
- */
-#define MAX_REGIONS (MAX_LMB_REGIONS*2)
-static struct {
- unsigned long start_pfn;
- unsigned long end_pfn;
- int nid;
-} init_node_data[MAX_REGIONS] __initdata;
-
-int __init early_pfn_to_nid(unsigned long pfn)
-{
- unsigned int i;
-
- for (i = 0; init_node_data[i].end_pfn; i++) {
- unsigned long start_pfn = init_node_data[i].start_pfn;
- unsigned long end_pfn = init_node_data[i].end_pfn;
-
- if ((start_pfn <= pfn) && (pfn < end_pfn))
- return init_node_data[i].nid;
- }
-
- return -1;
-}
-
-void __init add_region(unsigned int nid, unsigned long start_pfn,
- unsigned long pages)
-{
- unsigned int i;
-
- dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n",
- nid, start_pfn, pages);
-
- for (i = 0; init_node_data[i].end_pfn; i++) {
- if (init_node_data[i].nid != nid)
- continue;
- if (init_node_data[i].end_pfn == start_pfn) {
- init_node_data[i].end_pfn += pages;
- return;
- }
- if (init_node_data[i].start_pfn == (start_pfn + pages)) {
- init_node_data[i].start_pfn -= pages;
- return;
- }
- }
-
- /*
- * Leave last entry NULL so we dont iterate off the end (we use
- * entry.end_pfn to terminate the walk).
- */
- if (i >= (MAX_REGIONS - 1)) {
- printk(KERN_ERR "WARNING: too many memory regions in "
- "numa code, truncating\n");
- return;
- }
-
- init_node_data[i].start_pfn = start_pfn;
- init_node_data[i].end_pfn = start_pfn + pages;
- init_node_data[i].nid = nid;
-}
-
-/* We assume init_node_data has no overlapping regions */
-void __init get_region(unsigned int nid, unsigned long *start_pfn,
- unsigned long *end_pfn, unsigned long *pages_present)
-{
- unsigned int i;
-
- *start_pfn = -1UL;
- *end_pfn = *pages_present = 0;
-
- for (i = 0; init_node_data[i].end_pfn; i++) {
- if (init_node_data[i].nid != nid)
- continue;
-
- *pages_present += init_node_data[i].end_pfn -
- init_node_data[i].start_pfn;
-
- if (init_node_data[i].start_pfn < *start_pfn)
- *start_pfn = init_node_data[i].start_pfn;
-
- if (init_node_data[i].end_pfn > *end_pfn)
- *end_pfn = init_node_data[i].end_pfn;
- }
-
- /* We didnt find a matching region, return start/end as 0 */
- if (*start_pfn == -1UL)
- *start_pfn = 0;
-}
-
static void __cpuinit map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
@@ -471,8 +381,8 @@ new_range:
continue;
}
- add_region(nid, start >> PAGE_SHIFT,
- size >> PAGE_SHIFT);
+ add_active_range(nid, start >> PAGE_SHIFT,
+ (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
if (--ranges)
goto new_range;
@@ -485,6 +395,7 @@ static void __init setup_nonnuma(void)
{
unsigned long top_of_ram = lmb_end_of_DRAM();
unsigned long total_ram = lmb_phys_mem_size();
+ unsigned long start_pfn, end_pfn;
unsigned int i;
printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
@@ -492,9 +403,11 @@ static void __init setup_nonnuma(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
- for (i = 0; i < lmb.memory.cnt; ++i)
- add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT,
- lmb_size_pages(&lmb.memory, i));
+ for (i = 0; i < lmb.memory.cnt; ++i) {
+ start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
+ end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+ add_active_range(0, start_pfn, end_pfn);
+ }
node_set_online(0);
}
@@ -632,11 +545,11 @@ void __init do_init_bootmem(void)
(void *)(unsigned long)boot_cpuid);
for_each_online_node(nid) {
- unsigned long start_pfn, end_pfn, pages_present;
+ unsigned long start_pfn, end_pfn;
unsigned long bootmem_paddr;
unsigned long bootmap_pages;
- get_region(nid, &start_pfn, &end_pfn, &pages_present);
+ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
/* Allocate the node structure node local if possible */
NODE_DATA(nid) = careful_allocation(nid,
@@ -669,19 +582,7 @@ void __init do_init_bootmem(void)
init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
start_pfn, end_pfn);
- /* Add free regions on this node */
- for (i = 0; init_node_data[i].end_pfn; i++) {
- unsigned long start, end;
-
- if (init_node_data[i].nid != nid)
- continue;
-
- start = init_node_data[i].start_pfn << PAGE_SHIFT;
- end = init_node_data[i].end_pfn << PAGE_SHIFT;
-
- dbg("free_bootmem %lx %lx\n", start, end - start);
- free_bootmem_node(NODE_DATA(nid), start, end - start);
- }
+ free_bootmem_with_active_regions(nid, end_pfn);
/* Mark reserved regions on this node */
for (i = 0; i < lmb.reserved.cnt; i++) {
@@ -712,44 +613,14 @@ void __init do_init_bootmem(void)
}
}
- /* Add regions into sparsemem */
- for (i = 0; init_node_data[i].end_pfn; i++) {
- unsigned long start, end;
-
- if (init_node_data[i].nid != nid)
- continue;
-
- start = init_node_data[i].start_pfn;
- end = init_node_data[i].end_pfn;
-
- memory_present(nid, start, end);
- }
+ sparse_memory_present_with_active_regions(nid);
}
}
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES];
- unsigned long zholes_size[MAX_NR_ZONES];
- int nid;
-
- memset(zones_size, 0, sizeof(zones_size));
- memset(zholes_size, 0, sizeof(zholes_size));
-
- for_each_online_node(nid) {
- unsigned long start_pfn, end_pfn, pages_present;
-
- get_region(nid, &start_pfn, &end_pfn, &pages_present);
-
- zones_size[ZONE_DMA] = end_pfn - start_pfn;
- zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present;
-
- dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
- zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
-
- free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn,
- zholes_size);
- }
+ unsigned long end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
+ free_area_init_nodes(end_pfn, end_pfn, end_pfn, end_pfn);
}
static int __init early_numa(char *p)
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/ppc/Kconfig linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/ppc/Kconfig
--- linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/ppc/Kconfig 2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/ppc/Kconfig 2006-05-08 09:17:57.000000000 +0100
@@ -949,6 +949,9 @@ config NR_CPUS
config HIGHMEM
bool "High memory support"
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
source kernel/Kconfig.hz
source kernel/Kconfig.preempt
source "mm/Kconfig"
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/ppc/mm/init.c linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/ppc/mm/init.c
--- linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/ppc/mm/init.c 2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/ppc/mm/init.c 2006-05-08 09:17:57.000000000 +0100
@@ -359,8 +359,7 @@ void __init do_init_bootmem(void)
*/
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES], i;
-
+ unsigned long start_pfn, end_pfn;
#ifdef CONFIG_HIGHMEM
map_page(PKMAP_BASE, 0, 0); /* XXX gross */
pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k
@@ -370,19 +369,22 @@ void __init paging_init(void)
(KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN);
kmap_prot = PAGE_KERNEL;
#endif /* CONFIG_HIGHMEM */
-
- /*
- * All pages are DMA-able so we put them all in the DMA zone.
- */
- zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
- for (i = 1; i < MAX_NR_ZONES; i++)
- zones_size[i] = 0;
+ /* All pages are DMA-able so we put them all in the DMA zone. */
+ start_pfn = __pa(PAGE_OFFSET) >> PAGE_SHIFT;
+ end_pfn = start_pfn + (total_memory >> PAGE_SHIFT);
+ add_active_range(0, start_pfn, end_pfn);
#ifdef CONFIG_HIGHMEM
- zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
+ free_area_init_nodes(total_lowmem >> PAGE_SHIFT,
+ total_lowmem >> PAGE_SHIFT,
+ total_lowmem >> PAGE_SHIFT,
+ total_memory >> PAGE_SHIFT);
+#else
+ free_area_init_nodes(total_memory >> PAGE_SHIFT,
+ total_memory >> PAGE_SHIFT,
+ total_memory >> PAGE_SHIFT,
+ total_memory >> PAGE_SHIFT);
#endif /* CONFIG_HIGHMEM */
-
- free_area_init(zones_size);
}
void __init mem_init(void)
^ permalink raw reply
* [PATCH 1/6] Introduce mechanism for registering active regions of memory
From: Mel Gorman @ 2006-05-08 14:10 UTC (permalink / raw)
To: akpm
Cc: davej, tony.luck, linuxppc-dev, Mel Gorman, linux-kernel,
bob.picco, ak, linux-mm
In-Reply-To: <20060508141030.26912.93090.sendpatchset@skynet>
This patch defines the structure to represent an active range of page
frames within a node in an architecture independent manner. Architectures
are expected to register active ranges of PFNs using add_active_range(nid,
start_pfn, end_pfn) and call free_area_init_nodes() passing the PFNs of
the end of each zone.
include/linux/mm.h | 34 +++
include/linux/mmzone.h | 10 -
mm/page_alloc.c | 403 +++++++++++++++++++++++++++++++++++++++++---
3 files changed, 422 insertions(+), 25 deletions(-)
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-clean/include/linux/mm.h linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/include/linux/mm.h
--- linux-2.6.17-rc3-mm1-clean/include/linux/mm.h 2006-05-01 11:37:01.000000000 +0100
+++ linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/include/linux/mm.h 2006-05-08 09:16:57.000000000 +0100
@@ -916,6 +916,40 @@ extern void free_area_init(unsigned long
extern void free_area_init_node(int nid, pg_data_t *pgdat,
unsigned long * zones_size, unsigned long zone_start_pfn,
unsigned long *zholes_size);
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+/*
+ * Any architecture that supports CONFIG_ARCH_POPULATES_NODE_MAP can
+ * initialise zone and hole information by
+ *
+ * for_all_memory_regions()
+ * add_active_range(nid, start, end)
+ * free_area_init_nodes(max_dma, max_dma32, max_low_pfn, max_pfn);
+ *
+ * Optionally, free_bootmem_with_active_regions() can be used to call
+ * free_bootmem_node() after active regions have been registered with
+ * add_active_range(). Similarly, sparse_memory_present_with_active_regions()
+ * calls memory_present() for active regions when SPARSEMEM is enabled
+ */
+extern void free_area_init_nodes(unsigned long max_dma_pfn,
+ unsigned long max_dma32_pfn,
+ unsigned long max_low_pfn,
+ unsigned long max_high_pfn);
+extern void add_active_range(unsigned int nid, unsigned long start_pfn,
+ unsigned long end_pfn);
+extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
+ unsigned long new_end_pfn);
+extern void remove_all_active_ranges(void);
+extern unsigned long absent_pages_in_range(unsigned long start_pfn,
+ unsigned long end_pfn);
+extern void get_pfn_range_for_nid(unsigned int nid,
+ unsigned long *start_pfn, unsigned long *end_pfn);
+extern unsigned long find_min_pfn_with_active_regions(void);
+extern unsigned long find_max_pfn_with_active_regions(void);
+extern int early_pfn_to_nid(unsigned long pfn);
+extern void free_bootmem_with_active_regions(int nid,
+ unsigned long max_low_pfn);
+extern void sparse_memory_present_with_active_regions(int nid);
+#endif
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
extern void setup_per_zone_pages_min(void);
extern void mem_init(void);
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-clean/include/linux/mmzone.h linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/include/linux/mmzone.h
--- linux-2.6.17-rc3-mm1-clean/include/linux/mmzone.h 2006-05-01 11:37:01.000000000 +0100
+++ linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/include/linux/mmzone.h 2006-05-08 09:16:57.000000000 +0100
@@ -271,6 +271,13 @@ struct zonelist {
struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited
};
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+struct node_active_region {
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+ int nid;
+};
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
/*
* The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
@@ -468,7 +475,8 @@ extern struct zone *next_zone(struct zon
#endif
-#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
+ !defined(CONFIG_ARCH_POPULATES_NODE_MAP)
#define early_pfn_to_nid(nid) (0UL)
#endif
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-clean/mm/page_alloc.c linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/mm/page_alloc.c
--- linux-2.6.17-rc3-mm1-clean/mm/page_alloc.c 2006-05-01 11:37:01.000000000 +0100
+++ linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/mm/page_alloc.c 2006-05-08 10:56:43.000000000 +0100
@@ -38,6 +38,8 @@
#include <linux/vmalloc.h>
#include <linux/mempolicy.h>
#include <linux/stop_machine.h>
+#include <linux/sort.h>
+#include <linux/pfn.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -86,6 +88,18 @@ int min_free_kbytes = 1024;
unsigned long __meminitdata nr_kernel_pages;
unsigned long __meminitdata nr_all_pages;
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+ #ifdef CONFIG_MAX_ACTIVE_REGIONS
+ #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
+ #else
+ #define MAX_ACTIVE_REGIONS (MAX_NR_ZONES * MAX_NUMNODES + 1)
+ #endif
+
+ struct node_active_region __initdata early_node_map[MAX_ACTIVE_REGIONS];
+ unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
+ unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+
#ifdef CONFIG_DEBUG_VM
static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
{
@@ -1864,25 +1878,6 @@ static inline unsigned long wait_table_b
#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
-static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
- unsigned long *zones_size, unsigned long *zholes_size)
-{
- unsigned long realtotalpages, totalpages = 0;
- int i;
-
- for (i = 0; i < MAX_NR_ZONES; i++)
- totalpages += zones_size[i];
- pgdat->node_spanned_pages = totalpages;
-
- realtotalpages = totalpages;
- if (zholes_size)
- for (i = 0; i < MAX_NR_ZONES; i++)
- realtotalpages -= zholes_size[i];
- pgdat->node_present_pages = realtotalpages;
- printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
-}
-
-
/*
* Initially all pages are reserved - free ones are freed
* up by free_all_bootmem() once the early boot process is
@@ -2200,6 +2195,215 @@ __meminit int init_currently_empty_zone(
return 0;
}
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+/* Note: nid == MAX_NUMNODES returns first region */
+static int __init first_active_region_index_in_nid(int nid)
+{
+ int i;
+ for (i = 0; early_node_map[i].end_pfn; i++) {
+ if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
+ return i;
+ }
+
+ return MAX_ACTIVE_REGIONS;
+}
+
+/* Note: nid == MAX_NUMNODES returns next region */
+static int __init next_active_region_index_in_nid(unsigned int index, int nid)
+{
+ for (index = index + 1; early_node_map[index].end_pfn; index++) {
+ if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
+ return index;
+ }
+
+ return MAX_ACTIVE_REGIONS;
+}
+
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+int __init early_pfn_to_nid(unsigned long pfn)
+{
+ int i;
+
+ for (i = 0; early_node_map[i].end_pfn; i++) {
+ unsigned long start_pfn = early_node_map[i].start_pfn;
+ unsigned long end_pfn = early_node_map[i].end_pfn;
+
+ if ((start_pfn <= pfn) && (pfn < end_pfn))
+ return early_node_map[i].nid;
+ }
+
+ return -1;
+}
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+
+#define for_each_active_range_index_in_nid(i, nid) \
+ for (i = first_active_region_index_in_nid(nid); \
+ i != MAX_ACTIVE_REGIONS; \
+ i = next_active_region_index_in_nid(i, nid))
+
+void __init free_bootmem_with_active_regions(int nid,
+ unsigned long max_low_pfn)
+{
+ unsigned int i;
+ for_each_active_range_index_in_nid(i, nid) {
+ unsigned long size_pages = 0;
+ unsigned long end_pfn = early_node_map[i].end_pfn;
+ if (early_node_map[i].start_pfn >= max_low_pfn)
+ continue;
+
+ if (end_pfn > max_low_pfn)
+ end_pfn = max_low_pfn;
+
+ size_pages = end_pfn - early_node_map[i].start_pfn;
+ free_bootmem_node(NODE_DATA(early_node_map[i].nid),
+ PFN_PHYS(early_node_map[i].start_pfn),
+ size_pages << PAGE_SHIFT);
+ }
+}
+
+void __init sparse_memory_present_with_active_regions(int nid)
+{
+ unsigned int i;
+ for_each_active_range_index_in_nid(i, nid)
+ memory_present(early_node_map[i].nid,
+ early_node_map[i].start_pfn,
+ early_node_map[i].end_pfn);
+}
+
+void __init get_pfn_range_for_nid(unsigned int nid,
+ unsigned long *start_pfn, unsigned long *end_pfn)
+{
+ unsigned int i;
+ *start_pfn = -1UL;
+ *end_pfn = 0;
+
+ for_each_active_range_index_in_nid(i, nid) {
+ *start_pfn = min(*start_pfn, early_node_map[i].start_pfn);
+ *end_pfn = max(*end_pfn, early_node_map[i].end_pfn);
+ }
+
+ if (*start_pfn == -1UL) {
+ printk(KERN_WARNING "Node %u active with no memory\n", nid);
+ *start_pfn = 0;
+ }
+}
+
+unsigned long __init zone_present_pages_in_node(int nid,
+ unsigned long zone_type,
+ unsigned long *ignored)
+{
+ unsigned long node_start_pfn, node_end_pfn;
+ unsigned long zone_start_pfn, zone_end_pfn;
+
+ /* Get the start and end of the node and zone */
+ get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
+ zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
+ zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
+
+ /* Check that this node has pages within the zone's required range */
+ if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
+ return 0;
+
+ /* Move the zone boundaries inside the node if necessary */
+ zone_end_pfn = min(zone_end_pfn, node_end_pfn);
+ zone_start_pfn = max(zone_start_pfn, node_start_pfn);
+
+ /* Return the spanned pages */
+ return zone_end_pfn - zone_start_pfn;
+}
+
+unsigned long __init __absent_pages_in_range(int nid,
+ unsigned long range_start_pfn,
+ unsigned long range_end_pfn)
+{
+ int i = 0;
+ unsigned long prev_end_pfn = 0, hole_pages = 0;
+ unsigned long start_pfn;
+
+ /* Find the end_pfn of the first active range of pfns in the node */
+ i = first_active_region_index_in_nid(nid);
+ if (i == MAX_ACTIVE_REGIONS)
+ return 0;
+ prev_end_pfn = early_node_map[i].start_pfn;
+
+ /* Find all holes for the zone within the node */
+ for (; i != MAX_ACTIVE_REGIONS;
+ i = next_active_region_index_in_nid(i, nid)) {
+
+ /* No need to continue if prev_end_pfn is outside the zone */
+ if (prev_end_pfn >= range_end_pfn)
+ break;
+
+ /* Make sure the end of the zone is not within the hole */
+ start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
+ prev_end_pfn = max(prev_end_pfn, range_start_pfn);
+
+ /* Update the hole size cound and move on */
+ if (start_pfn > range_start_pfn) {
+ BUG_ON(prev_end_pfn > start_pfn);
+ hole_pages += start_pfn - prev_end_pfn;
+ }
+ prev_end_pfn = early_node_map[i].end_pfn;
+ }
+
+ return hole_pages;
+}
+
+unsigned long __init absent_pages_in_range(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn);
+}
+
+unsigned long __init zone_absent_pages_in_node(int nid,
+ unsigned long zone_type,
+ unsigned long *ignored)
+{
+ return __absent_pages_in_range(nid,
+ arch_zone_lowest_possible_pfn[zone_type],
+ arch_zone_highest_possible_pfn[zone_type]);
+}
+#else
+static inline unsigned long zone_present_pages_in_node(int nid,
+ unsigned long zone_type,
+ unsigned long *zones_size)
+{
+ return zones_size[zone_type];
+}
+
+static inline unsigned long zone_absent_pages_in_node(int nid,
+ unsigned long zone_type,
+ unsigned long *zholes_size)
+{
+ if (!zholes_size)
+ return 0;
+
+ return zholes_size[zone_type];
+}
+#endif
+
+static void __init calculate_node_totalpages(struct pglist_data *pgdat,
+ unsigned long *zones_size, unsigned long *zholes_size)
+{
+ unsigned long realtotalpages, totalpages = 0;
+ int i;
+
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ totalpages += zone_present_pages_in_node(pgdat->node_id, i,
+ zones_size);
+ }
+ pgdat->node_spanned_pages = totalpages;
+
+ realtotalpages = totalpages;
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ realtotalpages -=
+ zone_absent_pages_in_node(pgdat->node_id, i, zholes_size);
+ }
+ pgdat->node_present_pages = realtotalpages;
+ printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
+ realtotalpages);
+}
+
/*
* Set up the zone data structures:
* - mark all pages reserved
@@ -2223,10 +2427,9 @@ static void __meminit free_area_init_cor
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize;
- realsize = size = zones_size[j];
- if (zholes_size)
- realsize -= zholes_size[j];
-
+ size = zone_present_pages_in_node(nid, j, zones_size);
+ realsize = size - zone_absent_pages_in_node(nid, j,
+ zholes_size);
if (j < ZONE_HIGHMEM)
nr_kernel_pages += realsize;
nr_all_pages += realsize;
@@ -2294,13 +2497,165 @@ void __meminit free_area_init_node(int n
{
pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn;
- calculate_zone_totalpages(pgdat, zones_size, zholes_size);
+ calculate_node_totalpages(pgdat, zones_size, zholes_size);
alloc_node_mem_map(pgdat);
free_area_init_core(pgdat, zones_size, zholes_size);
}
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+void __init add_active_range(unsigned int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned int i;
+ printk(KERN_DEBUG "Range (%d) %lu -> %lu\n", nid, start_pfn, end_pfn);
+
+ /* Merge with existing active regions if possible */
+ for (i = 0; early_node_map[i].end_pfn; i++) {
+ if (early_node_map[i].nid != nid)
+ continue;
+
+ /* Skip if an existing region covers this new one */
+ if (start_pfn >= early_node_map[i].start_pfn &&
+ end_pfn <= early_node_map[i].end_pfn)
+ return;
+
+ /* Merge forward if suitable */
+ if (start_pfn <= early_node_map[i].end_pfn &&
+ end_pfn > early_node_map[i].end_pfn) {
+ early_node_map[i].end_pfn = end_pfn;
+ return;
+ }
+
+ /* Merge backward if suitable */
+ if (start_pfn < early_node_map[i].end_pfn &&
+ end_pfn >= early_node_map[i].start_pfn) {
+ early_node_map[i].start_pfn = start_pfn;
+ return;
+ }
+ }
+
+ /* Leave last entry NULL, we use range.end_pfn to terminate the walk */
+ if (i >= MAX_ACTIVE_REGIONS - 1) {
+ printk(KERN_ERR "Too many memory regions, truncating\n");
+ return;
+ }
+
+ early_node_map[i].nid = nid;
+ early_node_map[i].start_pfn = start_pfn;
+ early_node_map[i].end_pfn = end_pfn;
+}
+
+void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
+ unsigned long new_end_pfn)
+{
+ unsigned int i;
+
+ /* Find the old active region end and shrink */
+ for_each_active_range_index_in_nid(i, nid) {
+ if (early_node_map[i].end_pfn == old_end_pfn) {
+ early_node_map[i].end_pfn = new_end_pfn;
+ break;
+ }
+ }
+}
+
+void __init remove_all_active_ranges()
+{
+ memset(early_node_map, 0, sizeof(early_node_map));
+}
+
+/* Compare two active node_active_regions */
+static int __init cmp_node_active_region(const void *a, const void *b)
+{
+ struct node_active_region *arange = (struct node_active_region *)a;
+ struct node_active_region *brange = (struct node_active_region *)b;
+
+ /* Done this way to avoid overflows */
+ if (arange->start_pfn > brange->start_pfn)
+ return 1;
+ if (arange->start_pfn < brange->start_pfn)
+ return -1;
+
+ return 0;
+}
+
+/* sort the node_map by start_pfn */
+static void __init sort_node_map(void)
+{
+ size_t num = 0;
+ while (early_node_map[num].end_pfn)
+ num++;
+
+ sort(early_node_map, num, sizeof(struct node_active_region),
+ cmp_node_active_region, NULL);
+}
+
+/* Find the lowest pfn for a node. This depends on a sorted early_node_map */
+unsigned long __init find_min_pfn_for_node(unsigned long nid)
+{
+ int i;
+
+ /* Assuming a sorted map, the first range found has the starting pfn */
+ for_each_active_range_index_in_nid(i, nid)
+ return early_node_map[i].start_pfn;
+
+ printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid);
+ return 0;
+}
+
+unsigned long __init find_min_pfn_with_active_regions(void)
+{
+ return find_min_pfn_for_node(MAX_NUMNODES);
+}
+
+unsigned long __init find_max_pfn_with_active_regions(void)
+{
+ int i;
+ unsigned long max_pfn = 0;
+
+ for (i = 0; early_node_map[i].end_pfn; i++)
+ max_pfn = max(max_pfn, early_node_map[i].end_pfn);
+
+ return max_pfn;
+}
+
+void __init free_area_init_nodes(unsigned long arch_max_dma_pfn,
+ unsigned long arch_max_dma32_pfn,
+ unsigned long arch_max_low_pfn,
+ unsigned long arch_max_high_pfn)
+{
+ unsigned long nid;
+ int zone_index;
+
+ /* Record where the zone boundaries are */
+ memset(arch_zone_lowest_possible_pfn, 0,
+ sizeof(arch_zone_lowest_possible_pfn));
+ memset(arch_zone_highest_possible_pfn, 0,
+ sizeof(arch_zone_highest_possible_pfn));
+ arch_zone_lowest_possible_pfn[ZONE_DMA] =
+ find_min_pfn_with_active_regions();
+ arch_zone_highest_possible_pfn[ZONE_DMA] = arch_max_dma_pfn;
+ arch_zone_highest_possible_pfn[ZONE_DMA32] = arch_max_dma32_pfn;
+ arch_zone_highest_possible_pfn[ZONE_NORMAL] = arch_max_low_pfn;
+ arch_zone_highest_possible_pfn[ZONE_HIGHMEM] = arch_max_high_pfn;
+ for (zone_index = 1; zone_index < MAX_NR_ZONES; zone_index++) {
+ arch_zone_lowest_possible_pfn[zone_index] =
+ arch_zone_highest_possible_pfn[zone_index-1];
+ }
+
+ /* Regions in the early_node_map can be in any order */
+ sort_node_map();
+
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+ free_area_init_node(nid, pgdat, NULL,
+ find_min_pfn_for_node(nid), NULL);
+ }
+}
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+
#ifndef CONFIG_NEED_MULTIPLE_NODES
static bootmem_data_t contig_bootmem_data;
struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
^ permalink raw reply
* [PATCH 0/6] Sizing zones and holes in an architecture independent manner V6
From: Mel Gorman @ 2006-05-08 14:10 UTC (permalink / raw)
To: akpm
Cc: davej, tony.luck, linux-mm, Mel Gorman, ak, bob.picco,
linux-kernel, linuxppc-dev
This is V6 of the patchset to size zones and memory holes in an
architecture-independent manner. This is based against 2.6.17-rc3-mm1
and as there were no objections to these patches since V4. Please merge.
The reasons why I'd like to this merged include;
o Less architecture-specific code - particularly for x86 and ppc64
o More maintainable. Changes to zone layout need only be made in one place
o Zone-sizing and memory hole calculation is one less job that needs to be
done for new architecture ports
o With the architecture-independent representation, zone-based
anti-fragmentation needs a lot less architecture-specific code making it
more portable between architectures. This will be important for future
hugepage-availability work
o Nigel Cunningham has stated that that software suspend could potentially
use the architecture-independent representation to discover what pages
need to be saved during suspend
When testing on powerpc, I found compile errors that looked like this;
arch/powerpc/kernel/built-in.o(.init.text+0x7778): In function `vrsqrtefp':
: undefined reference to `__udivdi3'
arch/powerpc/kernel/built-in.o(.init.text+0x77c4): In function `vrsqrtefp':
: undefined reference to `__udivdi3'
make: *** [.tmp_vmlinux1] Error 1
A workaround that is *very likely wrong* and blatantly stolen from arch/sh
is at http://www.csn.ul.ie/~mel/udivdi3-powerpc-workaround.diff .
Changelog since V5
o Add a missing #include to mm/mem_init.c
o Drop the verbose debugging part of the set
o Report active range registration when loglevel is set for KERN_DEBUG
Changelog since V4
o Rebase to 2.6.17-rc3-mm1
o Calculate holes on x86 with SRAT correctly
Changelog since V3
o Rebase to 2.6.17-rc2
o Allow the active regions to be cleared. Needed by x86_64 when it decides
the SRAT table is bad half way through the registering of active regions
o Fix for flatmem x86_64 machines booting
Changelog since V2
o Fix a bug where holes in lower zones get double counted
o Catch the case where a new range is registered that is within an range
o Catch the case where a zone boundary is within a hole
o Use the EFI map for registering ranges on x86_64+numa
o On IA64+NUMA, add the active ranges before rounding for granules
o On x86_64, remove e820_hole_size and e820_bootmem_free and use
arch-independent equivalents
o On x86_64, remove the map walk in e820_end_of_ram()
o Rename memory_present_with_active_regions, name ambiguous
o Add absent_pages_in_range() for arches to call
Changelog since V1
o Correctly convert virtual and physical addresses to PFNs on ia64
o Correctly convert physical addresses to PFN on older ppc
o When add_active_range() is called with overlapping pfn ranges, merge them
o When a zone boundary occurs within a memory hole, account correctly
o Minor whitespace damage cleanup
o Debugging patch temporarily included
At a basic level, architectures define structures to record where active
ranges of page frames are located. Once located, the code to calculate
zone sizes and holes in each architecture is very similar. Some of this
zone and hole sizing code is difficult to read for no good reason. This
set of patches eliminates the similar-looking architecture-specific code.
The patches introduce a mechanism where architectures register where the
active ranges of page frames are with add_active_range(). When all areas
have been discovered, free_area_init_nodes() is called to initialise
the pgdat and zones. The zone sizes and holes are then calculated in an
architecture independent manner.
Patch 1 introduces the mechanism for registering and initialising PFN ranges
Patch 2 changes ppc to use the mechanism - 128 arch-specific LOC removed
Patch 3 changes x86 to use the mechanism - 142 arch-specific LOC removed
Patch 4 changes x86_64 to use the mechanism - 94 arch-specific LOC removed
Patch 5 changes ia64 to use the mechanism - 57 arch-specific LOC removed
At this point, there is a reduction of 421 architecture-specific lines of code
and a net reduction of 25 lines. The arch-independent code is a lot easier
to read in comparison to some of the arch-specific stuff, particularly in
arch/i386/ .
For Patch 6, it was also noted that page_alloc.c has a *lot* of
initialisation code which makes the file harder to read than it needs to
be. Patch 6 creates a new file mem_init.c and moves a lot of initialisation
code from page_alloc.c to it. After the patch is applied, there is still a net
loss of 7 lines.
The patches have been successfully boot tested by me and verified that the
zones are the correct size on
o x86, flatmem with 1.5GiB of RAM
o x86, NUMAQ
o x86, NUMA, with SRAT
o x86 with SRAT CONFIG_NUMA=n
o PPC64, NUMA
o PPC64, CONFIG_NUMA=n
o PPC64, CONFIG_64BIT=N
o Power, RS6000 (Had difficulty here with missing __udivdi3 symbol)
o x86_64, NUMA with SRAT
o x86_64, NUMA with broken SRAT that falls back to k8topology discovery
o x86_64, ACPI_NUMA, ACPI_MEMORY_HOTPLUG && !SPARSEMEM to trigger the
hotadd path without sparsemem fun in srat.c (SRAT broken on test machine and
I'm pretty sure the machine does not support physical memory hotadd anyway
so test may not have been effective other than being a compile test.)
o x86_64, CONFIG_NUMA=n
o x86_64, CONFIG_64=n
o x86_64, CONFIG_64=n, CONFIG_NUMA=n
o x86_64, AMD64 desktop machine with flatmem
o ia64 (Itanium 2)
o ia64 (Itanium 2), CONFIG_64=N
Tony Luck has successfully tested for ia64 on Itanium with tiger_defconfig,
gensparse_defconfig and defconfig. Bob Picco has also tested and debugged
on IA64. Jack Steiner successfully boot tested on a mammoth SGI IA64-based
machine. These were on patches against 2.6.17-rc1 and release 3 of these
patches but there have been no ia64-changes since release 3.
There are differences in the zone sizes for x86_64 as the arch-specific code
for x86_64 accounts the kernel image and the starting mem_maps as memory
holes but the architecture-independent code accounts the memory as present.
The net reduction seems small but the big benefit of this set of patches
is the reduction of 421 lines of architecture-specific code, some of
which is very hairy. There should be a greater net reduction when other
architectures use the same mechanisms for zone and hole sizing but I lack
the hardware to test on.
Comments?
Additional credit;
Dave Hansen for the initial suggestion and comments on early patches
Andy Whitcroft for reviewing early versions and catching numerous errors
Tony Luck for testing and debugging on IA64
Bob Picco for testing and fixing bugs related to pfn registration
Jack Steiner and Yasunori for testing on IA64
Andi Kleen for reviewing and feeding back about x86_64
arch/i386/Kconfig | 8
arch/i386/kernel/setup.c | 19
arch/i386/kernel/srat.c | 101 ---
arch/i386/mm/discontig.c | 65 --
arch/ia64/Kconfig | 3
arch/ia64/mm/contig.c | 60 --
arch/ia64/mm/discontig.c | 41 -
arch/ia64/mm/init.c | 12
arch/powerpc/Kconfig | 13
arch/powerpc/mm/mem.c | 53 --
arch/powerpc/mm/numa.c | 157 ------
arch/ppc/Kconfig | 3
arch/ppc/mm/init.c | 26 -
arch/x86_64/Kconfig | 3
arch/x86_64/kernel/e820.c | 109 +---
arch/x86_64/kernel/setup.c | 7
arch/x86_64/mm/init.c | 62 --
arch/x86_64/mm/k8topology.c | 3
arch/x86_64/mm/numa.c | 18
arch/x86_64/mm/srat.c | 11
include/asm-ia64/meminit.h | 1
include/asm-x86_64/e820.h | 5
include/asm-x86_64/proto.h | 2
include/linux/mm.h | 34 +
include/linux/mmzone.h | 10
mm/Makefile | 2
mm/mem_init.c | 1121 ++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 750 -----------------------------
--
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
^ permalink raw reply
* Re: Large Page Support, 2.6 kernel , PPC440
From: Ralph Blach @ 2006-05-08 12:36 UTC (permalink / raw)
To: moris dong; +Cc: linuxppc-embedded
In-Reply-To: <BAY19-F90D433E50C500C4EF93CBF8B10@phx.gbl>
[-- Attachment #1: Type: text/plain, Size: 1802 bytes --]
Morris,
A long time ago, I did this for a 405 and it involves much more than just
changing the page shift.
>From my memory, I will give you what I think you have to do.
1)you must go into tlb handling code and change it. That is in the
header file 44x.S i beleive.
2)You have to change the pte directory sizes
3)You have to chage the linker script.
4)You have to change the size of the zero page.
5)you have to chage dcache flush routines, to cover the new page size.
You have to find where 4096/4095 is hard coded as the page size and change
it.
This is no small job, and what you come up with will be very unsupported.
I beleive If you pin the kerel TLB you might get
the performance you need and this should be in the kernel config scripts.
Good luck,
Chip
On 5/1/06, moris dong <moris_dong@hotmail.com> wrote:
>
> Friends,
> My PPC440 (32bit) MMU supports multiple page sizes.
> For the default 4K pages, my 2.6.11 kernel compiles and boots just fine.
> I want to re-build it with large pages, to improve my application
> performance.
> I tried modifying PAGE_SHIFT in "page.h" to 13 (8K pages) and re-build my
> kernel.
> Compilation worked out fine, but my kernel does NOT boot, nor it prints
> anything to the console.
>
> Has anyone successfully compiled & booted a 2.6 kernel with pages larger
> than 4K ?
> What am I doing wrong ?
>
> Thanks a lot.
>
> _________________________________________________________________
> Express yourself instantly with MSN Messenger! Download today it's FREE!
> http://messenger.msn.click-url.com/go/onm00200471ave/direct/01/
>
> _______________________________________________
> Linuxppc-embedded mailing list
> Linuxppc-embedded@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-embedded
>
[-- Attachment #2: Type: text/html, Size: 2352 bytes --]
^ permalink raw reply
* Setting I&D cache enable in the same mtspr instruction
From: Assaf Hoffman @ 2006-05-08 10:39 UTC (permalink / raw)
To: linuxppc-embedded; +Cc: Rita Shtern, Ronen Shitrit
Hi,
I think the implementation of setup_common_caches() in file
cpu_setup_6xx.S; not according to the spec as far as MPC74xx concerns.
Looking in the spec (MPC7450 RISC Microprocessor Family Reference
Manual, MPC7450UM Rev. 5 1/2005) section 3.4.1.5 L1 Instruction and Data
Cache Flash Invalidation it says:=20
"Note that HID0[ICFI] and HID0[DCFI] must not both be set with the same
mtspr instruction, due to the synchronization requirements described in
Section 2.4.2.4.1, "Context Synchronization."
But in the code those two do set together.
Also, the same section says:=20
"An isync must precede the setting of the HID0[ICFI] in order for the
setting to take effect."
But in the code, only 'sync' can be found.
/* Enable caches for 603's, 604, 750 & 7400 */
setup_common_caches:
mfspr r11,SPRN_HID0
andi. r0,r11,HID0_DCE
ori r11,r11,HID0_ICE|HID0_DCE
ori r8,r11,HID0_ICFI
bne 1f /* don't invalidate the D-cache
*/
ori r8,r8,HID0_DCI /* unless it wasn't enabled */
1: sync
mtspr SPRN_HID0,r8 /* enable and invalidate caches
*/=20
^^^^^^^^^^^^^^^^^^^ Here we set both ICFI and DCFI in the same
mtspr instruction. Also, no isync before setting ICFI.
sync
mtspr SPRN_HID0,r11 /* enable caches */
sync
isync
blr
Please advice.
Thanks.
^ permalink raw reply
* Low latency Patch
From: Prabhat_Singh @ 2006-05-08 6:35 UTC (permalink / raw)
To: linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 698 bytes --]
Hi ,
Can any one please tell me from where I can get Low-latency and
rtc-debug patches for linux-2.4.24 kernel source and for MPC8248
processor platform.
Thanks
Prabhat Singh
DISCLAIMER:
This email (including any attachments) is intended for the sole use of the intended recipient/s and may contain material that is CONFIDENTIAL AND PRIVATE COMPANY INFORMATION. Any review or reliance by others or copying or distribution or forwarding of any or all of the contents in this message is STRICTLY PROHIBITED. If you are not the intended recipient, please contact the sender by email and delete all copies; your cooperation in this regard is appreciated.
[-- Attachment #2: Type: text/html, Size: 2884 bytes --]
^ permalink raw reply
* Low latecy patch
From: Prabhat_Singh @ 2006-05-08 6:02 UTC (permalink / raw)
To: linuxppc-embedded
[-- Attachment #1: Type: text/plain, Size: 689 bytes --]
Hi ,
Can any one please tell me from where I can get Low-latency and
rtc-debug patches for linux-2.4.24 kernel source and MPC8248 processor
platform.
Thanks
Prabhat Singh
DISCLAIMER:
This email (including any attachments) is intended for the sole use of the intended recipient/s and may contain material that is CONFIDENTIAL AND PRIVATE COMPANY INFORMATION. Any review or reliance by others or copying or distribution or forwarding of any or all of the contents in this message is STRICTLY PROHIBITED. If you are not the intended recipient, please contact the sender by email and delete all copies; your cooperation in this regard is appreciated.
[-- Attachment #2: Type: text/html, Size: 2748 bytes --]
^ permalink raw reply
* Re: [patch] powerpc: remove do-nothing cpu setup routines
From: Segher Boessenkool @ 2006-05-08 0:12 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: Arnd Bergmann, linuxppc-dev, Paul Mackerras, cbe-oss-dev
In-Reply-To: <1147041095.4777.170.camel@localhost.localdomain>
>> Removes the processor specific do-nothing routines
>> __setup_cpu_power3 and
>> __setup_cpu_power4 with the generic routine __setup_cpu_null.
>
> Why not just change the caller to test for NULL ?
Yes, please do (as Paul suggested already).
Segher
^ permalink raw reply
* Re: [patch] powerpc: remove do-nothing cpu setup routines
From: Benjamin Herrenschmidt @ 2006-05-07 22:31 UTC (permalink / raw)
To: Geoff Levand; +Cc: linuxppc-dev, Paul Mackerras, cbe-oss-dev, Arnd Bergmann
In-Reply-To: <445BE72D.3050205@am.sony.com>
On Fri, 2006-05-05 at 17:00 -0700, Geoff Levand wrote:
> Removes the processor specific do-nothing routines __setup_cpu_power3 and
> __setup_cpu_power4 with the generic routine __setup_cpu_null.
Why not just change the caller to test for NULL ?
Cheers,
Ben.
^ permalink raw reply
* [PATCH] ppc32 8xx: Fix r3 thrashing due to 8MB TLB page instantiation (!CONFIG_8xx_CPU6)
From: Marcelo Tosatti @ 2006-05-05 20:09 UTC (permalink / raw)
To: Paul Mackerras; +Cc: David Jander, linux-ppc-embedded
Instantiation of 8MB pages on the TLB cache for the kernel static
mapping thrashes r3 register on !CONFIG_8xx_CPU6 configurations.
Signed-off-by: Marcelo Tosatti <marcelo@kvack.org>
diff --git a/arch/ppc/kernel/head_8xx.S b/arch/ppc/kernel/head_8xx.S
index ec53c7d..7a2f205 100644
--- a/arch/ppc/kernel/head_8xx.S
+++ b/arch/ppc/kernel/head_8xx.S
@@ -355,9 +355,7 @@ #endif
. = 0x1200
DataStoreTLBMiss:
-#ifdef CONFIG_8xx_CPU6
stw r3, 8(r0)
-#endif
DO_8xx_CPU6(0x3f80, r3)
mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
mfcr r10
@@ -417,9 +415,7 @@ #endif
lwz r11, 0(r0)
mtcr r11
lwz r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
-#endif
rfi
/* This is an instruction TLB error on the MPC8xx. This could be due
^ permalink raw reply related
* Re: Calculating virtual address from physical address
From: Grant Likely @ 2006-05-06 19:38 UTC (permalink / raw)
To: Chris Dumoulin; +Cc: dhlii, linuxppc-embedded
In-Reply-To: <200605061843.k46Ih5Vd032048@www-webmail1.magma.ca>
On 5/6/06, Chris Dumoulin <cdumoulin@ics-ltd.com> wrote:
> You said that the temporary TLB entries setup in head_4xx.S will eventual=
ly be replaced.
> Where is the code that creates these new TLB entries later on? Are the 'r=
eal' TLB entries
> only created once, and persist for as long as the system is running, or d=
o TLB entries
> change often while the system is running?
The kernel maintains a list of mappings between virtual and physical
space. When the processor takes a TLB miss exception, then exception
handler loads the needed mapping into the TLB and returns from
exception. (Look in head_4xx.S; specifically at finish_tlb_load).=20
TLB entries are loaded in a round-robin fashion as needed. Since your
early TLB mappings aren't in the kernel page tables; they get
overwritten and can't be reloaded by the TLB miss exception handler.
> Can you point me to some information about Grant's platform bus changes t=
hat you were
> talking about? I am using a custom V2Pro board, and I'd be interested to =
see if this code
> is something I should be looking at.
The platform bus changes moves the devices to use the "Platform Bus"
infrastructure. It's kind of a move away from multiple
processor-specific bus management schemes for "simple" busses. ie.=20
If the processor can access it without special setup code; the device
can go on the platform bus.
Check out http://patchwork.ozlabs.org/linuxppc/ and search for my
name. There's about 9 patches grouped together.
Cheers,
g.
--
Grant Likely, B.Sc. P.Eng.
Secret Lab Technologies Ltd.
(403) 399-0195
^ permalink raw reply
* Re: Calculating virtual address from physical address
From: Chris Dumoulin @ 2006-05-06 18:43 UTC (permalink / raw)
To: dhlii, Chris Dumoulin; +Cc: linuxppc-embedded
Thanks for your reply; I found it very useful and interesting. Now, I have a whole bunch
of questions.
You said that the temporary TLB entries setup in head_4xx.S will eventually be replaced.
Where is the code that creates these new TLB entries later on? Are the 'real' TLB entries
only created once, and persist for as long as the system is running, or do TLB entries
change often while the system is running?
Do you know what state the MSR will be in at this point in the code? I know what the
power-on reset state is, but I'm wondering if it'll be in a different state by the time we
get to this point in head_4xx.S.
When you suggest disabling instruction or data address translation, is that just so I
could access my hardware directly, or is there some other reason?
You were enabling the MSR bits, one at a time, and found that the machine check was
causing the hang (I'm assuming that's what you meant by 'sent me to space'). Was the idea
there to just isolate what type of exception was causing the hang, or were you looking to
make some permanent changes to the MSR? Is a machine check interrupt caused by trying to
access an address that doesn't have a TLB entry?
Can you point me to some information about Grant's platform bus changes that you were
talking about? I am using a custom V2Pro board, and I'd be interested to see if this code
is something I should be looking at.
Thanks alot,
Chris
On May 05, "David H. Lynch Jr." <dhlii@dlasys.net> wrote:
>
> Chris Dumoulin wrote:
> > My LEDs are at address 0x4F600000 and my CONFIG_KERNEL_START is
> > 0xC0000000. If this address were low enough, I would just add 0xC0000000
> > to the address to get the virtual address, but since my LED address is
> > so high, the sum will be well past the 32-bit maximum address value. How
> > is a virtual address calculated for a high address like 0x4F600000?
> >
> There are macros tophys and tovirt that convert addresses between
> physical and virtual. There are use example in the head_4xx.S file you
> are already in.
>
> If you are going to use a port for debugging you need to create a
> tlb entry for it.
> Same file in initial_mmu the code inside the if
> defined(CONFIG_SERIAL_TEXT_DEBUG) should provide an example how to do that.
>
> Be forwarned that any entries you create now will eventually
> disappear (took 2 weeks to figure that out once), but that may not
> happen intil after /init starts.
>
> Also with a little of jiggering arround the bits in MSR_KERNEL you
> can enable Data address translation independently of instruction address
> translation as well as disable or enable a variety of
> checks. It took me three weeks to get a new Xilinx V4 board through
> the rfi and to start_here in the same turn_on_mmu code you are working on.
>
> Eventually I ended up enabling the MSR bits one at a time until I
> discovered that enabling the Machine Check sent me to space.
>
> Regardless, once I relialized I could test the code with the MSR
> bits enabled one at a time isolating the problem became easier.
>
>
> The two issues I addressed above which relate specifically to your
> efforts with the ml300, constituted more than 80% of my effort to get a
> Xilinx Virtex 4 running.
>
> Finally, I started prior to grants platform bus changes. I have been
> adapting my V4 code to fit with Grants changes (the client has what they
> want so they do not care)
> I have not put alot of effort into this, but I currently get
> waylayed much later in new platform bus specific initialization code.
> I had no problem with the older board specific initialization code.
>
> If you are running on a real ml300 I am surprised you are having any
> problems though I do not have an ml300 to check that.
> But if you are running on a custom V2Pro board you have to get the
> board specific initalization right and therefore could trip over the
> issue I am currently having migrating from old to new.
>
>
>
>
>
>
>
> > BTW, he is the assembly code that I'm working with (from
> > arch/ppc/kernel/head_4xx.S):
> >
> > .text
> > _GLOBAL(_stext)
> > _GLOBAL(_start)
> >
> > /* Save parameters we are passed.
> > */
> > mr r31,r3
> > mr r30,r4
> > mr r29,r5
> > mr r28,r6
> > mr r27,r7
> >
> > /* CRD: set LED state here */
> > lis r26,0x4F600000@h
> > ori r26,r26,0x4F600000@l
> > li r25,LED_STATE_0
> > stw r25,0(r26)
> >
> > /* We have to turn on the MMU right away so we get cache modes
> > * set correctly.
> > */
> > bl initial_mmu
> >
> > /* CRD: set LED state here */
> > lis r26,0x4F600000@h
> > ori r26,r26,0x4F600000@l
> > li r25,LED_STATE_1
> > stw r25,0(r26)
> >
> > /* We now have the lower 16 Meg mapped into TLB entries, and the caches
> > * ready to work.
> > */
> > turn_on_mmu:
> > lis r0,MSR_KERNEL@h
> > ori r0,r0,MSR_KERNEL@l
> > mtspr SPRN_SRR1,r0
> > lis r0,start_here@h
> > ori r0,r0,start_here@l
> > mtspr SPRN_SRR0,r0
> > SYNC
> >
> > /* CRD: set LED state here */
> > lis r26,0x4F600000@h
> > ori r26,r26,0x4F600000@l
> > li r25,LED_STATE_2
> > stw r25,0(r26)
> >
> > rfi /* enables MMU */
> >
> > /* CRD: set LED state here */
> > /* This address should be a virtual address */
> > lis r26,0x4F600000@h
> > ori r26,r26,0x4F600000@l
> > li r25,LED_STATE_3
> > stw r25,0(r26)
> >
> > b . /* prevent prefetch past rfi */
> >
> > Regards,
> > Chris Dumoulin
> >
>
>
> --
> Dave Lynch DLA Systems
> Software Development: Embedded Linux
> 717.627.3770 dhlii@dlasys.net <a
href='http://www.dlasys.net'>http://www.dlasys.net</a>
> fax: 1.253.369.9244 Cell: 1.717.587.7774
> Over 25 years' experience in platforms, languages, and technologies too numerous to list.
>
> "Any intelligent fool can make things bigger and more complex... It takes a touch of
genius - and a lot of courage to move in the opposite direction."
> Albert Einstein
>
>
^ permalink raw reply
* RE: Calculating virtual address from physical address
From: Alexander Szekely @ 2006-05-06 8:43 UTC (permalink / raw)
To: linuxppc-embedded
> How is a virtual address=20
> calculated for a high address like 0x4F600000?
Add this to the MMU initialzation code (initial_mmu):
/* map leds - 1:1 phys=3Dvirt */
lis r3,0x4F600000@h
ori r3,r3,0x4F600000@l
mr r4,r3
clrrwi r4,r4,12
ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
clrrwi r3,r3,12
ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
li r0,0 /* TLB slot 0 */
tlbwe r4,r0,TLB_DATA
tlbwe r3,r0,TLB_TAG
Then you have a one to one mapping between physical and virtual memory,=20
and you can access the LEDs at the same address after turning one the
MMU.
BTW, we had a similar problem, but the reason was not turning on the
MMU. The MMU is enabled by writing MSR_KERNEL the status register. This
also enables the machine state exception interrupt.=20
Our hardware design produced PLB bus exceptions, which was unnoticed on
standalone applications. But this bus exceptions also trigger machine
state exceptions. So instead of jumping to start_here the processor
branched to 0x200 - unfortunatly the trap table is not set up at this
point in the boot process...
Alex=20
^ permalink raw reply
* [patch] powerpc: remove do-nothing cpu setup routines
From: Geoff Levand @ 2006-05-06 0:00 UTC (permalink / raw)
To: Paul Mackerras; +Cc: linuxppc-dev, cbe-oss-dev, Arnd Bergmann
Removes the processor specific do-nothing routines __setup_cpu_power3 and
__setup_cpu_power4 with the generic routine __setup_cpu_null.
Signed-off-by: Geoff Levand <geoffrey.levand@am.sony.com>
Index: cell--alp--3/arch/powerpc/kernel/cpu_setup_power4.S
===================================================================
--- cell--alp--3.orig/arch/powerpc/kernel/cpu_setup_power4.S 2006-05-05 15:59:58.000000000 -0700
+++ cell--alp--3/arch/powerpc/kernel/cpu_setup_power4.S 2006-05-05 16:40:44.000000000 -0700
@@ -73,9 +73,6 @@
isync
blr
-_GLOBAL(__setup_cpu_power4)
- blr
-
_GLOBAL(__setup_cpu_ppc970)
mfspr r0,SPRN_HID0
li r11,5 /* clear DOZE and SLEEP */
Index: cell--alp--3/arch/powerpc/kernel/cputable.c
===================================================================
--- cell--alp--3.orig/arch/powerpc/kernel/cputable.c 2006-05-05 16:29:06.000000000 -0700
+++ cell--alp--3/arch/powerpc/kernel/cputable.c 2006-05-05 16:40:41.000000000 -0700
@@ -32,8 +32,6 @@
*/
#ifdef CONFIG_PPC64
extern void __setup_cpu_null(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_power3(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_power4(unsigned long offset, struct cpu_spec* spec);
#else
extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
@@ -80,7 +78,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
- .cpu_setup = __setup_cpu_power3,
+ .cpu_setup = __setup_cpu_null,
.oprofile_cpu_type = "ppc64/power3",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "power3",
@@ -94,7 +92,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
- .cpu_setup = __setup_cpu_power3,
+ .cpu_setup = __setup_cpu_null,
.oprofile_cpu_type = "ppc64/power3",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "power3",
@@ -108,7 +106,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
- .cpu_setup = __setup_cpu_power3,
+ .cpu_setup = __setup_cpu_null,
.oprofile_cpu_type = "ppc64/rs64",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "rs64",
@@ -122,7 +120,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
- .cpu_setup = __setup_cpu_power3,
+ .cpu_setup = __setup_cpu_null,
.oprofile_cpu_type = "ppc64/rs64",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "rs64",
@@ -136,7 +134,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
- .cpu_setup = __setup_cpu_power3,
+ .cpu_setup = __setup_cpu_null,
.oprofile_cpu_type = "ppc64/rs64",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "rs64",
@@ -150,7 +148,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
- .cpu_setup = __setup_cpu_power3,
+ .cpu_setup = __setup_cpu_null,
.oprofile_cpu_type = "ppc64/rs64",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "rs64",
@@ -164,7 +162,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
- .cpu_setup = __setup_cpu_power4,
+ .cpu_setup = __setup_cpu_null,
.oprofile_cpu_type = "ppc64/power4",
.oprofile_type = PPC_OPROFILE_POWER4,
.platform = "power4",
@@ -178,7 +176,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
- .cpu_setup = __setup_cpu_power4,
+ .cpu_setup = __setup_cpu_null,
.oprofile_cpu_type = "ppc64/power4",
.oprofile_type = PPC_OPROFILE_POWER4,
.platform = "power4",
@@ -244,7 +242,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
- .cpu_setup = __setup_cpu_power4,
+ .cpu_setup = __setup_cpu_null,
.oprofile_cpu_type = "ppc64/power5",
.oprofile_type = PPC_OPROFILE_POWER4,
.platform = "power5",
@@ -258,7 +256,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
- .cpu_setup = __setup_cpu_power4,
+ .cpu_setup = __setup_cpu_null,
.oprofile_cpu_type = "ppc64/power5+",
.oprofile_type = PPC_OPROFILE_POWER4,
.platform = "power5+",
@@ -285,7 +283,7 @@
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
- .cpu_setup = __setup_cpu_power4,
+ .cpu_setup = __setup_cpu_null,
.platform = "power4",
}
#endif /* CONFIG_PPC64 */
Index: cell--alp--3/arch/powerpc/kernel/misc_64.S
===================================================================
--- cell--alp--3.orig/arch/powerpc/kernel/misc_64.S 2006-05-05 16:04:59.000000000 -0700
+++ cell--alp--3/arch/powerpc/kernel/misc_64.S 2006-05-05 16:40:47.000000000 -0700
@@ -771,9 +771,6 @@
_GLOBAL(__setup_cpu_null)
blr
-_GLOBAL(__setup_cpu_power3)
- blr
-
_GLOBAL(execve)
li r0,__NR_execve
sc
^ permalink raw reply
* Re: [PATCH 04/13] cell: remove broken __setup_cpu_be function
From: Geoff Levand @ 2006-05-06 0:00 UTC (permalink / raw)
To: Paul Mackerras
Cc: Arnd Bergmann, Levand, Geoffrey, linux-kernel, linuxppc-dev,
Arnd Bergmann, cbe-oss-dev
In-Reply-To: <17498.60066.92373.6527@cargo.ozlabs.ibm.com>
Paul Mackerras wrote:
> Arnd Bergmann writes:
>
>> From: Geoff Levand <geoffrey.levand@am.sony.com>
>>
>> This patch removes the incorrect Cell processor setup routine
>> __setup_cpu_be. This routine improperly accesses the hypervisor
>> page size configuration at SPR HID6. The correct behavior is for
>> firmware, or if needed, platform setup code, to set the correct
>> page size.
>
>> - .cpu_setup = __setup_cpu_be,
>> + .cpu_setup = __setup_cpu_power4,
>
> That looks a bit dodgy. Either just remove the contents of
> __setup_cpu_be (leaving only the blr), or define a __setup_cpu_null
> that does nothing, or make the identify_cpu not call the cpu setup
> function if the pointer is NULL.
OK, I set it up with __setup_cpu_null. An updated patch follows.
It falls out from this that we can replace the do-nothing routines
__setup_cpu_power3 and __setup_cpu_power4 with __setup_cpu_null also.
I'll post a separate patch for consideration.
-Geoff
Replaced the Cell processor specific routine __setup_cpu_be with
a new generic routine __setup_cpu_null. __setup_cpu_be improperly
accessed the hypervisor page size configuration at SPR HID6. Correct
behavior is for firmware, or if needed, platform setup code, to set
the correct page size.
Signed-off-by: Geoff Levand <geoffrey.levand@am.sony.com>
Index: cell--alp--3/arch/powerpc/kernel/cpu_setup_power4.S
===================================================================
--- cell--alp--3.orig/arch/powerpc/kernel/cpu_setup_power4.S 2006-04-26 19:19:25.000000000 -0700
+++ cell--alp--3/arch/powerpc/kernel/cpu_setup_power4.S 2006-05-05 15:59:58.000000000 -0700
@@ -76,20 +76,6 @@
_GLOBAL(__setup_cpu_power4)
blr
-_GLOBAL(__setup_cpu_be)
- /* Set large page sizes LP=0: 16MB, LP=1: 64KB */
- addi r3, 0, 0
- ori r3, r3, HID6_LB
- sldi r3, r3, 32
- nor r3, r3, r3
- mfspr r4, SPRN_HID6
- and r4, r4, r3
- addi r3, 0, 0x02000
- sldi r3, r3, 32
- or r4, r4, r3
- mtspr SPRN_HID6, r4
- blr
-
_GLOBAL(__setup_cpu_ppc970)
mfspr r0,SPRN_HID0
li r11,5 /* clear DOZE and SLEEP */
Index: cell--alp--3/arch/powerpc/kernel/cputable.c
===================================================================
--- cell--alp--3.orig/arch/powerpc/kernel/cputable.c 2006-04-26 19:19:25.000000000 -0700
+++ cell--alp--3/arch/powerpc/kernel/cputable.c 2006-05-05 16:29:06.000000000 -0700
@@ -31,9 +31,9 @@
* and ppc64
*/
#ifdef CONFIG_PPC64
+extern void __setup_cpu_null(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_power3(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_power4(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_be(unsigned long offset, struct cpu_spec* spec);
#else
extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
@@ -273,7 +273,7 @@
PPC_FEATURE_SMT,
.icache_bsize = 128,
.dcache_bsize = 128,
- .cpu_setup = __setup_cpu_be,
+ .cpu_setup = __setup_cpu_null,
.platform = "ppc-cell-be",
},
{ /* default match */
Index: cell--alp--3/arch/powerpc/kernel/misc_64.S
===================================================================
--- cell--alp--3.orig/arch/powerpc/kernel/misc_64.S 2006-04-26 19:19:25.000000000 -0700
+++ cell--alp--3/arch/powerpc/kernel/misc_64.S 2006-05-05 16:04:59.000000000 -0700
@@ -768,6 +768,9 @@
#endif /* CONFIG_ALTIVEC */
+_GLOBAL(__setup_cpu_null)
+ blr
+
_GLOBAL(__setup_cpu_power3)
blr
^ permalink raw reply
* Re: Calculating virtual address from physical address
From: David H. Lynch Jr. @ 2006-05-05 23:35 UTC (permalink / raw)
To: Chris Dumoulin; +Cc: linuxppc-embedded
In-Reply-To: <445B6A80.2090202@ics-ltd.com>
Chris Dumoulin wrote:
> My LEDs are at address 0x4F600000 and my CONFIG_KERNEL_START is
> 0xC0000000. If this address were low enough, I would just add 0xC0000000
> to the address to get the virtual address, but since my LED address is
> so high, the sum will be well past the 32-bit maximum address value. How
> is a virtual address calculated for a high address like 0x4F600000?
>
There are macros tophys and tovirt that convert addresses between
physical and virtual. There are use example in the head_4xx.S file you
are already in.
If you are going to use a port for debugging you need to create a
tlb entry for it.
Same file in initial_mmu the code inside the if
defined(CONFIG_SERIAL_TEXT_DEBUG) should provide an example how to do that.
Be forwarned that any entries you create now will eventually
disappear (took 2 weeks to figure that out once), but that may not
happen intil after /init starts.
Also with a little of jiggering arround the bits in MSR_KERNEL you
can enable Data address translation independently of instruction address
translation as well as disable or enable a variety of
checks. It took me three weeks to get a new Xilinx V4 board through
the rfi and to start_here in the same turn_on_mmu code you are working on.
Eventually I ended up enabling the MSR bits one at a time until I
discovered that enabling the Machine Check sent me to space.
Regardless, once I relialized I could test the code with the MSR
bits enabled one at a time isolating the problem became easier.
The two issues I addressed above which relate specifically to your
efforts with the ml300, constituted more than 80% of my effort to get a
Xilinx Virtex 4 running.
Finally, I started prior to grants platform bus changes. I have been
adapting my V4 code to fit with Grants changes (the client has what they
want so they do not care)
I have not put alot of effort into this, but I currently get
waylayed much later in new platform bus specific initialization code.
I had no problem with the older board specific initialization code.
If you are running on a real ml300 I am surprised you are having any
problems though I do not have an ml300 to check that.
But if you are running on a custom V2Pro board you have to get the
board specific initalization right and therefore could trip over the
issue I am currently having migrating from old to new.
> BTW, he is the assembly code that I'm working with (from
> arch/ppc/kernel/head_4xx.S):
>
> .text
> _GLOBAL(_stext)
> _GLOBAL(_start)
>
> /* Save parameters we are passed.
> */
> mr r31,r3
> mr r30,r4
> mr r29,r5
> mr r28,r6
> mr r27,r7
>
> /* CRD: set LED state here */
> lis r26,0x4F600000@h
> ori r26,r26,0x4F600000@l
> li r25,LED_STATE_0
> stw r25,0(r26)
>
> /* We have to turn on the MMU right away so we get cache modes
> * set correctly.
> */
> bl initial_mmu
>
> /* CRD: set LED state here */
> lis r26,0x4F600000@h
> ori r26,r26,0x4F600000@l
> li r25,LED_STATE_1
> stw r25,0(r26)
>
> /* We now have the lower 16 Meg mapped into TLB entries, and the caches
> * ready to work.
> */
> turn_on_mmu:
> lis r0,MSR_KERNEL@h
> ori r0,r0,MSR_KERNEL@l
> mtspr SPRN_SRR1,r0
> lis r0,start_here@h
> ori r0,r0,start_here@l
> mtspr SPRN_SRR0,r0
> SYNC
>
> /* CRD: set LED state here */
> lis r26,0x4F600000@h
> ori r26,r26,0x4F600000@l
> li r25,LED_STATE_2
> stw r25,0(r26)
>
> rfi /* enables MMU */
>
> /* CRD: set LED state here */
> /* This address should be a virtual address */
> lis r26,0x4F600000@h
> ori r26,r26,0x4F600000@l
> li r25,LED_STATE_3
> stw r25,0(r26)
>
> b . /* prevent prefetch past rfi */
>
> Regards,
> Chris Dumoulin
>
--
Dave Lynch DLA Systems
Software Development: Embedded Linux
717.627.3770 dhlii@dlasys.net http://www.dlasys.net
fax: 1.253.369.9244 Cell: 1.717.587.7774
Over 25 years' experience in platforms, languages, and technologies too numerous to list.
"Any intelligent fool can make things bigger and more complex... It takes a touch of genius - and a lot of courage to move in the opposite direction."
Albert Einstein
^ permalink raw reply
* Re: Calculating virtual address from physical address
From: Josh Boyer @ 2006-05-05 20:58 UTC (permalink / raw)
To: Matt Porter; +Cc: Sylvain Munaut, Chris Dumoulin, linuxppc-embedded
In-Reply-To: <20060505132441.B30563@cox.net>
On Fri, 2006-05-05 at 13:24 -0700, Matt Porter wrote:
>
> There are no BATs on 4xx. However, the same conceptual thing can be
> done by wiring a fixed TLB entry to cover those LEDs temporarily
> during bringup debug. The temporary TLB entry will be wiped out by
> normal tlb misses after things are running whenever the fixed entry
> is clobbered by the round robin replacement.
At which point you should be able to call ioremap to generate a virtual
address inside a device driver or board file.
josh
^ permalink raw reply
* Re: Calculating virtual address from physical address
From: Matt Porter @ 2006-05-05 20:24 UTC (permalink / raw)
To: Sylvain Munaut; +Cc: Chris Dumoulin, linuxppc-embedded
In-Reply-To: <445BA736.30208@246tNt.com>
On Fri, May 05, 2006 at 09:27:50PM +0200, Sylvain Munaut wrote:
> Chris Dumoulin wrote:
> > I'm using a Virtex II Pro-based board with a PPC405. The board is
> > hanging somewhere very early in the kernel boot process. I believe it
> > may be dying at the point where the MMU is enabled. In order to
> > determine the exact point at which my board hangs, I'm blinking two LEDs
> > in the assembly code found in arch/ppc/kernel/head_4xx.S, . Currently I
> > am only able to successfully access the LEDs before the MMU is turned
> > on, but I can't be sure that I'm calculating the virtual address
> > properly when I try to access the LED after the MMU is turned on.
>
> Typical when trying to bring up board ...
>
> Once the MMU is turned on, you leds register are most likely ... nowhere
> ... i.e.
> if you don't create a mapping your self there is just no virtual address
> that will
> access your leds physical address.
>
> What I did on some ppc work was tu use a quick BAT mapping to map some leds.
> It's pretty easy to setup. Be aware though that this mapping will get
> wiped out when
> the kernel sets up the BAT for itself.
There are no BATs on 4xx. However, the same conceptual thing can be
done by wiring a fixed TLB entry to cover those LEDs temporarily
during bringup debug. The temporary TLB entry will be wiped out by
normal tlb misses after things are running whenever the fixed entry
is clobbered by the round robin replacement.
-Matt
^ permalink raw reply
* [PATCH] ppc32 8xx: Fix r3 thrashing due to 8MB TLB page instantiation (!CONFIG_8xx_CPU6)
From: Marcelo Tosatti @ 2006-05-05 20:22 UTC (permalink / raw)
To: Paul Mackerras; +Cc: David Jander, linux-ppc-embedded
(please ignore last patch, its incomplete)
Instantiation of 8MB pages on the TLB cache for the kernel static
mapping thrashes r3 register on !CONFIG_8xx_CPU6 configurations.
Signed-off-by: Marcelo Tosatti <marcelo@kvack.org>
diff --git a/arch/ppc/kernel/head_8xx.S b/arch/ppc/kernel/head_8xx.S
index ec53c7d..09b3adc 100644
--- a/arch/ppc/kernel/head_8xx.S
+++ b/arch/ppc/kernel/head_8xx.S
@@ -355,9 +355,7 @@ #endif
. = 0x1200
DataStoreTLBMiss:
-#ifdef CONFIG_8xx_CPU6
stw r3, 8(r0)
-#endif
DO_8xx_CPU6(0x3f80, r3)
mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
mfcr r10
@@ -417,9 +415,7 @@ #endif
lwz r11, 0(r0)
mtcr r11
lwz r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
-#endif
rfi
/* This is an instruction TLB error on the MPC8xx. This could be due
@@ -500,9 +496,7 @@ LoadLargeDTLB:
lwz r11, 4(r0)
lwz r12, 16(r0)
-#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
-#endif
rfi
/* This is the data TLB error on the MPC8xx. This could be due to
^ permalink raw reply related
* Re: Calculating virtual address from physical address
From: Sylvain Munaut @ 2006-05-05 19:27 UTC (permalink / raw)
To: Chris Dumoulin, linuxppc-embedded
In-Reply-To: <445B6A80.2090202@ics-ltd.com>
Chris Dumoulin wrote:
> I'm using a Virtex II Pro-based board with a PPC405. The board is
> hanging somewhere very early in the kernel boot process. I believe it
> may be dying at the point where the MMU is enabled. In order to
> determine the exact point at which my board hangs, I'm blinking two LEDs
> in the assembly code found in arch/ppc/kernel/head_4xx.S, . Currently I
> am only able to successfully access the LEDs before the MMU is turned
> on, but I can't be sure that I'm calculating the virtual address
> properly when I try to access the LED after the MMU is turned on.
Typical when trying to bring up board ...
Once the MMU is turned on, you leds register are most likely ... nowhere
... i.e.
if you don't create a mapping your self there is just no virtual address
that will
access your leds physical address.
What I did on some ppc work was tu use a quick BAT mapping to map some leds.
It's pretty easy to setup. Be aware though that this mapping will get
wiped out when
the kernel sets up the BAT for itself.
Sylvain
^ permalink raw reply
* Re: [PATCH] powermac: U4 DART improvements
From: Olof Johansson @ 2006-05-05 15:46 UTC (permalink / raw)
To: Johannes Berg; +Cc: linuxppc-dev, paulus
In-Reply-To: <1146840992.16487.0.camel@localhost>
On Fri, May 05, 2006 at 04:56:32PM +0200, Johannes Berg wrote:
>
> > This boots happily on my quad at home,
>
> Boots fine on my quad and seems to survive a bunch of normal tasks.
> Haven't gotten to do any benchmarking yet.
Thanks for testing. I've been running with it all week here as well. I'll
try to do some benchmarking this weekend; there are a couple of other
things that should be changed in the patch as well.
-Olof
^ permalink raw reply
* Re: Moving from 2.4 to 2.6 kernel
From: Grant Likely @ 2006-05-05 15:31 UTC (permalink / raw)
To: Chris Dumoulin; +Cc: linuxppc-embedded
In-Reply-To: <445B5597.9010801@ics-ltd.com>
On 5/5/06, Chris Dumoulin <cdumoulin@ics-ltd.com> wrote:
> I generated the xparameters_ml300.h file and built my kernel configured
> for the ML300 board, but I still had the same problem. I'm wondering if
> there's something in the hardware that's not being setup by u-boot as
> the linux kernel expects it. I'm looking through the ML300 code in
> u-boot 1.1.4 to see if can spot anything that's being done in there
> that's not being done in my u-boot code. Do you have any ideas about
> some hardware (or other) initialization that I could be neglecting to
> do in u-boot that might cause things to hang?
Probably not; The nice thing about the V2Pro is that there is pretty
much no setup that needs to be done. The bitstream takes care of
configuring SDRAM. :). For my daily work; I don't even use u-boot.=20
I'm downloading a zImage directly through my BDI 2000. (But u-boot
does work for me too)
Can you post your xparameters file? (Or email it to me directly)
Are you using a full UART or UART lite?
> On a similar note, is there any guide to porting u-boot that I could
> read that would enumerate all the steps I should be taking in u-boot? I
> know the README in the u-boot source basically says to browse the
> mailing list (which I'm doing), read the DULG (which I've done), and
> read the source (which I'm also doing), but some kind of succinct guide
> would be nice, if it exists.
Not that I know of; I had to pile throught the same stuff to learn it.
> Interactive Circuits and Systems Ltd.
> 5430 Canotek Road
> Ottawa, ON
Hey; you going to OLS this year?
Cheers,
g.
--
Grant Likely, B.Sc. P.Eng.
Secret Lab Technologies Ltd.
(403) 399-0195
^ permalink raw reply
* Calculating virtual address from physical address
From: Chris Dumoulin @ 2006-05-05 15:08 UTC (permalink / raw)
To: linuxppc-embedded
I'm using a Virtex II Pro-based board with a PPC405. The board is
hanging somewhere very early in the kernel boot process. I believe it
may be dying at the point where the MMU is enabled. In order to
determine the exact point at which my board hangs, I'm blinking two LEDs
in the assembly code found in arch/ppc/kernel/head_4xx.S, . Currently I
am only able to successfully access the LEDs before the MMU is turned
on, but I can't be sure that I'm calculating the virtual address
properly when I try to access the LED after the MMU is turned on.
My LEDs are at address 0x4F600000 and my CONFIG_KERNEL_START is
0xC0000000. If this address were low enough, I would just add 0xC0000000
to the address to get the virtual address, but since my LED address is
so high, the sum will be well past the 32-bit maximum address value. How
is a virtual address calculated for a high address like 0x4F600000?
BTW, he is the assembly code that I'm working with (from
arch/ppc/kernel/head_4xx.S):
.text
_GLOBAL(_stext)
_GLOBAL(_start)
/* Save parameters we are passed.
*/
mr r31,r3
mr r30,r4
mr r29,r5
mr r28,r6
mr r27,r7
/* CRD: set LED state here */
lis r26,0x4F600000@h
ori r26,r26,0x4F600000@l
li r25,LED_STATE_0
stw r25,0(r26)
/* We have to turn on the MMU right away so we get cache modes
* set correctly.
*/
bl initial_mmu
/* CRD: set LED state here */
lis r26,0x4F600000@h
ori r26,r26,0x4F600000@l
li r25,LED_STATE_1
stw r25,0(r26)
/* We now have the lower 16 Meg mapped into TLB entries, and the caches
* ready to work.
*/
turn_on_mmu:
lis r0,MSR_KERNEL@h
ori r0,r0,MSR_KERNEL@l
mtspr SPRN_SRR1,r0
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
SYNC
/* CRD: set LED state here */
lis r26,0x4F600000@h
ori r26,r26,0x4F600000@l
li r25,LED_STATE_2
stw r25,0(r26)
rfi /* enables MMU */
/* CRD: set LED state here */
/* This address should be a virtual address */
lis r26,0x4F600000@h
ori r26,r26,0x4F600000@l
li r25,LED_STATE_3
stw r25,0(r26)
b . /* prevent prefetch past rfi */
Regards,
Chris Dumoulin
--
*--Christopher Dumoulin--*
Software Team Leader
<http://ics-ltd.com/>
<http://ics-ltd.com/>
Interactive Circuits and Systems Ltd.
5430 Canotek Road
Ottawa, ON
K1J 9G2
(613)749-9241
1-800-267-9794 (USA only)
------------------------------------------------------------------------
This e-mail is private and confidential and is for the addressee only.
If misdirected, please notify us by telephone and confirm that it has
been deleted from your system and any hard copies destroyed. You are
strictly prohibited from using, printing, distributing or disseminating
it or any information contained in it save to the intended recipient.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox